[R] non parametric algorithm npEM (library mixtools) usage

Luigi Marongiu marongiu.luigi at gmail.com
Mon Feb 15 15:36:38 CET 2016


Dear all,
I am using mixtols to find cut-off values from datasets. At the moment
I can run the parametric function normalmixEM from the library
mixtools, but I owuld like to generalize the procedure with the non
parametric version npEM. However I am confused regarding the arguments
to use in this implementation. I reckon that x should be the dataframe
of the values I have, mu0 should probably give the locations of the
mode (or modes if the data are described by overlapping distributions)
of the data, but all the other arguments are beyond me.
could you please give me a hint?
thank you
luigi

Here I am placing an example for the normalmixEM function that
provides a cut-off for the value highlighted with the arrow.

>>>
xval <- c(42.39, 44.53, 5.05, 6.9, 45, 2.35, 20.73, 3.31, 45, 4.76,
2.47, 2.37, 2.8, 3.26, 3.21, 45, 6.41, 4.77, 4.72, 4.89, 44.71, 2.8,
4.08, 4.07, 4.81, 2.93, 2.73, 44.75, 2.61, 2.56, 2.75, 2.75, 44.82,
36.59, 2.8, 43.82, 2.53, 2.75, 2.73, 3.05, 2.66, 5.61, 2.28, 4.83,
38.63, 44.23, 2.35, 2.47, 44.03, 6.33, 2.7, 2.96, 42.85, 2.47, 2,
12.76, 2.99, 2, 35.11, 2.63, 44.69, 2.96, 45, 42.13, 41.04, 3.22, 45,
45, 2.55, 4.58, 3.09, 39.98, 2, 2.97, 2.87, 2, 44.82, 45, 2.95, 45, 2,
2.82, 2.47, 2.98, 4.81, 44.53, 44.38, 2.87, 44.45, 2.9, 2.48, 44.14,
3.05, 2.76, 45, 45, 44.54, 42.85, 3.17, 2.46, 39.95, 36.96, 2.59,
2.75, 5.38, 2.8, 44.53, 45, 38.84, 4.64, 3.04, 2.59, 2.64, 45, 2.66,
44.37, 45, 26.32, 3.29, 40.44, 2, 41.51, 2, 45, 2, 5, 2.78, 2.11,
3.31, 2.61, 2.83, 2.6, 2.66, 2.95, 2.46, 2.58, 2.94, 45, 45, 2.71,
2.63, 2.81, 2, 3.29, 5.48, 45, 3.02, 2.82, 3.07, 2.65, 2.61, 2.67,
36.6, 2.08, 40.2, 45, 2.5, 45, 41.46, 45, 2.62, 2.77, 4.14, 2.63,
3.21, 4.79, 42.63, 2.66, 45, 4.69, 3.05, 45, 45, 2.97, 42.07, 2.73,
3.26, 5.17, 2.47, 44.66, 2.42, 5.14, 5.03, 2.65, 2.88, 2.69, 44.1,
3.15, 4.92, 42.02, 6.97, 2.46, 35.98, 2.95, 32.98, 2.79, 44.82, 2.84,
2.15, 44.42, 2.96, 45, 2.42, 2.75, 2.44, 4.58, 2, 45, 41.04, 4.04,
3.08, 2.46, 44.54, 3.21, 39.16, 2, 35.36, 3.08, 5.77, 2.71, 4.41,
2.46, 44.43, 2.62, 45, 2.7, 45, 41.43, 4.65, 3.05, 4.76, 40.66, 32.88,
45, 44.94, 44.67, 3.07, 2.92, 2.75, 2.63, 2.68, 34.15, 3.27, 2.47, 2,
2.63, 45, 3.06, 42.53, 35.25, 2.82, 42.62, 5.83, 4.69, 38.04, 2.47,
38.14, 3.73, 10, 4.93, 4.93, 4.65, 40.8, 2.32, 5.53, 3.01, 41.13, 4.5,
2.65, 44.85, 5.02, 2, 39.99, 2.89, 3.09, 2, 43.77, 44.53, 4.09, 6.22,
3.31, 44.64, 4.65, 45, 6.68, 39.93, 45, 2.77, 2.51, 2, 45, 4.08, 4.61,
6.11, 3.02, 44.8, 45, 44.54, 2.95, 2.77)
yval <- c(-0.002, 0.001, 0.002, 0.001, -0.001, 0.003, 0.003, 0.005, 0,
0.011, 0.003, 0.011, 0.004, 0.012, 0.004, 0.005, 0.001, 0.007, 0.006,
0.007, -0.001, 0.011, 0.005, 0.002, 0.007, 0.028, 0.01, 0.002, 0.003,
0.007, 0.033, 0.006, 0.003, 0, 0.01, 0.018, 0.01, 0.008, 0.002, 0.022,
0.02, 0.002, 0.006, 0.008, 0, -0.002, -0.001, 0.001, 0.001, 0.007,
0.005, 0.011, 0.004, 0.001, 0.005, 0.001, 0.019, 0.002, 0.001, 0.002,
-0.001, 0.003, 0.001, 0, -0.001, 0.002, 0.005, 0.001, 0, 0.007, 0.011,
-0.001, 0.002, 0.01, 0.004, 0.003, 0.001, 0, 0.015, 0.004, 0.001,
0.003, 0.003, 0.027, 0.005, 0, 0.003, 0.003, 0, 0.017, 0.004, -0.001,
0.043, 0.003, -0.001, 0.001, 0, 0, 0.019, 0.003, -0.001, 0.001, 0.009,
0.013, 0.001, 0.021, 0.001, -0.001, -0.001, 0.002, 0.008, 0.004,
0.007, 0.001, 0.007, 0.001, 0, 0.001, 0.004, -0.001, 0.001, 0, 0.007,
0.003, 0.002, 0.001, 0.028, 0.002, 0.005, 0.013, 0.017, 0.013, 0.009,
0.021, 0.01, 0.007, 0.015, 0, 0.002, 0.002, 0.013, 0.012, 0, 0.034,
0.005, 0, 0.041, 0.02, 0.036, 0.004, 0.002, 0.004, 0.001, 0.001,
0.001, 0.003, 0.016, 0.002, 0, 0, 0.002, 0.009, 0.006, 0.022, 0.002,
0.01, 0, 0.012, 0.006, 0.01, 0.005, 0.001, 0.001, 0.027, 0.003, 0.002,
0.02, 0.014, 0.003, 0.003, -0.001, 0.002, 0.008, 0.011, 0.014, 0.017,
0, 0.015, 0.008, 0.001, 0.005, 0.002, 0.001, 0.012, 0.002, 0.004, 0,
0.012, 0, 0.001, 0.005, 0.001, 0.008, 0.011, 0.006, 0.007, 0.005, 0,
0, 0.003, 0.004, 0.002, 0, 0.015, -0.001, 0.002, -0.001, 0.006, 0.005,
0.006, 0.003, 0.002, 0.001, 0.001, 0, 0.007, 0.002, -0.001, 0.033,
0.01, 0.007, 0.003, 0, 0.001, -0.001, 0.011, 0.006, 0.015, 0.013,
0.01, 0.015, 0, 0.014, 0.001, 0.001, 0.003, 0, 0.012, -0.001, 0.002,
0.027, -0.001, 0.009, 0.021, 0.001, 0.001, -0.001, 0.008, 0.001,
0.017, 0.002, 0.027, 0.003, -0.001, 0.018, 0.026, -0.001, 0.004,
0.003, 0.001, 0.019, 0.001, 0.001, 0.019, 0.005, 0.001, -0.001, 0.002,
0.001, 0.001, 0.004, 0.002, 0.007, 0.003, 0.01, 0.002, 0.003, 0.002,
0.005, 0.003, 0, 0.004, 0.032, 0.005, 0.018, 0.002, 0.001, 0.002,
0.003, 0.005)
df <- data.frame(xval,yval)

DF <- subset(df, xval>=10)
sdmult = 10 # A guess to get started
library(mixtools)
model = normalmixEM((x = DF$yval))
muneg = model$mu[1] # Mean of negative population
sdneg = model$sigma[1] # SD of negative population
mupos = model$mu[2] # Mean of positive population
sdpos = model$sigma[2] # SD of positive population
co = muneg + sdmult*sdneg # Calculate threshold based on SD multiplier
plot(DF$yval ~ DF$xval)
abline(h=co, lty=2)
arrows(37,0.018, 42, 0.018, col="red")



More information about the R-help mailing list