dataprep: data preprocessing and plots

library(dataprep)
library(ggplot2)
library(scales)

Figure 1. Line plots for variables with names that are essentially numeric and logarithmic

# Descriptive statistics
descplot(data,5,65)
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Time used by descdata: 0.0439 secs 
#> Time used by descplot: 0.101 secs
#> Warning: Removed 3 row(s) containing missing values (geom_path).

Figure 2. Line plots for variables whose names are essentially numeric and logarithmic

# Selected descriptive statistics, equal to descdata(data,5,65,c('na','min','max','IQR'))
descplot(data,5,65,c(2,7:9))
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Warning in min(x, na.rm = TRUE): no non-missing arguments to min; returning Inf
#> Warning in max(x, na.rm = TRUE): no non-missing arguments to max; returning -Inf
#> Time used by descdata: 0.0339 secs 
#> Time used by descplot: 0.0379 secs
#> Warning: Removed 3 row(s) containing missing values (geom_path).

Figure 3. Bar charts for the type of variable names that is character

# Descriptive statistics
descplot(data1,3,7)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00399 secs 
#> Time used by descplot: 0.00798 secs

Figure 4. Bar charts for the type of variable names that is character

# Selected descriptive statistics, equal to descplot(data1,3,7,c('min','max','IQR'))
descplot(data1,3,7,7:9)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by descdata: 0.00299 secs 
#> Time used by descplot: 0.00598 secs

Figure 5. Particle number size distributions in logarithmic scales

# Top and bottom percentiles
percplot(data,5,65,4)
#> Time used by percdata: 0.0479 secs 
#> Time used by percplot: 0.0648 secs
#> Warning: Removed 144 row(s) containing missing values (geom_path).

Figure 6. Particle number size distributions in logarithmic scales with only one part

# Top percentiles
percplot(data,5,65,4,part=1)
#> Time used by percdata: 0.0429 secs 
#> Time used by percplot: 0.0588 secs
#> Warning: Removed 72 row(s) containing missing values (geom_path).

Figure 7. Particle number size distributions in logarithmic scales with only one part

# Bottom percentiles
percplot(data,5,65,4,part=0)
#> Time used by percdata: 0.0449 secs 
#> Time used by percplot: 0.0578 secs
#> Warning: Removed 72 row(s) containing missing values (geom_path).

Figure 8. Percentiles of modes in linear scales

# Top and bottom percentiles
percplot(data1,3,7,2)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00698 secs 
#> Time used by percplot: 0.017 secs

Figure 9. Percentiles of modes in linear scales with only one part

# Top percentiles
percplot(data1,3,7,2,part=1)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00499 secs 
#> Time used by percplot: 0.014 secs

Figure 10. Percentiles of modes in linear scales with only one part

# Bottom percentiles
percplot(data1,3,7,2,part=0)+
  ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30,hjust=1,vjust=1.1))
#> Time used by percdata: 0.00598 secs 
#> Time used by percplot: 0.014 secs