This vignette demonstrates how to use the {samplezoo} package to generate datasets of varying sizes (small, medium, and large) with variables from multiple probability distributions.
Each dataset contains:
Variables/columns from common distributions such as Normal, Binomial, Poisson, and others.
Adjustable sample sizes to meet needs.
data_small <- samplezoo("small")
head(data_small)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 52.81412 53.51319 53.67912 0 4 1 2.930778 0.6989983 0.10578491
#> 2 68.23793 59.38779 35.13537 1 0 2 0.927297 0.2805017 0.07902199
#> 3 68.65313 58.44543 40.40566 0 0 1 34.820686 0.2691659 0.39371600
#> 4 45.62340 73.49855 61.86956 0 2 5 11.391594 0.6804970 0.46589920
#> 5 43.92391 37.85506 65.47060 0 0 1 21.108217 0.3427744 0.75404320
#> 6 51.66342 46.86237 81.70720 1 1 1 8.234561 0.3648130 0.14351144
#> gamma chi_sq t_dist f_dist
#> 1 3.3267537 7.212244 -0.8529863 0.5167607
#> 2 3.6669007 9.661967 -1.0643916 0.7294443
#> 3 4.1921663 5.173578 0.5679994 0.9108190
#> 4 0.8220897 8.708574 1.4667282 1.4952712
#> 5 2.0554174 19.556774 -0.6172805 0.1470397
#> 6 2.7243905 11.227294 -3.5974447 1.1166657
data_medium <- samplezoo("medium")
head(data_medium)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 46.02367 71.25660 39.750130 0 0 2 21.772330 0.9305956 0.4539395
#> 2 49.32905 58.94355 -3.378858 0 2 1 24.235364 0.7742999 0.2503492
#> 3 58.65559 69.64089 6.842902 1 2 2 27.303474 0.7920125 0.2215807
#> 4 29.89703 56.91901 70.248972 0 3 2 53.093483 0.6174459 0.2416861
#> 5 76.22624 59.05343 30.431970 1 3 4 1.065747 0.9747066 0.4487912
#> 6 40.68722 64.01339 12.307218 0 2 1 7.177614 0.7474789 0.4006164
#> gamma chi_sq t_dist f_dist
#> 1 4.9530141 11.196222 -0.8598328 3.2969429
#> 2 1.4745625 8.721485 -0.2513822 1.1522467
#> 3 0.4714032 7.562053 1.3909277 1.1831806
#> 4 2.3675387 16.463112 0.4024442 1.1443270
#> 5 4.0108882 5.337741 0.8392667 1.1098908
#> 6 4.8259632 11.386632 -0.2674480 0.6722363
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 68.17047 43.77534 32.77409 0 0 2 7.686341 0.07423565 0.22908364
#> 2 54.50925 57.31223 19.59262 0 0 3 5.295738 0.43575854 0.28826722
#> 3 34.23543 73.95832 35.53038 0 1 1 18.730670 0.07914885 0.02256758
#> 4 71.51849 66.28091 40.40233 1 0 8 18.031923 0.90119115 0.36785600
#> 5 54.46610 68.21688 36.06081 0 1 1 8.926137 0.90411665 0.57451053
#> 6 48.68553 60.46151 53.43388 1 1 2 2.153736 0.25749653 0.43182738
#> gamma chi_sq t_dist f_dist
#> 1 3.527325 14.116512 0.1701468 1.1050098
#> 2 1.410158 4.353722 -0.8447841 2.2619767
#> 3 2.300857 10.397276 2.1414233 1.6591481
#> 4 4.877539 12.756853 0.5519434 1.0611762
#> 5 2.961266 10.642384 0.6644180 0.8904096
#> 6 3.291894 12.879280 -0.3902477 3.0987640
To ensure reproducibility and introduce controlled variation in your dataset, use set.seed() before generating random data.
Reproducibility
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 1 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 1 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chi_sq t_dist f_dist
#> 1 6.9893762 10.286282 -0.3814568 0.7264343
#> 2 5.4087626 6.519658 -2.3409216 0.9698166
#> 3 1.2587867 8.011417 -0.4744159 0.4329175
#> 4 0.9871787 14.780626 0.4292511 1.0227474
#> 5 2.4021943 6.799788 -0.6692669 2.7446729
#> 6 4.2109032 17.858701 -0.3370763 1.3993853
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 1 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 1 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chi_sq t_dist f_dist
#> 1 6.9893762 10.286282 -0.3814568 0.7264343
#> 2 5.4087626 6.519658 -2.3409216 0.9698166
#> 3 1.2587867 8.011417 -0.4744159 0.4329175
#> 4 0.9871787 14.780626 0.4292511 1.0227474
#> 5 2.4021943 6.799788 -0.6692669 2.7446729
#> 6 4.2109032 17.858701 -0.3370763 1.3993853
Variation
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 1 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 1 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chi_sq t_dist f_dist
#> 1 6.9893762 10.286282 -0.3814568 0.7264343
#> 2 5.4087626 6.519658 -2.3409216 0.9698166
#> 3 1.2587867 8.011417 -0.4744159 0.4329175
#> 4 0.9871787 14.780626 0.4292511 1.0227474
#> 5 2.4021943 6.799788 -0.6692669 2.7446729
#> 6 4.2109032 17.858701 -0.3370763 1.3993853
set.seed(456)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 29.84718 68.13494 7.9885694 0 0 5 3.4417303 0.8866347 0.05413307
#> 2 59.32663 52.32066 21.2526086 0 3 3 0.8114356 0.7976466 0.07195440
#> 3 62.01312 62.47569 38.4789563 0 2 6 46.8038907 0.6469920 0.22555129
#> 4 29.16661 53.51086 -0.8656269 0 1 5 11.6955326 0.2036753 0.71455809
#> 5 39.28465 47.19406 47.7819258 1 1 1 0.3535625 0.3653401 0.34619912
#> 6 45.13908 63.33566 53.3620528 1 1 2 4.5592136 0.7628573 0.25880522
#> gamma chi_sq t_dist f_dist
#> 1 6.7914120 4.464348 -1.0150596 2.2557295
#> 2 3.0132520 8.062120 0.3262369 1.4955877
#> 3 4.7360954 10.969593 1.5141157 1.0766901
#> 4 5.1235878 6.249247 0.6432708 1.1251542
#> 5 6.6851637 4.358815 0.2025742 0.4754946
#> 6 0.3903841 20.019575 1.6257109 0.6653886