[R] Nested for loop

Kirsten Morehouse kmoreho1 at swarthmore.edu
Sat Aug 5 15:21:28 CEST 2017


Hi! Thanks for taking the time to read this.

The code below creates a graph that takes 100 samples that are between 5%
and 15% of the population (400).

What I'd like to do, however, is add two other sections to the graph. It
would look something like this:

from 1-100 samples take 100 samples that are between 5% and 15% of the
population (400). From 101-200 take 100 samples that are between 5% and 15%
of the population (800). From 201-300 take 100 samples that are between 5%
and 15% of the population (300).

I assume this would require a nested for loop. Does anyone have advice as
to how to do this?

Thanks for your time. Kirsten

## Mark-Recapture
## Estimate popoulation from repeated sampling

## Population size
N <- 400
N

## Vector labeling each item in the population
pop <- c(1:N)
pop

## Lower and upper bounds of sample size
lower.bound <- round(x = .05 * N, digits = 0)
lower.bound ## Smallest possible sample size

upper.bound <- round(x = .15 * N, digits = 0)
upper.bound ## Largest possible sample size

## Length of sample size interval
length.ss.interval <- length(c(lower.bound:upper.bound))
length.ss.interval ## total possible sample sizes, ranging form lower.bound
to upper.bound

## Determine a sample size randomly (not a global variable...simply for
test purposes)
## Between lower and upper bounds set previously
## Give equal weight to each possible sample size in this interval
sample(x = c(lower.bound:upper.bound),
       size = 1,
       prob = c(rep(1/length.ss.interval, length.ss.interval)))

## Specify number of samples to take
n.samples <- 100

## Initiate empty matrix
## 1st column is population (item 1 thorugh item 400)
## 2nd through nth column are all rounds of sampling
dat <- matrix(data = NA,
              nrow = length(pop),
              ncol = n.samples + 1)

dat[,1] <- pop

dat

## Take samples of random sizes
## Record results in columns 2 through n
## 1 = sampled (marked)
## 0 = not sampled (not marked)
for(i in 2:ncol(dat)) {
  a.sample <- sample(x = pop,
                     size = sample(x = c(lower.bound:upper.bound),
                                   size = 1,
                                   prob = c(rep(1/length.ss.interval,
length.ss.interval))),
                     replace = FALSE)
  dat[,i] <- dat[,1] %in% a.sample
}

## How large was each sample size?
apply(X = dat, MARGIN = 2, FUN = sum)
## 1st element is irrelevant
## 2nd element through nth element: sample size for each of the 100 samples

## At this point, all computations can be done using dat

## Create Schnabel dataframe using dat
## Google the Schnabel formula

schnabel.comp <- data.frame(sample = 1:n.samples,
                            n.sampled = apply(X = dat, MARGIN = 2, FUN =
sum)[2:length(apply(X = dat, MARGIN = 2, FUN = sum))]
)

## First column: which sample, 1-100
## Second column: number selected in that sample


## How many items were previously sampled?
## For 1st sample, it's 0
## For 2nd sample, code is different than for remaning samples

n.prev.sampled <- c(0, rep(NA, n.samples-1))
n.prev.sampled

n.prev.sampled[2] <- sum(ifelse(test = dat[,3] == 1 & dat[,2] == 1,
                                yes = 1,
                                no = 0))

n.prev.sampled

for(i in 4:ncol(dat)) {
  n.prev.sampled[i-1] <- sum(ifelse(test = dat[,i] == 1 &
rowSums(dat[,2:(i-1)]) > 0,
                                    yes = 1,
                                    no = 0))
}

schnabel.comp$n.prev.sampled <- n.prev.sampled

## n.newly.sampled: in each sample, how many items were newly sampled?
## i.e., never seen before?
schnabel.comp$n.newly.sampled <- with(schnabel.comp,
                                      n.sampled - n.prev.sampled)

## cum.sampled: how many total items have you seen?
schnabel.comp$cum.sampled <- c(0,
cumsum(schnabel.comp$n.newly.sampled)[2:n.samples-1])

## numerator of schnabel formula
schnabel.comp$numerator <- with(schnabel.comp,
                                n.sampled * cum.sampled)

## denominator of schnable formula is n.prev.sampled

## pop.estimate -- after each sample (starting with 2nd -- need at least
two samples)
schnabel.comp$pop.estimate <- NA

for(i in 1:length(schnabel.comp$pop.estimate)) {
  schnabel.comp$pop.estimate[i] <- sum(schnabel.comp$numerator[1:i]) /
sum(schnabel.comp$n.prev.sampled[1:i])
}


## Plot population estimate after each sample
if (!require("ggplot2")) {install.packages("ggplot2"); require("ggplot2")}
if (!require("scales")) {install.packages("scales"); require("scales")}


small.sample.dat <- schnabel.comp

small.sample <- ggplot(data = small.sample.dat,
                       mapping = aes(x = sample, y = pop.estimate)) +
  geom_point(size = 2) +
  geom_line() +
  geom_hline(yintercept = N, col = "red", lwd = 1) +
  coord_cartesian(xlim = c(0:100), ylim = c(300:500)) +
  scale_x_continuous(breaks = pretty_breaks(11)) +
  scale_y_continuous(breaks = pretty_breaks(11)) +
  labs(x = "\nSample", y = "Population estimate\n",
       title = "Sample sizes are between 5% and 15%\nof the population") +
  theme_bw(base_size = 12) +
  theme(aspect.ratio = 1)

small.sample

	[[alternative HTML version deleted]]



More information about the R-help mailing list