[R-sig-ME] lmer(): Higher weight in case of more measurements

Fri Jun 26 13:05:46 CEST 2015

Hi Susanne,

I've fixed a few problems with your code.

There is a difference between the group size. That is what you expect as
the analysis weights the first group higher when it has 40 points and that
drags the line down.

For the second analysis I've changed the weight to 5000, as it seems
sensible and is less likely to give numerical problems. This does what
would be expected. It assumes that each observation in in the 1st group is
actually the mean of 5000 points. From this it obtains the variation of a
single observation which will be much higher than before as the variation
in group 1 is unchanged. It actually violates the assumptions as the
residual variation in each group is not equal, but you will see that the
estimate of the residual variance is much higher. This can be fixed by
changing the residual variance in the first group to reflect the lower
variation in each observation, and that code is at the end, and seems to do
the right thing.

Ken

## 10 groups with 10 datapoints each
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)

y.m <- x.m <- matrix(nrow=10,ncol=10)

# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}

# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
res <- lmer(y ~ x + (x|group))

summary(res)

#####################################################################
## 10 groups. 1. has 40 datapoints, others have 10
# if it matters, that group 1 has more datapoints, the regression line
should have a smaller intercept and smaller slope
# slope a and intercept b for lines

a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)

y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)

  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}

x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)

# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
res <- lmer(y ~ x + (x|group))
summary(res)

###################################################################
# result:
# it's the same regression line for both
# => it doesn't matter that group 1 has more datapoints!!
##################################################################

# plot:
plot(x,y, pch=16, col=1, ylab="y",xlab="x",main=sprintf("plot"))
s <- summary(res)
# add linear regression lines
abline(s$coefficients[1,1],s$coefficients[2,1], lty=2, col = 1)

###################################################################
# check with weights:
##################################################################

# same as above:
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}
# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
## Add weights: 1. group has weight=5000, the others weight=1 #############
w <- c(rep(5000,10),rep(1,90))

res <- lmer(y ~ x + (x|group), weights=w)
summary(res)
#################################################################
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}

x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
x.m <- rbind(runif(10,0,5),x.m)
y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)

# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))

## Add weights: 1. group has weight=5000, the others weight=1 #############
w <- c(rep(5000,40),rep(5000,100))

res <- lmer(y ~ x + (x|group), weights=w)
summary(res)

# correct variances for weights
# slope a and intercept b for lines
a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
y.m <- x.m <- matrix(nrow=10,ncol=10)
# i th row is for ith group
for(i in 1:10){
  # generate randomly x values
  x.m[i,] <- runif(10,0,5)
  # y values
  if (i==1) y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01/sqrt(5000))
  else y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
}
# convert to vectors with first row, then second row...
y <- as.vector(t(y.m))
x <- as.vector(t(x.m))
group <-
c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
## Add weights: 1. group has weight=5000, the others weight=1 #############
w <- c(rep(5000,10),rep(1,90))

res <- lmer(y ~ x + (x|group), weights=w)
summary(res)

On 26 June 2015 at 00:54, Susanne Susanne <susanne.stat at gmx.de> wrote:

> Hello Ken and everybody,
>
>  thank you very much for your answer! I tried out another example,
> generating the datapoints randomly.
> I get different results, but it is just as hard to understand as before.
>
> I fixed lines and chose datapoints randomly on these lines.
> A) 10 groups, each with 10 datapoints
> B) 10 groups. Group 1 has 40 datapoints, the others 10 each.
>
>  I get the same result. So first it seems for me as if the number of
> datapoints for a group doesn't matter.
>
> Then I weighted group 1 with weight= 500000000 and the others with weight
> =1.
> (If I use smaller weights, I can't see a difference. But it doesn't seem
> naturally...)
>
> The result:
> A) with no weights = B) with no weights = B) with weights [a regression
> line which lies in the middle of my generated lines]
> !=
> A) with weights [a regression line which almost lies on group 1, so
> clearly stronger influence of group 1]
>
> So now it get's weird.  The version of group 1 with higher weight gives a
> regression line which is very close to the one of group 1. That's what I
> would expect from these extremely higher weights.
> But if group 1 has 40 datapoints and this extremely higher weight, the
> result is a regression line just as the one without weighting.
>
> Am I doing something wrong? Can someone explain me this?
>
> Susanne
>
>
> My code:
>
>  ## 10 groups with 10 datapoints each
> # slope a and intercept b for lines
> a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
> b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
>
> y.m <- x.m <- matrix(nrow=10,ncol=10)
>
> # i th row is for ith group
> for(i in 1:10){
>   # generate randomly x values
>   x.m[i,] <- runif(10,0,5)
>   # y values
>   y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
>   }
>
> # convert to vectors with first row, then second row...
> y <- as.vector(t(y.m))
> x <- as.vector(t(x.m))
> group <-
> c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
> res <- lmer(y ~ x + (x|group))
>
> #####################################################################
> ## 10 groups. 1. has 40 datapoints, others have 10
> # if it matters, that group 1 has more datapoints, the regression line
> should have a smaller intercept and smaller slope
> # slope a and intercept b for lines
>
> a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
> b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
>
> y.m <- x.m <- matrix(nrow=10,ncol=10)
> # i th row is for ith group
> for(i in 1:10){
>   # generate randomly x values
>   x.m[i,] <- runif(10,0,5)
>
>   # y values
>   y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
> }
>
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
>
> # convert to vectors with first row, then second row...
> y <- as.vector(t(y.m))
> x <- as.vector(t(x.m))
> group <-
> c(rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
> res <- lmer(y ~ x + (x|group))
> res
>
> ###################################################################
> # result:
> # it's the same regression line for both
> # => it doesn't matter that group 1 has more datapoints!!
> ##################################################################
>
> # plot:
> plot(x,y, pch=16, col=1, ylab="y",xlab="x",main=sprintf("plot"))
> s <- summary(res)
> # add linear regression lines
> abline(s$coefficients[1,1],s$coefficients[2,1], lty=2, col = 1)
>
> ###################################################################
> # check with weights:
> ##################################################################
>
> # same as above:
> # slope a and intercept b for lines
> a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
> b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
> y.m <- x.m <- matrix(nrow=10,ncol=10)
> # i th row is for ith group
> for(i in 1:10){
>   # generate randomly x values
>   x.m[i,] <- runif(10,0,5)
>   # y values
>   y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
> }
> # convert to vectors with first row, then second row...
> y <- as.vector(t(y.m))
> x <- as.vector(t(x.m))
> group <-
> c(rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
> ## Add weights: 1. group has weight=5000, the others weight=1 #############
> w <- c(rep(500000000,10),rep(1,90))
>
> res <- lmer(y ~ x + (x|group), weights=w)
> res
> #################################################################
> # slope a and intercept b for lines
> a <- c(0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3,2.5)
> b <- c(0,0.3,0.5,0.8,1,1.3,1.5,1.8,2,2.3)
> y.m <- x.m <- matrix(nrow=10,ncol=10)
> # i th row is for ith group
> for(i in 1:10){
>   # generate randomly x values
>   x.m[i,] <- runif(10,0,5)
>   # y values
>   y.m[i,] <- a[i]*x.m[i,] + b[i] + rnorm(10,0,0.01)
> }
>
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
> x.m <- rbind(runif(10,0,5),x.m)
> y.m <- rbind(a[1]*x.m[1,] + b[1] + rnorm(10,0,0.01),y.m)
>
> # convert to vectors with first row, then second row...
> y <- as.vector(t(y.m))
> x <- as.vector(t(x.m))
> group <-
> c(rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(1,10),rep(2,10),rep(3,10),rep(4,10),rep(5,10),rep(6,10),rep(7,10),rep(8,10),rep(9,10),rep(10,10))
>
> ## Add weights: 1. group has weight=5000, the others weight=1 #############
> w <- c(rep(500000000,40),rep(1,90))
>
> res <- lmer(y ~ x + (x|group))
> res
>
>
>
>
>
>
>
>  *Gesendet:* Mittwoch, 24. Juni 2015 um 15:23 Uhr
> *Von:* "Ken Beath" <ken.beath at mq.edu.au>
> *An:* "Susanne Susanne" <susanne.stat at gmx.de>
> *Cc:* Rlist <r-sig-mixed-models at r-project.org>
> *Betreff:* Re: [R-sig-ME] lmer(): Higher weight in case of more
> measurements
>  lmer will sort out by itself the fact that some groups are larger than
> the others. weights is used when an individual point is the summary of a
> number of observations.
>
> When generating data sets to test things 2 groups isn't enough, use a
> similar number that would be used in the analysis, and generate the data
> randomly, that way the results can be compared to what is expected.
>
> On 24 June 2015 at 23:10, Susanne Susanne <susanne.stat at gmx.de> wrote:
>>
>>
>> Hello everyone,
>>
>> I just saw now, that my last emails were empty. I had this question
>> regarding the lme4 package.
>> I want to  regress a simple
>>
>>   lmer(y ~ x + (x | group),
>>
>> but the number of datapoints vary among groups.
>>
>> So what I want is to weight groups, which have more measurements than
>> others, slightly higher (as they provide more information).
>>
>>
>> My question is: Is this done automatically in the lmer() function or
>> should I do it manually by myself?
>>
>>
>> It seems an easy question, but I tried a small example which I attached
>> and it's very contradictory.
>>
>> The upper two pictures seem to proove that lmer doesn't weight groups
>> higher which report more datapoints. I get the same regression line,
>> independent of whether the blue group has more or less datapoints.
>>
>> In the lower two pictures I used the "weights=" argument to weight the
>> blue group higher. And then suddenly it seems to matter how many datapoints
>> a group has. I used the same weights in both pictures, but now I get
>> different regression lines, dependent of how many datapoints the blue group
>> has.
>>
>>
>> I really need to know this for my thesis and would be very thankful if
>> someone could help me!
>>
>> Susanne
>>
>>
>>
>>
>> I add my code:
>>
>> # Data
>> # many Values
>> x <- c(2, 4, 1, 1.25, 1.5,1.75, 2,2.25, 2.5, 2.75, 3, 3.25,
>> 3.5,3.75,4,4.25, 4.5, 4.75, 5)
>> y <- c(0.2, 0.4, 0.5, 0.525, 0.57, 0.575, 0.62, 0.625, 0.65, 0.677,
>> 0.7,0.726,0.75,0.775,0.8,0.827,0.85,0.873, 0.9)
>> group <- c(1,1,rep(2,17))
>>
>> # Less values
>> x <- c(2, 4, 1, 3.25, 5) y <- c(0.2, 0.4,0.5,0.726, 0.9)
>> group <- c(1,1,rep(2,3))
>>
>> # Weights
>> if(weights == "Default"){ w <- NULL }
>>
>> if(weights == "Disequilibrated"){
>>    w <- c(1,1, rep(5000,17))
>>   # or for less values
>>   w <- c(1,1, rep(5000,3))
>>   # scale weights to sum weights=number of values
>>   w <- length(w)*w/sum(w) }
>>
>> # Lme Model
>> lmeModel <- lmer(y ~ x + (x|group), weights=w)
>> s <- summary(lmeModel)
>>
>> # Plot
>> plot(x,y, pch=16, col=c(1,1,rep(2,17)),
>> xlim=c(min(x),max(x)),ylim=c(min(y),max(y)), ylab="y",xlab="x",main="Random
>> Intercept + Slope") abline(s$coefficients[1,1],s$coefficients[2,1], lty=2,
>> col = 1)
>>
>>
>>
>> _______________________________________________
>> R-sig-mixed-models at r-project.org mailing list
>> https://stat.ethz.ch/mailman/listinfo/r-sig-mixed-models
>>
>
>
>
> --
>
> *Ken Beath*
> Lecturer
> Statistics Department
> MACQUARIE UNIVERSITY NSW 2109, Australia
>
> Phone: +61 (0)2 9850 8516
>
> Level 2, AHH
> http://stat.mq.edu.au/our_staff/staff_-_alphabetical/staff/beath,_ken/
>
> CRICOS Provider No 00002J
> This message is intended for the addressee named and may contain
> confidential information.  If you are not the intended recipient, please
> delete it and notify the sender.  Views expressed in this message are those
> of the individual sender, and are not necessarily the views of the Faculty
> of Science, Department of Statistics or Macquarie University.
>
>

-- 

*Ken Beath*
Lecturer
Statistics Department
MACQUARIE UNIVERSITY NSW 2109, Australia

Phone: +61 (0)2 9850 8516

Level 2, AHH
http://stat.mq.edu.au/our_staff/staff_-_alphabetical/staff/beath,_ken/

CRICOS Provider No 00002J
This message is intended for the addressee named and may contain
confidential information.  If you are not the intended recipient, please
delete it and notify the sender.  Views expressed in this message are those
of the individual sender, and are not necessarily the views of the Faculty
of Science, Department of Statistics or Macquarie University.

	[[alternative HTML version deleted]]