[R] cumulative sum by group and under some criteria

Tue Mar 12 02:14:40 CET 2013

HI,

maxN<-10
c11<-0.07
c12<-0.07
c1<-0.15
c2<-0.20

p0L<-0.05
p0H<-0.05
p1L<-0.25
p1H<-0.25

alpha<-0.6
beta<-0.6

bay<-function (c11,c12,c1,c2){
d<-do.call(rbind,lapply(2:(maxN-6),function(m1) 
do.call(rbind,lapply(2:(maxN-m1-4),function(n1)
do.call(rbind,lapply(0:m1,function(x1)
do.call(rbind,lapply(0:n1,function(y1)
expand.grid(m1,n1,x1,y1))))))))) 
names(d)<-c("m1","n1","x1","y1")

d<-within(d,{
term1_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)
term1_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)
})      

########## add Qm Qn ##################################
set.seed(8)
d1<-do.call(rbind,lapply(seq_len(nrow(d)),function(i){
Pm<- rbeta(1000,0.2+d[i,"x1"],0.8+d[i,"m1"]-d[i,"x1"]);
Pn<- rbeta(1000,0.2+d[i,"y1"],0.8+d[i,"n1"]-d[i,"y1"]);
Fm<- ecdf(Pm);
Fn<- ecdf(Pn);
#Fmm<- Fm(d[i,"p11"]);
#Fnn<- Fn(d[i,"p12"]);
Fmm<- Fm(p1L);
Fnn<- Fn(p1H);
R<- (Fmm+Fnn)/2; 
Fmm_f<- max(R, Fmm);
Fnn_f<- min(R, Fnn);
Qm<- 1-Fmm_f;
Qn<- 1-Fnn_f;
data.frame(Qm,Qn)}))
d2<-cbind(d,d1)

library(zoo)
lst1<- split(d2,list(d$m1,d$n1)) 
d2<-do.call(rbind,lapply(lst1[lapply(lst1,nrow)!=0],function(x){ 
x[,9:14]<-NA; 
x[,9:10][x$Qm<=c11,]<-cumsum(x[,5:6][x$Qm<=c11,]); 
x[,11:12][x$Qn<=c12,]<-cumsum(x[,5:6][x$Qn<=c12,]);
x[,13:14]<-cumsum(x[,5:6]); 
colnames(x)[9:14]<- c("cterm1_P0L","cterm1_P1L","cterm1_P0H","cterm1_P1H","sumTerm1_p0","sumTerm1_p1"); 
x1<-na.locf(x); 
x1[,9:14][is.na(x1[,9:14])]<-0; 
x1}
)) 
row.names(d2)<-1:nrow(d2)

res1<-aggregate(.~m1+n1,data=d2[,c(1:2,9:12)],max)

d3<-res1[res1[,4]<=beta & res1[,6]<beta,] 
f1<-function(dat){
stopifnot(nrow(dat)!=0)
library(plyr) 
join(dat,d2,by=c("m1","n1"),type="inner")
}###not present

d4<-f1(d3)###not run
#########################
################ 2nd stage ################################
#### this method not look at the combination of m1 and n1,
#### so need to merger the orginal data 'd2

res2<-do.call(rbind,lapply(unique(d4$m1),function(m1) 
do.call(rbind,lapply(unique(d4$n1),function(n1)
do.call(rbind,lapply(unique(d4$x1),function(x1)
do.call(rbind,lapply(unique(d4$y1),function(y1)

#do.call(rbind,lapply(0:m1,function(x1) 
#do.call(rbind,lapply(0:n1,function(y1) 
do.call(rbind,lapply((m1+2):(maxN-2-n1),function(m) 
do.call(rbind,lapply((n1+2):(maxN-m),function(n) 
do.call(rbind,lapply(x1:(x1+m-m1), function(x) 
do.call(rbind,lapply(y1:(y1+n-n1), function(y)
expand.grid(m1,n1,x1,y1,m,n,x,y)) ))))))))))))))) 

names(res2)<- c("m1","n1","x1","y1","m","n","x","y") 
attr(res2,"out.attrs")<-NULL 
res3<- join(res2,d4,by=c("m1","n1","x1","y1"),type="inner") 
res3<-res3[,c(1:16)]
############################################################# add more variables another method #############################
res3<- within(res3,{
term2_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
term2_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)}) 

#term2_p0<- term1_p0*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
#term2_p1<- term1_p1*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)}) 

res4<-do.call(rbind,lapply(seq_len(nrow(res3)),function(i){
Pm2<-rbeta(1000,0.2+res3[i,"x"],0.8+res3[i,"m"]-res3[i,"x"]);
Pn2<- rbeta(1000,0.2+res3[i,"y"],0.8+res3[i,"n"]-res3[i,"y"]);
Fm2<- ecdf(Pm2); 
Fn2<- ecdf(Pn2);
#Fmm2<- Fm2(res3[i,"p1"]);
#Fnn2<- Fn2(res3[i,"p2"]);

Fmm2<- Fm2(p1L);
Fnn2<- Fn2(p1H);

R2<- (Fmm2+Fnn2)/2; 
Fmm_f2<- max(R2, Fmm2);
Fnn_f2<- min(R2, Fnn2); 
Qm2<- 1-Fmm_f2;
Qn2<- 1-Fnn_f2;
data.frame(Qm2,Qn2)}))

res5<-cbind(res3,res4)
################################################# calculate cumulative sum #################################

lst2<- split(res5,list(res5$m1,res5$n1,res5$m,res5$n))

res5<-do.call(rbind,lapply(lst2[lapply(lst2,nrow)!=0],
function(x){ 
x[,21:26]<-NA; 
x[,21:22][x$Qm2<=c1 & x$Qm>c11,]<-cumsum(x[,17:18][x$Qm2<=c1 & x$Qm>c11,]); 
x[,23:24][x$Qn2<=c2 & x$Qn>c12,]<-cumsum(x[,17:18][x$Qn2<=c2 & x$Qn>c12,]); 
x[,25:26]<-cumsum(x[,17:18]);

colnames(x)[21:26]<- c("cterm2_P1L","cterm2_P0L","cterm2_P1H","cterm2_P0H","sumT2p0","sumT2p1"); 
x2<-na.locf(x); 
x2[,21:26][is.na(x2[,21:26])]<-0; x2}
)) 
row.names(res5)<-1:nrow(res5)
res6<-aggregate(.~m1+n1+m+n,data=res5[,c(1:6,9:12,21:24)] ,max) 
res6<-within(res6,{AL<-1-(cterm1_P0L + cterm2_P0L);
AH<-1-(cterm1_P0H + cterm2_P0H);
BL<-(cterm1_P1L + cterm2_P1L);
BH<-(cterm1_P1H + cterm2_P1H);
EN<- m1 + (m-m1)*(1-cterm1_P0L) + n1 + (n-n1)*(1-cterm1_P0H);
N<-m+n
})
res7<-res6[,c(1:4,7,9,15:20)]
res7<-res7[res7[,9]<=beta & res7[,10]<=beta &res7[,11] <= alpha & res7[,12] <= alpha,] 
}
bay(0.07,0.07,0.15,0.20)
res7

 head(res7)
#  m1 n1 m n cterm1_P0L cterm1_P0H  N       EN        BH        BL        AH
#1  2  2 4 4  0.8167625          0  8 6.366475 0.1001129 0.4702148 0.3365796
#2  2  2 5 4  0.8167625          0  9 6.549713 0.2002258 0.4405518 0.2038955
#3  3  2 5 4  0.8573750          0  9 7.285250 0.2002258 0.4218750 0.2038955
#4  2  2 6 4  0.8167625          0 10 6.732950 0.1689405 0.4558468 0.2121882
#5  3  2 6 4  0.8573750          0 10 7.427875 0.1689405 0.4781885 0.2121882
#6  4  2 6 4  0.8145062          0 10 8.370988 0.1689405 0.3914909 0.2121882
 #         AL
#1 0.10585941
#2 0.10972831
#3 0.14262500
#4 0.05037883
#5 0.04808759
#6 0.05944387

A.K.

________________________________
From: Joanna Zhang <zjoanna2013 at gmail.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Monday, March 11, 2013 3:55 PM
Subject: Re: [R] cumulative sum by group and under some criteria

ok. Could you take a look at this code when you have time? I really can't figure out why the 'res 7' not found if I put the code in functions. if I take out the Bay function and f1 function, res7 should have some data.

rm(list=ls())
ls()
ls(all=true)
search()

maxN<-10
c11<-0.07
c12<-0.07
c1<-0.15
c2<-0.20

p0L<-0.05
p0H<-0.05
p1L<-0.25
p1H<-0.25

alpha<-0.6
beta<-0.6

bay<-function (c11,c12,c1,c2){
d<-do.call(rbind,lapply(2:(maxN-6),function(m1) 
do.call(rbind,lapply(2:(maxN-m1-4),function(n1)
do.call(rbind,lapply(0:m1,function(x1)
do.call(rbind,lapply(0:n1,function(y1)
expand.grid(m1,n1,x1,y1))))))))) 
names(d)<-c("m1","n1","x1","y1")

d<-within(d,{
term1_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)
term1_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)
})      

########## add Qm Qn ##################################
set.seed(8)
d1<-do.call(rbind,lapply(seq_len(nrow(d)),function(i){
Pm<- rbeta(1000,0.2+d[i,"x1"],0.8+d[i,"m1"]-d[i,"x1"]);
Pn<- rbeta(1000,0.2+d[i,"y1"],0.8+d[i,"n1"]-d[i,"y1"]);
Fm<- ecdf(Pm);
Fn<- ecdf(Pn);
#Fmm<- Fm(d[i,"p11"]);
#Fnn<- Fn(d[i,"p12"]);
Fmm<- Fm(p1L);
Fnn<- Fn(p1H);
R<- (Fmm+Fnn)/2; 
Fmm_f<- max(R, Fmm);
Fnn_f<- min(R, Fnn);
Qm<- 1-Fmm_f;
Qn<- 1-Fnn_f;
data.frame(Qm,Qn)}))
d2<-cbind(d,d1)

library(zoo)
lst1<- split(d2,list(d$m1,d$n1)) 
d2<-do.call(rbind,lapply(lst1[lapply(lst1,nrow)!=0],function(x){ 
x[,9:14]<-NA; 
x[,9:10][x$Qm<=c11,]<-cumsum(x[,5:6][x$Qm<=c11,]); 
x[,11:12][x$Qn<=c12,]<-cumsum(x[,5:6][x$Qn<=c12,]);
x[,13:14]<-cumsum(x[,5:6]); 
colnames(x)[9:14]<- c("cterm1_P0L","cterm1_P1L","cterm1_P0H","cterm1_P1H","sumTerm1_p0","sumTerm1_p1"); 
x1<-na.locf(x); 
x1[,9:14][is.na(x1[,9:14])]<-0; 
x1}
)) 
row.names(d2)<-1:nrow(d2)

res1<-aggregate(.~m1+n1,data=d2[,c(1:2,9:12)],max)

d3<-res1[res1[,4]<=beta & res1[,6]<beta,] 
d3

f1<-function(dat){

stopifnot(nrow(dat)!=0)

library(plyr) 
d4<- join(dat,d2,by=c("m1","n1"),type="inner")
d4

#########################
################ 2nd stage ################################
#### this method not look at the combination of m1 and n1,
#### so need to merger the orginal data 'd2

res2<-do.call(rbind,lapply(unique(d4$m1),function(m1) 
do.call(rbind,lapply(unique(d4$n1),function(n1)
do.call(rbind,lapply(unique(d4$x1),function(x1)
do.call(rbind,lapply(unique(d4$y1),function(y1)

#do.call(rbind,lapply(0:m1,function(x1) 
#do.call(rbind,lapply(0:n1,function(y1) 
do.call(rbind,lapply((m1+2):(maxN-2-n1),function(m) 
do.call(rbind,lapply((n1+2):(maxN-m),function(n) 
do.call(rbind,lapply(x1:(x1+m-m1), function(x) 
do.call(rbind,lapply(y1:(y1+n-n1), function(y)
expand.grid(m1,n1,x1,y1,m,n,x,y)) ))))))))))))))) 

names(res2)<- c("m1","n1","x1","y1","m","n","x","y") 
attr(res2,"out.attrs")<-NULL 

#install.packages(pkgs="plyr")
library(plyr) 

res3<- join(res2,d4,by=c("m1","n1","x1","y1"),type="inner") 

res3<-res3[,c(1:16)]
############################################################# add more variables another method #############################

res3<- within(res3,{
term2_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
term2_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)}) 

#term2_p0<- term1_p0*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
#term2_p1<- term1_p1*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)}) 

res4<-do.call(rbind,lapply(seq_len(nrow(res3)),function(i){
Pm2<-rbeta(1000,0.2+res3[i,"x"],0.8+res3[i,"m"]-res3[i,"x"]);
Pn2<- rbeta(1000,0.2+res3[i,"y"],0.8+res3[i,"n"]-res3[i,"y"]);
Fm2<- ecdf(Pm2); 
Fn2<- ecdf(Pn2);
#Fmm2<- Fm2(res3[i,"p1"]);
#Fnn2<- Fn2(res3[i,"p2"]);

Fmm2<- Fm2(p1L);
Fnn2<- Fn2(p1H);

R2<- (Fmm2+Fnn2)/2; 
Fmm_f2<- max(R2, Fmm2);
Fnn_f2<- min(R2, Fnn2); 
Qm2<- 1-Fmm_f2;
Qn2<- 1-Fnn_f2;
data.frame(Qm2,Qn2)}))

res5<-cbind(res3,res4)

################################################# calculate cumulative sum #################################

lst2<- split(res5,list(res5$m1,res5$n1,res5$m,res5$n))

res5<-do.call(rbind,lapply(lst2[lapply(lst2,nrow)!=0],
function(x){ 
x[,21:26]<-NA; 
x[,21:22][x$Qm2<=c1 & x$Qm>c11,]<-cumsum(x[,17:18][x$Qm2<=c1 & x$Qm>c11,]); 
x[,23:24][x$Qn2<=c2 & x$Qn>c12,]<-cumsum(x[,17:18][x$Qn2<=c2 & x$Qn>c12,]); 
x[,25:26]<-cumsum(x[,17:18]);

colnames(x)[21:26]<- c("cterm2_P1L","cterm2_P0L","cterm2_P1H","cterm2_P0H","sumT2p0","sumT2p1"); 
x2<-na.locf(x); 
x2[,21:26][is.na(x2[,21:26])]<-0; x2}
)) 
row.names(res5)<-1:nrow(res5)

res6<-aggregate(.~m1+n1+m+n,data=res5[,c(1:6,9:12,21:24)] ,max) 

res6<-within(res6,{AL<-1-(cterm1_P0L + cterm2_P0L);
AH<-1-(cterm1_P0H + cterm2_P0H);
BL<-(cterm1_P1L + cterm2_P1L);
BH<-(cterm1_P1H + cterm2_P1H);
EN<- m1 + (m-m1)*(1-cterm1_P0L) + n1 + (n-n1)*(1-cterm1_P0H);
N<-m+n
})
res7<-res6[,c(1:4,7,9,15:20)]

res7<-res7[res7[,9]<=beta & res7[,10]<=beta &res7[,11] <= alpha & res7[,12] <= alpha,] 
}
f1(d3)

}
bay(0.07,0.07,0.15,0.20)
res7