[R] cumulative sum by group and under some criteria
arun
smartpink111 at yahoo.com
Tue Mar 12 02:14:40 CET 2013
HI,
maxN<-10
c11<-0.07
c12<-0.07
c1<-0.15
c2<-0.20
p0L<-0.05
p0H<-0.05
p1L<-0.25
p1H<-0.25
alpha<-0.6
beta<-0.6
bay<-function (c11,c12,c1,c2){
d<-do.call(rbind,lapply(2:(maxN-6),function(m1)
do.call(rbind,lapply(2:(maxN-m1-4),function(n1)
do.call(rbind,lapply(0:m1,function(x1)
do.call(rbind,lapply(0:n1,function(y1)
expand.grid(m1,n1,x1,y1)))))))))
names(d)<-c("m1","n1","x1","y1")
d<-within(d,{
term1_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)
term1_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)
})
########## add Qm Qn ##################################
set.seed(8)
d1<-do.call(rbind,lapply(seq_len(nrow(d)),function(i){
Pm<- rbeta(1000,0.2+d[i,"x1"],0.8+d[i,"m1"]-d[i,"x1"]);
Pn<- rbeta(1000,0.2+d[i,"y1"],0.8+d[i,"n1"]-d[i,"y1"]);
Fm<- ecdf(Pm);
Fn<- ecdf(Pn);
#Fmm<- Fm(d[i,"p11"]);
#Fnn<- Fn(d[i,"p12"]);
Fmm<- Fm(p1L);
Fnn<- Fn(p1H);
R<- (Fmm+Fnn)/2;
Fmm_f<- max(R, Fmm);
Fnn_f<- min(R, Fnn);
Qm<- 1-Fmm_f;
Qn<- 1-Fnn_f;
data.frame(Qm,Qn)}))
d2<-cbind(d,d1)
library(zoo)
lst1<- split(d2,list(d$m1,d$n1))
d2<-do.call(rbind,lapply(lst1[lapply(lst1,nrow)!=0],function(x){
x[,9:14]<-NA;
x[,9:10][x$Qm<=c11,]<-cumsum(x[,5:6][x$Qm<=c11,]);
x[,11:12][x$Qn<=c12,]<-cumsum(x[,5:6][x$Qn<=c12,]);
x[,13:14]<-cumsum(x[,5:6]);
colnames(x)[9:14]<- c("cterm1_P0L","cterm1_P1L","cterm1_P0H","cterm1_P1H","sumTerm1_p0","sumTerm1_p1");
x1<-na.locf(x);
x1[,9:14][is.na(x1[,9:14])]<-0;
x1}
))
row.names(d2)<-1:nrow(d2)
res1<-aggregate(.~m1+n1,data=d2[,c(1:2,9:12)],max)
d3<-res1[res1[,4]<=beta & res1[,6]<beta,]
f1<-function(dat){
stopifnot(nrow(dat)!=0)
library(plyr)
join(dat,d2,by=c("m1","n1"),type="inner")
}###not present
d4<-f1(d3)###not run
#########################
################ 2nd stage ################################
#### this method not look at the combination of m1 and n1,
#### so need to merger the orginal data 'd2
res2<-do.call(rbind,lapply(unique(d4$m1),function(m1)
do.call(rbind,lapply(unique(d4$n1),function(n1)
do.call(rbind,lapply(unique(d4$x1),function(x1)
do.call(rbind,lapply(unique(d4$y1),function(y1)
#do.call(rbind,lapply(0:m1,function(x1)
#do.call(rbind,lapply(0:n1,function(y1)
do.call(rbind,lapply((m1+2):(maxN-2-n1),function(m)
do.call(rbind,lapply((n1+2):(maxN-m),function(n)
do.call(rbind,lapply(x1:(x1+m-m1), function(x)
do.call(rbind,lapply(y1:(y1+n-n1), function(y)
expand.grid(m1,n1,x1,y1,m,n,x,y)) )))))))))))))))
names(res2)<- c("m1","n1","x1","y1","m","n","x","y")
attr(res2,"out.attrs")<-NULL
res3<- join(res2,d4,by=c("m1","n1","x1","y1"),type="inner")
res3<-res3[,c(1:16)]
############################################################# add more variables another method #############################
res3<- within(res3,{
term2_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
term2_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})
#term2_p0<- term1_p0*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
#term2_p1<- term1_p1*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})
res4<-do.call(rbind,lapply(seq_len(nrow(res3)),function(i){
Pm2<-rbeta(1000,0.2+res3[i,"x"],0.8+res3[i,"m"]-res3[i,"x"]);
Pn2<- rbeta(1000,0.2+res3[i,"y"],0.8+res3[i,"n"]-res3[i,"y"]);
Fm2<- ecdf(Pm2);
Fn2<- ecdf(Pn2);
#Fmm2<- Fm2(res3[i,"p1"]);
#Fnn2<- Fn2(res3[i,"p2"]);
Fmm2<- Fm2(p1L);
Fnn2<- Fn2(p1H);
R2<- (Fmm2+Fnn2)/2;
Fmm_f2<- max(R2, Fmm2);
Fnn_f2<- min(R2, Fnn2);
Qm2<- 1-Fmm_f2;
Qn2<- 1-Fnn_f2;
data.frame(Qm2,Qn2)}))
res5<-cbind(res3,res4)
################################################# calculate cumulative sum #################################
lst2<- split(res5,list(res5$m1,res5$n1,res5$m,res5$n))
res5<-do.call(rbind,lapply(lst2[lapply(lst2,nrow)!=0],
function(x){
x[,21:26]<-NA;
x[,21:22][x$Qm2<=c1 & x$Qm>c11,]<-cumsum(x[,17:18][x$Qm2<=c1 & x$Qm>c11,]);
x[,23:24][x$Qn2<=c2 & x$Qn>c12,]<-cumsum(x[,17:18][x$Qn2<=c2 & x$Qn>c12,]);
x[,25:26]<-cumsum(x[,17:18]);
colnames(x)[21:26]<- c("cterm2_P1L","cterm2_P0L","cterm2_P1H","cterm2_P0H","sumT2p0","sumT2p1");
x2<-na.locf(x);
x2[,21:26][is.na(x2[,21:26])]<-0; x2}
))
row.names(res5)<-1:nrow(res5)
res6<-aggregate(.~m1+n1+m+n,data=res5[,c(1:6,9:12,21:24)] ,max)
res6<-within(res6,{AL<-1-(cterm1_P0L + cterm2_P0L);
AH<-1-(cterm1_P0H + cterm2_P0H);
BL<-(cterm1_P1L + cterm2_P1L);
BH<-(cterm1_P1H + cterm2_P1H);
EN<- m1 + (m-m1)*(1-cterm1_P0L) + n1 + (n-n1)*(1-cterm1_P0H);
N<-m+n
})
res7<-res6[,c(1:4,7,9,15:20)]
res7<-res7[res7[,9]<=beta & res7[,10]<=beta &res7[,11] <= alpha & res7[,12] <= alpha,]
}
bay(0.07,0.07,0.15,0.20)
res7
head(res7)
# m1 n1 m n cterm1_P0L cterm1_P0H N EN BH BL AH
#1 2 2 4 4 0.8167625 0 8 6.366475 0.1001129 0.4702148 0.3365796
#2 2 2 5 4 0.8167625 0 9 6.549713 0.2002258 0.4405518 0.2038955
#3 3 2 5 4 0.8573750 0 9 7.285250 0.2002258 0.4218750 0.2038955
#4 2 2 6 4 0.8167625 0 10 6.732950 0.1689405 0.4558468 0.2121882
#5 3 2 6 4 0.8573750 0 10 7.427875 0.1689405 0.4781885 0.2121882
#6 4 2 6 4 0.8145062 0 10 8.370988 0.1689405 0.3914909 0.2121882
# AL
#1 0.10585941
#2 0.10972831
#3 0.14262500
#4 0.05037883
#5 0.04808759
#6 0.05944387
A.K.
________________________________
From: Joanna Zhang <zjoanna2013 at gmail.com>
To: arun <smartpink111 at yahoo.com>
Sent: Monday, March 11, 2013 3:55 PM
Subject: Re: [R] cumulative sum by group and under some criteria
ok. Could you take a look at this code when you have time? I really can't figure out why the 'res 7' not found if I put the code in functions. if I take out the Bay function and f1 function, res7 should have some data.
rm(list=ls())
ls()
ls(all=true)
search()
maxN<-10
c11<-0.07
c12<-0.07
c1<-0.15
c2<-0.20
p0L<-0.05
p0H<-0.05
p1L<-0.25
p1H<-0.25
alpha<-0.6
beta<-0.6
bay<-function (c11,c12,c1,c2){
d<-do.call(rbind,lapply(2:(maxN-6),function(m1)
do.call(rbind,lapply(2:(maxN-m1-4),function(n1)
do.call(rbind,lapply(0:m1,function(x1)
do.call(rbind,lapply(0:n1,function(y1)
expand.grid(m1,n1,x1,y1)))))))))
names(d)<-c("m1","n1","x1","y1")
d<-within(d,{
term1_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)
term1_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)
})
########## add Qm Qn ##################################
set.seed(8)
d1<-do.call(rbind,lapply(seq_len(nrow(d)),function(i){
Pm<- rbeta(1000,0.2+d[i,"x1"],0.8+d[i,"m1"]-d[i,"x1"]);
Pn<- rbeta(1000,0.2+d[i,"y1"],0.8+d[i,"n1"]-d[i,"y1"]);
Fm<- ecdf(Pm);
Fn<- ecdf(Pn);
#Fmm<- Fm(d[i,"p11"]);
#Fnn<- Fn(d[i,"p12"]);
Fmm<- Fm(p1L);
Fnn<- Fn(p1H);
R<- (Fmm+Fnn)/2;
Fmm_f<- max(R, Fmm);
Fnn_f<- min(R, Fnn);
Qm<- 1-Fmm_f;
Qn<- 1-Fnn_f;
data.frame(Qm,Qn)}))
d2<-cbind(d,d1)
library(zoo)
lst1<- split(d2,list(d$m1,d$n1))
d2<-do.call(rbind,lapply(lst1[lapply(lst1,nrow)!=0],function(x){
x[,9:14]<-NA;
x[,9:10][x$Qm<=c11,]<-cumsum(x[,5:6][x$Qm<=c11,]);
x[,11:12][x$Qn<=c12,]<-cumsum(x[,5:6][x$Qn<=c12,]);
x[,13:14]<-cumsum(x[,5:6]);
colnames(x)[9:14]<- c("cterm1_P0L","cterm1_P1L","cterm1_P0H","cterm1_P1H","sumTerm1_p0","sumTerm1_p1");
x1<-na.locf(x);
x1[,9:14][is.na(x1[,9:14])]<-0;
x1}
))
row.names(d2)<-1:nrow(d2)
res1<-aggregate(.~m1+n1,data=d2[,c(1:2,9:12)],max)
d3<-res1[res1[,4]<=beta & res1[,6]<beta,]
d3
f1<-function(dat){
stopifnot(nrow(dat)!=0)
library(plyr)
d4<- join(dat,d2,by=c("m1","n1"),type="inner")
d4
#########################
################ 2nd stage ################################
#### this method not look at the combination of m1 and n1,
#### so need to merger the orginal data 'd2
res2<-do.call(rbind,lapply(unique(d4$m1),function(m1)
do.call(rbind,lapply(unique(d4$n1),function(n1)
do.call(rbind,lapply(unique(d4$x1),function(x1)
do.call(rbind,lapply(unique(d4$y1),function(y1)
#do.call(rbind,lapply(0:m1,function(x1)
#do.call(rbind,lapply(0:n1,function(y1)
do.call(rbind,lapply((m1+2):(maxN-2-n1),function(m)
do.call(rbind,lapply((n1+2):(maxN-m),function(n)
do.call(rbind,lapply(x1:(x1+m-m1), function(x)
do.call(rbind,lapply(y1:(y1+n-n1), function(y)
expand.grid(m1,n1,x1,y1,m,n,x,y)) )))))))))))))))
names(res2)<- c("m1","n1","x1","y1","m","n","x","y")
attr(res2,"out.attrs")<-NULL
#install.packages(pkgs="plyr")
library(plyr)
res3<- join(res2,d4,by=c("m1","n1","x1","y1"),type="inner")
res3<-res3[,c(1:16)]
############################################################# add more variables another method #############################
res3<- within(res3,{
term2_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
term2_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})
#term2_p0<- term1_p0*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
#term2_p1<- term1_p1*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})
res4<-do.call(rbind,lapply(seq_len(nrow(res3)),function(i){
Pm2<-rbeta(1000,0.2+res3[i,"x"],0.8+res3[i,"m"]-res3[i,"x"]);
Pn2<- rbeta(1000,0.2+res3[i,"y"],0.8+res3[i,"n"]-res3[i,"y"]);
Fm2<- ecdf(Pm2);
Fn2<- ecdf(Pn2);
#Fmm2<- Fm2(res3[i,"p1"]);
#Fnn2<- Fn2(res3[i,"p2"]);
Fmm2<- Fm2(p1L);
Fnn2<- Fn2(p1H);
R2<- (Fmm2+Fnn2)/2;
Fmm_f2<- max(R2, Fmm2);
Fnn_f2<- min(R2, Fnn2);
Qm2<- 1-Fmm_f2;
Qn2<- 1-Fnn_f2;
data.frame(Qm2,Qn2)}))
res5<-cbind(res3,res4)
################################################# calculate cumulative sum #################################
lst2<- split(res5,list(res5$m1,res5$n1,res5$m,res5$n))
res5<-do.call(rbind,lapply(lst2[lapply(lst2,nrow)!=0],
function(x){
x[,21:26]<-NA;
x[,21:22][x$Qm2<=c1 & x$Qm>c11,]<-cumsum(x[,17:18][x$Qm2<=c1 & x$Qm>c11,]);
x[,23:24][x$Qn2<=c2 & x$Qn>c12,]<-cumsum(x[,17:18][x$Qn2<=c2 & x$Qn>c12,]);
x[,25:26]<-cumsum(x[,17:18]);
colnames(x)[21:26]<- c("cterm2_P1L","cterm2_P0L","cterm2_P1H","cterm2_P0H","sumT2p0","sumT2p1");
x2<-na.locf(x);
x2[,21:26][is.na(x2[,21:26])]<-0; x2}
))
row.names(res5)<-1:nrow(res5)
res6<-aggregate(.~m1+n1+m+n,data=res5[,c(1:6,9:12,21:24)] ,max)
res6<-within(res6,{AL<-1-(cterm1_P0L + cterm2_P0L);
AH<-1-(cterm1_P0H + cterm2_P0H);
BL<-(cterm1_P1L + cterm2_P1L);
BH<-(cterm1_P1H + cterm2_P1H);
EN<- m1 + (m-m1)*(1-cterm1_P0L) + n1 + (n-n1)*(1-cterm1_P0H);
N<-m+n
})
res7<-res6[,c(1:4,7,9,15:20)]
res7<-res7[res7[,9]<=beta & res7[,10]<=beta &res7[,11] <= alpha & res7[,12] <= alpha,]
}
f1(d3)
}
bay(0.07,0.07,0.15,0.20)
res7
More information about the R-help
mailing list