[R] new question
arun
smartpink111 at yahoo.com
Thu Mar 28 19:47:18 CET 2013
Hi,
You also mentioned about separating the significant from the non-significant.
If you replace:
Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1}))
res1<- cbind(res,Chisq1test_Count)
with
Chisqtest_CountNew<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x2<-within(x1,{Flag<-ifelse(x1[,1]<0.05,"S","NS")}); colnames(x2)[2]<-paste0(colnames(x2)[1],"_Flag");x2}))
res1<- cbind(res,Chisqtest_CountNew)
in the Spec(),
head(Spec(ListFacGroup,0.05),2)
# Seq Mod z a2 c2 c3 t2 V1.Count_a2c2
#1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 0.02534732
#2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 0.01430588
# V1.Count_a2c2_Flag V2.Count_a2c3 V2.Count_a2c3_Flag V3.Count_a2t2
#1 S 0.02534732 S 0.10247043
#2 S 0.01430588 S 0.05878172
# V3.Count_a2t2_Flag V4.Count_c2c3 V4.Count_c2c3_Flag V5.Count_c2t2
#1 NS NA <NA> 0.3173105
#2 NS NA <NA> 0.3173105
# V5.Count_c2t2_Flag V6.Count_c3t2 V6.Count_c3t2_Flag
#1 NS 0.3173105 NS
#2 NS 0.3173105 NS
A.K.
----- Original Message -----
From: arun <smartpink111 at yahoo.com>
To: Vera Costa <veracosta.rt at gmail.com>
Cc: R help <r-help at r-project.org>
Sent: Thursday, March 28, 2013 2:28 PM
Subject: Re: [R] new question
Hi,
The function outputs the unique rows and also chisq test on frequency ( by row).
Spec <- function(lista,FDR_k) {
list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)}))
split.list<-split(list.new,names(lista))
#Data needed with FDR<FDR_k
seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")]))
names(seq.mod.z)<- names(split.list)
#insert colunm with the name of the folder
folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
#merge data with the same Seq, Mod and z
library(plyr)
library(data.table)
merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]}))
#colunm with number of spec
count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.specUnique<-lapply(count.spec,function(x) lapply(x,unique))
#count spec by group (2-columns)
spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
#spec.group1<-spec.group[lapply(spec.group,length)!=0]
#data frame with count of spec
res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group)
res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
#print(res)
Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1}))
#print(Chisq1test_Count)
res1<- cbind(res,Chisq1test_Count)
res1
}
ListFacGroup<-ReadDir(FacGroup)
Spec(ListFacGroup,0.05)
head(Spec(ListFacGroup,0.05))
# Seq Mod z a2 c2 c3 t2 Count_a2c2
#1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1 0.02534732
#2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1 0.01430588
#3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1 1.00000000
#4 AAAAAAALQAK 2 1 0 1 1 0.31731051
#5 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2 1.00000000
#6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1 0.31731051
# Count_a2c3 Count_a2t2 Count_c2c3 Count_c2t2 Count_c3t2
#1 0.02534732 0.10247043 NA 0.3173105 0.3173105
#2 0.01430588 0.05878172 NA 0.3173105 0.3173105
#3 0.31731051 1.00000000 0.3173105 1.0000000 0.3173105
#4 1.00000000 1.00000000 0.3173105 0.3173105 1.0000000
#5 0.56370286 1.00000000 0.5637029 1.0000000 0.5637029
#6 0.31731051 1.00000000 NA 0.3173105 0.3173105
A.K.
________________________________
From: arun <smartpink111 at yahoo.com>
To: Vera Costa <veracosta.rt at gmail.com>
Cc: R help <r-help at r-project.org>
Sent: Thursday, March 28, 2013 10:18 AM
Subject: Re: [R] new question
Hi,
Try this:
Spec <- function(lista,FDR_k) {
list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)}))
split.list<-split(list.new,names(lista))
#Data needed with FDR<FDR_k
seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")]))
names(seq.mod.z)<- names(split.list)
#insert colunm with the name of the folder
folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
#merge data with the same Seq, Mod and z
library(plyr)
library(data.table)
merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]}))
#colunm with number of spec
count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.specUnique<-lapply(count.spec,function(x) lapply(x,unique))
#count spec by group (2-columns)
spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
#spec.group1<-spec.group[lapply(spec.group,length)!=0]
#data frame with count of spec
res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group)
res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
print(res)
}
Spec(ListFacGroup,0.05)
# Seq Mod z a2 c2 c3 t2
#1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1
#2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1
#3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1
#4 AAAAAAALQAK 2 1 0 1 1
#5 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2
#6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1
#7 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3 1 0 0 1
#8 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 0 1 0 0
#9 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 1 2 2 1
#10 AAAAAPGTAEK 2 0 1 0 0
#11 aAAAASAPQQLSDEELFSQLR 1-n_acPro/ 2 1 0 0 1
#12 aAAAAVGNAVPCGAR 1-n_acPro/ 2 1 1 1 1
#13 AAAAAWEEPSSGNGTAR 2 1 1 1 1
#14 aAAAELSLLEK 1-n_acPro/ 1 1 0 0 1
#15 aAAAELSLLEK 1-n_acPro/ 2 1 1 1 1
#16 AAAAEVLGLILR 2 1 1 1 1
#17 aAAAGAAAAAAAEGEAPAEMGALLLEK 1-n_acPro/ 3 1 1 1 1
#18 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3 0 0 1 0
#19 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-n_acPro/ 3 1 0 0 1
#20 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK 1-n_acPro/ 3 1 0 0 1
#21 AAAAAAAkAAK 8-K_ac/ 2 0 1 0 0
#22 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 2 0 1 1 0
#23 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 3 0 0 1 0
#24 aAADGDDSLYPIAVLIDELR 1-n_acPro/ 2 0 0 1 0
Regarding the 2nd question, I am a bit busy now. Will try it later.
A.K.
________________________________
From: Vera Costa <veracosta.rt at gmail.com>
To: arun <smartpink111 at yahoo.com>
Sent: Thursday, March 28, 2013 9:43 AM
Subject: Re: new question
I don't remove duplicated, but write only one time. If I haven't "unique" I have the same row a lot of times, but with "unique" we remove all. I need this row write only one time.
without "unique" the output is
1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1
2 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1
3 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1
4 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1
5 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1
6 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1
7 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1
8 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1
9 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1
10 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1
11 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 6 0 0 1
12 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1
13 AAAAAAALQAK 2 1 0 1 1
14 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2
15 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1
16 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3 1 0 0 1
17 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 0 1 0 0
18 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 1 2 2 1
19 AAAAAPGTAEK 2 0 1 0 0
20 aAAAASAPQQLSDEELFSQLR 1-n_acPro/ 2 1 0 0 1
21 aAAAAVGNAVPCGAR 1-n_acPro/ 2 1 1 1 1
22 AAAAAWEEPSSGNGTAR 2 1 1 1 1
23 aAAAELSLLEK 1-n_acPro/ 1 1 0 0 1
24 aAAAELSLLEK 1-n_acPro/ 2 1 1 1 1
25 AAAAEVLGLILR 2 1 1 1 1
26 aAAAGAAAAAAAEGEAPAEMGALLLEK 1-n_acPro/ 3 1 1 1 1
27 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3 0 0 1 0
28 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-n_acPro/ 3 1 0 0 1
29 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK 1-n_acPro/ 3 1 0 0 1
30 AAAAAAAkAAK 8-K_ac/ 2 0 1 0 0
31 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 2 0 1 1 0
32 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 3 0 0 1 0
33 aAADGDDSLYPIAVLIDELR 1-n_acPro/ 2 0 0 1 0
with "unique" is
Seq Mod z a2 c2 c3 t2
1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 1 0 0 1
2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 1 0 0 1
3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 1 1 0 1
4 AAAAAAALQAK 2 1 0 1 1
5 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 1 2
6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 1 0 0 1
7 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3 1 0 0 1
8 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 0 1 0 0
9 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 1 2 2 1
10 AAAAAPGTAEK 2 0 1 0 0
11 aAAAASAPQQLSDEELFSQLR 1-n_acPro/ 2 1 0 0 1
12 aAAAAVGNAVPCGAR 1-n_acPro/ 2 1 1 1 1
13 AAAAAWEEPSSGNGTAR 2 1 1 1 1
14 aAAAELSLLEK 1-n_acPro/ 1 1 0 0 1
15 aAAAELSLLEK 1-n_acPro/ 2 1 1 1 1
16 AAAAEVLGLILR 2 1 1 1 1
17 aAAAGAAAAAAAEGEAPAEMGALLLEK 1-n_acPro/ 3 1 1 1 1
18 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3 0 0 1 0
19 aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-n_acPro/ 3 1 0 0 1
20 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK 1-n_acPro/ 3 1 0 0 1
21 AAAAAAAkAAK 8-K_ac/ 2 0 1 0 0
22 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 2 0 1 1 0
23 aAAAVGAGHGAGGPGAASSSGGAR 1-n_acPro/ 3 0 0 1 0
24 aAADGDDSLYPIAVLIDELR 1-n_acPro/ 2 0 0 1 0
But I need the row
1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 5 0 0 1
write only one time
______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
More information about the R-help
mailing list