[R] question
arun
smartpink111 at yahoo.com
Thu May 23 00:13:31 CEST 2013
HI,
directory<- "/home/arunksa111/NewData"
GetFileList <- function(directory,number){
setwd(directory)
filelist1<-dir()[file.info(dir())$isdir]
direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE)
direct<-lapply(direct,function(x) paste(directory,"/",x,sep=""))
lista<-unlist(direct)
output<- list(filelist1,lista)
return(output)
}
file.list.names<-GetFileList(directory,23) [[1]]
lista<-GetFileList(directory,23) [[2]]
FacGroup<-c(0,1,1,1,0,2,2,2)
ReadDir<-function(FacGroup){
list.new<-lista[FacGroup!=0]
read.list<-lapply(list.new, function(x) read.table(x,header=TRUE, sep = "\t",stringsAsFactors=FALSE))
names(read.list)<-file.list.names[FacGroup!=0]
return (read.list)
}
ListFacGroup<-ReadDir(FacGroup)
ListFacGroupSub<-lapply(ListFacGroup,head)
Pro<- function(lista,FDR_k) {
split.list<- split(lista,names(lista))
seq.mod.z<- lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]< FDR_k,c("Seq","Mod","z","Pro")]))
names(seq.mod.z)<- names(split.list)
folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
library(plyr)
library(data.table)
merge.data<-lapply(folder.name,function(x) lapply(x,function(x1) {x1<- data.table(x1);x1[,Pro:=paste(Pro,collapse=","),by=c("Seq","Mod","z")]}))
count.Pro<-lapply(merge.data,function(x) lapply(x,function(x1) { x1$counts<-sapply(x1$Pro,function(x2) length(gsub("\\s","",unlist(strsplit(x2,",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.ProUnique<-lapply(count.Pro,function(x) lapply(x,unique))
#count Pro by group (2-columns)
Pro.group<-lapply(count.ProUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
#Pro.group1<-Pro.group[lapply(Pro.group,length)!=0]
res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),Pro.group)
res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
res
}
Pro(ListFacGroupSub,0.05)
# Seq Mod z c2 c3 c4 t2 t3 t4
#1 aAAAAAAAAAAAAAATATAGPR 1-n_acPro/ 2 0 0 1 1 0 1
#2 aAAAAAAAAAAASSPVGVGQR 1-n_acPro/ 2 0 0 1 1 0 1
#3 aAAAAAAAAAGAAGGR 1-n_acPro/ 2 2 0 2 2 2 2
#4 aAAAAAAAGAAGGRGSGPGRR 1-n_acPro/ 2 0 0 2 0 0 2
#5 AAAAAAAkAAK 8-K_ac/ 2 1 0 0 0 0 0
#6 AAAAAAALQAK 2 0 2 0 2 0 0
#7 aAAAAAGAGPEMVR 1-n_acPro/ 2 2 2 2 2 2 2
#8 aAAAAATAAAAASIR 1-n_acPro/ 2 0 0 0 0 1 0
#9 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2 0 0 0 18 0 0
#10 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 2 18 0 0 0 0 0
#11 aAAAAEQQQFYLLLGNLLSPDNVVR 1-n_acPro/ 3 0 18 0 0 0 0
#12 aAAAAVGNAVPCGAR 1-n_acPro/ 2 0 1 0 0 0 0
ProCt<-Pro(ListFacGroup,0.05)
dim(ProCt)
#[1] 29429 9
A.K.
________________________________
From: Vera Costa <veracosta.rt at gmail.com>
To: arun <smartpink111 at yahoo.com>
Sent: Thursday, May 16, 2013 1:37 PM
Subject: Re: question
Hi.
Other thing that I need (and I sent a new format data) is to count data like function spec, but for the variable "pro". The idea is exactly the same, but I'm with some dificulties because the format of the data... The Pro is like ">sp|Q86U42|PABP2_HUMAN,>sp|Q86U42-2|PABP2_HUMAN". The comma split 2 pro's.
More information about the R-help
mailing list