[R] faster execution of for loop in Fishers test

Adrian Johnson or|o|eb@|t|more @end|ng |rom gm@||@com
Tue Feb 12 02:26:35 CET 2019


Dear group,

I have two large matrices.

Matrix one: is 24776 x 76 (example toy1 dput object given below)

Matrix two: is 12913 x 76 (example toy2 dput object given below)

Column names of both matrices are identical.

My aim is:

a. Take each row of toy2 and transform vector into UP (>0)  and DN (
<0 ) categories. (kc)
b  Test association between kc and every row of toy1.

My code, given below, although this works but is very slow.

I gave dput objects for toy1, toy2 and result matrix.

Could you suggest/help me how I can make this faster.  Also, how can I
select values in result column that are less than 0.001 (p < 0.001).

Appreciate your help. Thank you.
-Adrian

Code:
===============================================================================



result <- matrix(NA,nrow=nrow(toy1),ncol=nrow(toy2))

rownames(result) <- rownames(toy1)
colnames(result) <- rownames(toy2)

for(i in 1:nrow(toy2)){
for(j in 1:nrow(toy1)){
kx = toy2[i,]
kc <- rep('NC',length(kx))
kc[ kx >0] <- 'UP'
kc[ kx <=0 ] <- 'DN'
xpv <- fisher.test(table(kc,toy1[j,]),simulate.p.value = TRUE)$p.value
result[j,i] <- xpv
}
}

===============================================================================


===============================================================================


> dput(toy1)
structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1), .Dim = c(10L, 7L), .Dimnames = list(c("ACAP3",
"ACTRT2", "AGRN", "ANKRD65", "ATAD3A", "ATAD3B", "ATAD3C", "AURKAIP1",
"B3GALT6", "C1orf159"), c("a", "b", "c", "d", "e", "f", "g")))



> dput(toy2)
structure(c(-0.242891119688613, -0.0514058216682132, 0.138447212993773,
-0.312576648033122, 0.271489918720452, -0.281196468299486, -0.0407160143344565,
-0.328353812845287, 0.151667836674511, 0.408596843743938, -0.049351944902924,
0.238586287349249, 0.200571558784821, -0.0737604184858411, 0.245971526254877,
0.24740263959845, -0.161528943131908, 0.197521973013793, 0.0402668125708444,
0.376323735212088, 0.0731550871764204, 0.385270176969893, 0.28953042756208,
0.062587289401188, -0.281187168932979, -0.0202298984561554, -0.0848696970309447,
0.0349676726358973, -0.520484215644868, -0.481991414222996,
-0.00698099201388211,
0.135503878341873, 0.156983081312087, 0.320223832092661, 0.34582193394074,
0.0844455960468667, -0.157825604090972, 0.204758250510969, 0.261796072978612,
-0.19510450641405, 0.43196474472874, -0.211155577453175, -0.0921641871215187,
0.420950361292263, 0.390261862151936, -0.422273930504427, 0.344653684951627,
0.0378273248838503, 0.197782027324611, 0.0963124876309569, 0.332093167080656,
0.128036554821915, -0.41338065859335, -0.409470440033177, 0.371490567256253,
-0.0912549189140141, -0.247451812684234, 0.127741739114639, 0.0856254238844557,
0.515282940316031, -0.25675759521248, 0.333943163209869, 0.604141413840881,
0.0824942299510931, -0.179605710473021, -0.275604207054643, -0.113251154591898,
0.172897837449258, -0.329808795076691, -0.239255324324506), .Dim = c(10L,
7L), .Dimnames = list(c("chr5q23", "chr16q24", "chr8q24", "chr13q11",
"chr7p21", "chr10q23", "chr13q13", "chr10q21", "chr1p13", "chrxp21"
), c("a", "b", "c", "d", "e", "f", "g")))
>


> dput(result)
structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.532733633183408,
0.511244377811094, 0.528235882058971, 0.526736631684158, 0.51424287856072,
0.530734632683658, 0.513243378310845, 0.533233383308346, 0.542228885557221,
0.517241379310345, 0.532733633183408, 0.521739130434783, 0.529235382308846,
0.530234882558721, 0.548725637181409, 0.525737131434283, 0.527236381809095,
0.532733633183408, 0.530234882558721, 0.520739630184908, 0.15592203898051,
0.142928535732134, 0.140929535232384, 0.150924537731134, 0.160419790104948,
0.139430284857571, 0.152923538230885, 0.146426786606697, 0.149425287356322,
0.145427286356822, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.282358820589705,
0.293853073463268, 0.262868565717141, 0.290854572713643, 0.276861569215392,
0.288855572213893, 0.282358820589705, 0.292853573213393, 0.286356821589205,
0.271364317841079, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1), .Dim = c(10L, 10L), .Dimnames = list(c("ACAP3",
"ACTRT2", "AGRN", "ANKRD65", "ATAD3A", "ATAD3B", "ATAD3C", "AURKAIP1",
"B3GALT6", "C1orf159"), c("chr5q23", "chr16q24", "chr8q24", "chr13q11",
"chr7p21", "chr10q23", "chr13q13", "chr10q21", "chr1p13", "chrxp21"
)))



More information about the R-help mailing list