[R] field significance test

Mon Sep 6 08:37:07 CEST 2021

Dear r-list member,

I want to plot a histogram that shows a number of station that have a
significant statistic (positive or negative) based on the value itself
and its p-value. df3 shows the test statistic value (column shows the
station and rows show the result from the resample matrix
(repetition/bootstrap)) and df4 shows the p-value.

#the value
dput(head(df3,10))
structure(c(0.569535339474781, 1.02925697755861, 1.08125714350978,
0.50589479161552, -0.695827095264809, 0.455608022735733, 1.2552019505074,
0.981335144120386, 1.63020923423253, -0.424613279862939, 0.429207234903993,
1.99059339634301, -1.25731480224036, 0.64293796635093, 0.0189774621961392,
0.1163965630274, -1.41756397958877, 1.58945674395921, -1.2551489541395,
-2.84122761058959, -0.72446669544026, -0.719331298629362, -0.164045813998067,
0.444120153507258, -0.0845757313567553, -0.27732982718919, -0.166982066770785,
-0.193859909749249, 0.277426534878283, -0.0430460496295642, -0.0741475736028902,
-0.017026178205196, 0.732589091697401, 0.332813962514037, -0.0860983232517636,
0.155930932436498, -0.438635444604027, 0.046881008364722, -0.704876076807635,
-0.945506782070735, 0.662399207637722, -0.860903464600488, 1.06638547921749,
-0.462184163508299, 0.442447468362937, 0.145655792120232, 0.696309974316211,
1.84692085953474, 0.00841868461519582, -1.04408256815264, -0.548599461573869,
1.22352273108675, 0.0191993545723452, 1.26090162037733, 0.192106046362172,
-1.02864978106213, -0.0712068006002629, -0.674610175422543, -0.658383381010154,
-1.52779151484935, 0.479809528798632, -0.112078644619679, -0.19482661081522,
-0.192179943664117, -0.246553759113406, -0.563554156777087, -1.0236492805268,
0.0289772842372375, -0.274878506644853, 0.95578159001869, -0.27550722692588,
-0.66586322268903, 1.24703690613745, -0.00368775734780707, -0.0766884108214613,
-1.41610325144406, 0.518897523428314, -2.12289477996499, 0.968369305561191,
0.0766656793804207, 0.470712743077857, 0.241711948576043, 0.0636131491007723,
-1.13735866614159, 0.625015831730259, -0.234696421716696, 0.358555918256736,
-0.651761882852838, -0.236796663592383, 0.0421395303375618, 0.574747610964774,
-0.730646230622174, -0.20839489662388, -1.4832025994155, -0.366841536561336,
0.621868015281511, 0.945609952617796, 0.297055307072896, 0.737974050847397,
1.49862070675738), .Dim = c(10L, 10L))

#the p-value
dput(head(df4,10))
structure(c(0.560903574193679, 0.358019718822816, 0.320136568444488,
0.721538652049639, 0.419898899237915, 0.511481779449553, 0.208829636238898,
0.535905791761543, 0.252523383923989, 0.721538652049639, 0.487651926831611,
0.0281856103410957, 0.138370395238992, 0.639104270712721, 0.98503410973661,
0.955123383216192, 0.358019718822816, 0.138370395238992, 0.252523383923989,
0.0373292396736942, 0.302215769747998, 0.302215769747998, 0.807343273858921,
0.560903574193679, 0.955123383216192, 0.836526366120417, 0.807343273858921,
0.807343273858921, 0.693640621783759, 0.895532903167044, 0.895532903167044,
0.98503410973661, 0.159470497055087, 0.560903574193679, 0.925275729900227,
0.865936215436343, 0.441845502530452, 0.98503410973661, 0.358019718822816,
0.170893484254114, 0.586452625432322, 0.268412562734209, 0.102689728987727,
0.511481779449553, 0.666151798537229, 0.925275729900227, 0.358019718822816,
0.0581501553999165, 0.98503410973661, 0.170893484254114, 0.586452625432322,
0.464434476654839, 0.98503410973661, 0.252523383923989, 0.925275729900227,
0.377977518007105, 0.98503410973661, 0.586452625432322,
0.666151798537229, 0.284975267823252, 0.560903574193679,
0.721538652049639, 0.778425914188847,
0.836526366120417, 0.778425914188847, 0.511481779449553, 0.087825095630195,
0.98503410973661, 0.693640621783759, 0.208829636238898, 0.807343273858921,
0.222740206090239, 0.222740206090239, 0.98503410973661, 0.925275729900227,
0.0373292396736942, 0.586452625432322, 0.00322938266821475, 0.222740206090239,
0.865936215436343, 0.338738311334395, 0.639104270712721, 0.895532903167044,
0.0533495868962313, 0.268412562734209, 0.721538652049639, 0.721538652049639,
0.195559652706897, 0.778425914188847, 0.880692897134707, 0.398606385377039,
0.398606385377039, 0.693640621783759, 0.102689728987727, 0.666151798537229,
0.252523383923989, 0.358019718822816, 0.778425914188847, 0.284975267823252,
0.0633043080023749), .Dim = c(10L, 10L))

#find the positive significant station
df5<-df3
df5[df4>0.05|df5<0]<-NA
df5[df5>0]<-1
pos<-as.numeric(rowSums(df5, na.rm=T))
hist(pos)

#find the negative significant station
df6<-df3
df6[df4>0.05|df5>0]<-NA
df6[df6<0]<-1
neg<-as.numeric(rowSums(df6, na.rm=T))
hist(neg)

but above code is not correct because the 0 station (row when there is
no significant station detected) should be the same. The problem is
when the row produces significant positive and negative at the same
time. Is there any way to combine positive and negative significant
value and plot the histogram? or we can calculate the 0 station first
separately?

Any lead is really appreciated. Thank you.

Ani Jaya