[R] R: re: dataframe
arun
smartpink111 at yahoo.com
Wed Apr 30 15:22:00 CEST 2014
Hi James,
I guess the problem was because the columns you tried were factors.
Suppose `dat1` is your dataset (first).
library(stringr)
indx <- grep("sample", colnames(dat1))
toGrep <- read.table(text = str_trim(gsub("[sample.]", " ", colnames(dat1)[indx])),
header = FALSE, stringsAsFactors = FALSE)
dat1New <- dat1
indx2 <- grep("^o", colnames(dat1New))
dat1New[indx2] <- lapply(dat1New[indx2], as.character)
lst1 <- lapply(seq_len(nrow(toGrep)), function(i) {
indx3 <- intersect(grep(toGrep[i, 1], dat1New), grep(paste0("\\b", toGrep[i,
2], "\\b"), dat1New))
indx4 <- grep(paste(toGrep[i, ], collapse = "."), colnames(dat1New))
indx5 <- which(!seq(ncol(dat1New)) %in% c(indx, indx2))
dat1New[, sort(c(indx3, indx4, indx5)), drop = FALSE]
})
names(lst1) <- as.character(interaction(toGrep, sep = "."))
Here, I assumed that you wanted only the particular "sample...", column along with the subset.
lapply(lst1,names)[[1]]
[1] "X" "rt"
[3] "mz" "o066_010"
[5] "o066_022" "o066_029"
[7] "o066_034" "o066_068"
[9] "o066_072" "o066_079"
[11] "o066_081" "o066_086"
[13] "pspectrum" "isotopes"
[15] "adduct" "mzmin"
[17] "mzmax" "rtmin"
[19] "rtmax" "npeaks"
[21] "sample.Au5.C" "X2W.p.value.Nanoparticle"
[23] "X2W.p.value.Treatment" "X2W.adj.p.value.Nanoparticle"
[25] "X2W.adj.p.value.\nTreatment" "Metlin"
[27] "HMDBtag" "HMDBinfo"
A.K.
On Wednesday, April 30, 2014 8:29 AM, "pascalbells at libero.it" <pascalbells at libero.it> wrote:
Hi, thank you again for your help....
here is the dput:
structure(list(X = structure(c(8L, 7L, 9L, 1L), .Label = c("61/195",
"69/954", "72/300", "73/300", "74/946", "76/611", "Nanoparticle",
"SampleName", "Treatment"), class = "factor"), rt = c(NA, NA,
NA, 195.09555), mz = c(NA, NA, NA, 61.00722661), o066_010 = structure(c(9L,
7L, 8L, 2L), .Label = c("14043676.02", "14536204.77", "17652481.49",
"5212485.416", "6731535.564", "9876651.476", "Au5", "C", "sample"
), class = "factor"), o066_019 = structure(c(9L, 7L, 8L, 3L), .Label = c
("10023255.04",
"15073569.61", "15473173.22", "28708474.98", "5633388.668", "7154698.204",
"Au5", "H", "sample"), class = "factor"), o066_022 = structure(c(9L,
7L, 8L, 2L), .Label = c("10087537.1", "13846871.69", "14528006.42",
"18458758.83", "5383022.265", "6577623.782", "Au5", "C", "sample"
), class = "factor"), o066_023 = structure(c(9L, 7L, 8L, 1L), .Label = c
("15723091.09",
"17023369.3", "18462299.71", "4996777.375", "6987554.082", "9174926.395",
"Au5", "L", "sample"), class = "factor"), o066_024 = structure(c(9L,
7L, 8L, 1L), .Label = c("14367825.89", "16698747.9", "18696653.21",
"5025690.383", "6930208.708", "9736032.197", "Au32", "M", "sample"
), class = "factor"), o066_025 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14488608.39",
"17814216.17", "18698120.65", "4950983.925", "7193018.533", "8061360.256",
"Au32", "M", "sample"), class = "factor"), o066_029 = structure(c(9L,
7L, 8L, 2L), .Label = c("11915681.99", "13140449.17", "16980818.81",
"5529195.67", "7045157.096", "9792452.409", "Au5", "C", "sample"
), class = "factor"), o066_032 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14598638.42",
"17574435.2", "18133608.77", "5440291.241", "6145144.579", "6754867.22",
"Au32", "C", "sample"), class = "factor"), o066_034 = structure(c(9L,
7L, 8L, 1L), .Label = c("14979396.48", "18783422.61", "18944230.28",
"5058848.793", "6240890.066", "7253951.112", "Au5", "C", "sample"
), class = "factor"), o066_039 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14540874.05",
"17997589.68", "18808525.49", "4923318.003", "4989990.142", "6943084.118",
"Au32", "C", "sample"), class = "factor"), o066_049 = structure(c(9L,
7L, 8L, 1L), .Label = c("13904244.76", "17285116.29", "18378290.63",
"5578459.181", "6817168.77", "6956602.05", "Au32", "H", "sample"
), class = "factor"), o066_052 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14351098.41",
"16914393.93", "27503386.81", "6337197.417", "7047144.652", "8204323.852",
"Au5", "H", "sample"), class = "factor"), o066_055 = structure(c(9L,
7L, 8L, 1L), .Label = c("13906338.32", "15282046.89", "18357761.85",
"5612493.227", "5990567.16", "6541120.91", "Au32", "L", "sample"
), class = "factor"), o066_056 = structure(c(9L, 7L, 8L, 1L), .Label = c
("13198308.2",
"16943030.55", "24819426.73", "5794416.907", "6526930.135", "6965902.611",
"Au5", "M", "sample"), class = "factor"), o066_057 = structure(c(9L,
7L, 8L, 1L), .Label = c("13553932.63", "16650323.6", "23887991.99",
"4978191.783", "6498421.044", "7482551.482", "Au5", "M", "sample"
), class = "factor"), o066_060 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12757675.44",
"14478961.76", "17693417", "5688587.416", "6278360.393", "6778930.512",
"Au32", "C", "sample"), class = "factor"), o066_062 = structure(c(9L,
7L, 8L, 1L), .Label = c("12566817.06", "16230428.36", "18310198.43",
"4929894.983", "6051669.256", "6893438.406", "Au32", "C", "sample"
), class = "factor"), o066_063 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12765436.23",
"15664549.3", "18880700.09", "5012562.077", "5653516.005", "6891139.714",
"Au5", "L", "sample"), class = "factor"), o066_068 = structure(c(9L,
7L, 8L, 2L), .Label = c("10077274.11", "11533763.33", "11914610.67",
"17481826.33", "5118520.628", "5722064.277", "Au5", "C", "sample"
), class = "factor"), o066_072 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12506876.82",
"15059553.41", "17963658.12", "5147251.947", "5936120.075", "6757510.158",
"Au5", "C", "sample"), class = "factor"), o066_075 = structure(c(9L,
7L, 8L, 1L), .Label = c("12629327.74", "14157915.9", "17676966.26",
"5507872.857", "5772060.453", "6460845.336", "Au5", "L", "sample"
), class = "factor"), o066_077 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11424841.46",
"13873092.31", "16264095.9", "5139882.469", "5267420.049", "6093969.775",
"Au32", "C", "sample"), class = "factor"), o066_079 = structure(c(9L,
7L, 8L, 1L), .Label = c("11310021.96", "12817107.8", "16850943.65",
"4790059.15", "5497826.813", "6272350.228", "Au5", "C", "sample"
), class = "factor"), o066_080 = structure(c(9L, 7L, 8L, 1L), .Label = c
("10962613.96",
"12999912.78", "17414255.92", "5079434.424", "5790847.752", "6285463.247",
"Au32", "C", "sample"), class = "factor"), o066_081 = structure(c(9L,
7L, 8L, 1L), .Label = c("10716085.25", "12783977.19", "17139019.04",
"5316270.963", "5477933.375", "5939018.303", "Au5", "C", "sample"
), class = "factor"), o066_082 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11020219.13",
"12911640.64", "17298440.71", "5292075.669", "5608730.99", "5950511.372",
"Au32", "C", "sample"), class = "factor"), o066_084 = structure(c(9L,
7L, 8L, 1L), .Label = c("11254185.62", "14433261.34", "17602305.5",
"4220138.319", "4802246.445", "6580756.431", "Au32", "C", "sample"
), class = "factor"), o066_086 = structure(c(9L, 7L, 8L, 6L), .Label = c
("13251623.05",
"17376126.66", "4975858.829", "5877369.947", "6459614.347", "9956435.681",
"Au5", "C", "sample"), class = "factor"), o066_087 = structure(c(9L,
7L, 8L, 1L), .Label = c("11090591.08", "12481560.98", "17685195.53",
"4976028.139", "5729221.367", "5866020.444", "Au32", "H", "sample"
), class = "factor"), o066_090 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12039292.87",
"13766587.51", "17661480.62", "4105603.641", "4892418.995", "6364759.427",
"Au32", "M", "sample"), class = "factor"), o066_093 = structure(c(9L,
7L, 8L, 1L), .Label = c("11050848.78", "14300477", "17455404.65",
"4944754.379", "5314318.706", "6132171.794", "Au32", "L", "sample"
), class = "factor"), o066_101 = structure(c(9L, 7L, 8L, 1L), .Label = c
("10569492.27",
"12736422.09", "16216119.36", "4941484.876", "5716315.813", "5888895.838",
"Au32", "C", "sample"), class = "factor"), o066_102 = structure(c(9L,
7L, 8L, 1L), .Label = c("10004456.65", "13327035.46", "16746388.21",
"4074210.702", "5464246.567", "6303763.552", "Au32", "L", "sample"
), class = "factor"), o066_103 = structure(c(9L, 7L, 8L, 1L), .Label = c
("10248532.42",
"12683248.9", "25351553.76", "5103432.052", "6204859.229", "6370345.756",
"Au5", "H", "sample"), class = "factor"), o066_104 = structure(c(9L,
7L, 8L, 6L), .Label = c("14115821.79", "16849972.37", "4717878.989",
"6440223.441", "8546766.206", "9957099.225", "Au32", "H", "sample"
), class = "factor"), o066_105 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11382449.23",
"15148302.74", "22676887.88", "5267156.789", "5970593.031", "6526962.522",
"Au5", "M", "sample"), class = "factor"), pspectrum = c(NA, NA,
NA, 24L), isotopes = c(NA, NA, NA, NA), adduct = structure(c(1L,
1L, 1L, 1L), .Label = c("", "[M+H]+ 72.0781", "[M+Na]+ 51"), class =
"factor"),
mzmin = c(NA, NA, NA, 61.00708832), mzmax = c(NA, NA, NA,
61.00730019), rtmin = c(NA, NA, NA, 194.0899), rtmax = c(NA,
NA, NA, 196.0904), npeaks = c(NA, NA, NA, 36L), sample.Au5.C = c(NA,
NA, NA, 9L), sample.Au5.H = c(NA, NA, NA, 9L), sample.Au5.L = c(NA,
NA, NA, 3L), sample.Au32.M = c(NA, NA, NA, 3L), sample.Au32.C = c(NA,
NA, NA, 3L), sample.Au32.H = c(NA, NA, NA, 3L), sample.Au32.L = c(NA,
NA, NA, 3L), sample.Au5.M = c(NA, NA, NA, 3L), X2W.p.value.Nanoparticle = c
(NA,
NA, NA, 0.291527766), X2W.p.value.Treatment = c(NA, NA, NA,
0.79589541), X2W.adj.p.value.Nanoparticle = c(NA, NA, NA,
0.453622043), X2W.adj.p.value.Treatment = c(NA, NA, NA, 0.870148042
), Metlin = structure(c(1L, 1L, 1L, 2L), .Label = c("", "http://metlin.
scripps.edu/metabo_list.php?mass_min=59.95&mass_max=60.05",
"http://metlin.scripps.edu/metabo_list.php?mass_min=67.936&mass_max=68.
036",
"http://metlin.scripps.edu/metabo_list.php?mass_min=71.024&mass_max=71.
124",
"http://metlin.scripps.edu/metabo_list.php?mass_min=72.027&mass_max=72.
127",
"http://metlin.scripps.edu/metabo_list.php?mass_min=72.932&mass_max=73.
032",
"http://metlin.scripps.edu/metabo_list.php?mass_min=74.982&mass_max=75.
082"
), class = "factor"), HMDBtag = structure(c(1L, 1L, 1L, 1L
), .Label = c("", "HMDB00123, HMDB00925, HMDB12136"), class = "factor"),
HMDBinfo = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
)), .Names = c("X", "rt", "mz", "o066_010", "o066_019", "o066_022",
"o066_023", "o066_024", "o066_025", "o066_029", "o066_032", "o066_034",
"o066_039", "o066_049", "o066_052", "o066_055", "o066_056", "o066_057",
"o066_060", "o066_062", "o066_063", "o066_068", "o066_072", "o066_075",
"o066_077", "o066_079", "o066_080", "o066_081", "o066_082", "o066_084",
"o066_086", "o066_087", "o066_090", "o066_093", "o066_101", "o066_102",
"o066_103", "o066_104", "o066_105", "pspectrum", "isotopes",
"adduct", "mzmin", "mzmax", "rtmin", "rtmax", "npeaks", "sample.Au5.C",
"sample.Au5.H", "sample.Au5.L", "sample.Au32.M", "sample.Au32.C",
"sample.Au32.H", "sample.Au32.L", "sample.Au5.M", "X2W.p.value.Nanoparticle",
"X2W.p.value.Treatment", "X2W.adj.p.value.Nanoparticle", "X2W.adj.p.value.
Treatment",
"Metlin", "HMDBtag", "HMDBinfo"), row.names = c(NA, 4L), class = "data.frame")
>
I would like to arrange all the columns that match "Au5" and "C", as you can
see below:
> dput(head(tabella,4))
structure(list(X = structure(c(8L, 7L, 9L, 1L), .Label = c("61/195",
"69/954", "72/300", "73/300", "74/946", "76/611", "Nanoparticle",
"SampleName", "Treatment"), class = "factor"), rt = c(NA, NA,
NA, 195.09555), mz = c(NA, NA, NA, 61.00722661), o066_010 = structure(c(9L,
7L, 8L, 2L), .Label = c("14043676.02", "14536204.77", "17652481.49",
"5212485.416", "6731535.564", "9876651.476", "Au5", "C", "sample"
), class = "factor"), o066_022 = structure(c(9L, 7L, 8L, 2L), .Label = c
("10087537.1",
"13846871.69", "14528006.42", "18458758.83", "5383022.265", "6577623.782",
"Au5", "C", "sample"), class = "factor"), o066_029 = structure(c(9L,
7L, 8L, 2L), .Label = c("11915681.99", "13140449.17", "16980818.81",
"5529195.67", "7045157.096", "9792452.409", "Au5", "C", "sample"
), class = "factor"), o066_034 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14979396.48",
"18783422.61", "18944230.28", "5058848.793", "6240890.066", "7253951.112",
"Au5", "C", "sample"), class = "factor"), o066_068 = structure(c(9L,
7L, 8L, 2L), .Label = c("10077274.11", "11533763.33", "11914610.67",
"17481826.33", "5118520.628", "5722064.277", "Au5", "C", "sample"
), class = "factor"), o066_072 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12506876.82",
"15059553.41", "17963658.12", "5147251.947", "5936120.075", "6757510.158",
"Au5", "C", "sample"), class = "factor"), o066_077 = structure(c(9L,
7L, 8L, 1L), .Label = c("11424841.46", "13873092.31", "16264095.9",
"5139882.469", "5267420.049", "6093969.775", "Au32", "C", "sample"
), class = "factor"), o066_079 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11310021.96",
"12817107.8", "16850943.65", "4790059.15", "5497826.813", "6272350.228",
"Au5", "C", "sample"), class = "factor"), o066_080 = structure(c(9L,
7L, 8L, 6L), .Label = c("13251623.05", "17376126.66", "4975858.829",
"5877369.947", "6459614.347", "9956435.681", "Au5", "C", "sample"
), class = "factor"), o066_102 = structure(c(8L, 1L, 1L, 3L), .Label = c("",
"159", "24", "40", "6", "65", "91", "sample"), class = "factor"),
o066_103 = structure(c(2L, 1L, 1L, 1L), .Label = c("", "sample"
), class = "factor"), o066_104 = structure(c(4L, 1L, 1L,
1L), .Label = c("", "[M+H]+ 72.0781", "[M+Na]+ 51", "sample"
), class = "factor"), o066_105 = structure(c(8L, 1L, 1L,
2L), .Label = c("", "61.00708832", "68.9930209", "72.08063445",
"73.08397527", "73.98878773", "76.03904924", "sample"), class =
"factor"),
pspectrum = c(NA, NA, NA, 61.00730019), isotopes = c(NA,
NA, NA, 194.0899), adduct = c(NA, NA, NA, 196.0904), mzmin = c(NA,
NA, NA, 36L), mzmax = c(NA, NA, NA, 9L), rtmin = c(NA, NA,
NA, 9L), rtmax = c(NA, NA, NA, 3L), npeaks = c(NA, NA, NA,
3L), sample.Au5.C = c(NA, NA, NA, 3L), sample.Au5.H = c(NA,
NA, NA, 3L), sample.Au5.L = c(NA, NA, NA, 3L), sample.Au32.M = c(NA,
NA, NA, 3L), sample.Au32.C = c(NA, NA, NA, 0.291527766),
sample.Au32.H = c(NA, NA, NA, 0.79589541), sample.Au32.L = c(NA,
NA, NA, 0.453622043), sample.Au5.M = c(NA, NA, NA, 0.870148042
), X2W.p.value.Nanoparticle = structure(c(1L, 1L, 1L, 2L), .Label = c("",
"http://metlin.scripps.edu/metabo_list.php?mass_min=59.95&mass_max=60.
05",
"http://metlin.scripps.edu/metabo_list.php?mass_min=67.936&mass_max=68.
036",
"http://metlin.scripps.edu/metabo_list.php?mass_min=71.024&mass_max=71.
124",
"http://metlin.scripps.edu/metabo_list.php?mass_min=72.027&mass_max=72.
127",
"http://metlin.scripps.edu/metabo_list.php?mass_min=72.932&mass_max=73.
032",
"http://metlin.scripps.edu/metabo_list.php?mass_min=74.982&mass_max=75.
082"
), class = "factor"), X2W.p.value.Treatment = structure(c(1L,
1L, 1L, 1L), .Label = c("", "HMDB00123, HMDB00925, HMDB12136"
), class = "factor"), X2W.adj.p.value.Nanoparticle = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_), X2W.adj.p.value.Treatment = c(NA,
NA, NA, NA), Metlin = c(NA, NA, NA, NA), HMDBtag = c(NA,
NA, NA, NA), HMDBinfo = c(NA, NA, NA, NA)), .Names = c("X",
"rt", "mz", "o066_010", "o066_022", "o066_029", "o066_034", "o066_068",
"o066_072", "o066_077", "o066_079", "o066_080", "o066_102", "o066_103",
"o066_104", "o066_105", "pspectrum", "isotopes", "adduct", "mzmin",
"mzmax", "rtmin", "rtmax", "npeaks", "sample.Au5.C", "sample.Au5.H",
"sample.Au5.L", "sample.Au32.M", "sample.Au32.C", "sample.Au32.H",
"sample.Au32.L", "sample.Au5.M", "X2W.p.value.Nanoparticle",
"X2W.p.value.Treatment", "X2W.adj.p.value.Nanoparticle", "X2W.adj.p.value.
Treatment",
"Metlin", "HMDBtag", "HMDBinfo"), row.names = c(NA, 4L), class = "data.frame")
I want to arrange all the columns that match with Au5 and C,
then Au5 and L
then Au5 and H
then Au5 and M
and so on....
and do the same with Au32....
Just reordering the columns,matching the patterns above.
But I would be happy just with Au5 and C.
Thanks a lot
James
>----Messaggio originale----
>Da: smartpink111 at yahoo.com
>Data: 30/04/2014 11.02
>A: "pascalbells at libero.it"<pascalbells at libero.it>
>Ogg: re: [R] dataframe
>
>Could you dput the dataset?
>Also,the expected result...
>
>----------
>Sent from my Nokia
>
>------Original message------
>From: pascalbells at libero.it <pascalbells at libero.it>
>To: <smartpink111 at yahoo.com>
>Date: Wednesday, April 30, 2014 9:40:06 AM GMT+0200
>Subject: R: Re: [R] dataframe
>
>thank you for your help but it doesn't work....
>
>James
>
>
>
>>----Messaggio originale----
>>Da: smartpink111 at yahoo.com
>>Data: 29/04/2014 17.59
>>A: "r-help at r-project.org"<r-help at r-project.org>
>>Cc: "pascalbells at libero.it"<pascalbells at libero.it>
>>Ogg: Re: [R] dataframe
>>
>>Hi,
>>
>>It is better to show example data using ?dput().
>>
>>dat <- structure(list(row.names = 1:4, XYZ = c("sample", "sample2",
>>"sample3", "sample4"), `000_001` = c("sample", "Au5", "C", "C"
>>), `000_002` = c("sample", "Au32", "C", "Au4"), `000_003` = c("sample",
>>"Au5", "A", "AC")), .Names = c("row.names", "XYZ", "000_001",
>>"000_002", "000_003"), class = "data.frame", row.names = c(NA,
>>-4L))
>>
>>
>>dat[,intersect(grep("Au5", dat), grep("\\bC\\b",dat)),drop=FALSE]
>># 000_001
>>#1 sample
>>#2 Au5
>>#3 C
>>#4 C
>>
>>A.K.
>>
>>
>>
>>
>>On Tuesday, April 29, 2014 9:45 AM, "pascalbells at libero.it"
><pascalbells at libero.it> wrote:
>>Hello,
>>thank you for accepting me into the list.
>>
>>I have the following dataframe:
>>
>> row.names X Y Z 000_001 000_002 000_003
>>1 sample sample sample sample
>>2 sample2 Au5 Au32 Au5
>>3 sample3 C C
C
>>4 ..........
>>..
>>..
>n ........... .... .......
>............
>>
>>
>>I would like to select al the columns that have Au5 and C.
>>thank you in advance for your help,
>>james
>>
>>
>> [[alternative HTML version deleted]]
>>
>>______________________________________________
>>R-help at r-project.org mailing list
>>https://stat.ethz.ch/mailman/listinfo/r-help
>>PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>>and provide commented, minimal, self-contained, reproducible code.
>>
>>
>
>
>
>
More information about the R-help
mailing list