[R] deduplication
Wu Gong
wg2f at mtmail.mtsu.edu
Fri Jun 4 11:22:43 CEST 2010
Please try this
## Import data
id1<-c(4,17,9,1,1,1,3,3,6,15,1,1,1,1,3,3,3,3,4,4,4,5,5,12,9,9,10,10)
id2<-c(8,18,10,3,6,7,6,7,7,16,4,5,12,18,4,5,12,18,5,12,18,12,18,18,15,16,15,16)
id<-data.frame(id1 = id1, id2 = id2)
## Create same structure table
id <- id0 <- unique(id)
leng <- nrow(id)
n <- 0
repeat {
if (n == leng) {break}
n <- 0
id <- id[order(-id$id1, -id$id2),]
for (i in 1:leng) {
if (id$id1[i] == id$id2[i]) {
n <- n+1
next }
smal <- min(id[i,])
larg <- max(id[i,])
id$id2[which(id$id2 == larg)] <- smal
id$id1[which(id$id1 == larg)] <- smal
}}
## Create results
tab <- table(as.matrix(id0),
as.matrix(id[order(as.numeric(rownames(id))),]))
res <- list()
for (i in 1:ncol(tab)) {
res[[i]] <- rownames(tab[(tab[,i] != 0),])}
res
-----
A R learner.
--
View this message in context: http://r.789695.n4.nabble.com/deduplication-tp2241637p2242921.html
Sent from the R help mailing list archive at Nabble.com.
More information about the R-help
mailing list