[R] IP-Address

Henrik Bengtsson hb at stat.berkeley.edu
Sun May 31 19:52:03 CEST 2009


library(gsubfn)
library(gtools)
library(rbenchmark)

n <- 10000
df <- data.frame(
  a = rnorm(n),
  b = rnorm(n),
  c = rnorm(n),
  ip = replicate(n, paste(sample(255, 4), collapse='.'), simplify=TRUE)
)

res <- benchmark(columns=c('test', 'elapsed'), replications=10, order=NULL,
  peda = {
    connection <- textConnection(as.character(df$ip))
    o <- do.call(order, read.table(connection, sep='.'))
    close(connection)
    df[o, ]
  },

  peda2 = {
    connection <- textConnection(as.character(df$ip))
    dfT <- read.table(connection, sep='.', colClasses=rep("integer",
4), quote="", na.strings=NULL, blank.lines.skip=FALSE)
    close(connection)
    o <- do.call(order, dfT)
    df[o, ]
  },

  hb = {
    ip <- strsplit(as.character(df$ip), split=".", fixed=TRUE)
    ip <- unlist(ip, use.names=FALSE)
    ip <- as.integer(ip)
    dim(ip) <- c(4, nrow(df))
    ip <- 256^3*ip[1,] + 256^2*ip[2,] + 256*ip[3,] + ip[4,]
    o <- order(ip)
    df[o, ]
  },

  hb2 = {
    ip <- strsplit(as.character(df$ip), split=".", fixed=TRUE)
    ip <- unlist(ip, use.names=FALSE)
    ip <- as.integer(ip);
    dim(ip) <- c(4, nrow(df))
    o <- sort.list(ip[4,], method="radix", na.last=TRUE)
    for (kk in 3:1) {
      o <- o[sort.list(ip[kk,o], method="radix", na.last=TRUE)]
    }
    df[o, ]
  }
)

print(res)

   test elapsed
1  peda    4.12
2 peda2    4.08
3    hb    0.28
4   hb2    0.25


On Sun, May 31, 2009 at 12:42 AM, Wacek Kusnierczyk
<Waclaw.Marcin.Kusnierczyk at idi.ntnu.no> wrote:
> edwin Sendjaja wrote:
>> Hi VQ,
>>
>> Thank you. It works like charm. But I think Peter's code is faster. What is the difference?
>>
>
> i think peter's code is more r-elegant, though less generic.  here's a
> quick test, with not so surprising results.  gsubfn is implemented in r,
> not c, and it is painfully slow in this test. i also added gabor's
> suggestion.
>
>    library(gsubfn)
>    library(gtools)
>    library(rbenchmark)
>
>    n = 1000
>    df = data.frame(
>       a=rnorm(n),
>       b = rnorm(n),
>       c = rnorm(n),
>       ip = replicate(n, paste(sample(255, 4), collapse='.'),
> simplify=TRUE))
>    benchmark(columns=c('test', 'elapsed'), replications=10, order=NULL,
>       peda={
>          connection = textConnection(as.character(df$ip))
>          o = do.call(order, read.table(connection, sep='.'))
>          close(connection)
>          df[o, ] },
>       waku=df[order(gsubfn(perl=TRUE,
>          '[0-9]+',
>          ~ sprintf('%03d', as.integer(x)),
>          as.character(df$ip))), ],
>       gagr=df[mixedorder(df$ip), ] )
>
>    # peda 0.070
>    # waku 7.070
>    # gagr 4.710
>
>
> vQ
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>



More information about the R-help mailing list