[R] importing and filtering time series data

Joel Reymont joelr1 at gmail.com
Mon May 2 10:18:41 CEST 2011


My current code looks like this. Anything that can be improved?

#! /usr/bin/rscript

# install.packages(c('zoo','xts'))

library(zoo)
library(xts)

req_stats <- function(data, type = NA)
{
  if (is.na(type))
    csv <- data
  else
    # subset of data matching our request type
    csv <- subset(data, Kind == type)
  # import into a time series
  x <- xts(csv$Duration, as.POSIXct(csv$Time))
  # requests per second
  rps <- period.apply(x, endpoints(x, 'seconds'), length)
  # stats
  c(length(x), mean(x), var(x), quantile(x, c(.05, .95)), mean(rps))
  # indexFormat(x) <- "%Y-%m-%d %H:%M:%OS"
  # options(digits.secs=6)
}

# assumes column headers

data <- read.csv("benchie.csv")

# take out the rows with "N"

all <- subset(data, Include == "Y")

# Kind: R = sidebar request, C = sidebar click, U = upload doc, A = create ad

sidebar_req <- req_stats(all, "R")
# sidebar_click <- req_stats(all, "C")
doc_upload <- req_stats(all, "U")
ad_create <- req_stats(all, "A")
all <- req_stats(all)

# mdat <- rbind(all, sidebar_req, sidebar_click, doc_upload, ad_create)
# rownames(mdat) <- c("all", "sidebar req", "sidebar click", "doc upload", "ad create")
mdat <- rbind(all, sidebar_req, doc_upload, ad_create)
rownames(mdat) <- c("all", "sidebar req", "doc upload", "ad create")
colnames(mdat) <- c("count", "mean", "var", "5%", "95%", "rps")
                                
print(round(mdat, digits = 3))

--------------------------------------------------------------------------
- for hire: mac osx device driver ninja, kernel extensions and usb drivers
---------------------+------------+---------------------------------------
http://wagerlabs.com | @wagerlabs | http://www.linkedin.com/in/joelreymont
---------------------+------------+---------------------------------------


More information about the R-help mailing list