[R] importing and filtering time series data
Joel Reymont
joelr1 at gmail.com
Mon May 2 10:18:41 CEST 2011
My current code looks like this. Anything that can be improved?
#! /usr/bin/rscript
# install.packages(c('zoo','xts'))
library(zoo)
library(xts)
req_stats <- function(data, type = NA)
{
if (is.na(type))
csv <- data
else
# subset of data matching our request type
csv <- subset(data, Kind == type)
# import into a time series
x <- xts(csv$Duration, as.POSIXct(csv$Time))
# requests per second
rps <- period.apply(x, endpoints(x, 'seconds'), length)
# stats
c(length(x), mean(x), var(x), quantile(x, c(.05, .95)), mean(rps))
# indexFormat(x) <- "%Y-%m-%d %H:%M:%OS"
# options(digits.secs=6)
}
# assumes column headers
data <- read.csv("benchie.csv")
# take out the rows with "N"
all <- subset(data, Include == "Y")
# Kind: R = sidebar request, C = sidebar click, U = upload doc, A = create ad
sidebar_req <- req_stats(all, "R")
# sidebar_click <- req_stats(all, "C")
doc_upload <- req_stats(all, "U")
ad_create <- req_stats(all, "A")
all <- req_stats(all)
# mdat <- rbind(all, sidebar_req, sidebar_click, doc_upload, ad_create)
# rownames(mdat) <- c("all", "sidebar req", "sidebar click", "doc upload", "ad create")
mdat <- rbind(all, sidebar_req, doc_upload, ad_create)
rownames(mdat) <- c("all", "sidebar req", "doc upload", "ad create")
colnames(mdat) <- c("count", "mean", "var", "5%", "95%", "rps")
print(round(mdat, digits = 3))
--------------------------------------------------------------------------
- for hire: mac osx device driver ninja, kernel extensions and usb drivers
---------------------+------------+---------------------------------------
http://wagerlabs.com | @wagerlabs | http://www.linkedin.com/in/joelreymont
---------------------+------------+---------------------------------------
More information about the R-help
mailing list