################################################## # Installation of Packages Required by this Demo # ################################################## # XLConnect: Excel Connector for R (that's what this demo is about) # fImport: Rmetrics - Economical and Financial Data Import # forecast: Forecasting functions for time series # zoo: S3 Infrastructure for Regular and Irregular Time Series # ggplot2: An implementation of the Grammar of Graphics install.packages(c("XLConnect", "fImport", "forecast", "zoo", "ggplot2")) ################# # Load Packages # ################# require(XLConnect) require(fImport) require(forecast) require(zoo) require(ggplot2) #################### # Data Preparation # #################### # Currencies we're interested in compared to CHF currencies = c("EUR", "USD", "GBP", "JPY") # Fetch currency exchange rates (currency to CHF) from OANDA (last 366 days) curr = do.call("cbind", args = lapply(currencies, function(cur) oandaSeries(paste(cur, "CHF", sep = "/")))) # Make a copy for later use curr.orig = curr # Scale currencies to exchange rate on first day in the series (baseline) curr = curr * matrix(1/curr[1,], nrow = nrow(curr), ncol = ncol(curr), byrow = TRUE) - 1 # Some data transformations to bring the data into a simple data.frame curr = transform(curr, Time = time(curr)@Data) names(curr) = c(currencies, "Time") # Cyclic shift to bring the Time column to the front curr = curr[(seq(along = curr) - 2) %% ncol(curr) + 1] # Let's do some predictions ... # Number of days to predict predictDays = 20 # For each currency ... currFit = sapply(curr[, -1], function(cur) { as.numeric(forecast(cur, h = predictDays)$mean) # Note: Normally one would obviously do some model diagnostics first... }) # Add Time column to predictions currFit = cbind( Time = seq(from = curr[nrow(curr), "Time"], length.out = predictDays + 1, by = "days")[-1], as.data.frame(currFit)) # Bind actual data with predictions curr = rbind(curr, currFit) ########################################### # Writing Data to Excel / Excel Reporting # ########################################### ## Let's start simple! # Workbook filename wbFilename = "swiss_franc.xlsx" # Create a new workbook wb = loadWorkbook(wbFilename, create = TRUE) # Create a new sheet named 'Swiss_Franc' sheet = "Swiss_Franc" createSheet(wb, name = sheet) # Alternatively, in a more object-oriented style: # wb$createSheet(name = sheet) # Create a new Excel name referring to the top left corner # of the sheet 'Swiss_Franc' - this name is going to hold # our currency data dataName = "currency" createName(wb, name = dataName, formula = paste(sheet, "$A$1", sep = "!")) # Alternatively: # wb$createName(name = dataName, formula = paste(sheet, "$A$1", sep = "!")) # Write the currency data to the named region created above # Note: the named region will be automatically redefined to encompass all # written data writeNamedRegion(wb, data = curr, name = dataName, header = TRUE) # Alternatively: # wb$writeNamedRegion(data = curr, name = dataName, header = TRUE) # Save the workbook (this actually writes the file to disk) saveWorkbook(wb) # Just dumping out some data to Excel is a very common case. # Therefore, there is a simpler way of doing this: if(file.exists(wbFilename)) file.remove(wbFilename) writeNamedRegionToFile(wbFilename, data = curr, name = dataName, formula = paste(sheet, "$A$1", sep = "!"), header = TRUE) # Note: writeNamedRegionToFile automatically generates the required sheet # and Excel name! ## In the following steps we will make the currency report a bit nicer... # In addition to writing out the data, for each currency we want to # highlight the points in time when there was a change of more than 2% # compared to the previous day. # Load the workbook created above wb = loadWorkbook(wbFilename) # Create a date cell style with a custom format for the Time column # (only show year, month and day without any time fields) csDate = createCellStyle(wb, name = "date") setDataFormat(csDate, format = "yyyy-mm-dd") # Create a time/date cell style for the prediction records csPrediction = createCellStyle(wb, name = "prediction") setDataFormat(csPrediction, format = "yyyy-mm-dd") setFillPattern(csPrediction, fill = XLC$FILL.SOLID_FOREGROUND) setFillForegroundColor(csPrediction, color = XLC$COLOR.GREY_25_PERCENT) # Create a percentage cell style # Number format: 2 digits after decimal point csPercentage = createCellStyle(wb, name = "currency") setDataFormat(csPercentage, format = "0.00%") # Create a highlighting cell style csHlight = createCellStyle(wb, name = "highlight") setFillPattern(csHlight, fill = XLC$FILL.SOLID_FOREGROUND) setFillForegroundColor(csHlight, color = XLC$COLOR.CORNFLOWER_BLUE) setDataFormat(csHlight, format = "0.00%") # Index for all rows except header row allRows = seq(length = nrow(curr)) + 1 # Apply date cell style to the Time column setCellStyle(wb, sheet = sheet, row = allRows, col = 1, cellstyle = csDate) # Set column width such that the full date column is visible setColumnWidth(wb, sheet = sheet, column = 1, width = 2800) # Apply prediction cell style setCellStyle(wb, sheet = sheet, row = tail(allRows, n = predictDays), col = 1, cellstyle = csPrediction) # Apply number format to the currency columns currencyColumns = seq(along = currencies) + 1 for(col in currencyColumns) { setCellStyle(wb, sheet = sheet, row = allRows, col = col, cellstyle = csPercentage) } # Check if there was a change of more than 2% compared to the previous day # (per currency) idx = rollapply(curr.orig, width = 2, FUN = function(x) abs(x[2] / x[1] - 1), by.column = TRUE) > 0.02 idx = rbind(rep(FALSE, ncol(idx)), idx) widx = apply(idx, 2, which) # Apply highlighting cell style for(i in seq(along = currencies)) { setCellStyle(wb, sheet = sheet, row = widx[[i]] + 1, col = i + 1, cellstyle = csHlight) # Note: # +1 for row since there is a header row # +1 for column since the first column is the time column } saveWorkbook(wb) ## In a next step we are going to add a currency graph to our report wb = loadWorkbook(wbFilename) # Stack currencies into a currency variable (for use with ggplot2 below) gcurr = reshape(curr, varying = currencies, direction = "long", v.names = "Value", times = currencies, timevar = "Currency") # Also add a discriminator column to differentiate between actual and # prediction values gcurr[["Type"]] = ifelse(gcurr$Time %in% currFit$Time, "prediction", "actual") # Create a png graph showing the currencies in the context of the Swiss Franc png(filename = "swiss_franc.png", width = 800, height = 600) ggplot(gcurr, aes(Time, Value, colour = Currency, linetype = Type)) + geom_line() + stat_smooth(method = "loess") + xlab("") + scale_y_continuous("Change to baseline", formatter = "percent") + opts(title = "Currencies vs Swiss Franc", axis.title.y = theme_text(size = 10, angle = 90, vjust = 0.3)) dev.off() # Define where the image should be placed via a named region; # let's put the image two columns left to the data starting in the 5th row createName(wb, name = "graph", formula = paste(sheet, idx2cref(c(5, ncol(curr) + 2)), sep = "!")) # Note: idx2cref converts indices (row, col) to Excel cell references # Put the image created above at the corresponding location addImage(wb, filename = "swiss_franc.png", name = "graph", originalSize = TRUE) # As a short side-note: summary also works on workbooks! summary(wb) saveWorkbook(wb) ## Want to add formulas to your reports? Here we go... wb = loadWorkbook(wbFilename) # Let's add the maximum downturn (= minimum) fur the currencies at the bottom # of the data (offset by three rows) # Get reference coordinates (corners) of the named region containing our data corners = getReferenceCoordinates(wb, dataName) # Excel area references to calculate the average for areaRefs = idx2aref(t(sapply(currencyColumns, function(col) c(corners[1,1] + 1, col, corners[2,1], col)))) # Note: corners[1,1] + 1 as we do not want to include the header row in # the formula # Construct the required formulas avg = paste("MIN(", areaRefs, ")", sep = "") # Set cell formulas setCellFormula(wb, sheet = sheet, row = corners[2,1] + 3, col = currencyColumns, formula = avg) # Add some cell styling... csAvg = createCellStyle(wb, name = "min") setDataFormat(csAvg, format = "0.00%") setBorder(csAvg, side = "top", type = XLC$BORDER.MEDIUM, color = XLC$COLOR.BLACK) setCellStyle(wb, sheet = sheet, row = corners[2,1] + 3, col = currencyColumns, cellstyle = csAvg) # And, as we are already at it - let's also add an Excel auto-filter to # the columns setAutoFilter(wb, sheet = sheet, reference = aref(corners[1,], corners[2,])) saveWorkbook(wb) # Depending on the size and complexity of reports to create, starting from # scratch and coding everything in R can be cumbersome (especially cell styling). # Therefore, XLConnect supports working with templates. The application of cell # styles when writing data to Excel can be controlled via so-called "style # actions" (see function setStyleAction). ####################################### # Reading / Importing Data from Excel # ####################################### # Reading (rectangular) data from Excel is straight forward. # There is basically two ways: # - reading data from worksheets via readWorksheet # - reading data from named regions via readNamedRegion # Generally, reading from named regions is the simplest and preferred way but # requires a named region to be set up correspondingly. If this is not the case, # readWorksheet can be used. ## Reading from named regions wb = loadWorkbook(wbFilename) # Read the named region that was created in the section above data = readNamedRegion(wb, name = dataName, header = TRUE) # Alternatively: wb$readNamedRegion(name = dataName, header = TRUE) # Do some calculations ... colMeans(data[, currencyColumns]) ## Reading from worksheets data = readWorksheet(wb, sheet = sheet, header = TRUE) # Note: When not specifying any of the boundaries (startRow, startCol, endRow, # endCol) XLConnect will try to determine the boundaries automatically. # If there are several data regions on a worksheet, this may not give the desired # result and you may need to specify some boundaries. # Actually, in our example the above won't give the desired result as the data # read in also includes the mean values computed via the Excel formulas. tail(data) # Therefore we need to do the following data = readWorksheet(wb, sheet = sheet, startRow = -1, endRow = nrow(curr) + 1, startCol = -1, endCol = -1, header = TRUE) tail(data) # Note: -1 means that the correspondingly values will be automatically # determined. Currently it is still necessary to specify either none or all # boundaries. This may be changed in the future to simplify the application of # readWorksheet and to allow something like: # data = readWorksheet(wb, sheet = sheet, endRow = nrow(curr) + 1, # header = TRUE) # Also note that for simple cases there are the functions # readNamedRegionFromFile and readWorksheetFromFile that prevent you having to # go via the sequence loadWorkbook & readNamedRegion/readWorksheet: data = readNamedRegionFromFile(wbFilename, name = dataName, header = TRUE) data = readWorksheetFromFile(wbFilename, sheet = sheet, startRow = -1, endRow = nrow(curr + 1), startCol = -1, endCol = -1, header = TRUE) ## Some additional syntactic sugar: # In case a workbook contains only a few named data regions, you can use 'with' # to reference them directly (via the name of the named region) without having # to read them in manually first: with(wb, { colMeans(currency[, currencyColumns]) }) # Note: 'with' should only be used if the number of named regions in the workbook # is small, since 'with' basically reads in all named regions automatically. ########################### # Handling of Error Cells # ########################### # XLConnect supports two different schemes on how to deal with error cells # (cells that produce an error when trying to be read): # - immediately stop on the first occurrence on an error cell # - produce warnings for erroring cells and assign NA as their value # Read example workbook containing various forms of error cells errorCell <- system.file("demoFiles/errorCell.xlsx", package = "XLConnect") wb = loadWorkbook(errorCell) # Define to produce warnings on erroring cells and assume error cells to be NA # (this is the default behavior) onErrorCell(wb, XLC$ERROR.WARN) # Read named region containing error cells data = readNamedRegion(wb, name = "MyData") print(data) # Define to stop immediately when encountering error cells onErrorCell(wb, XLC$ERROR.STOP) # Read named region containing error cells data = readNamedRegion(wb, name = "MyData")