##################################################
# Installation of Packages Required by this Demo #
##################################################

# XLConnect: Excel Connector for R (that's what this demo is about)

# fImport: Rmetrics - Economical and Financial Data Import
# forecast: Forecasting functions for time series
# zoo: S3 Infrastructure for Regular and Irregular Time Series
# ggplot2: An implementation of the Grammar of Graphics

install.packages(c("XLConnect", "fImport", "forecast", "zoo", "ggplot2"))

#################
# Load Packages #
#################

require(XLConnect)
require(fImport)
require(forecast)
require(zoo)
require(ggplot2)

####################
# Data Preparation #
####################

# Currencies we're interested in compared to CHF
currencies = c("EUR", "USD", "GBP", "JPY")

# Fetch currency exchange rates (currency to CHF) from OANDA (last 366 days)
curr = do.call("cbind", args = lapply(currencies,
               function(cur) oandaSeries(paste(cur, "CHF", sep = "/"))))
# Make a copy for later use
curr.orig = curr
# Scale currencies to exchange rate on first day in the series (baseline)
curr = curr * matrix(1/curr[1,], nrow = nrow(curr),
                     ncol = ncol(curr), byrow = TRUE) - 1
# Some data transformations to bring the data into a simple data.frame
curr = transform(curr, Time = time(curr)@Data)
names(curr) = c(currencies, "Time")
# Cyclic shift to bring the Time column to the front
curr = curr[(seq(along = curr) - 2) %% ncol(curr) + 1]

# Let's do some predictions ...

# Number of days to predict
predictDays = 20
# For each currency ...
currFit = sapply(curr[, -1], function(cur) {
  as.numeric(forecast(cur, h = predictDays)$mean)
  # Note: Normally one would obviously do some model diagnostics first...
})
# Add Time column to predictions
currFit = cbind(
  Time = seq(from = curr[nrow(curr), "Time"],
             length.out = predictDays + 1, by = "days")[-1],
  as.data.frame(currFit))

# Bind actual data with predictions
curr = rbind(curr, currFit)

###########################################
# Writing Data to Excel / Excel Reporting #
###########################################

## Let's start simple!

# Workbook filename
wbFilename = "swiss_franc.xlsx"
# Create a new workbook
wb = loadWorkbook(wbFilename, create = TRUE)

# Create a new sheet named 'Swiss_Franc'
sheet = "Swiss_Franc"
createSheet(wb, name = sheet)
# Alternatively, in a more object-oriented style:
# wb$createSheet(name = sheet)

# Create a new Excel name referring to the top left corner
# of the sheet 'Swiss_Franc' - this name is going to hold
# our currency data
dataName = "currency"
createName(wb, name = dataName, formula = paste(sheet, "$A$1", sep = "!"))
# Alternatively:
# wb$createName(name = dataName, formula = paste(sheet, "$A$1", sep = "!"))

# Write the currency data to the named region created above
# Note: the named region will be automatically redefined to encompass all
# written data
writeNamedRegion(wb, data = curr, name = dataName, header = TRUE)
# Alternatively:
# wb$writeNamedRegion(data = curr, name = dataName, header = TRUE)

# Save the workbook (this actually writes the file to disk)
saveWorkbook(wb)

# Just dumping out some data to Excel is a very common case.
# Therefore, there is a simpler way of doing this:
if(file.exists(wbFilename))
  file.remove(wbFilename)
writeNamedRegionToFile(wbFilename, data = curr, name = dataName,
                       formula = paste(sheet, "$A$1", sep = "!"),
                       header = TRUE)
# Note: writeNamedRegionToFile automatically generates the required sheet
# and Excel name!

## In the following steps we will make the currency report a bit nicer...

# In addition to writing out the data, for each currency we want to
# highlight the points in time when there was a change of more than 2%
# compared to the previous day.

# Load the workbook created above
wb = loadWorkbook(wbFilename)

# Create a date cell style with a custom format for the Time column
# (only show year, month and day without any time fields)
csDate = createCellStyle(wb, name = "date")
setDataFormat(csDate, format = "yyyy-mm-dd")
# Create a time/date cell style for the prediction records
csPrediction = createCellStyle(wb, name = "prediction")
setDataFormat(csPrediction, format = "yyyy-mm-dd")
setFillPattern(csPrediction, fill = XLC$FILL.SOLID_FOREGROUND)
setFillForegroundColor(csPrediction, color = XLC$COLOR.GREY_25_PERCENT)
# Create a percentage cell style
# Number format: 2 digits after decimal point
csPercentage = createCellStyle(wb, name = "currency")
setDataFormat(csPercentage, format = "0.00%")
# Create a highlighting cell style
csHlight = createCellStyle(wb, name = "highlight")
setFillPattern(csHlight, fill = XLC$FILL.SOLID_FOREGROUND)
setFillForegroundColor(csHlight, color = XLC$COLOR.CORNFLOWER_BLUE)
setDataFormat(csHlight, format = "0.00%")

# Index for all rows except header row
allRows = seq(length = nrow(curr)) + 1

# Apply date cell style to the Time column
setCellStyle(wb, sheet = sheet, row = allRows, col = 1, cellstyle = csDate)
# Set column width such that the full date column is visible
setColumnWidth(wb, sheet = sheet, column = 1, width = 2800)
# Apply prediction cell style
setCellStyle(wb, sheet = sheet, row = tail(allRows, n = predictDays), col = 1,
             cellstyle = csPrediction)
# Apply number format to the currency columns
currencyColumns = seq(along = currencies) + 1
for(col in currencyColumns) {
  setCellStyle(wb, sheet = sheet, row = allRows, col = col,
               cellstyle = csPercentage)
}

# Check if there was a change of more than 2% compared to the previous day
# (per currency)
idx = rollapply(curr.orig, width = 2, FUN = function(x) abs(x[2] / x[1] - 1),
                by.column = TRUE) > 0.02
idx = rbind(rep(FALSE, ncol(idx)), idx)
widx = apply(idx, 2, which)
# Apply highlighting cell style
for(i in seq(along = currencies)) {
  setCellStyle(wb, sheet = sheet, row = widx[[i]] + 1, col = i + 1,
               cellstyle = csHlight)
  # Note:
  # +1 for row since there is a header row
  # +1 for column since the first column is the time column
}

saveWorkbook(wb)

## In a next step we are going to add a currency graph to our report

wb = loadWorkbook(wbFilename)

# Stack currencies into a currency variable (for use with ggplot2 below)
gcurr = reshape(curr, varying = currencies, direction = "long",
                v.names = "Value", times = currencies, timevar = "Currency")
# Also add a discriminator column to differentiate between actual and
# prediction values
gcurr[["Type"]] = ifelse(gcurr$Time %in% currFit$Time, "prediction", "actual")

# Create a png graph showing the currencies in the context of the Swiss Franc
png(filename = "swiss_franc.png", width = 800, height = 600)
ggplot(gcurr, aes(Time, Value, colour = Currency, linetype = Type)) +
  geom_line() + stat_smooth(method = "loess") + xlab("") +
  scale_y_continuous("Change to baseline", formatter = "percent") +
  opts(title = "Currencies vs Swiss Franc",
       axis.title.y = theme_text(size = 10, angle = 90, vjust = 0.3))
dev.off()

# Define where the image should be placed via a named region;
# let's put the image two columns left to the data starting in the 5th row
createName(wb, name = "graph",
           formula = paste(sheet, idx2cref(c(5, ncol(curr) + 2)), sep = "!"))
# Note: idx2cref converts indices (row, col) to Excel cell references

# Put the image created above at the corresponding location
addImage(wb, filename = "swiss_franc.png", name = "graph",
         originalSize = TRUE)

# As a short side-note: summary also works on workbooks!
summary(wb)

saveWorkbook(wb)

## Want to add formulas to your reports? Here we go...

wb = loadWorkbook(wbFilename)

# Let's add the maximum downturn (= minimum) fur the currencies at the bottom
# of the data (offset by three rows)

# Get reference coordinates (corners) of the named region containing our data
corners = getReferenceCoordinates(wb, dataName)
# Excel area references to calculate the average for
areaRefs = idx2aref(t(sapply(currencyColumns,
                    function(col) c(corners[1,1] + 1, col,
                                    corners[2,1], col))))
# Note: corners[1,1] + 1 as we do not want to include the header row in
# the formula

# Construct the required formulas
avg = paste("MIN(", areaRefs,  ")", sep = "")

# Set cell formulas
setCellFormula(wb, sheet = sheet, row = corners[2,1] + 3,
               col = currencyColumns, formula = avg)
# Add some cell styling...
csAvg = createCellStyle(wb, name = "min")
setDataFormat(csAvg, format = "0.00%")
setBorder(csAvg, side = "top", type = XLC$BORDER.MEDIUM,
          color = XLC$COLOR.BLACK)
setCellStyle(wb, sheet = sheet, row = corners[2,1] + 3,
             col = currencyColumns, cellstyle = csAvg)

# And, as we are already at it - let's also add an Excel auto-filter to
# the columns
setAutoFilter(wb, sheet = sheet, reference = aref(corners[1,], corners[2,]))

saveWorkbook(wb)

# Depending on the size and complexity of reports to create, starting from
# scratch and coding everything in R can be cumbersome (especially cell styling).
# Therefore, XLConnect supports working with templates. The application of cell
# styles when writing data to Excel can be controlled via so-called "style
# actions" (see function setStyleAction).

#######################################
# Reading / Importing Data from Excel #
#######################################

# Reading (rectangular) data from Excel is straight forward.
# There is basically two ways:
# - reading data from worksheets via readWorksheet
# - reading data from named regions via readNamedRegion

# Generally, reading from named regions is the simplest and preferred way but
# requires a named region to be set up correspondingly. If this is not the case,
# readWorksheet can be used.

## Reading from named regions

wb = loadWorkbook(wbFilename)

# Read the named region that was created in the section above
data = readNamedRegion(wb, name = dataName, header = TRUE)
# Alternatively: wb$readNamedRegion(name = dataName, header = TRUE)
# Do some calculations ...
colMeans(data[, currencyColumns])

## Reading from worksheets

data = readWorksheet(wb, sheet = sheet, header = TRUE)
# Note: When not specifying any of the boundaries (startRow, startCol, endRow,
# endCol) XLConnect will try to determine the boundaries automatically.
# If there are several data regions on a worksheet, this may not give the desired
# result and you may need to specify some boundaries.
# Actually, in our example the above won't give the desired result as the data
# read in also includes the mean values computed via the Excel formulas.
tail(data)

# Therefore we need to do the following
data = readWorksheet(wb, sheet = sheet, startRow = -1, endRow = nrow(curr) + 1,
                     startCol = -1, endCol = -1, header = TRUE)
tail(data)
# Note: -1 means that the correspondingly values will be automatically
# determined. Currently it is still necessary to specify either none or all
# boundaries. This may be changed in the future to simplify the application of
# readWorksheet and to allow something like:
# data = readWorksheet(wb, sheet = sheet, endRow = nrow(curr) + 1,
#                      header = TRUE)

# Also note that for simple cases there are the functions
# readNamedRegionFromFile and readWorksheetFromFile that prevent you having to
# go via the sequence loadWorkbook & readNamedRegion/readWorksheet:
data = readNamedRegionFromFile(wbFilename, name = dataName, header = TRUE)

data = readWorksheetFromFile(wbFilename, sheet = sheet, startRow = -1,
                             endRow = nrow(curr + 1), startCol = -1,
                             endCol = -1, header = TRUE)

## Some additional syntactic sugar:

# In case a workbook contains only a few named data regions, you can use 'with'
# to reference them directly (via the name of the named region) without having
# to read them in manually first:
with(wb, {
  colMeans(currency[, currencyColumns])
})

# Note: 'with' should only be used if the number of named regions in the workbook
# is small, since 'with' basically reads in all named regions automatically.

###########################
# Handling of Error Cells #
###########################

# XLConnect supports two different schemes on how to deal with error cells
# (cells that produce an error when trying to be read):
# - immediately stop on the first occurrence on an error cell
# - produce warnings for erroring cells and assign NA as their value

# Read example workbook containing various forms of error cells
errorCell <- system.file("demoFiles/errorCell.xlsx", package = "XLConnect")
wb = loadWorkbook(errorCell)

# Define to produce warnings on erroring cells and assume error cells to be NA
# (this is the default behavior)
onErrorCell(wb, XLC$ERROR.WARN)
# Read named region containing error cells
data = readNamedRegion(wb, name = "MyData")
print(data)

# Define to stop immediately when encountering error cells
onErrorCell(wb, XLC$ERROR.STOP)
# Read named region containing error cells
data = readNamedRegion(wb, name = "MyData")