Predictor Identifier: Nonparametric PREDiction

Ashish Sharma, Raj Mehrotra, Sanjeev Jha, Jingwan Li and Ze Jiang

14:23:19 21 June, 2023

library(NPRED)

op <- par()
require(zoo)
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
require(ggplot2)
#> Loading required package: ggplot2
#> Warning: package 'ggplot2' was built under R version 4.2.3

1 Synthetic data generation

# Other statistical models for generating synthetic data see Package - synthesis
# require(synthesis)

set.seed(2020) # set seed for reproducible results
# AR1 model from paper with 9 dummy variables
data.ar1 <- data.gen.ar1(500)

# AR4 model from paper with total 9 dimensions
data.ar4 <- data.gen.ar4(500)

# AR9 model from paper with total 9 dimensions
data.ar9 <- data.gen.ar9(500)

plot.zoo(cbind(data.ar1$x, data.ar4$x, data.ar9$x),
  xlab = NA, main = "Example of AR models",
  ylab = c("AR1", "AR4", "AR9"))
Example of AR models

Figure 1.1: Example of AR models

2 Partial informational correlation (PIC)

# R version of pic
pic.calc(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1]  0.01667864 -0.01345925  0.03862148  0.33933764 -0.01281137  0.01703449
#> [7]  0.04359516  0.07521345  0.07754585
#> 
#> $pic
#> [1] 0.1811272 0.0000000 0.2726446 0.7019341 0.0000000 0.1830168 0.2889591
#> [8] 0.3737103 0.3790295

# fortran version of pic
calc.PIC(data.ar9$x, data.ar9$dp)
#> $pmi
#> [1] -0.003200178 -0.030318462  0.030197035  0.343560072 -0.019053916
#> [6]  0.010659037  0.046381753  0.068540976  0.059103240
#> 
#> $pic
#> [1] 0.0000000 0.0000000 0.2420878 0.7049662 0.0000000 0.1452324 0.2976424
#> [8] 0.3579123 0.3338973

3 Predictor identifier: stepwise.PIC

# pic <- stepwise.PIC(data.ar1$x, data.ar1$dp)
# pic <- stepwise.PIC(data.ar4$x, data.ar4$dp)
pic <- stepwise.PIC(data.ar9$x, data.ar9$dp)
pic
#> $cpy
#> [1] 4 9 1
#> 
#> $cpyPIC
#> [1] 0.7019341 0.5358898 0.3475098
#> 
#> $wt
#> [1] 0.4708906 0.3340259 0.1950835
#> 
#> $lstwet
#>  Intercept         X1         X2         X3 
#> 0.02092053 0.61892085 0.49902045 0.26565225 
#> 
#> $icpy
#> [1] 3

4 Partial weights (PW)

# R version of pw
pw.calc(data.ar9$x, data.ar9$dp, pic$cpy, pic$cpyPIC)
#> $pw
#> [1] 0.4708906 0.3340259 0.1950835

# fortran version of pw
calc.PW(data.ar9$x, data.ar9$dp, pic$cpy, pic$cpyPIC)
#> $pw
#> [1] 0.6371889 0.4519895 0.2639787

5 Nonparameteric prediction: knn

data("data3")
x <- ts(data3[, 1]) # response
z <- ts(data3[, -1]) # possible predictors
zout <- ts(data.gen.ar1(500, ndim = 15)$dp) # new input

xhat1 <- xhat2 <- x
# xhat1 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=F)
# xhat2 <- NPRED::knn(x,z,zout,k=5,reg=T,extrap=T)

for (i in 1:500) {
  xhat1[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = F)
  xhat2[i] <- NPRED::knn(x[-i], z[-i, ], z[i, ], extrap = T)
}

if (TRUE) {
  par(mfrow = c(1, 2), pty = c("s"))

  ts.plot(x, xhat1, xhat2, col = c("black","red","blue"), ylim = c(-10, 10), lwd = c(1, 1, 1))
  legend("topleft",
    bty = "n", lwd = 3, cex = 1, lty = 1,
    # inset = c(-0.5, 0),
    legend = c("OBS", "Pred", "Pred(extrap=T)"),
    x.intersp = 0, xjust = 0, yjust = 0, text.width = c(0, 50, 50), horiz = T,
    col = c("black","red","blue")
  )

  plot(xhat1, xhat2, xlim = c(-10, 10), ylim = c(-10, 10))
  abline(coef = c(0, 1), lwd = 1, col = 2)
  par(op)
}
#> Warning in par(op): graphical parameter "cin" cannot be set
#> Warning in par(op): graphical parameter "cra" cannot be set
#> Warning in par(op): graphical parameter "csi" cannot be set
#> Warning in par(op): graphical parameter "cxy" cannot be set
#> Warning in par(op): graphical parameter "din" cannot be set
#> Warning in par(op): graphical parameter "page" cannot be set
Example of KNN implemented in NPRED

Figure 5.1: Example of KNN implemented in NPRED

6 Illustration of the usage of partial weights

sample <- 500
k <- 0
u <- runif(sample, 0, 5 * pi)
z <- sin(u) + rnorm(sample, sd = 0.2)

u1 <- cbind(u, runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi), runif(sample, 0, 5 * pi))
# zhat1 <- knnregl1cv(x=z, z=u1, k=k)
zhat1 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, ], zout = u1[i, ], k = k))

sel <- stepwise.PIC(x = z, py = u1)
sel
#> $cpy
#> [1] 1
#> 
#> $cpyPIC
#> [1] 0.2559138
#> 
#> $wt
#> [1] 1
#> 
#> $lstwet
#>  Intercept          X 
#> 0.17620490 0.00959311 
#> 
#> $icpy
#> [1] 1
# zhat2 <- knnregl1cv(x=z, z=u1[,sel$cpy], k=k)
zhat2 <- sapply(1:sample, function(i) knn(x = z[-i], z = u1[-i, sel$cpy], zout = u1[i, sel$cpy], k = k))

if (TRUE) {
  plot(u, z, pch = 16)
  lines(sort(u), zhat1[order(u)], col = "green")
  lines(sort(u), zhat2[order(u)], col = "red")
  abline(a = 0, b = 0)
}
Illustration of the usage of partial weights

Figure 6.1: Illustration of the usage of partial weights

7 Application to predicting drought anomalies

# An demo example used in Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2020)
# Jiang, Z., Rashid, M. M., Johnson, F., & Sharma, A. (2021). A wavelet-based tool to modulate variance in predictors: An application to predicting drought anomalies. Environmental Modelling and Software, 135, 104907. https://doi.org/10.1016/j.envsoft.2020.104907 
require(WASP) 
#> Loading required package: WASP
#> 
#> Attaching package: 'WASP'
#> The following objects are masked from 'package:NPRED':
#> 
#>     data.gen.ar1, data.gen.ar4, data.gen.ar9, knn, knnregl1cv

#-------------------------------------------------------------------------------------
#load response and predictor variables
data(SPI.12); data(data.CI); data(Ind_AWAP.2.5)
#study grids and period
Grid <- sample(Ind_AWAP.2.5,1)
Grid = 149 #Grid used in the SI
SPI.12.ts <- window(SPI.12, start=c(1910,1),end=c(2009,12))
data.CI.ts <- window(data.CI, start=c(1910,1),end=c(2009,12))
#partition into two folds
folds <- cut(seq(1,nrow(SPI.12.ts)),breaks=2,labels=FALSE)
sub.cali <- which(folds==1, arr.ind=TRUE); sub.vali <- which(folds==2, arr.ind=TRUE)
#-------------------------------------------------------------------------------------
###calibration and selection
data <- list(x=SPI.12.ts[sub.cali,Grid],dp=data.CI.ts[sub.cali,])

#variance transformation - calibration
modwt <- modwt.vt(data, wf="haar", J=8, boundary="periodic")

#stepwise PIC selection
sel <- NPRED::stepwise.PIC(modwt$x, modwt$dp.n)
#-------------------------------------------------------------------------------------
###validation and prediction
data.val <- list(x=SPI.12.ts[sub.vali,Grid],dp=data.CI.ts[sub.vali,])

#variance transformation - validation
modwt.val <- modwt.vt.val(data.val, J=8, modwt)

#knn prediction
cpy <- sel$cpy; wt <- sel$wt
x=data$x; z=modwt$dp.n[,cpy]; zout=modwt.val$dp.n[,cpy]
mod <- knn(x, z, zout, k=5, pw=wt, extrap=T)

#-------------------------------------------------------------------------------------
###plot
start.cal <- c(1910,1); start.val <- c(1960,1)
ndim = ncol(data.CI.ts); CI.names = colnames(data.CI.ts)
par(mfcol=c(ndim+1,2),mar=c(2,4,2,2),mgp=c(1.5,0.5,0),
    bg = "white",pty="m", cex.lab=1.5, ps=8)
#----------------------------------------------
#plot before and after vt - calibration
if(TRUE){

  x <- ts(modwt$x, start=start.cal, frequency = 12)
  dp <- ts(modwt$dp, start=start.cal, frequency = 12)
  dp.n <- ts(modwt$dp.n, start=start.cal, frequency = 12)

  ts.plot(x,xlab=NA, main=paste0("Sampled Grid: ", Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
#----------------------------------------------
#plot before and after vt - validation
if(TRUE){

  x <- ts(modwt.val$x, start=start.val, frequency = 12)
  dp <- ts(modwt.val$dp, start=start.val, frequency = 12)
  dp.n <- ts(modwt.val$dp.n, start=start.val, frequency = 12)

  ts.plot(x, xlab=NA, main=paste0("Sampled Grid: ",Grid), ylab=paste0("SPI",12), col=c("black"),lwd=c(1))
  for(nc in 1:ndim)
    ts.plot(dp[,nc],dp.n[,nc],xlab=NA,ylab=paste0(CI.names[nc]),
            col=c("red","blue"),lwd=c(1,1),lty=c(1,2))

}
Application to predicting drought anomalies

Figure 7.1: Application to predicting drought anomalies


par(op)