[R-sig-Finance] arma model results when exogenouse variables used in, ARMA(p=5, q=(1-6, 19)) (Joe Byers)
Joe Byers
joe-byers at utulsa.edu
Thu Jul 13 22:56:54 CEST 2006
All I have a simply script that replicates by error. I have include a
summary.fARMA.new function because the digits variables needed
modification and printCoefmat did not print correctly. The code is
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Example to replicate ARMA model with exogenous variables error in the
fARMA.summary functions
#set up enviroment and libraries
library(date);
library(car);library(tseries);library(Hmisc);library(fBasics);
library(R2HTML);#library(gdata);
library(partsm);library(tseries);library(pear);
library(fBasics);library(fSeries); library(cba);
summary.fARMA.new<-function (object, doplot = TRUE, ...)
{
digits <- max(5, getOption("digits") - 4) #added by joe w. byers
ans = NULL
x = object
object = x at fit
ans$call = object$call
ans$tsmodel = object$tstitle
ans$residuals = as.vector(na.omit(object$residuals))
if (length(ans$residuals) == 0) {
ans$var = 0
}
if (length(ans$residuals) > 0) {
ans$var = var(ans$residuals)
}
ans$sigma2 = object$sigma2
# the following lines replace to handle exongeous variables and
masking of paramters
# tval <- object$coef/object$se.coef
# prob <- 2 * (1 - pnorm(abs(tval)))
# ans$coefmat <- cbind(format(object$coef,digits=digits),
format(object$se.coef,digits=digits),
# format(tval,digits=digits), prob=format.pval(prob,digits=digits))
# rownames(ans$coefmat)<-ans$coefmat$Row.names
# ans$coefmat<-ans$coefmat[2:length(ans$coefmat)]
# dimnames(ans$coefmat) <- list(names(object$coef), c(" Estimate",
# " Std. Error", " t value", "Pr(>|t|)"))
# row.names(ans$coefmat)<-toupper(row.names(ans$coefmat))
tval<-subset(object$coef,object$mask)/object$se.coef
prob <- 2 * (1 - pnorm(abs(tval)))
ans$coefmat<-merge(format(object$coef,digits=digits),
cbind(format(object$se.coef,digits=digits),format(tval,digits=digits),format.pval(prob,digits=digits)),
by.x="row.names",by.y="row.names",sort=FALSE,all.x=TRUE,all.y=FALSE)
rownames(ans$coefmat)<-toupper(as.vector(ans$coefmat$Row.names))
#make the row indexes the parameter names
ans$coefmat<-ans$coefmat[2:length(ans$coefmat)] # drop first column
that merge create of row.names
names(ans$coefmat) <- c(" Estimate"," Std. Error", " t value",
"Pr(>|t|)")
if (object$tsmodel == "ar") {
ans$aic = (object$n.used * (1 + log(2 * pi)) + object$n.used *
log(ans$var) + 2 * length(object$coef))
}
if (object$tsmodel == "arma") {
ans$aic = (object$n.used * (1 + log(2 * pi)) + object$n.used *
log(ans$var) + 2 * length(object$coef))
ans$css = object$css
}
if (object$tsmodel == "arima") {
ans$aic = object$aic
ans$loglik = object$loglik
}
if (object$tsmodel == "fracdiff") {
doplot = FALSE
}
cat("\nTitle:\n ")
cat(x at title, "\n")
cat("\nCall:\n ")
cat(paste(deparse(object$call), sep = "\n", collapse = "\n"),
"\n", sep = "")
cat("\nModel:\n ", object$tstitle, "\n", sep = "")
cat("\nCoefficient(s):\n")
digits = max(5, getOption("digits") - 4) # changed 4 to 5 by joe w.
byers
print.default(format(object$coef, digits = digits), print.gap = 2,
quote = FALSE)
digits = max(5, getOption("digits") - 4) # changed 4 to 5 by joe w.
byers
if (length(object$residuals) > 2) {
cat("\nResiduals:\n")
rq = structure(quantile(ans$residuals), names = c("Min",
"1Q", "Median", "3Q", "Max"))
print(rq, digits = digits)
cat("\nMoments: \n")
skewness = sum((ans$residuals -
mean(ans$residuals))^3/sqrt(var(ans$residuals))^3)/length(ans$residuals)
kurtosis = sum((ans$residuals -
mean(ans$residuals))^4/var(ans$residuals)^2)/length(ans$residuals) -
3
stats = structure(c(skewness, kurtosis), names = c("Skewness",
"Kurtosis"))
print(stats, digits = digits)
}
cat("\nCoefficient(s):\n")
signif.stars = getOption("show.signif.stars")
print(ans$coefmat, digits = digits, signif.stars = signif.stars,
...) #changes printCoefmat to print by joe w. byers
cat("\n")
if (x at fit$tsmodel == "ar") {
cat("sigma^2 estimated as: ", format(object$var,
digits = digits), "\n")
cat("AIC Criterion: ", format(round(object$aic,
2)), "\n")
}
if (x at fit$tsmodel == "arma") {
cat("sigma^2 estimated as: ", format(object$sigma2,
digits = digits), "\n")
cat("Conditional Sum-of-Squares: ", format(round(object$css,
digits = 2)), "\n")
}
if (x at fit$tsmodel == "arima") {
cm = object$call$method
if (is.null(cm) || cm != "CSS")
cat("sigma^2 estimated as: ", format(object$sigma2,
digits = digits), "\nlog likelihood: ",
format(round(object$loglik, 2)), "\nAIC
Criterion: ",
format(round(object$aic, 2)), "\n", sep = "")
else cat("sigma^2 estimated as: ", format(object$sigma2,
digits = digits), "\npart log likelihood: ",
format(round(object$loglik,
2)), "\n", sep = "")
}
if (doplot)
plot.fARMA(x, ...)
cat("\nDescription:\n ")
cat(x at description, "\n\n")
invisible()
}
# generate white noise vector
set.seed(1) # so you can reproduce the results
v = rnorm(1000,1,1) # v contains 1000 iid N(1,1) variates
e=rnorm(1000,0,1)
x = cumsum(v) # x is a random walk with drift = 1
plot.ts(x) # have a look if you don't believe me
#generate ARMA(1,(1,3)) process with trend
#ar(1)=.5 MA(1)=.05 MA(3)=-.25 trend=.025 mean=1
trend=.025;ma1=.05; ma3=-.25; ar1=.5;
len<-length(v);
data<-ar1*v[3:(len-1)]+ma1*e[3:(len-1)]+ma3*e[1:(len-3)]
plot.ts(data)
# estimate ARMA model using fARMA()
fixed=c(NA,NA,0,NA,NA);#1 ar terms, 3 ma terms, intercept
ts.results<-armaFit(data~arma(1,3),include.mean=TRUE,optim.control=list(maxit=500),fixed=fixed);
#set the graphics to pause since I am using sciViews
par(ask=TRUE);
#produce results with error using summary.fARMA()
summary(ts.results)
#produce results without error using modified summary.fARMA.new()
summary.fARMA.new(ts.results)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Spencer Graves wrote:
> Hi, Joe:
>
> Not a problem. However, if you had 26 coefficients, you had
> more than a simple ARMA(1,1). With only 14 computed standard errors
> for 26 coefficients, I gather you fixed a dozen coefficients. Thus,
> if I had had time to experiment, I could have tried an ARMA(1,1),
> fixing one coefficient.
>
> Also, how does an arma(5, 19) produce 26 coefficients? 5 AR +
> 19 MA = 24, + 1 intercept = 25. Does this model include the standard
> deviation of the whitened residuals?
>
> If you'll permit another question: An arma(5, 19) sounds like
> overfitting to me. What kinds of plots have you made? Have you
> plotted the data vs. time? Have you made normal probability plots of
> the the data and of whitened residuals from a simple model? Have you
> tried acf and pacf of the absolute values of whitened residuals?
>
> Best Wishes,
> Spencer Graves
>
> Joe Byers wrote:
>> Spencer,
>>
>> Thank you for your reply. I knew I should include an example, but
>> using my data would not be a simple one to provide. I hope to
>> produce a short one today since I have the afternoon. A simple
>> simulated data of an ARMA(1,1) with a trend and exogenous component.
>> Hopefully my thesis advisee will not take up too much of my afternoon.
>>
>> Please bear with me.
>>
>> Thank you
>> Joe W. Byers
>>
>> PS Dirk, How was your trip to Europe?
>>
>> Spencer Graves wrote:
>>> Hi, Diethelm, Martin, Dirk, Joe:
>>>
>>> DIETHELM, MARTIN, DIRK:
>>>
>>> Joe Byers identified a problem and an apparent fix for
>>> summary.fARMA; see below. Under certain circumstances,
>>> summary.fARMA apparently computed 14 standard errors for 26
>>> coefficients. I didn't see a simple, self-contained / replicatable
>>> example in his post, and I haven't found the time to try to create
>>> one. However, his suggested code changes look plausible to me.
>>>
>>> How do you think we should react to this and similar reports?
>>>
>>>
>>> JOE:
>>>
>>> Thanks for the suggested fix. For future reference, you might
>>> increase the chances for a quick, helpful reply by including a
>>> simple, self-contained / replicatable example? It also helps to
>>> mention the name of the package you are using. I have replied to
>>> many time series questions, partly as a means of familiarizing
>>> myself with the time series capabilities available in R. Without
>>> such an example, I often try to generate one, modifying an example
>>> from a help page or using made-up numbers; if random numbers are
>>> involved, one should always include 'set.seed'. For the 'armaFit
>>> problem you reported, I have not found the time to do so. Also, I
>>> don't know why others have not responded to your question, but your
>>> "Subject" line may not have caught their eye. For other suggestions
>>> on how to increase your chances of a quicker resolution of an issue,
>>> see the posting guide! "www.R-project.org/posting-guide.html".
>>>
>>> Best Wishes,
>>> Spencer Graves
>>>
>>> Joe Byers wrote:
>>>> All,
>>>>
>>>> I have developed a modification for the summary.fARMA() to handle
>>>> the different vector sizes. I am not sure if this is the best or
>>>> most efficient coding and would appreciate your comments. The
>>>> original code is commented out and my new code follows as
>>>> # the following lines replace to handle exongeous variables and
>>>> masking of paramters
>>>> # tval <- object$coef/object$se.coef
>>>> # prob <- 2 * (1 - pnorm(abs(tval)))
>>>> # ans$coefmat <- cbind(format(object$coef,digits=digits),
>>>> format(object$se.coef,digits=digits),
>>>> # format(tval,digits=digits),
>>>> prob=format.pval(prob,digits=digits))
>>>> # rownames(ans$coefmat)<-ans$coefmat$Row.names
>>>> # ans$coefmat<-ans$coefmat[2:length(ans$coefmat)]
>>>> # dimnames(ans$coefmat) <- list(names(object$coef), c(" Estimate",
>>>> # " Std. Error", " t value", "Pr(>|t|)"))
>>>> # row.names(ans$coefmat)<-toupper(row.names(ans$coefmat))
>>>> tval<-subset(object$coef,object$mask)/object$se.coef
>>>> prob <- 2 * (1 - pnorm(abs(tval)))
>>>> ans$coefmat<-merge(format(object$coef,digits=digits),
>>>>
>>>> cbind(format(object$se.coef,digits=digits),format(tval,digits=digits),format.pval(prob,digits=digits)),
>>>>
>>>>
>>>> by.x="row.names",by.y="row.names",sort=FALSE,all.x=TRUE,all.y=FALSE)
>>>> rownames(ans$coefmat)<-toupper(as.vector(ans$coefmat$Row.names))
>>>> #make the row indexes the parameter names
>>>> ans$coefmat<-ans$coefmat[2:length(ans$coefmat)] # drop first
>>>> column that merge create of row.names
>>>> names(ans$coefmat) <- c(" Estimate"," Std. Error", " t
>>>> value", "Pr(>|t|)")
>>>>
>>>> This code worked in my modified summary.fARMA() that outputs to
>>>> html files. The only problem is that the merge creates a
>>>> data.frame of the intersected rows of the two data sets and then
>>>> adds the outer (non-intersecting) rows from object$coef
>>>> data.frame. I can not figure out how to keep the order based on
>>>> object$coef. The full method is include below for reference.
>>>>
>>>> Thank you
>>>> Joe W. Byers
>>>> Professor of Finance
>>>> The University of Tulsa
>>>>
>>>>
>>>>> 4. arma model results when exogenouse variables used in
>>>>> ARMA(p=5, q=(1-6, 19)) (Joe Byers)
>>>>> ------------------------------
>>>>>
>>>>> Message: 4
>>>>> Date: Thu, 06 Jul 2006 11:38:02 -0500
>>>>> From: Joe Byers <joe-byers at utulsa.edu>
>>>>> Subject: [R-sig-Finance] arma model results when exogenouse variables
>>>>> used in ARMA(p=5, q=(1-6, 19))
>>>>> To: r-sig-finance at stat.math.ethz.ch
>>>>> Message-ID: <44AD3C6A.9000204 at utulsa.edu>
>>>>> Content-Type: text/plain; charset="iso-8859-1"
>>>>>
>>>>> All,
>>>>>
>>>>> I posted a message earlier about fitting an ARMA(p=5,q=(1-6,19))
>>>>> with exnogenouse variables (xreg=exovars), and masking
>>>>> (fixed=fixedparms) the MA terms 7-18 to get the model to run. I am
>>>>> reposting some of the message to help with understanding the
>>>>> summary() function problem
>>>>>
>>>>> My code is
>>>>> fixed=c(rep(NA,5),rep(NA,6),rep(0,12),NA,NA);#5 ar terms, 19 ma
>>>>> terms fixed 7-18 lag ma term, intercept
>>>>>
>>>>>
>>>>> fixed<-c(fixed,NA); #add the lin.trend term
>>>>>
>>>>> ts.results.2.ma<-armaFit(datats~arma(5,19),xreg=cbind(Lin.Trend=d$factor.Lin.Trend.),
>>>>>
>>>>> include.mean=TRUE,optim.control=list(maxit=500),fixed=fixed);
>>>>> summary.fARMA.HTML(ts.results.2.ma,title="AR(5) MA(1:6,19)
>>>>> with Intercept and Linear Trend");# this function is a
>>>>> modification of summary.fARMA to work with r2HTML replacing the
>>>>> cat() functions.
>>>>>
>>>>> The problem I have is that the @fit$coef(26) and @fit$se.coef(14)
>>>>> are of different lengths causing the t-stats calculation is
>>>>> summary to issue warnings.
>>>>> Warning messages:
>>>>> 1: longer object length
>>>>> is not a multiple of shorter object length in:
>>>>> object$coef/object$se.coef
>>>>> 2: number of rows of result
>>>>> is not a multiple of vector length (arg 2) in: cbind(1,
>>>>> format(object$coef, digits = digits), format(object$se.coef, The
>>>>> tval and prob are not correct.
>>>>>
>>>>> The summary() code is
>>>>> tval <- object$coef/object$se.coef
>>>>> prob <- 2 * (1 - pnorm(abs(tval)))
>>>>> ans$coefmat <- cbind(format(object$coef,digits=digits),
>>>>> format(object$se.coef,digits=digits),
>>>>> format(tval,digits=digits),
>>>>> prob=format.pval(prob,digits=digits))
>>>>> dimnames(ans$coefmat) <- list(names(object$coef), c(" Estimate",
>>>>> " Std. Error", " t value", "Pr(>|t|)"))
>>>>> row.names(ans$coefmat)<-toupper(row.names(ans$coefmat))
>>>>>
>>>>> I can modify tval as
>>>>> tval<-subset(object$coef,object$mask)/object$se.coef
>>>>> prob <- 2 * (1 - pnorm(abs(tval)))
>>>>>
>>>>> The subset functions removes all FALSE or masked MA terms from the
>>>>> coef vector. It will return a vector of length 14. Now I have to
>>>>> expand tval, prob and se.coef out to match the length of coef to
>>>>> get the the results printed correctly.
>>>>>
>>>>> Can anyone help me with this? It would probably be a good thing to
>>>>> include in future versions of rMetrics as well.
>>>>>
>>>>> Thank you
>>>>> Joe W. Byers
>>>>>
>>>>> -------------- next part --------------
>>>>> A non-text attachment was scrubbed...
>>>>> Name: joe-byers.vcf
>>>>> Type: text/x-vcard
>>>>> Size: 104 bytes
>>>>> Desc: not available
>>>>> Url :
>>>>> https://stat.ethz.ch/pipermail/r-sig-finance/attachments/20060706/a4c15bae/attachment-0001.vcf
>>>>>
>>>>>
>>>> summary.fARMA.html()
>>>> ##Created by Joe W. Byers, The University of Tulsa
>>>> ##This code is available under current GPL Licenses of R and Rmetrics
>>>> ##This is modification of Original code from summary.fARMA from
>>>> Rmetrics
>>>> ## at http://www.itp.phys.ethz.ch/econophysics/R/
>>>>
>>>> summary.fARMA.HTML<-function (object, doplot = FALSE, ...)
>>>> {
>>>> ans <- NULL
>>>> digits <- max(5, getOption("digits") - 4)
>>>> x <- object
>>>> object <- x at fit
>>>> ans$call <- object$call
>>>> ans$tsmodel <- object$tstitle
>>>> ans$residuals <- as.vector(na.omit(object$residuals))
>>>> if (length(ans$residuals) == 0) {
>>>> ans$var <- 0
>>>> }
>>>> if (length(ans$residuals) > 0) {
>>>> ans$var <- var(ans$residuals)
>>>> }
>>>> ans$sigma2 <- object$sigma2
>>>> # the following lines replace to handle exongeous variables and
>>>> masking of paramters
>>>> # tval <- object$coef/object$se.coef
>>>> # prob <- 2 * (1 - pnorm(abs(tval)))
>>>> # ans$coefmat <- cbind(format(object$coef,digits=digits),
>>>> format(object$se.coef,digits=digits),
>>>> # format(tval,digits=digits),
>>>> prob=format.pval(prob,digits=digits))
>>>> # rownames(ans$coefmat)<-ans$coefmat$Row.names
>>>> # ans$coefmat<-ans$coefmat[2:length(ans$coefmat)]
>>>> # dimnames(ans$coefmat) <- list(names(object$coef), c(" Estimate",
>>>> # " Std. Error", " t value", "Pr(>|t|)"))
>>>> # row.names(ans$coefmat)<-toupper(row.names(ans$coefmat))
>>>> tval<-subset(object$coef,object$mask)/object$se.coef
>>>> prob <- 2 * (1 - pnorm(abs(tval)))
>>>> ans$coefmat<-merge(format(object$coef,digits=digits),
>>>>
>>>> cbind(format(object$se.coef,digits=digits),format(tval,digits=digits),format.pval(prob,digits=digits)),
>>>>
>>>>
>>>> by.x="row.names",by.y="row.names",sort=FALSE,all.x=TRUE,all.y=FALSE)
>>>> rownames(ans$coefmat)<-toupper(as.vector(ans$coefmat$Row.names))
>>>> ans$coefmat<-ans$coefmat[2:length(ans$coefmat)]
>>>> names(ans$coefmat) <- c(" Estimate"," Std. Error", " t
>>>> value", "Pr(>|t|)")
>>>> if (object$tsmodel == "ar") {
>>>> ans$aic <- (object$n.used * (1 + log(2 * pi)) + object$n.used *
>>>> log(ans$var) + 2 * length(object$coef))
>>>> }
>>>> if (object$tsmodel == "arma") {
>>>> ans$aic <- (object$n.used * (1 + log(2 * pi)) + object$n.used *
>>>> log(ans$var) + 2 * length(object$coef))
>>>> ans$css <- object$css
>>>> }
>>>> if (object$tsmodel == "arima") {
>>>> ans$aic <- object$aic
>>>> ans$loglik <- object$loglik
>>>> }
>>>> if (object$tsmodel == "fracdiff") {
>>>> doplot <- FALSE
>>>> }
>>>> HTML("Title: ")
>>>> HTML(x at title)
>>>> HTML("Call: ")
>>>> HTML(object$call)
>>>> HTML(c("Model: ", object$tstitle))#, "", sep = "")
>>>> HTML("Coefficient(s):")
>>>> digits <- max(5, getOption("digits") - 4)
>>>> t1<-data.frame(object$coef)#copy to dataframe
>>>> t1<-data.frame(t(t1)) #traspose for reporting
>>>> names(t1)<-toupper(names(t1))
>>>> row.names(t1)<-" " # rename row name
>>>> HTML(t1,digits=digits)
>>>> #HTML(print.default(format(object$coef, digits = digits),
>>>> print.gap = 2, quote = FALSE))
>>>> digits <- max(5, getOption("digits") - 4)
>>>> if (length(object$residuals) > 2) {
>>>> HTML("Residuals:")
>>>> rq <- as.data.frame(t(structure(quantile(ans$residuals),
>>>> names = c("Min",
>>>> "1Q", "Median", "3Q", "Max"))))
>>>> row.names(rq)<-' '
>>>> HTML(rq,digits=digits)
>>>> HTML("Moments: ")
>>>> skewness <- sum((ans$residuals -
>>>> mean(ans$residuals))^3/sqrt(var(ans$residuals))^3)/length(ans$residuals)
>>>>
>>>> kurtosis <- sum((ans$residuals -
>>>> mean(ans$residuals))^4/var(ans$residuals)^2)/length(ans$residuals) -
>>>> 3
>>>> stats <- as.data.frame(t(structure(c(skewness, kurtosis),
>>>> names = c("Skewness",
>>>> "Kurtosis"))))
>>>> row.names(stats)<-" "
>>>> HTML(stats,digits=digits)
>>>> }
>>>> HTML("Coefficient(s):")
>>>> signif.stars <- getOption("show.signif.stars")
>>>> #HTML(printCoefmat(ans$coefmat, digits=digits, signif.stars =
>>>> signif.stars, ...))
>>>> HTML(ans$coefmat, digits=digits, signif.stars = signif.stars)
>>>> if (x at fit$tsmodel == "ar") {
>>>> t1<-data.frame(c(format(object$sigma2, digits =
>>>> digits),format(round(object$aic, digits))),
>>>> row.names=c("sigma^2 estimated as: ","AIC
>>>> Criterion: "))
>>>> names(t1)<-" "
>>>> HTML(t1)
>>>> }
>>>> if (x at fit$tsmodel == "arma") {
>>>> t1<-data.frame(c(format(object$sigma2, digits =
>>>> digits),format(round(object$css, digits = digits))),
>>>> row.names=c("sigma^2 estimated as: ", "Conditional
>>>> Sum-of-Squares: "))
>>>> names(t1)<-" "
>>>> HTML(t1)
>>>> }
>>>> if (x at fit$tsmodel == "arima") {
>>>> cm <- object$call$method
>>>> if (is.null(cm) || cm != "CSS") {
>>>> t1<-data.frame(c(format(object$sigma2, digits = digits),
>>>> format(round(object$loglik, digits)),
>>>> format(round(object$aic, digits))),
>>>> row.names=c("sigma^2 estimated as: ", "log
>>>> likelihood: ",
>>>> "AIC Criterion: ",))
>>>> names(t1)<-" "
>>>> HTML(t1)
>>>> }
>>>> else {
>>>> t1<-data.frame(c(format(object$sigma2, digits = digits),
>>>> format(round(object$loglik, digits))),
>>>> row.names=c("sigma^2 estimated as: ", "log
>>>> likelihood: "))
>>>> names(t1)<-" "
>>>> HTML(t1)
>>>> }
>>>> }
>>>> if (doplot)
>>>> plot.fARMA(x, ...)
>>>> HTML(c("Description: ",x at description))
>>>> invisible()
>>>> }
>>>>
>>>>
>>>>
>>>> _______________________________________________
>>>> R-SIG-Finance at stat.math.ethz.ch mailing list
>>>> https://stat.ethz.ch/mailman/listinfo/r-sig-finance
-------------- next part --------------
A non-text attachment was scrubbed...
Name: joe-byers.vcf
Type: text/x-vcard
Size: 295 bytes
Desc: not available
Url : https://stat.ethz.ch/pipermail/r-sig-finance/attachments/20060713/705147a8/attachment.vcf
More information about the R-SIG-Finance
mailing list