Search This Blog

Wednesday, April 18, 2012

R: FORECASTING AND TIME SERIES # 10




# Example 10.1.
# US milk production data
# Page 268
data(milk)
plot(milk,ylab="Amount of milk produced",xlab="Year",type="o")
# Page 269
plot(diff(milk),ylab="Amount of milk produced: First differences",xlab="Year",type="o")
points(y=diff(milk),x=time(diff(milk)),pch=as.vector(season(diff(milk))),cex=1) 

# Example 10.2.
# MA(1) simulation with s=12
# Page 271
seasonal.ma1.sim <- arima.sim(list(order = c(0,0,12), ma = c(rep(0,11),0.9)), n = 200)
plot(seasonal.ma1.sim,ylab=expression(Y[t]),xlab="Time",type="o")
# ACF/PACF: Population and sample
# ARMAacf function includes the k=0 lag for ACF
# Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF
# Page 272
par(mfrow=c(2,2))
y = ARMAacf(ma = c(rep(0,11),0.9), lag.max = 50)
y = y[2:51]
plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",
      ylab = "Autocorrelation", main = "Population ACF")
abline(h = 0)
y = ARMAacf(ma = c(rep(0,11),0.9), lag.max = 50, pacf=TRUE)
plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",
      ylab = "Partial autocorrelation", main = "Population PACF")
abline(h = 0)
acf(seasonal.ma1.sim,main="Sample ACF",lag.max=50)
pacf(seasonal.ma1.sim,main="Sample PACF",lag.max=50)

# Example 10.3.
# AR(1) simulation with s=12
# Page 275
seasonal.ar1.sim <- arima.sim(list(order = c(12,0,0), ar = c(rep(0,11),0.9)), n = 200)
plot(seasonal.ar1.sim,ylab=expression(Y[t]),xlab="Time",type="o")
# ACF/PACF: Population and sample
# ARMAacf function includes the k=0 lag for ACF
# Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF
# Page 276
par(mfrow=c(2,2))
y = ARMAacf(ar = c(rep(0,11),0.8), lag.max = 50)
y = y[2:51]
plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",
      ylab = "Autocorrelation", main = "Population ACF")
abline(h = 0)
y = ARMAacf(ar = c(rep(0,11),0.8), lag.max = 50, pacf=TRUE)
plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",
      ylab = "Partial autocorrelation", main = "Population PACF")
abline(h = 0)
acf(seasonal.ar1.sim,main="Sample ACF",lag.max=50)
pacf(seasonal.ar1.sim,main="Sample PACF",lag.max=50)

# MA(1) x MA(1)_12 process
# MA(1) x AR(1)_12 process
# Theoretical ACF and PACF
# Page 282
par(mfrow=c(2,2))
phi.ar12=c(rep(0,11),0.9)
theta.Theta=c(-0.5,rep(0,10),-0.9,0.45)
# ARMAacf function includes the k=0 lag for ACF
# Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF
y = acf.ma.ma = ARMAacf(ma=theta.Theta,lag.max=50)
y = y[2:51]
plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",
      ylab = "Autocorrelation", main = "MA1*MA1(12) ACF")
abline(h=0)
pacf.ma.ma = ARMAacf(ma=theta.Theta,lag.max=50,pacf=T)
plot(pacf.ma.ma,type='h',xlab="k",ylab="PACF",main = "MA1*MA1(12) PACF")
abline(h=0)
y = acf.ma.ar = ARMAacf(ar=phi.ar12,ma=-0.5,lag.max=50)
y = y[2:51]
plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",
      ylab = "Autocorrelation", main = "MA1*AR1(12) ACF")
abline(h=0)
pacf.ma.ar = ARMAacf(ar=phi.ar12,ma=-0.5,lag.max=50,pacf=T)
plot(pacf.ma.ar,type='h',xlab="k",ylab="PACF",main = "MA1*AR1(12) PACF")
abline(h=0)

# Example 10.4
# Denver, CO Bus/Rail boarding data
# Log-transformed
# Page 283
data(boardings)
# "boardings" is a multivariate dataset (2 series)
# Number of boardings is the first series
boardings = boardings[,1]
plot(boardings,ylab="Number of boardings (log-transformed)",xlab="Year",type="o")
points(y=boardings,x=time(boardings),pch=as.vector(season(boardings)))
# Sample ACF and PACF
# Plots were constructed separately
# Page 285
acf(as.vector(boardings),main="Sample ACF",lag.max=40)
pacf(as.vector(boardings),main="Sample PACF",lag.max=40)
# Fit ARMA(0,3) x ARMA(1,0)_{12}
boardings.arma03.arma10 = arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12))
boardings.arma03.arma10
# Residual analysis
hist(rstandard(boardings.arma03.arma10),xlab="Standardised residuals",main="")
qqnorm(rstandard(boardings.arma03.arma10),main="")
qqline(rstandard(boardings.arma03.arma10))
# Shapiro-Wilk and runs tests
shapiro.test(rstandard(boardings.arma03.arma10))
runs(rstandard(boardings.arma03.arma10))
tsdiag(boardings.arma03.arma10,gof=20,omit.initial=F)
# Overfitting
arima(boardings,order=c(1,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12))
arima(boardings,order=c(0,0,4),method='ML',seasonal=list(order=c(1,0,0),period=12))
arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(2,0,0),period=12))
arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,1),period=12))
# Forecasting
# Full data set: 8/00-3/06
boardings.arma03.arma10.fit = arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12))
# MMSE forecasts on log scale
boardings.arma03.arma10.predict <- predict(boardings.arma03.arma10.fit,n.ahead=12)
round(boardings.arma03.arma10.predict$pred,3)
round(boardings.arma03.arma10.predict$se,3)
# Display prediction intervals (12 months ahead) on log scale
year.temp = c(2006.250,2006.333,2006.416,2006.500,2006.583,2006.666,2006.750,2006.833,2006.916,2007,2007.083,2007.166)
# Compute prediction intervals (on log scale)
lower.pi<-boardings.arma03.arma10.predict$pred-qnorm(0.975,0,1)*boardings.arma03.arma10.predict$se
upper.pi<-boardings.arma03.arma10.predict$pred+qnorm(0.975,0,1)*boardings.arma03.arma10.predict$se
data.frame(Month=year.temp,lower.pi,upper.pi)
# Original series starts at Month = Aug 2000
# Note: Argument n1=c(2003,1) starts plot at Time = Jan 2003
# Note: Argument pch=16 produces a small black circle (MMSE prediction)
plot(boardings.arma03.arma10.fit,n.ahead=12,col='red',type='b',pch=16,n1=c(2003,1),ylab="Number of boardings (log-transformed)",xlab="Year")
# Put horizontal line at ML estimate of overall mean
abline(h=coef(boardings.arma03.arma10.fit)[names(coef(boardings.arma03.arma10.fit))=='intercept'])
# Put prediction interval lines on plot (darker than default)
lines(y=lower.pi,x=year.temp,lwd=2,col="red",lty="dashed")
lines(y=upper.pi,x=year.temp,lwd=2,col="red",lty="dashed")
# MMSE forecasts back-transformed (to original scale)
denver.boardings.predict <- round(exp(boardings.arma03.arma10.predict$pred + (1/2)*(boardings.arma03.arma10.predict$se)^2),3)
denver.boardings.predict
# Compute prediction intervals (on original scale)
data.frame(Month=year.temp,lower.pi=exp(lower.pi),upper.pi=exp(upper.pi))

# Example 10.5.
# US milk production data
# Plots were constructed separately
# Page 292
data(milk)
plot(milk,ylab="Amount of milk produced",xlab="Year",type="o")
plot(diff(milk),ylab="First differences",xlab="Year",type="o")
plot(diff(milk,lag=12),ylab="First seasonal differences (s=12)",xlab="Year",type="o")
plot(diff(diff(milk),lag=12),ylab="Combined first differences",xlab="Year",type="o")
# Model specification
# d=1, D=1, and s=12
# Plots were constructed separately
# Page 295
# Combined differences
milk.diff = diff(diff(milk,lag=12))
acf(as.vector(milk.diff),main="Sample ACF",lag.max = 48)
pacf(as.vector(milk.diff),main="Sample PACF",lag.max = 48)
# Fit and diagnosis
# This model was considered but not displayed in the notes.
# ARIMA(0,1,0) x ARIMA(0,1,1)_{12}
milk.arima010.arima011 = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(0,1,1),period=12))
milk.arima010.arima011
# ARIMA(0,1,0) x ARIMA(3,1,0)_{12}
milk.arima010.arima310 = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,0),period=12))
milk.arima010.arima310
# Residual analysis
hist(rstandard(milk.arima010.arima310),xlab="Standardised residuals",main="")
qqnorm(rstandard(milk.arima010.arima310),main="")
qqline(rstandard(milk.arima010.arima310))
# Shapiro-Wilk and runs tests
shapiro.test(rstandard(milk.arima010.arima310))
runs(rstandard(milk.arima010.arima310))
tsdiag(milk.arima010.arima310,gof=30,omit.initial=T)
# Overfitting
arima(milk,order=c(1,1,0),method='CLS',seasonal=list(order=c(3,1,0),period=12))
arima(milk,order=c(0,1,1),method='ML',seasonal=list(order=c(3,1,0),period=12))
arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,0),period=12))
arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,1),period=12))
# Forecasting
# Full data set: 1/94-12/05
milk.arima010.arima310.fit = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,0),period=12))
# MMSE forecasts
milk.arima010.arima310.predict <- predict(milk.arima010.arima310.fit,n.ahead=24)
round(milk.arima010.arima310.predict$pred,3)
round(milk.arima010.arima310.predict$se,3)
# Display prediction intervals (24 months ahead)
year.temp = c(2006,2006.083,2006.166,2006.250,2006.333,2006.416,2006.500,2006.583,2006.666,2006.750,2006.833,2006.916)
year.temp.2 = c(year.temp,year.temp+1)
# Compute prediction intervals
lower.pi<-milk.arima010.arima310.predict$pred-qnorm(0.975,0,1)*milk.arima010.arima310.predict$se
upper.pi<-milk.arima010.arima310.predict$pred+qnorm(0.975,0,1)*milk.arima010.arima310.predict$se
data.frame(Month=year.temp.2,lower.pi,upper.pi)
# Original series starts at Month = Jan 1994
# Note: Argument n1=c(2004,1) starts plot at Time = Jan 2004
# Note: Argument pch=16 produces a small black circle (MMSE prediction)
plot(milk.arima010.arima310.fit,n.ahead=24,col='red',type='b',pch=16,n1=c(2004,1),ylab="Amount of milk produced",xlab="Year")
# Put prediction interval lines on plot (darker than default)
lines(y=lower.pi,x=year.temp.2,lwd=2,col="red",lty="dashed")
lines(y=upper.pi,x=year.temp.2,lwd=2,col="red",lty="dashed")

# Example 10.6
# Australian clay brick production
# Page 301
brick <- ts(read.table(file = "C:\\Users\\Tebbs\\My Documents\\texfiles\\Classes\\USC\\stat520\\f11\\data\\brick.txt"),start=1956,freq=4)
plot(brick,ylab="Australian clay brick production (in millions)",xlab="Time",type="o")
# Box Cox analysis
BoxCox.ar(brick)
# Square root transformation
sqrt.brick = sqrt(brick)
# Plots were constructed separately
plot(sqrt.brick,ylab="Brick production (Square-root transformed)",xlab="Time",type="o")
plot(diff(sqrt.brick),ylab="First differences",xlab="Year",type="o")
plot(diff(sqrt.brick,lag=4),ylab="First seasonal differences (s=4)",xlab="Year",type="o")
plot(diff(diff(sqrt.brick),lag=4),ylab="Combined first differences",xlab="Year",type="o")
# Combined differences of sqrt.brick
sqrt.brick.diff = diff(diff(sqrt.brick),lag=4)
# Plots were constructed separately
acf(as.vector(sqrt.brick.diff),main="Sample ACF",lag.max=32)
pacf(as.vector(sqrt.brick.diff),main="Sample PACF",lag.max=32)
# Initial model choice
sqrt.brick.arima010.arima410 = arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,0),period=4))
sqrt.brick.arima010.arima410
# Residual analysis
hist(rstandard(sqrt.brick.arima010.arima410),xlab="Standardised residuals",main="")
qqnorm(rstandard(sqrt.brick.arima010.arima410),main="")
qqline(rstandard(sqrt.brick.arima010.arima410))
# Shapiro-Wilk and runs tests
shapiro.test(rstandard(sqrt.brick.arima010.arima410))
runs(rstandard(sqrt.brick.arima010.arima410))
tsdiag(sqrt.brick.arima010.arima410,gof=20,omit.initial=T)
# Overfitting
arima(sqrt.brick,order=c(1,1,0),method='ML',seasonal=list(order=c(4,1,0),period=4))
arima(sqrt.brick,order=c(0,1,1),method='ML',seasonal=list(order=c(4,1,0),period=4))
arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(5,1,0),period=4))
arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,1),period=4))

No comments:

Post a Comment

Thank you