First we read in the data, which can be downloaded from yahoo finance. It is SPX 500 historical time series data in recent 5 years. The data set consists of 1553 observations and 7 variables. It records SPX 500 index changes from 2011 to 2016. I am interested in SPX 500 index’s closed price every business day, so I mainly focus on closed price.
Then we can plot the closed price over time to study its pattern. Since \(y_t\) denotes the log return at time t, we plot the log form of SPX500 index.
N <- nrow(dat)
SPX_dat <- dat$Close[N:1] # data are in reverse order in spx500.csv
par(mfrow=c(1,2))
plot(SPX_dat,type="l")
plot(log(SPX_dat),type="l")
SPX_demean_dat <- log(SPX_dat[2:N])-log(SPX_dat[1:N-1])
rproc1 <- "
double eta_t = rnorm(0,sigma_eta);
H = mu + phi * (H-mu) + eta_t;
"
rproc2.sim <-"
Y_state = rnorm(0,exp(H/2));
"
rproc2.filt <-"
Y_state = covarft;
"
SPX_rproc.sim <- paste(rproc1,rproc2.sim)
SPX_rproc.filt <- paste(rproc1,rproc2.filt)
SPX_initializer <-"
H = H_0;
Y_state = rnorm(0,exp(H_0/2));
"
SPX_rmeasure <- "
y = Y_state;
"
SPX_dmeasure <- "
lik = dnorm(y,0,exp(H/2),give_log);
"
SPX_toEstimationScale <- "
Tmu = mu;
Tphi = logit(phi);
Tsigma_eta = logit(sigma_eta);
"
SPX_fromEstimationScale <- "
Tmu = mu;
Tphi = expit(phi);
Tsigma_eta = expit(sigma_eta);
"
expit<-function(real){1/(1+exp(-real))}
logit<-function(p.arg){log(p.arg/(1-p.arg))}
require(pomp)
## Loading required package: pomp
SPX.filt <- pomp(data=data.frame(y=SPX_demean_dat,
time=1:length(SPX_demean_dat)),
statenames=SPX_statenames,
paramnames=SPX_paramnames,
covarnames=SPX_covarnames,
times="time",
t0=0,
covar=data.frame(covarft=c(0,SPX_demean_dat),
time=0:length(SPX_demean_dat)),
tcovar="time",
rmeasure=Csnippet(SPX_rmeasure),
dmeasure=Csnippet(SPX_dmeasure),
rprocess=discrete.time.sim(step.fun=Csnippet(SPX_rproc.filt),delta.t=1),
initializer=Csnippet(SPX_initializer),
toEstimationScale=Csnippet(SPX_toEstimationScale),
fromEstimationScale=Csnippet(SPX_fromEstimationScale)
)
plot(SPX.filt)
params_test <- c(
mu = -9,
phi = 1,
sigma_eta = 0.1,
H_0 = 0
)
sim1.sim <- pomp(SPX.filt,
statenames=SPX_statenames,
paramnames=SPX_paramnames,
covarnames=SPX_covarnames,
rprocess=discrete.time.sim(step.fun=Csnippet(SPX_rproc.sim),delta.t=1)
)
sim1.sim <- simulate(sim1.sim,seed=493536993,params=params_test)
plot(sim1.sim)
sim1.filt <- pomp(sim1.sim,
covar=data.frame(
covarft=c(obs(sim1.sim),NA),
time=c(timezero(sim1.sim),time(sim1.sim))),
tcovar="time",
statenames=SPX_statenames,
paramnames=SPX_paramnames,
covarnames=SPX_covarnames,
rprocess=discrete.time.sim(step.fun=Csnippet(SPX_rproc.filt),delta.t=1)
)
plot(sim1.filt)
run_level <- 3
sp500_Np <- c(100,1e3,2e3)
sp500_Nmif <- c(10, 100,200)
sp500_Nreps_eval <- c(4, 10, 20)
sp500_Nreps_local <- c(10, 20, 20)
sp500_Nreps_global <-c(10, 20, 100)
## ----parallel-setup,cache=FALSE------------------------------------------
require(doParallel)
cl <- makeCluster(4)
registerDoParallel(cl)
clusterExport(cl,c("sim1.sim","sim1.filt","run_level","sp500_Nreps_eval","sp500_Nmif", "params_test", "sp500_Np","SPX.filt"))
cores <- 4 # The number of cores on this machine
# registerDoParallel(cores)
mcopts <- list(set.seed=TRUE)
set.seed(396658101,kind="L'Ecuyer")
## ----pf1-----------------------------------------------------------------
stew(file=sprintf("pf1_level3.rda",run_level),{
t.pf1 <- system.time(
pf1 <- foreach(i=1:sp500_Nreps_eval[run_level],.packages='pomp',
.options.multicore=list(set.seed=TRUE)) %dopar% try(
pfilter(sim1.filt,Np=sp500_Np[run_level])
)
)
},seed=493536993,kind="L'Ecuyer")
(L.pf1 <- logmeanexp(sapply(pf1,logLik),se=TRUE))
## se
## -4.704765e+03 6.855144e-02
## ----mif1-----------------
stew("mif1_level3.rda",{
t.if1 <- system.time({
if1 <- foreach(i=1:sp500_Nreps_local[run_level],
.packages='pomp', .combine=c,
.options.multicore=list(set.seed=TRUE)) %dopar% try(
mif2(SPX.filt,
start=params_test,
Np=sp500_Np[run_level],
Nmif=sp500_Nmif[run_level],
cooling.type="geometric",
cooling.fraction.50=0.5,
transform=TRUE,
rw.sd = rw.sd(
mu = 0.02,
phi = 0.02,
sigma_eta = 0.02,
H_0 = ivp(0.1)
)
)
)
L.if1 <- foreach(i=1:sp500_Nreps_local[run_level],.packages='pomp',
.combine=rbind,.options.multicore=list(set.seed=TRUE)) %dopar%
{
logmeanexp(
replicate(sp500_Nreps_eval[run_level],
logLik(pfilter(SPX.filt,params=coef(if1[[i]]),Np=sp500_Np[1]))
),
se=TRUE)
}
})
},seed=318817883,kind="L'Ecuyer")
r.if1 <- data.frame(logLik=L.if1[,1],logLik_se=L.if1[,2],t(sapply(if1,coef)))
if (run_level>1)
write.table(r.if1,file="sp500_params.csv",append=TRUE,col.names=FALSE,row.names=FALSE)
summary(r.if1$logLik,digits=5)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5063.7 5077.6 5081.6 5081.4 5086.2 5103.3
pairs(~logLik+mu+phi+sigma_eta,data=subset(r.if1,logLik>max(logLik)-500))
stopCluster(cl)
require(doParallel)
cl <- makeCluster(4)
registerDoParallel(cl)
sp500_box <- rbind(
mu =c(-15,0),
phi = c(0.75,1.05),
sigma_eta = c(0.05,0.5),
H_0 = c(-5,5)
)
clusterExport(cl,c("sim1.sim","sim1.filt","run_level","sp500_box", "if1","sp500_Nreps_eval","sp500_Nmif", "params_test", "sp500_Np","SPX.filt"))
cores <- 4 # The number of cores on this machine
# registerDoParallel(cores)
mcopts <- list(set.seed=TRUE)
set.seed(396658101,kind="L'Ecuyer")
stew(file="box_eval_level3_new_para.rda",{
t.box <- system.time({
if.box <- foreach(i=1:sp500_Nreps_global[run_level],.packages='pomp',.combine=c,
.options.multicore=list(set.seed=TRUE)) %dopar%
mif2(
if1[[1]],
start=apply(sp500_box,1,function(x)runif(1,x))
)
L.box <- foreach(i=1:sp500_Nreps_global[run_level],.packages='pomp',.combine=rbind,
.options.multicore=list(set.seed=TRUE)) %dopar% {
set.seed(87932+i)
logmeanexp(
replicate(sp500_Nreps_eval[run_level],
logLik(pfilter(SPX.filt,params=coef(if.box[[i]]),Np=sp500_Np[run_level]))
),
se=TRUE)
}
})
},seed=290860873,kind="L'Ecuyer")
r.box <- data.frame(logLik=L.box[,1],logLik_se=L.box[,2],t(sapply(if.box,coef)))
if(run_level>1) write.table(r.box,file="sp500_params.csv",append=TRUE,col.names=FALSE,row.names=FALSE)
summary(r.box$logLik,digits=5)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5073.6 5113.2 5118.5 5111.6 5121.1 5126.2
plot(r.box$logLik)
index <- which.max(r.box$logLik)
print(index)
## [1] 31
print(r.box[index,])
## logLik logLik_se mu phi sigma_eta H_0
## result.31 5126.167 0.1080819 -9.643573 0.9150861 0.3614072 -3.497375
## ----pairs_global--------------------------------------------------------
pairs(~logLik+mu+phi+sigma_eta+H_0,data=subset(r.box,logLik>max(logLik)-250))
plot(if.box)
## ----garch_benchmark-----------------------------------------------------
require(tseries)
fit.garch.benchmark <- garch(SPX_demean_dat,grad = "numerical", trace = FALSE)
L.garch.benchmark <- logLik(fit.garch.benchmark)
L.garch.benchmark
## 'log Lik.' 4946.96 (df=3)
[1] Discrete-Time Stochastic Volatility Models and MCMC-Based Statistical Inference, Nikolaus Hautsch, 2008
[2] Roman Liesenfeld & Robert C. Jung, 2000. “Stochastic volatility models: conditional normality versus heavy-tailed distributions,” Journal of Applied Econometrics, John Wiley & Sons, Ltd., vol. 15(2), pages 137-160.
[3] [wikipedia: Stochastic Volatility](https://en.wikipedia.org/wiki/Stochastic_volatility)
[4] Markov Chain Monte Carlo Methods for Generalized Stochastic Volatility Models (with Siddhartha Chib and Neil Shephard), Journal of Econometrics 2002, 108, 281-316.