First we read in the data, which can be downloaded from yahoo finance. It is SPX 500 historical time series data in recent 5 years. The data set consists of 1553 observations and 7 variables. It records SPX 500 index changes from 2011 to 2016. I am interested in SPX 500 index’s closed price every business day, so I mainly focus on closed price.
Then we can plot the closed price over time to study its pattern. Since \(y_t\) denotes the log return at time t, we plot the log form of SPX500 index.
N <- nrow(dat)
SPX_dat <- dat$Close[N:1] # data are in reverse order in spx500.csv
par(mfrow=c(1,2))
plot(SPX_dat,type="l")
plot(log(SPX_dat),type="l")
SPX_demean_dat <- log(SPX_dat[2:N])-log(SPX_dat[1:N-1])
rproc1 <- "
  double eta_t = rnorm(0,sigma_eta);
  H = mu + phi * (H-mu) + eta_t;
"
rproc2.sim <-"
  Y_state = rnorm(0,exp(H/2));
"
rproc2.filt <-"
  Y_state = covarft;
"
SPX_rproc.sim <- paste(rproc1,rproc2.sim)
SPX_rproc.filt <- paste(rproc1,rproc2.filt)
SPX_initializer <-"
  H = H_0;
  Y_state = rnorm(0,exp(H_0/2));
"
SPX_rmeasure <- "
  y = Y_state;
"
SPX_dmeasure <- "
  lik = dnorm(y,0,exp(H/2),give_log);
"
SPX_toEstimationScale <- "
 Tmu = mu;
 Tphi = logit(phi);
 Tsigma_eta = logit(sigma_eta);
"
SPX_fromEstimationScale <- "
 Tmu = mu;
 Tphi = expit(phi);
 Tsigma_eta = expit(sigma_eta);
"
expit<-function(real){1/(1+exp(-real))}
logit<-function(p.arg){log(p.arg/(1-p.arg))}
require(pomp)
## Loading required package: pomp
SPX.filt <- pomp(data=data.frame(y=SPX_demean_dat,
                                   time=1:length(SPX_demean_dat)),
                   statenames=SPX_statenames,
                   paramnames=SPX_paramnames,
                   covarnames=SPX_covarnames,
                   times="time",
                   t0=0,
                   covar=data.frame(covarft=c(0,SPX_demean_dat),
                                    time=0:length(SPX_demean_dat)),
                   tcovar="time",
                   rmeasure=Csnippet(SPX_rmeasure),
                   dmeasure=Csnippet(SPX_dmeasure),
                   rprocess=discrete.time.sim(step.fun=Csnippet(SPX_rproc.filt),delta.t=1),
                   initializer=Csnippet(SPX_initializer),
                   toEstimationScale=Csnippet(SPX_toEstimationScale), 
                   fromEstimationScale=Csnippet(SPX_fromEstimationScale)
)
plot(SPX.filt)
params_test <- c(
  mu = -9,
  phi = 1,       
  sigma_eta = 0.1,
  H_0 = 0
)
sim1.sim <- pomp(SPX.filt, 
                 statenames=SPX_statenames,
                 paramnames=SPX_paramnames,
                 covarnames=SPX_covarnames,
                 rprocess=discrete.time.sim(step.fun=Csnippet(SPX_rproc.sim),delta.t=1)
)
sim1.sim <- simulate(sim1.sim,seed=493536993,params=params_test)
plot(sim1.sim)
sim1.filt <- pomp(sim1.sim, 
                  covar=data.frame(
                    covarft=c(obs(sim1.sim),NA),
                    time=c(timezero(sim1.sim),time(sim1.sim))),
                  tcovar="time",
                  statenames=SPX_statenames,
                  paramnames=SPX_paramnames,
                  covarnames=SPX_covarnames,
                  rprocess=discrete.time.sim(step.fun=Csnippet(SPX_rproc.filt),delta.t=1)
)
plot(sim1.filt)
run_level <- 3
sp500_Np <-          c(100,1e3,2e3)
sp500_Nmif <-        c(10, 100,200)
sp500_Nreps_eval <-  c(4,  10,  20)
sp500_Nreps_local <- c(10, 20, 20)
sp500_Nreps_global <-c(10, 20, 100)
## ----parallel-setup,cache=FALSE------------------------------------------
require(doParallel)
cl <- makeCluster(4)
registerDoParallel(cl)
clusterExport(cl,c("sim1.sim","sim1.filt","run_level","sp500_Nreps_eval","sp500_Nmif", "params_test", "sp500_Np","SPX.filt"))
cores <- 4  # The number of cores on this machine 
# registerDoParallel(cores)
mcopts <- list(set.seed=TRUE)
set.seed(396658101,kind="L'Ecuyer")
## ----pf1-----------------------------------------------------------------
stew(file=sprintf("pf1_level3.rda",run_level),{
  t.pf1 <- system.time(
    pf1 <- foreach(i=1:sp500_Nreps_eval[run_level],.packages='pomp',
                   .options.multicore=list(set.seed=TRUE)) %dopar% try(
                     pfilter(sim1.filt,Np=sp500_Np[run_level])
                   )
  )
},seed=493536993,kind="L'Ecuyer")
(L.pf1 <- logmeanexp(sapply(pf1,logLik),se=TRUE))
##                          se 
## -4.704765e+03  6.855144e-02
## ----mif1-----------------
stew("mif1_level3.rda",{
  t.if1 <- system.time({
    if1 <- foreach(i=1:sp500_Nreps_local[run_level],
                   .packages='pomp', .combine=c,
                   .options.multicore=list(set.seed=TRUE)) %dopar% try(
                     mif2(SPX.filt,
                          start=params_test,
                          Np=sp500_Np[run_level],
                          Nmif=sp500_Nmif[run_level],
                          cooling.type="geometric",
                          cooling.fraction.50=0.5,
                          transform=TRUE,
                          rw.sd = rw.sd(
                            mu  = 0.02,
                            phi      = 0.02,
                            sigma_eta = 0.02,
                            H_0       = ivp(0.1)
                          )
                     )
                   )
    
    L.if1 <- foreach(i=1:sp500_Nreps_local[run_level],.packages='pomp',
                     .combine=rbind,.options.multicore=list(set.seed=TRUE)) %dopar% 
                     {
                       logmeanexp(
                         replicate(sp500_Nreps_eval[run_level],
                                   logLik(pfilter(SPX.filt,params=coef(if1[[i]]),Np=sp500_Np[1]))
                         ),
                         se=TRUE)
                     }
    
  })
},seed=318817883,kind="L'Ecuyer")
r.if1 <- data.frame(logLik=L.if1[,1],logLik_se=L.if1[,2],t(sapply(if1,coef)))
if (run_level>1) 
  write.table(r.if1,file="sp500_params.csv",append=TRUE,col.names=FALSE,row.names=FALSE)
summary(r.if1$logLik,digits=5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  5063.7  5077.6  5081.6  5081.4  5086.2  5103.3
pairs(~logLik+mu+phi+sigma_eta,data=subset(r.if1,logLik>max(logLik)-500))
stopCluster(cl)
require(doParallel)
cl <- makeCluster(4)
registerDoParallel(cl)
sp500_box <- rbind(
  mu    =c(-15,0),
  phi = c(0.75,1.05),
  sigma_eta = c(0.05,0.5),
  H_0 = c(-5,5)
)
clusterExport(cl,c("sim1.sim","sim1.filt","run_level","sp500_box", "if1","sp500_Nreps_eval","sp500_Nmif", "params_test", "sp500_Np","SPX.filt"))
cores <- 4  # The number of cores on this machine 
# registerDoParallel(cores)
mcopts <- list(set.seed=TRUE)
set.seed(396658101,kind="L'Ecuyer")
stew(file="box_eval_level3_new_para.rda",{
  t.box <- system.time({
    if.box <- foreach(i=1:sp500_Nreps_global[run_level],.packages='pomp',.combine=c,
                      .options.multicore=list(set.seed=TRUE)) %dopar%  
      mif2(
        if1[[1]],
        start=apply(sp500_box,1,function(x)runif(1,x))
      )
    
    L.box <- foreach(i=1:sp500_Nreps_global[run_level],.packages='pomp',.combine=rbind,
                     .options.multicore=list(set.seed=TRUE)) %dopar% {
                       set.seed(87932+i)
                       logmeanexp(
                         replicate(sp500_Nreps_eval[run_level],
                                   logLik(pfilter(SPX.filt,params=coef(if.box[[i]]),Np=sp500_Np[run_level]))
                         ), 
                         se=TRUE)
                     }
  })
},seed=290860873,kind="L'Ecuyer")
r.box <- data.frame(logLik=L.box[,1],logLik_se=L.box[,2],t(sapply(if.box,coef)))
if(run_level>1) write.table(r.box,file="sp500_params.csv",append=TRUE,col.names=FALSE,row.names=FALSE)
summary(r.box$logLik,digits=5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  5073.6  5113.2  5118.5  5111.6  5121.1  5126.2
plot(r.box$logLik)
index <- which.max(r.box$logLik)
print(index)
## [1] 31
print(r.box[index,])
##             logLik logLik_se        mu       phi sigma_eta       H_0
## result.31 5126.167 0.1080819 -9.643573 0.9150861 0.3614072 -3.497375
## ----pairs_global--------------------------------------------------------
pairs(~logLik+mu+phi+sigma_eta+H_0,data=subset(r.box,logLik>max(logLik)-250))
plot(if.box)
## ----garch_benchmark-----------------------------------------------------
require(tseries)
fit.garch.benchmark <- garch(SPX_demean_dat,grad = "numerical", trace = FALSE)
L.garch.benchmark <- logLik(fit.garch.benchmark)
L.garch.benchmark
## 'log Lik.' 4946.96 (df=3)
[1] Discrete-Time Stochastic Volatility Models and MCMC-Based Statistical Inference, Nikolaus Hautsch, 2008
[2] Roman Liesenfeld & Robert C. Jung, 2000. “Stochastic volatility models: conditional normality versus heavy-tailed distributions,” Journal of Applied Econometrics, John Wiley & Sons, Ltd., vol. 15(2), pages 137-160.
[3] [wikipedia: Stochastic Volatility](https://en.wikipedia.org/wiki/Stochastic_volatility)
[4] Markov Chain Monte Carlo Methods for Generalized Stochastic Volatility Models (with Siddhartha Chib and Neil Shephard), Journal of Econometrics 2002, 108, 281-316.