Mumps is one of the contagious diseases, usually spreading through saliva or mucus from the mouth, nose, or throat (Gupta et al. 2005). outbreaks usually occur among people living in close-contact settings (Centers for disease contol and prevention website). Children are easily attacked by mumps since they are usually living in a crowded environmet. Thus outbreaks of mumps usually happen in winter and spring when students are attending classes.
For the data part, I get weekly cases of mumps in Michigan from the website Project Tycho http://www.tycho.pitt.edu. This mumps data has been thoroughly standardized. Besides, I download the population data from US census Bureau http://www.census.gov. In this project, I want to get some insight of the whole process of mumps transmission throught constructing a partial observed markov process.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 36.75 129.50 295.10 468.00 1962.00
\[\mu_{SE}(t) = \tfrac{\beta(t)}{P(t)}\,(I+\iota)^\alpha\,\zeta(t)\]
During school,\[ {\beta(t)}=(1+2(1-p)a)\beta_0\] During holiday,\[ {\beta(t)}=(1-2pa)\beta_0\]
The cases reported given true incidence are modelled as an overdispersed binomial distribution (He et al. 2009). The distribution is like
\[\mathbb{E}[\text{cases}|C] = \rho\,C\]
\[\mathrm{Var}[\text{cases}|C] = \rho\,(1-\rho)\,C + (\psi\,\rho\,C)^2\]
\[f(c|\rho,\psi,C) = \Phi(c+\tfrac{1}{2},\rho\,C,\rho\,(1-\rho)\,C+(\psi\,\rho\,C)^2)-\Phi(c-\tfrac{1}{2},\rho\,C,\rho\,(1-\rho)\,C+(\psi\,\rho\,C)^2),\]
## ----mumps_obsnames------------------------------------------------------
colnames(mumps_data)[2]=c('B')
## ----rprocess------------------------------------------------------------
mumps_rprocess <- Csnippet("
double beta, br, seas, foi, dw, births;
double rate[3], trans[3];
// cohort effect
if (fabs(t-floor(t)-251.0/365.0) < 0.5*dt)
br = cohort*birthrate/dt + (1-cohort)*birthrate;
else
br = (1.0-cohort)*birthrate;
// term-time seasonality
t = (t-floor(t))*365.25;
if ((t>=10&&t<=66) || (t>=74&&t<=169) || (t>=251&&t<=327) || (t>=332&&t<=357))
seas = 1.0+amplitude*0.3205/0.6794;
else
seas = 1.0-amplitude;
// transmission rate
beta = R0*gamma*seas;
// expected force of infection
foi = beta*pow(I+iota,alpha)/P;
// white noise (extrademographic stochasticity)
dw = rgammawn(sigmaSE,dt);
rate[0] = foi*dw/dt; // stochastic force of infection
rate[1] = sigma; // rate of ending of latent stage
rate[2] = gamma; // recovery
// Poisson births
births = rpois(0.2*br*dt);
// transitions between classes
reulermultinom(2,S,&rate[0],dt,&trans[0]);
reulermultinom(2,E,&rate[1],dt,&trans[1]);
reulermultinom(2,I,&rate[2],dt,&trans[2]);
S += births - trans[0];
E += trans[0] - trans[1];
I += trans[1] - trans[2];
R = P - S - E - I;
W += (dw - dt)/sigmaSE; // standardized i.i.d. white noise
H += trans[2]; // true incidence
")
## ----initializer------------------------------------------------------------
mumps_initializer <- Csnippet("
double m = P/(S_0+E_0+I_0+R_0);
S = nearbyint(m*S_0);
E = nearbyint(m*E_0);
I = nearbyint(m*I_0);
R = nearbyint(m*R_0);
W = 0;
H = 0;
")
## ----dmeasure------------------------------------------------------------
mumps_dmeasure <- Csnippet("
double m = rho*H;
double v = m*(1.0-rho+psi*psi*m);
double tol = 1.0e-18;
if (B > 0.0) {
lik = pnorm(B+0.5,m,sqrt(v)+tol,1,0)-pnorm(B-0.5,m,sqrt(v)+tol,1,0)+tol;
} else {
lik = pnorm(B+0.5,m,sqrt(v)+tol,1,0)+tol;
}
")
## ----rmeasure------------------------------------------------------------
mumps_rmeasure <- Csnippet("
double m = rho*H;
double v = m*(1.0-rho+psi*psi*m);
double tol = 1.0e-18;
B = rnorm(m,sqrt(v)+tol);
if (B > 0.0) {
B = nearbyint(B);
} else {
B = 0.0;
}
")
## ----scale------------------------------------------------------------
mumps_toEstimationScale <- Csnippet("
TR0 = log(R0);
Tsigma = log(sigma);
Tgamma = log(gamma);
Talpha = log(alpha);
Tiota = log(iota);
Trho = logit(rho);
Tcohort = logit(cohort);
Tamplitude = logit(amplitude);
TsigmaSE = log(sigmaSE);
Tpsi = log(psi);
to_log_barycentric (&TS_0, &S_0, 4);
")
mumps_fromEstimationScale <- Csnippet("
TR0 = exp(R0);
Tsigma = exp(sigma);
Tgamma = exp(gamma);
Talpha = exp(alpha);
Tiota = exp(iota);
Trho = expit(rho);
Tcohort = expit(cohort);
Tamplitude = expit(amplitude);
TsigmaSE = exp(sigmaSE);
Tpsi = exp(psi);
from_log_barycentric (&TS_0, &S_0, 4);
")
## ----construct-pomp-model------------------------------------------------------
mumps2 <- pomp(
data=subset(mumps_data,select = c("B","Date")),
times="Date",
t0=1969+1/13,
rprocess=euler.sim(
step.fun=mumps_rprocess,
delta.t=1/365
),
rmeasure=mumps_rmeasure,
dmeasure=mumps_dmeasure,
covar=covartable,
tcovar="Date",
#covarnames=c("P"),
fromEstimationScale=mumps_fromEstimationScale,
toEstimationScale=mumps_toEstimationScale,
zeronames=c("H","W"),
statenames=c("S","E","I","R","H","W"),
paramnames=c("R0","sigma","gamma","alpha","iota",
"rho","sigmaSE","psi","cohort","amplitude",
"S_0","E_0","I_0","R_0"),
#obsnames = mumps_obsnames,
initializer=mumps_initializer
)
run_level <- 2
switch(run_level,
{mumps_Np=1000; mumps_Nmif=10; mumps_Neval=10; mumps_Nglobal=8; mumps_Nlocal=10},
{mumps_Np=20000; mumps_Nmif=100; mumps_Neval=10; mumps_Nglobal=20; mumps_Nlocal=10},
{mumps_Np=60000; mumps_Nmif=300; mumps_Neval=10; mumps_Nglobal=100; mumps_Nlocal=20}
)
cores <- 4 # The number of cores on this machine
registerDoParallel(cores)
mcopts <- list(set.seed=TRUE)
set.seed(396658101,kind="L'Ecuyer")
## ----box_eval------------------------------------------------------------
stew(file=sprintf("box_eval-%d.rda",run_level),{
t_global <- system.time({
mifs_global <- foreach(i=1:mumps_Nglobal,.packages='pomp', .combine=c, .options.multicore=mcopts) %dopar%
mif2(
mumps2,
start=c(apply(mumps_box,1,function(x)runif(1,x[1],x[2]))),
Np=mumps_Np,
Nmif=mumps_Nmif,
cooling.type="geometric",
cooling.fraction.50=0.5,
transform=TRUE,
rw.sd=rw.sd(
R0=0.02,
sigma=0.02,
gamma=0.02,
alpha=0.02,
iota=0.02,
rho=0.02,
sigmaSE=0.02,
psi=0.02,
cohort=0.02,
amplitude=0.02,
S_0=ivp(0.02),
E_0=ivp(0.02),
I_0=ivp(0.02),
R_0=ivp(0.02))
)
})
},seed=1270401374,kind="L'Ecuyer")
## ----lik_global_eval-----------------------------------------------------
stew(file=sprintf("lik_global_eval-%d.rda",run_level),{
t_global_eval <- system.time({
liks_global <- foreach(i=1:mumps_Nglobal,.packages='pomp',.combine=rbind, .options.multicore=mcopts) %dopar% {
evals <- replicate(mumps_Neval, logLik(pfilter(mumps2,params=coef(mifs_global[[i]]),Np=mumps_Np)))
logmeanexp(evals, se=TRUE)
}
})
},seed=442141592,kind="L'Ecuyer")
results_global <- data.frame(logLik=liks_global[,1],logLik_se=liks_global[,2],t(sapply(mifs_global,coef)))
summary(results_global$logLik,digits=5)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3390.4 -1776.5 -1411.1 -1674.9 -1315.0 -1239.1
plot(mifs_global)
There are 14 parameters in total. We find the best parameter sets through global search with mifs. The diagnostic plot is shown above. In general, log likelihood achieves relatively high value, max log likelihood is -1239. From the convergence diagnostics, we can see loglik shows good convergence.
From the last iteration diagnostics, most of effective sampling size are larger than 1000. But it seems the particle filter fails to match the data at the begining of the time period. The effective sampling size are mainly smaller than hundreds around 1970. Increasing the number of particles may improve the performance. Different start points produce similar I and H process, but different parallel processes of S and R.
For the diagnostics plot of parameters, psi alpha, R0, sigma and sigmaSE seems to converge. gamma, iota, rho, amplitude and cohort seem need more iterations to coverge. It is interesting for the values of S, E, I, R initials, different start points all achieve its own convergence value. These parameters show similar trend as S and R state diagnostics. We can see similar pattern in pairs plot. S, E, I, R initials have little influence on log likelihood. This suggests that S, E, I, R initial parameters are not well identified by the model and data. We may need other supplementary information.
pairs(~logLik+S_0+E_0+I_0+R_0,data=subset(results_global,logLik>max(logLik)-550))
We get our parameter estimate from maximum log likelihood. Now, we simulate the mumps cases from the mle. Simulation data caputures the seasonality pattern as the original data. In addition, it is intended to reproduce the decreasing trend of mumps cases. In general, the model caputures the main feature of original data. However, the model tends to estimate the cases in 1969-1970 much higher than the original data. It also ignores two peaks pattern.
A reasonal hypothesis may be insufficient process of the model. One thing to notice is that, significant changes happen in 1967, vaccine against mumps starts. It is just two years before this time period. In our model, we only specify the new birth who are not immune to mumps entering into susceptible according to current vaccination rate. We do not specify the concrete process of vaccination. It is highly possible that when vaccination begins, there is a sharp decrease in mumps cases due to the decrease of susceptibles. However, due to the low vaccination rate at the begining, the mumps still transmitted and resulted in new infecious individuals. Therefore, we can specify the vaccination process to improve the model performance.
## ----sims1,fig.height=8--------------------------------------------------
a=which.max(results_global$logLik)
mumps2 %>%
simulate(params=coef(mifs_global[[a]]),nsim=9,as.data.frame=TRUE,include.data=TRUE) %>%
ggplot(aes(x=time,y=B,group=sim,color=(sim=="data")))+
guides(color=FALSE)+
geom_line()+facet_wrap(~sim,ncol=2)
## ----sims2---------------------------------------------------------------
mumps2 %>%
simulate(params=coef(mifs_global[[a]]),nsim=100,as.data.frame=TRUE,include.data=TRUE) %>%
subset(select=c(time,sim,B)) %>%
mutate(data=sim=="data") %>%
ddply(~time+data,summarize,
p=c(0.05,0.5,0.95),q=quantile(B,prob=p,names=FALSE)) %>%
mutate(p=mapvalues(p,from=c(0.05,0.5,0.95),to=c("lo","med","hi")),
data=mapvalues(data,from=c(TRUE,FALSE),to=c("data","simulation"))) %>%
dcast(time+data~p,value.var='q') %>%
ggplot(aes(x=time,y=med,color=data,fill=data,ymin=lo,ymax=hi))+
geom_ribbon(alpha=0.2)
\[LP=\tfrac{\delta}{1-exp(-\delta\mu_{EI})}\] \[IP=\tfrac{\delta}{1-exp(-\delta\mu_{IR})}\]
LP is 1.27 days, which means the latent period is rather short. Once exposed, the individual will be infectious. IP is 21.12 days, which is consistent with the data from Centers for disease contol and prevention website, saying people show symptoms about 16-18 days after infection.
The initial value of S_0 is rather high, about 47.8%. It may be part of the reason of very high mumps cases around 1970. Amplitude is seasonal pattern of mumps, high value at 0.87, suggesting the crowded environment is important reason for mumps transmission. The extra-demographic stochasticity parameter sigmaSE of 0.083, compared with measles case study (He et al. 2009), is relatively high value. It means extra-demographic stochasticity plays a role in mumps as well.
mle=results_global[which.max(results_global$logLik),3:16]
(mle)
## R0 sigma gamma alpha iota rho
## result.14 431.5988 557.0366 17.72854 0.5705136 3.661483 0.01554675
## sigmaSE psi cohort amplitude S_0 I_0
## result.14 0.08342795 0.3386664 0.9438985 0.8709689 0.4776558 0.006453151
## E_0 R_0
## result.14 0.002683586 0.5132074