1. Experience of air quality and temperature difference



2. Data and fitted models

2.1 ARMA model with no transformation on data

x <- read.csv(file="temp_pm25_2010_2012.csv",header=T)
head(x)
##         Time     pm2.5 TMAX TMIN  TD
## 1 2010-01-01 129.00000  -16 -102  86
## 2 2010-01-02 144.33333  -21  -63  42
## 3 2010-01-03  78.37500  -44  -99  55
## 4 2010-01-04  29.29167  -85 -134  49
## 5 2010-01-05  43.54167  -75 -156  81
## 6 2010-01-06  59.37500  -59 -167 108
t <- x$Time
p <- x$pm2.5
d <- x$TD
plot(ts(cbind(p,d)),main = "PM2.5(pm25) and temperature difference(td) for Beijing",xlab="Day")

spectrum(p,method='ar',main="Spectrum of p")

spectrum(d,method='ar',main="Spectrum of d")

arima(d,order=c(2,0,1), xreg=p)
## 
## Call:
## arima(x = d, order = c(2, 0, 1), xreg = p)
## 
## Coefficients:
##          ar1      ar2      ma1  intercept        p
##       1.3275  -0.3445  -0.9335    99.9070  -0.0384
## s.e.  0.0367   0.0328   0.0201     4.0202   0.0152
## 
## sigma^2 estimated as 998.2:  log likelihood = -5164.43,  aic = 10340.86
log_lik_ratio <- as.numeric(
   logLik(arima(d,xreg=p,order=c(2,0,1))) -
   logLik(arima(d,order=c(2,0,1)))
)
LRT_pval <- 1-pchisq(2*log_lik_ratio,df=1)
p2012 <- p[706:1060]
d2012 <- d[706:1060]
t2012 <- as.Date(t[706:1060])
arima(d2012,c(2,0,1),xreg=p2012)
## 
## Call:
## arima(x = d2012, order = c(2, 0, 1), xreg = p2012)
## 
## Coefficients:
##          ar1      ar2      ma1  intercept    p2012
##       1.3374  -0.3551  -0.9214    99.9762  -0.0475
## s.e.  0.0622   0.0575   0.0305     7.7602   0.0288
## 
## sigma^2 estimated as 1014:  log likelihood = -1732.6,  aic = 3477.21
plot(t2012,p2012,type="l",xlab="Year",ylab="")
par(new=TRUE)
plot(t2012,d2012,col="red",type="l",axes=FALSE,xlab="",ylab="")
axis(side=4, col="red")



2.2 Analysis of temporal differences

delta_p <- p[2:1060] - x$pm2.5[as.numeric(x$Time) %in% (as.numeric(t)-1)]
t2 <- as.Date(t[2:1060])
d2 <- d[2:1060]
plot(t2,d2,type="l",xlab="Day",ylab="")
par(new=TRUE)
plot(t2,delta_p,col="red",type="l",axes=FALSE,xlab="",ylab="")
axis(side=4,col="red")

log_lik_ratio <- as.numeric(
   logLik(arima(d2,xreg=delta_p,order=c(2,0,1))) -
   logLik(arima(d2,order=c(2,0,1)))
)
LRT_pval2 <- 1-pchisq(2*log_lik_ratio,df=1)
arima(d2,xreg=delta_p,order=c(2,0,1))
## 
## Call:
## arima(x = d2, order = c(2, 0, 1), xreg = delta_p)
## 
## Coefficients:
##          ar1      ar2      ma1  intercept  delta_p
##       1.3200  -0.3371  -0.9308    95.7360   0.0398
## s.e.  0.0375   0.0335   0.0207     3.8717   0.0124
## 
## sigma^2 estimated as 995.1:  log likelihood = -5157.9,  aic = 10327.79


3. Conclusions



4. Some supplementary analysis

4.1 Model selection by AIC

low_aic_table <- aic_table(d,5,6,xreg=p)
require(knitr)
kable(low_aic_table,digits=2)
MA0 MA1 MA2 MA3 MA4 MA5 MA6
AR0 10593.24 10403.03 10382.67 10365.41 10366.99 10362.64 10363.23
AR1 10363.26 10362.35 10352.36 10349.43 10345.73 10346.45 10347.71
AR2 10363.23 10345.17 10357.60 10353.82 10345.31 10346.36 10348.23
AR3 10360.08 10359.07 10343.19 10344.94 10346.65 10348.19 10350.14
AR4 10362.07 10359.67 10344.91 10336.20 10338.77 10350.29 10351.43
AR5 10357.49 10348.15 10346.67 10337.71 10339.39 10340.68 10347.48
low_aic_table <- aic_table(d,5,6,xreg=delta_p)
require(knitr)
kable(low_aic_table,digits=2)
MA0 MA1 MA2 MA3 MA4 MA5 MA6
AR0 10593.24 10403.03 10382.67 10365.41 10366.99 10362.64 10363.23
AR1 10363.26 10362.35 10352.36 10349.43 10345.73 10346.45 10347.71
AR2 10363.23 10345.17 10357.60 10353.82 10345.31 10346.36 10348.23
AR3 10360.08 10359.07 10343.19 10344.94 10346.65 10348.19 10350.14
AR4 10362.07 10359.67 10344.91 10336.20 10338.77 10350.29 10351.43
AR5 10357.49 10348.15 10346.67 10337.71 10339.39 10340.68 10347.48
arima(d,xreg=p,order=c(4,0,3))
## 
## Call:
## arima(x = d, order = c(4, 0, 3), xreg = p)
## 
## Coefficients:
##          ar1     ar2      ar3      ar4     ma1      ma2      ma3
##       0.1860  0.9442  -0.0856  -0.0848  0.2190  -0.8632  -0.1981
## s.e.  0.8132  0.3350   0.6876   0.2340  0.8139   0.0432   0.7051
##       intercept        p
##         99.8480  -0.0401
## s.e.     4.0311   0.0152
## 
## sigma^2 estimated as 991.7:  log likelihood = -5160.98,  aic = 10341.96


4.2 Residual analysis

r <- resid(arima(d,xreg=p,order=c(2,0,1)))
plot(r)

acf(r)

5. References

The PM2.5 data is collected from U.S. Department of State Air Quality Monitoring Program http://www.stateair.net/web/historical/1/1.html The temperature data is collected from National Climite Data Center.