1.Introduction and Data Overview

##       year          month           crash          snowfall     
##  Min.   :2004   Min.   : 1.00   Min.   :17752   Min.   :  0.00  
##  1st Qu.:2007   1st Qu.: 3.75   1st Qu.:21926   1st Qu.:  0.00  
##  Median :2010   Median : 6.50   Median :24776   Median :  2.00  
##  Mean   :2010   Mean   : 6.50   Mean   :25688   Mean   : 15.71  
##  3rd Qu.:2013   3rd Qu.: 9.25   3rd Qu.:28182   3rd Qu.: 25.56  
##  Max.   :2016   Max.   :12.00   Max.   :41560   Max.   :105.50

2.Data Analysis and Model Choosing

par(mfrow=c(2,1),cex=0.8)
spectrum(mydata$crash,sub="",main="Unsmoothed Periodogram")
smoothed_periodogram = spectrum(mydata$crash,method="pgram",spans=c(5,5),main="Smoothed Periodogram for Traffic Crash",sub="")

periodogram_freq = smoothed_periodogram$freq[which.max(smoothed_periodogram$spec)]
## The estimated frequency through AR fit is :  0.08125 cycles per month
## The corrsponding period is : 12.30769  months, or  1.025641  years
n = seq(1,156)
Crash = mydata$crash
Trend = ts(loess(Crash~n,span=0.5)$fitted,start=2004,frequency=12)
Noise = ts(Crash-loess(Crash~n,span=0.1)$fitted,start=2004,frequency=12)
Cycles = Crash - Trend - Noise
plot(ts.union(Crash,Trend,Noise,Cycles),main="Decomposition of Traffic Crash as Trend + Noise + Cycles",xlab="Year")

\[ Y_{n} = \alpha +\beta z_{n}+\epsilon_{n} \] * \(\epsilon_{1:N}\) is a stationary, causal, invertable, Gaussian ARMA(p,q) model satisfying

\[ \phi(B)\epsilon_{n}=\psi(B)\omega_{n} \] * \(\omega_{n}\) a Gaussian white noise and

\[ \omega_{n} \sim N(0,\sigma^{2})\\ \phi(x) = 1 - \phi_{1}x -...- \phi_{p}x^{p}\\ \psi(x) = 1 + \psi_{1}x +...+ \psi_{q}x^{q} \]

## 
## Call:
## arima(x = mydata$crash, order = c(1, 0, 1), seasonal = list(order = c(1, 0, 
##     1), period = 12), xreg = mydata$snowfall)
## 
## Coefficients:
##          ar1      ma1    sar1     sma1  intercept  mydata$snowfall
##       0.9535  -0.7099  0.9903  -0.7896  25526.828          90.6088
## s.e.  0.0412   0.1144  0.0105   0.1027   5912.013          13.5145
## 
## sigma^2 estimated as 4744590:  log likelihood = -1432.31,  aic = 2878.62

\[ Crash_{n} = 25526.8 + 90.6\times Snowfall_{n} + \epsilon_{n} \]

\[ (1-0.95B)(1-0.99B^{12})\epsilon_{n}=(1 - 0.71B)(1 - 0.79B^{12})\omega_{n} \]

\[ \omega_{n} \sim N(0,\sigma^{2})\\ \]

3.Diagonistic

AR_roots = polyroot(c(1,-fit2$coef["ar1"],rep(0,10),-fit2$coef["sar1"],fit2$coef["sar1"]*fit2$coef["ar1"]))
MA_roots = polyroot(c(1,fit2$coef["ma1"],rep(0,10),fit2$coef["sma1"],fit2$coef["sma1"]*fit2$coef["ma1"]))
min(abs(AR_roots))
## [1] 1.000814
min(abs(MA_roots))
## [1] 1.019881

4.Result and Conclusion

library(forecast)
plot(time,mydata$crash,type="l",ylab="Crash",xlab="Year",main="Traffic Crash of Original Value and Fitted Value")
lines(time,fitted(fit2),col="red",lty="dashed")
legend("topright",c("Original Value","Fitted Value"),lty=c(1,2),col=c("black","red"))

5.Resource and Reference