####### Sample Code for Problem #1 #######


### Read your file

		sample_file<-"/Users/acspearot/Documents/Classes/Econ 217/Project ANS 19-2/Sample_Q1.csv"
		sample_file_lags<-"/Users/acspearot/Documents/Classes/Econ 217/Project ANS 19-2/Sample_Q1_Lags.csv"

		y<-as.numeric(read.csv(sample_file,header=TRUE)$x)
		phis<-read.csv(sample_file_lags,header=TRUE)

### For this question, the dataset is generated by a time series with the following lags
	
# phi1 phi2 phi3 phi4 phi5
#  0.4    0  0.1  0.1 0.05


#Plot the Autocorrelation function


par(mfrow=c(1,2))
plot(y,type='l')
acf(y,lag.max=10,type="correlation",cex.lab=2,cex.axis=2,lwd=3,cex.main=2)

##  The autocorrelation function suggestions that the first lag is strongest, and then the 3rd and 4th are also reasonably strong.  After the 4th the correlation begins to fall.  So, it seems as if all are significant, but as this is not a rigorous test, we will use a procedure to evaluate things further.
	
# Since we're looking for when some lags are zero and others have a value, we will use the lasso procedure to find the solution.  Other methods might also work, so we will grade carefully if other methods are attempted.

		len<-length(y)

# Generate the five lags to test

		dy<-y[6:(len)]
		dy1<-y[5:(len-1)]
		dy2<-y[4:(len-2)]
		dy3<-y[3:(len-3)]
		dy4<-y[2:(len-4)]
		dy5<-y[1:(len-5)]

### combine the data together into a matrix, and run the lasso

		d<-cbind(dy1,dy2,dy3,dy4,dy5)

		library(lars)
		lasso.reg<-lars(d,dy,type="lasso",normalize=FALSE)

### Run a cross validation 

		CVlasso<-cv.lars(d,dy,type="lasso",normalize=FALSE)
		
### Find the best value of the constraint that limits the absolute size of the coefficients
		
		opt<-CVlasso$index[which.min(CVlasso$cv)]

### Generate the predictions, bind to the original parameters for easy comparison
	
	phis<-rbind(phis,predict(lasso.reg,s=opt,type="coef",mode="fraction")$coefficients)
	
### Compare the answer with the generate estimates

#> phis
#       phi1 phi2      phi3      phi4        phi5
#1 0.4000000    0 0.1000000 0.1000000 0.050000000
#2 0.3763152    0 0.1099917 0.1096376 0.009126586


####### Sample Code for Problem #2a #######


### Load the test file

	y<-read.csv("/Users/acspearot/Documents/Classes/Econ 217/Project ANS 19-2/Sample_Q2.csv",header=TRUE)
	
##  For the one lag  var, we simply need to regress the variable in time t on its lag and the lag of the other variable.

n<-length(y$lusa)	

summary(lm(y$lusa[2:n]~y$lusa[1:(n-1)]+y$luk[1:(n-1)]))
summary(lm(y$luk[2:n]~y$lusa[1:(n-1)]+y$luk[1:(n-1)]))


#> summary(lm(y$lusa[2:n]~y$lusa[1:(n-1)]+y$luk[1:(n-1)]))
#
#Call:
#lm(formula = y$lusa[2:n] ~ y$lusa[1:(n - 1)] + y$luk[1:(n - 1)])
#
#Residuals:
#      Min        1Q    Median        3Q       Max 
#-0.053465 -0.010693  0.003997  0.017579  0.045019 
#
#Coefficients:
#                  Estimate Std. Error t value Pr(>|t|)    
#(Intercept)       0.040369   0.008443   4.782 2.76e-05 ***
#y$lusa[1:(n - 1)] 0.952913   0.101185   9.418 2.29e-11 ***
#y$luk[1:(n - 1)]  0.048923   0.127218   0.385    0.703    
#---
#Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
#Residual standard error: 0.0253 on 37 degrees of freedom
#Multiple R-squared:  0.9959,	Adjusted R-squared:  0.9956 
#F-statistic:  4464 on 2 and 37 DF,  p-value: < 2.2e-16
#
#> summary(lm(y$luk[2:n]~y$lusa[1:(n-1)]+y$luk[1:(n-1)]))
#
#Call:
#lm(formula = y$luk[2:n] ~ y$lusa[1:(n - 1)] + y$luk[1:(n - 1)])
#
#Residuals:
#      Min        1Q    Median        3Q       Max 
#-0.096556 -0.009096  0.003137  0.014254  0.047854 
#
#Coefficients:
#                  Estimate Std. Error t value Pr(>|t|)    
#(Intercept)        0.02311    0.00877   2.635   0.0122 *  
#y$lusa[1:(n - 1)]  0.10639    0.10511   1.012   0.3180    
#y$luk[1:(n - 1)]   0.87263    0.13215   6.603 9.62e-08 ***
#---
#Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
#Residual standard error: 0.02628 on 37 degrees of freedom
#Multiple R-squared:  0.9932,	Adjusted R-squared:  0.9928 
#F-statistic:  2692 on 2 and 37 DF,  p-value: < 2.2e-16

#In the results for this file, it seems that the only significant predictor of each variable in the next period is the value of the lag of the same time series.  The effect of the lag us gdp on current uk gdp is moderately significant, but not to the point of conventional levels.


####### Sample Code for Problem #2b #######

library(tseries)


### First, lets use the augmented Dickey-Fuller test to evaluate whether we can reject a unit root in either the us of uk series.

adf.test(y$lusa,k=0)$p.value
adf.test(y$luk,k=0)$p.value

#adf.test(y$lusa,k=0)$p.value
#[1] 0.5311834
#> adf.test(y$luk,k=0)$p.value
#[1] 0.5636095

## For both a unit cannot be rejected.  Thus, we need to first difference to get rid of it.  but first, let's see if the two time series are cointegrated, in which case we can use the engel granger technique and an error-correction model

	errors<-as.numeric(resid(lm(lusa~luk,data=y)))	
	
adf.test(errors,k=0)$p.value
#> adf.test(errors,k=0)$p.value
#[1] 0.757459

#It seems that since a unit root cannot be rejected in the error term, the series are not cointegrated, and the only way we can evaluate them is if their differenced time series both do not have a unit root.  Then we can regress one on the other and evaluate short-run changes.

adf.test(diff(y$lusa),k=0)$p.value
adf.test(diff(y$luk),k=0)$p.value	
	
#> adf.test(diff(y$lusa),k=0)$p.value
#[1] 0.01
#Warning message:
#In adf.test(diff(y$lusa), k = 0) : p-value smaller than printed p-value
#> adf.test(diff(y$luk),k=0)$p.value	
#[1] 0.01899603	
	
## One can reject a unit root in both time series.  Thus, we can estimate the model in first differences	
	
summary(lm(diff(y$lusa)~diff(y$luk)))

#> summary(lm(diff(y$lusa)~diff(y$luk)))
#
#Call:
#lm(formula = diff(y$lusa) ~ diff(y$luk))
#
#Residuals:
#      Min        1Q    Median        3Q       Max 
#-0.048307 -0.014519 -0.000249  0.012167  0.053633 

#Coefficients:
#            Estimate Std. Error t value Pr(>|t|)    
#(Intercept) 0.019739   0.005131   3.847 0.000443 ***
#diff(y$luk) 0.493459   0.133113   3.707 0.000666 ***
#---
#Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
#Residual standard error: 0.02162 on 38 degrees of freedom
#Multiple R-squared:  0.2656,	Adjusted R-squared:  0.2463 
#F-statistic: 13.74 on 1 and 38 DF,  p-value: 0.0006663

#  The results indicate that a one unit log change in the UK GDP is significantly associated with a 0.49 log change in US GDP.