# Scott Merrill # Oct 29 # linear and non-linear models rm (list = ls(all = TRUE)) require(datasets) # co2 data is monthly co2 concentration data in ppm from 1959 to 1997 data(co2) plot(co2, main = "Mauna Loa Atmospheric CO2 Concentration") co2 # 468 data points length(co2) # extract the January data from these data jan.co2 = co2[seq(from = 1, to = 468, by = 12)] length(jan.co2) # check co2 jan.co2 year = seq(1959,1997) length(year) plot(year,jan.co2) mod1=lm(jan.co2~year) summary(mod1) abline(mod1$coefficients) mtext("How does that fit?") plot(mod1$residuals) sum(mod1$residuals^2) # Change year to 1 to 39 year = seq(from =1, to = 39) # Doesn't change anything other than the x axis plot(year,jan.co2) mod1=lm(jan.co2~year) summary(mod1) abline(mod1$coefficients) mtext("Doesn't change anything other than the x-axis") ####### # Standard operating procedure is to linearize the data ###### # power transforms can be optimized using a boxcox tranformation # found in library (MASS) # but I couldn't get my transformation to work so... for (lambda in seq(from = 1.3, to = 1.5, by = .01)) { trans.year = year^lambda mod.fit=lm(jan.co2~trans.year) SSE = sum(mod.fit$residuals^2) print(c("SSE = ", SSE, "lambda = ", lambda)) } trans.year = year^1.37 plot(trans.year,jan.co2) mod2=lm(jan.co2~trans.year) summary(mod2) abline(mod2$coefficients) plot(mod2$residuals) sum(mod2$residuals^2) # Interpretation of this linear model is... plot(trans.year,jan.co2, main = "linear fit to transformed data is exceptional") abline(mod2$coefficients) mtext("Adjusted R-squared is psyck!", line = -1) mtext(summary(mod2)[8],line = -2) ######## # What if you were interested in the non-linear nature of the model? ######## # I think it is exponential mod.nls = nls(jan.co2~co2.base.value + b*year^gamma, start = list(co2.base.value = 0, b = 1, gamma = 1)) summary(mod.nls) coef(mod.nls) # This tells us that the best fit is: fit.y = 315.19326 + 0.32334 *year^1.36955 plot(year,fit.y, lwd=3, type = "l") points(year,jan.co2, lwd=2, col = "red") # What is the sum of squared error in this model sum((fit.y-jan.co2)^2) ################## # very similar fit. So why not just use the simple linear transformation? ############## rm(list = ls(all = TRUE)) require(datasets) data(co2) co2 # making a column of evenly spaced dates from 1959-1997 (actually year 1 to end of year 39) # each month equals 1/12th of a year so jan of year 1 equals 1, february of year 1 is 1+ 1/12, # march of year 1 = 1 + 2/12, on through december of year 1 = 1 + 11/12 z = seq(1,40-1/12, by = 1/12) length(z) # Checking to confirm that my date vector is of the same length as my co2 vector # creating and filling in a data matrix all.co2 = matrix(nrow = 468, ncol = 2, data = 0) all.co2[,1] = co2[seq(from = 1, to = 468, by = 1)] all.co2[,2] = z # Looking at the co2 data in a couple of different ways plot(all.co2[,2],all.co2[,1], type = "l", main = "Mauna Loa Atmospheric CO2 Concentration") plot(all.co2[,2],all.co2[,1], main = "Mauna Loa Atmospheric CO2 Concentration") # creating a simple linear model mod.lm.all= lm(all.co2[,1]~all.co2[,2]) # adding the model predictions to the plot abline(mod.lm.all$coefficients, lwd = 3, col = "red") sum(mod.lm.all$residuals^2) #### # SSE = 3194.08 #### # creating an exponential model mod.nls.exp = nls(all.co2[,1]~co2.base.value + b*all.co2[,2]^gamma, start = list(co2.base.value = 310, b = 1.3, gamma = 1)) summary(mod.nls.exp) fit.line.exp=315.26545+ 0.33431*all.co2[,2]^1.35882 # adding the exponential model predictions to the plot points(all.co2[,2],fit.line.exp, type = "l", col = "blue", lwd = 3) sum((fit.line.y - all.co2[,1])^2) #### # SSE = 2138.026 #### mod.nls.complex = nls(all.co2[,1]~co2.base.value + b*all.co2[,2]^gamma + zeta*sin(all.co2[,2]*pi*2) , start = list(co2.base.value = 315, b = .3, gamma = 1, zeta = 7)) summary(mod.nls.complex) fit.line.complex = 315.19481 + 0.33702*all.co2[,2]^ 1.35733 + 2.76377*sin(all.co2[,2]*pi*2) # adding the complex non-linear model predictions to the plot plot(all.co2[,2],fit.line.complex, type = "l", col = "green", lwd = 1) sum((fit.line.complex - all.co2[,1])^2) #### # SSE = 351.2988 #### # revisualizing the data as a line on the plot points(all.co2[,2],all.co2[,1], type = "l", col = "black", lwd = 2) # AIC values lower is better # AIC uses a measure of fit (how much error exists) # and penalizes models for increasing the number of parameters AIC(mod.lm.all) AIC(mod.nls.exp) AIC(mod.nls.complex)