#The data give the speed of cars and the distances taken to stop. data(cars) #scatterplot plot(cars$speed, cars$dist) #plot(x,y) syntax plot(dist~speed, data=cars) #same plot using model(~) syntax [PREFERRED SYNTAX] plot(speed, dist, data=cars) #NB: this syntax does not work #lm() is a function for linear models lm(cars$dist~cars$speed) #attributes of the model mod <- lm(dist~speed, data=cars) mod attributes(mod) #lists properties of the model mod$fitted # predicted values mod$resid # residuals mod$coeff # estimated coefficients #plotting the regression line abline(-17.579, 3.932, col=2) #regression line in red abline(mod$coeff, col=3) #alternate syntax in green abline(mod, col=4) #alternate syntax in blue #Assessing the residuals cars$speed[4] # x.4 cars$dist[4] # y.4 mod$fitted[4] # y_hat.4 mod$resid[4] # resid.4 = y_hat.4 - y.4 cars$dist[4] - mod$fitted[4] hist(mod$resid) #Histogram of the residuals: qqnorm(mod$resid) #Normal QQ plot of the residuals qqline(mod$resid) #add the reference line plot(mod$res~mod$fit) #Residuals versus fitted (predicted) abline(0,0) #Add a zero line to a residual plot #inference for the regression parameters summary(mod) anova(mod) #CI for Beta_1 (95% --> alpha=.05) df <- 50-2 t.crit <- qt(.975, df) # .975 = 1-alpha/2 b1.hat <- 3.932 se.b1 <- 0.4155 lwr.95 <- b1.hat - t.crit * se.b1 #lower endpoint of CI upr.95 <- b1.hat + t.crit * se.b1 #upper endpoint of CI c(lwr.95, upr.95) #95% CI for Beta_1 #prediction at new x-values & CI vs. PI x.new.df <- data.frame(speed=12.5) #NOTE: the var.name must match (speed) x.new.df predict(mod, x.new.df, interval="confidence") predict(mod, x.new.df, interval="prediction") predict(mod, x.new.df, se.fit=TRUE) #Extrapolation #Note: The 'cars' data was recorded in the 1920's & includes speeds up to 25 mph. # Problem "ch05q07.txt" has recent data with speeds over 60 mph. mod <- lm(dist~speed, data=cars) plot(dist~speed, data=cars) #new data from chapter 5-7 source("http://www.uvm.edu/~rsingle/stat221/data/scripts-221.R") dat2 <- bookdata("ch05q07.txt") plot(dat2$MPH, dat2$DIST) mod2 <- lm(DIST~MPH,data=dat2) mod # older 'cars' data mod2 # newer data from "ch05q07.txt" abline(mod2,col=2) #red line for the SLR from new data abline(mod, col=3) #green line for SLR from 'cars' data plot(dat2$MPH,sqrt(dat2$DIST)) mod3 <- lm(sqrt(DIST)~MPH,data=dat2) mod3 abline(mod3) multifig(2,2) plot(dist~speed, data=cars); abline(mod, col=3) plot(dat2$MPH, dat2$DIST); abline(mod2,col=2) plot(dat2$MPH,sqrt(dat2$DIST)); abline(mod3) plot(0,0) plot(mod$resid, mod$pred) ; abline(h=0) plot(mod2$resid, mod2$pred); abline(h=0) plot(mod3$resid, mod3$pred); abline(h=0) plot(0,0)