# # Resampling Regression # This program looks at two separate multiple regression equations for predicting # combined SAT scores for Gruber's data. # It finds the empirical cutoff points for the 95% confidence interval. # It plots the results and prints out the confidence limits. dat <- read.table("http://www.uvm.edu/~dhowell/methods8/DataFiles/Tab15-1.dat", sep = "\t", header = TRUE) attach(dat) nreps <- 1000 #Set number of replications b1 <- numeric(nreps) b2 <- numeric(nreps) # Using Expend and LogPcxtSat model1 <- lm(SATcombined~Expend + LogPctSAT) b1obs <- model1$coeff[2] b2obs <- model1$coeff[3] # Start resampling for (i in 1:nreps) { sample.50 <- sort(sample(nrow(dat),50,replace = TRUE)) #Create a sample of row nmumbers sampdata <- dat[sample.50,] #Sample those rows model2 <- lm( sampdata$SATcombined~sampdata$Expend + sampdata$LogPctSAT) b1[i] <- model2$coeff[2] b2[i] <- model2$coeff[3] } seb1 <- sd(b1) #Compute standard errors--not necessarily needed seb2 <- sd(b2) cat("Results using Expend and LogPctSAT as predictors \n") cat("The standard errors of b1 and b2 are: ",seb1, seb2, "\n") par(mfrow=c(2,2)) hist(b1, breaks = 100, main = "b1 for Expenditure") hist(b2, breaks = 100, main = "b2 for Log Percent Taking SAT") #Compute percentage of resamples that exceed limits. lower.limit <- .025*nreps; upper.limit <- .975*nreps b1 <- sort(b1) b2 <- sort(b2) cat(" The lower limit of b1 = ", b1[lower.limit], "\n") cat(" The upper limit of b1 = ", b1[upper.limit], "\n") cat(" The lower limit of b2 = ", b2[lower.limit], "\n") cat(" The upper limit of b2 = ", b2[upper.limit], "\n \n \n") #******************************************************************* cat("Results using Expend and PTratio as predictors \n") # Now replacing LogPctSAT with PTratio to get a non-significant variable b1 <- numeric(nreps) b2 <- numeric(nreps) model3 <- lm(SATcombined~Expend + PTratio) b1obs <- model3$coeff[2] b2obs <- model3$coeff[3] # Start resampling for (i in 1:nreps) { sample.50 <- sort(sample(nrow(dat),50,replace = TRUE)) #Create a sample of row nmumbers sampdata <- dat[sample.50,] #Sample those rows model4 <- lm( sampdata$SATcombined~sampdata$Expend + sampdata$PTratio) b1[i] <- model4$coeff[2] b2[i] <- model4$coeff[3] } seb1 <- sd(b1) #Compute standard errors--not necessarily needed seb2 <- sd(b2) cat("The standard errors of b1 and b2 are: ",seb1, seb2, "\n") hist(b1, breaks = 100, main = "b1 for Expenditure") hist(b2, breaks = 100, main = "b2 for Pupil/Teacher Ratio") #Compute confidence limits on b1 and b2 lower.limit <- .025*nreps; upper.limit <- .975*nreps b1 <- sort(b1) b2 <- sort(b2) cat(" The lower limit of b1 = ", b1[lower.limit], "\n") cat(" The upper limit of b1 = ", b1[upper.limit], "\n") cat(" The lower limit of b2 = ", b2[lower.limit], "\n") cat(" The upper limit of b2 = ", b2[upper.limit], "\n")