# Sampling distribution in Chapters 7
# Looks as samplinbg distribution if you vary n and if you vary underlying population

# Plots the sampling distribution of the mean when drawing from a normal distribution
#               By changing "n" in the commands below you can see how the shape
#               of the distribution depends on sample size.
#               Removing "#" allows for sampling from other distributions
#               The axes on the graph change when you change sample size, so the
#               curve doesn't APPEAR to get any steeper, though it really does.

# The first set of graphs show what happens when we sample from different, and non-normal,
# distributions. (Thge normal, the chi-square, and the exponential.
# The last two plots show the distribution when the null is true and when it is false.

# Run SampDistMeansVaryN.R if you want to see how the distribution changes as
# you change the sample size.

n <- 20                        # Sample size
nreps <- 10000                 # Number of replications
sample.mean <- numeric(nreps)  # Variable to store means
par(mfrow = c(3,2))

sample.normal <- rnorm(n = 1000, mean = 35, sd = 15)  #Just to see what distrib. looks like
for (i in 1:nreps) {
  sample <- rnorm(n = n,mean =  35, sd = 15)  # 100 normal obs with mean = 35 and sd = 15
  sample.mean[i] <- mean(sample)       # Calculate means for each sample and store them.
}  
   
# Now plot out the large sample and the means
hist(sample.normal, breaks = 50, main = "Distribution of Normal Population", xlab = "")
hist(sample.mean, breaks = 50, main = "Distribution of Sample Mean \n for Normal", xlab = "Mean")

# The chi-square distribution on 6 df is quite skewed
 sample.chisq <- rchisq(1000, df = 6)
 for (i in 1:nreps) {
    sample <- rchisq(100, 6)
    sample.mean[i] <- mean(sample)
    }
hist(sample.chisq, breaks = 50, main = "Distribution of Chi-square Population", xlab = "")
hist(sample.mean, breaks = 50, main = "Distribution of Sample Mean \n for Chi-Square", xlab = "Mean")
    
# Now we will draw from an exponential distribtution, which is very skewed. Let n = 10
# First a histogram of the exponential itself
sample.expon <- rexp(n = 10000, rate = 1.5)
hist(sample.expon, breaks = 50, main = "Exponential Distribution")

# Then the distribution of means
means.exp <- numeric(nreps)
for (i in 1:nreps) {
  sample.first.exp <- rexp(n = 10, rate = 1.5)
  means.exp[i] <- mean(sample.first.exp)
  }
hist(means.exp, breaks = 50, main = "Distribution of Means  \n for Exponential", xlab = "Mean ")

################################################################################

#  Now plot distribution of mean differences when H0 is true
mean.diff <- numeric(nreps)
for (i in 1:nreps) {
   sample.first <- rnorm(n = 100, mean = 35, sd = 15)
   sample.second <- rnorm(n = 100, mean = 35, sd = 15)
   mean.diff[i] <- mean(sample.first) - mean(sample.second)
}
par(ask = TRUE)  
print("You will be asked if you want to go on to the next graphics screen.")
print("Click on 'Next' in upper left of graphics screen.")
hist(mean.diff, breaks = 50, main = "Distribution of Mean \n Differences", xlab = "Mean Difference H0 true")
  # Notice that the distribution is centered on 0.0
  
#  Now plot distribution of mean differences when H0 is false
mean.diff <- numeric(nreps)
for (i in 1:nreps) {
   sample.first <- rnorm(n = 100, mean = 35, sd = 15)
   sample.second <- rnorm(n = 100, mean = 45, sd = 15)
   mean.diff[i] <- mean(sample.first) - mean(sample.second)
   }
hist(mean.diff, breaks = 50, main = "Distribution of Mean \n Differences", xlab = "Mean Difference Ho false")
  # Notice that the distribution is centered on -10.0
  
  
  # You can sample from other distributions --some of which we have used.
    # sample <- rexp(n = 100, rate = 1)           # for exponential
    # sample <- rchisq(n = 100, df = 1, ncp = 0)  # for chi-sq on 1 df
    # sample <- runif(n = 100, min = 0, max = 1)  # for uniform (rectangular) distrib.
    # sample <- rpois(n = 100, lambda =  1)       # for poisson with mean = lambda
    # sample <- rbinom(n = 100, size = 10, p = .7 # Draw 100 samples of 10 items with p(success) = .70