# if you want to save some time and get the APA style conclusion quickly, just use the apa function. Here’s how it works: 
# Consider the following two-sample t-test on the pirates dataset that compares whether or not there is a significant age difference between pirates who wear headbands and those who do not:

(test.result <- t.test(age ~ headband,
                      data = pirates))

	Welch Two Sample t-test

data:  age by headband
t = 0.35135, df = 135.47, p-value = 0.7259
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.030754  1.476126
sample estimates:
 mean in group no mean in group yes 
         27.55752          27.33484

# As you can see, the apa function got the values we wanted and reported them in proper APA style. The apa function will even automatically adapt the output for Chi-Square and correlation tests if you enter such a test object. Let’s see how it works on a correlation test where we correlate a pirate’s age with the number of parrots she has owned:

# Print an APA style conclusion of the correlation between a pirate's age and # of parrots

(age.parrots.ctest <- cor.test(formula = ~ age + parrots,
                              data = pirates))

# Print the apa style conclusion!

yarrr::apa(age.parrots.ctest)

# The apa function has a few optional arguments that control things like the number of significant digits in the output, and the number of tails in the test. Run ?apa to see all the options.

	Pearson's product-moment correlation

data:  age and parrots
t = 6.1311, df = 998, p-value = 1.255e-09
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.1300655 0.2495678
sample estimates:
      cor 
0.1905224

# You can read normtemp from a file as follows, assuming the file is stored in the same directory as is returned when you type getwd(), or get it from the UsingR package.

#normtemp <- read.csv("normtemp.csv")

str(normtemp)

'data.frame':	130 obs. of  3 variables:
 $ temperature: num  96.3 96.7 96.9 97 97.1 97.1 97.1 97.2 97.3 97.4 ...
 $ gender     : int  1 1 1 1 1 1 1 1 1 1 ...
 $ hr         : int  70 71 74 80 73 75 82 64 69 70 ...

# The R command that finds a confidence interval for the mean in this way is

library(UsingR)
t.test(normtemp$hr, conf.level = .98)

	One Sample t-test

data:  normtemp$hr
t = 119.09, df = 129, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 0
98 percent confidence interval:
 72.30251 75.22056
sample estimates:
mean of x 
 73.76154

library(UsingR)
t.test(normtemp$temperature, mu = 98.6)

	One Sample t-test

data:  normtemp$temperature
t = -5.4548, df = 129, p-value = 2.411e-07
alternative hypothesis: true mean is not equal to 98.6
95 percent confidence interval:
 98.12200 98.37646
sample estimates:
mean of x 
 98.24923

t.test(daily.intake, mu = 7725, conf.level = .99)

# We see that the 99% confidence interval is (5662,7845), and the p-value is .01814. So, there is about a 1.8% chance of getting data that is this compelling or more against H0 even if H0 is true.

	One Sample t-test

data:  daily.intake
t = -2.8208, df = 10, p-value = 0.01814
alternative hypothesis: true mean is not equal to 7725
99 percent confidence interval:
 5662.256 7845.017
sample estimates:
mean of x 
 6753.636

anorexia <- MASS::anorexia
weightdifference <- anorexia$Postwt - anorexia$Prewt
t.test(weightdifference, mu = 0, alternative = "greater", conf.level = .98)

# There is pretty strong evidence that there is a positive mean difference in weight of patients.

	One Sample t-test

data:  weightdifference
t = 2.9376, df = 71, p-value = 0.002229
alternative hypothesis: true mean is greater than 0
98 percent confidence interval:
 0.7954181       Inf
sample estimates:
mean of x 
 2.763889

library(yarrr)
# Let’s conduct a correlation test on the pirates dataset to see if there is a relationship between a pirate’s age and number of parrots they’ve had in their lifetime. Because the variables (age and parrots) are in a dataframe, we can do this in formula notation:
# Is there a correlation between a pirate's age and the number of parrots (s)he's owned?
# Method 1: Formula notation

age.parrots.ctest <- cor.test(formula = ~ age + parrots,
                             data = pirates)
# Print result
age.parrots.ctest

	Pearson's product-moment correlation

data:  age and parrots
t = 6.1311, df = 998, p-value = 1.255e-09
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.1300655 0.2495678
sample estimates:
      cor 
0.1905224

# Just like the t.test() function, we can use the subset argument in the cor.test() function to conduct a test on a subset of the entire dataframe. For example, to run the same correlation test between a pirate’s age and the number of parrot’s she’s owned, but only for female pirates, I can add the subset = sex == "female" argument:
# Is there a correlation between age and number of parrots ONLY for female pirates?

cor.test(formula = ~ age + parrots,
         data = pirates,
         subset = sex == "female")

# The results look pretty much identical. In other words, the strength of the relationship between a pirate’s age and the number of parrots they’ve owned is pretty much the same for female pirates and pirates in general.

	Pearson's product-moment correlation

data:  age and parrots
t = 4.6727, df = 462, p-value = 3.906e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.1237924 0.2977054
sample estimates:
      cor 
0.2124305

# If the results of the preceding section didn’t convince you that you need to understand your underlying population before applying a t-test, then this section will. A typical “heavy tail” distribution is the t random variable. Indeed, the t rv with 1 or 2 degrees of freedom doesn’t even have a finite standard deviation!

# Estimate the effective type I error rate in a t-test of H0:μ=0 versus Ha:μ≠0 at the α=0.1 level when the underlying population is t with 3 degrees of freedom when we take a sample of size n=30.

mean(replicate(20000, t.test(rt(30,3), mu = 0)$p.value < .1)) 

# Not too bad. It seems that the t-test has an effective error rate less than how the test was designed.

# Permutation test for two binary variables

library(dplyr)
library(infer)

mtcars %>%
dplyr::mutate(am = factor(am), vs = factor(vs)) %>%
specify(am ~ vs, success = "1") %>%
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("1", "0"))

# Permutation test similar to ANOVA

library(dplyr)
library(infer)

mtcars %>%
dplyr::mutate(cyl = factor(cyl)) %>%
specify(mpg ~ cyl) %>%
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")

# Permutation test for two binary variables

mtcars %>%
dplyr::mutate(am = factor(am), vs = factor(vs)) %>%
specify(am ~ vs, success = "1") %>%
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute")

library(yarrr)
# R stores hypothesis tests in special object classes called htest. htest objects contain all the major results from a hypothesis test, from the test statistic (e.g.; a t-statistic for a t-test, or a correlation coefficient for a correlation test), to the p-value, to a confidence interval. 

# To show you how this works, let’s create an htest object called height.htest containing the results from a two-sample t-test comparing the heights of male and female pirates:

# T-test comparing male and female heights stored in a new htest object called height.htest

height.htest <- t.test(formula = height ~ sex,
                       data = pirates,
                       subset = sex %in% c("male", "female"))

# Once you’ve created an htest object, you can view a print-out of the main results by just evaluating the object name:
# Print main results from height.htest

height.htest

	Welch Two Sample t-test

data:  height by sex
t = -20.655, df = 950.55, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -15.26008 -12.61186
sample estimates:
mean in group female   mean in group male 
            163.0436             176.9796

# Now, if we want to access the test statistic or p-value directly, we can just use $:
# Get the test statistic

height.htest$statistic

# Get the p-value

height.htest$p.value

# Get a confidence interval for the mean

height.htest$conf.int
str(height.htest$conf.int)

 num [1:2] -15.3 -12.6
 - attr(*, "conf.level")= num 0.95

str(t.test(c(1,2,3)))

List of 9
 $ statistic  : Named num 3.46
  ..- attr(*, "names")= chr "t"
 $ parameter  : Named num 2
  ..- attr(*, "names")= chr "df"
 $ p.value    : num 0.0742
 $ conf.int   : num [1:2] -0.484 4.484
  ..- attr(*, "conf.level")= num 0.95
 $ estimate   : Named num 2
  ..- attr(*, "names")= chr "mean of x"
 $ null.value : Named num 0
  ..- attr(*, "names")= chr "mean"
 $ alternative: chr "two.sided"
 $ method     : chr "One Sample t-test"
 $ data.name  : chr "c(1, 2, 3)"
 - attr(*, "class")= chr "htest"

# It’s a list of 9 things, and we can pull out the p-value by using 

t.test(c(1,2,3))$p.value

# Symmetric, light tailed:
# Estimate the effective type I error rate in a t-test of H0:μ=0 versus Ha:μ≠0 at the α=0.1 level when the underlying population is uniform on the interval (−1,1) when we take a sample size of n=10. Note that H0 is definitely true in this case. So, we are going to estimate the probability of rejecting H0 under the scenario above, and compare it to 10%, which is what the test is designed to give. Let’s build up our replicate function step by step.

set.seed(1)
runif(10,-1,1) #Sample of size 10 from uniform rv

t.test(runif(10,-1,1))$p.value #p-value of test

mean(replicate(10000, t.test(runif(10,-1,1))$p.value < .1)) 

# Our result is 10.07%. That is really close to the designed value of 10%.

# Estimate the effective type I error rate in a t-test of H0:μ=1 versus Ha:μ≠1 at the α=0.05 level when the underlying population is exponential with rate 1 when we take a sample of size n=20.

set.seed(2)
mean(replicate(20000, t.test(rexp(20,1), mu = 1)$p.value < .05))

# Pretty similar each time. That’s reason enough for me to believe the true type I error rate is relatively close to what we computed. We summarize the simulated effective Type I error rates for H0:μ=1 versus Ha:μ≠1 at α=.1,.05,.01 and .001 levels for sample sizes N=10,20,50,100 and 200.

effectiveError <- sapply(c(.1,.05,.01,.001), function (y) sapply(c(10,20,50,100,200), function(x) mean(replicate(20000, t.test(rexp(x,1), mu = 1)$p.value < y))  ))
colnames(effectiveError) <- c(".1", ".05", ".01", ".001")
rownames(effectiveError) <- c("10", "20", "50", "100", "200")
tbl_df(effectiveError)

# an example selecting a sample of flights traveling to Boston and to San Francisco from New York City in the flights data frame in the nycflights13 package. (We will remove flights with missing data first using na.omit and then sample 100 flights going to each of the two airports.)

bos_sfo <- flights %>% 
  na.omit() %>% 
  filter(dest %in% c("BOS", "SFO")) %>% 
  group_by(dest) %>% 
  sample_n(100)
bos_sfo

# Suppose we were interested in seeing if the air_time to SFO in San Francisco was statistically greater than the air_time to BOS in Boston. As suggested, let’s begin with some exploratory data analysis to get a sense for how the two variables of air_time and dest relate for these two destination airports:

bos_sfo_summary <- bos_sfo %>% group_by(dest) %>% 
  summarize(mean_time = mean(air_time),
            sd_time = sd(air_time))
bos_sfo_summary

# Looking at these results, we can clearly see that SFO air_time is much larger than BOS air_time. The standard deviation is also extremely informative here. To further understand just how different the air_time variable is for BOS and SFO, let’s look at a boxplot:

ggplot(data = bos_sfo, mapping = aes(x = dest, y = air_time)) +
  geom_boxplot()

# Since there is no overlap at all, we can conclude that the air_time for San Francisco flights is statistically greater (at any level of significance) than the air_time for Boston flights.

# The movies dataset in the ggplot2movies package contains information on a large number of movies that have been rated by users of IMDB.com (Wickham 2015). We are interested in the question here of whether Action movies are rated higher on IMDB than Romance movies. We will first need to do a little bit of data wrangling using the ideas from Chapter 5 to get the data in the form that we would like:

movies_trimmed <- movies %>% 
  select(title, year, rating, Action, Romance)

# Note that Action and Romance are binary variables here. To remove any overlap of movies (and potential confusion) that are both Action and Romance, we will remove them from our population:

movies_trimmed <- movies_trimmed %>%
  filter(!(Action == 1 & Romance == 1))

# We will now create a new variable called genre that specifies whether a movie in our movies_trimmed data frame is an "Action" movie, a "Romance" movie, or "Neither". We aren’t really interested in the "Neither" category here so we will exclude those rows as well. Lastly, the Action and Romance columns are not needed anymore since they are encoded in the genre column.

movies_trimmed <- movies_trimmed %>%
  mutate(genre = case_when(Action == 1 ~ "Action",
                           Romance == 1 ~ "Romance",
                           TRUE ~ "Neither")) %>%
  filter(genre != "Neither") %>%
  select(-Action, -Romance)

# The case_when function is useful for assigning values in a new variable based on the values of another variable. The last step of TRUE ~ "Neither" is used when a particular movie is not set to either Action or Romance. 
# We are left with 8878 movies in our population dataset that focuses on only "Action" and "Romance" movies. Let’s now visualize the distributions of rating across both levels of genre. Think about what type(s) of plot is/are appropriate here before you proceed:

ggplot(data = movies_trimmed, aes(x = genre, y = rating)) +
  geom_boxplot()

# We can see that the middle 50% of ratings for "Action" movies is more spread out than that of "Romance" movies in the population. "Romance" has outliers at both the top and bottoms of the scale though. We are initially interested in comparing the mean rating across these two groups so a faceted histogram may also be useful:

ggplot(data = movies_trimmed, mapping = aes(x = rating)) +
  geom_histogram(binwidth = 1, color = "white") +
  facet_grid(genre ~ .)

# Let’s select a random sample of 34 action movies and a random sample of 34 romance movies. (The number 34 was chosen somewhat arbitrarily here.)

set.seed(2017)
movies_genre_sample <- movies_trimmed %>% 
  group_by(genre) %>%
  sample_n(34) %>% 
  ungroup()

# Note the addition of the ungroup() function here. This will be useful shortly in allowing us to permute the values of rating across genre. Our analysis does not work without this ungroup() function since the data stays grouped by the levels of genre without it.

# We can now observe the distributions of our two sample ratings for both groups. Remember that these plots should be rough approximations of our population distributions of movie ratings for "Action" and "Romance" in our population of all movies in the movies data frame.

ggplot(data = movies_genre_sample, aes(x = genre, y = rating)) +
  geom_boxplot()

ggplot(data = movies_genre_sample, mapping = aes(x = rating)) +
  geom_histogram(binwidth = 1, color = "white") +
  facet_grid(genre ~ .)

# it’s often useful to calculate the mean and standard deviation as well, conditioned on the two levels.

summary_ratings <- movies_genre_sample %>% 
  group_by(genre) %>%
  summarize(mean = mean(rating),
            std_dev = sd(rating),
            n = n())
summary_ratings

obs_diff <- movies_genre_sample %>% 
  specify(formula = rating ~ genre) %>% 
  calculate(stat = "diff in means", order = c("Romance", "Action"))
obs_diff

# Our goal next is to figure out a random process with which to simulate the null hypothesis being true. Recall that H0:μr−μa=0 corresponds to us assuming that the population means are the same. We would like to assume this is true and perform a random process to generate() data in the model of the null hypothesis.

shuffled_ratings_old <- #movies_trimmed %>%
  movies_genre_sample %>% 
     mutate(genre = mosaic::shuffle(genre)) %>% 
     group_by(genre) %>%
     summarize(mean = mean(rating))
diff(shuffled_ratings_old$mean)

generated_samples <- movies_genre_sample %>% 
  specify(formula = rating ~ genre) %>% 
  hypothesize(null = "independence") %>% 
  generate(reps = 5000)

# A null distribution of simulated differences in sample means is created with the specification of stat = "diff in means" for the calculate() step. 
#The null distribution is similar to the bootstrap distribution we saw in Chapter 9, but remember that it consists of statistics generated assuming the null hypothesis is true.

null_distribution_two_means <- generated_samples %>% 
  calculate(stat = "diff in means", order = c("Romance", "Action"))

# We can now plot the distribution of these simulated differences in means:

null_distribution_two_means %>% visualize()

Setting `type = "permute"` in `generate()`.

# The p-value: Remember that we are interested in seeing where our observed sample mean difference of 0.95 falls on this null/randomization distribution. We are interested in simply a difference here so “more extreme” corresponds to values in both tails on the distribution. Let’s shade our null distribution to show a visual representation of our p-value:

null_distribution_two_means %>% 
  visualize(obs_stat = obs_diff, direction = "both")

# Remember that the observed difference in means was 0.95. We have shaded red all values at or above that value and also shaded red those values at or below its negative value (since this is a two-tailed test). By giving obs_stat = obs_diff a vertical darker line is also shown at 0.95.  
# Shaded histogram to show p-value:

Warning message:
“`visualize()` shouldn't be used to plot p-value. Arguments `obs_stat`, `obs_stat_color`, `pvalue_fill`, and `direction` are deprecated. Use `shade_p_value()` instead.”

# To better estimate how large the p-value will be, we also increase the number of bins to 100 here from 20:

null_distribution_two_means %>% 
  visualize(bins = 100, obs_stat = obs_diff, direction = "both")

# Histogram with vertical lines corresponding to observed statistic.

Warning message:
“`visualize()` shouldn't be used to plot p-value. Arguments `obs_stat`, `obs_stat_color`, `pvalue_fill`, and `direction` are deprecated. Use `shade_p_value()` instead.”

# At this point, it is important to take a guess as to what the p-value may be. We can see that there are only a few permuted differences as extreme or more extreme than our observed effect (in both directions). Maybe we guess that this p-value is somewhere around 2%, or maybe 3%, but certainly not 30% or more. Lastly, we calculate the p-value directly using infer:

pvalue <- null_distribution_two_means %>% 
  get_pvalue(obs_stat = obs_diff, direction = "both")
pvalue

# We have around 0.46% of values as extreme or more extreme than our observed statistic in both directions. Assuming we are using a 5% significance level for α, we have evidence supporting the conclusion that the mean rating for romance movies is different from that of action movies. The next important idea is to better understand just how much higher of a mean rating can we expect the romance movies to have compared to that of action movies.

# To get the corresponding bootstrap distribution with which we can compute a confidence interval, we can just remove or comment out the hypothesize() step since we are no longer assuming the null hypothesis is true when we bootstrap:

percentile_ci_two_means <- movies_genre_sample %>% 
  specify(formula = rating ~ genre) %>% 
#  hypothesize(null = "independence") %>% 
  generate(reps = 5000) %>% 
  calculate(stat = "diff in means", order = c("Romance", "Action")) %>% 
  get_ci()
percentile_ci_two_means

# Thus, we can expect the true mean of Romance movies on IMDB to have a rating 0.333 to 1.593 points higher than that of Action movies. Remember that this is based on bootstrapping using movies_genre_sample as our original sample and the confidence interval process being 95% reliable.

Setting `type = "bootstrap"` in `generate()`.

# We have already built an approximation for what we think the distribution of δ=¯x1−¯x2 looks like using randomization above. Recall this distribution:

ggplot(data = null_distribution_two_means, aes(x = stat)) +
  geom_histogram(color = "white", bins = 20)

# The infer package also includes some built-in theory-based statistics as well, so instead of going through the process of trying to transform the difference into a standardized form, we can just provide a different value for stat in calculate(). Recall the generated_samples data frame created via:

generated_samples <- movies_genre_sample %>% 
  specify(formula = rating ~ genre) %>% 
  hypothesize(null = "independence") %>% 
  generate(reps = 5000)

# We can now created a null distribution of t statistics:

null_distribution_t <- generated_samples %>% 
  calculate(stat = "t", order = c("Romance", "Action"))
null_distribution_t %>% visualize()

Setting `type = "permute"` in `generate()`.

# We see that the shape of this stat = "t" distribution is the same as that of stat = "diff in means". The scale has changed though with the t values having less spread than the difference in means.

# A traditional t-test doesn’t look at this simulated distribution, but instead it looks at the t-curve with degrees of freedom equal to 62.029. We can overlay this distribution over the top of our permuted t statistics using the method = "both" setting in visualize().

null_distribution_t %>% 
  visualize(method = "both")

Warning message:
“Check to make sure the conditions have been met for the theoretical method. {infer} currently does not check these for you.”

# We can see that the curve does a good job of approximating the randomization distribution here. (More on when to expect for this to be the case when we discuss conditions for the t-test in a bit.) To calculate the p-value in this case, we need to figure out how much of the total area under the t-curve is at our observed T-statistic or more, plus also adding the area under the curve at the negative value of the observed T-statistic or below. (Remember this is a two-tailed test so we are looking for a difference–values in the tails of either direction.)
# Just as we converted all of the simulated values to T-statistics, we must also do so for our observed effect δ∗ :

obs_t <- movies_genre_sample %>% 
  specify(formula = rating ~ genre) %>% 
  calculate(stat = "t", order = c("Romance", "Action"))

# So graphically we are interested in finding the percentage of values that are at or above 2.945 or at or below -2.945.

null_distribution_t %>% 
  visualize(method = "both", obs_stat = obs_t, direction = "both")

# As we might have expected with this just being a standardization of the difference in means statistic that produced a small p-value, we also have a very small one here.

Warning message:
“`visualize()` shouldn't be used to plot p-value. Arguments `obs_stat`, `obs_stat_color`, `pvalue_fill`, and `direction` are deprecated. Use `shade_p_value()` instead.”Warning message:
“Check to make sure the conditions have been met for the theoretical method. {infer} currently does not check these for you.”

# 100 Bernoulli trials -- no need for replace=TRUE

library(dplyr)
library(infer)
library(mosaic)

resample(0:1, 100)
tally(resample(0:1, 100))
if (require(mosaicData)) {
Small <- sample(KidsFeet, 10)
resample(Small)
tally(~ sex, data=resample(Small))
tally(~ sex, data=resample(Small))
    
# shuffled can be used to reshuffle some variables within groups
# orig.id shows where the values were in original data frame.
    
Small <- mutate(Small,
id1 = paste(sex,1:10, sep=":"),
id2 = paste(sex,1:10, sep=":"))
resample(Small, groups=sex, shuffled=c("id1","id2"))
}
deal(Cards, 13) # A Bridge hand
shuffle(Cards)

X
 0  1 
38 62

# fixed marginals for sex
    
tally(~ sex, data=Small)
tally(~ sex, data=resample(Small, groups=sex))

sex
B G 
6 4

sex
B G 
6 4

library(yarrr)
# Here, I’ll conduct a t-test to see if the average number of tattoos owned by pirates is different from 5:

tattoo.ttest <- t.test(x = pirates$tattoos,  # Vector of data
                       mu = 5)               # Null: Mean is 5

# Print the result

tattoo.ttest

# As you can see, the function printed lots of information: the sample mean was 9.43, the test statistic (41.59), and the p-value was 2e-16 (which is virtually 0). Because 2e-16 is less than 0.05, we would reject the null hypothesis that the true mean is equal to 5.

	One Sample t-test

data:  pirates$tattoos
t = 41.587, df = 999, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 5
95 percent confidence interval:
 9.22001 9.63799
sample estimates:
mean of x 
    9.429

# Now, what happens if I change the null hypothesis to a mean of 9.4? Because the sample mean was 9.43, quite close to 9.4, the test statistic should decrease, and the p-value should increase:

tattoo.ttest <- t.test(x = pirates$tattoos,
                       mu = 9.5)  # Null: Mean is 9.4
tattoo.ttest

# Just as we predicted! The test statistic decreased to just -0.67, and the p-value increased to 0.51. In other words, our sample mean of 9.43 is reasonably consistent with the hypothesis that the true population mean is 9.50.

	One Sample t-test

data:  pirates$tattoos
t = -0.66667, df = 999, p-value = 0.5051
alternative hypothesis: true mean is not equal to 9.5
95 percent confidence interval:
 9.22001 9.63799
sample estimates:
mean of x 
    9.429

# For example, let’s test a prediction that pirates who wear eye patches have fewer tattoos on average than those who don’t wear eye patches. Because the data are in the pirates dataframe, we can do this using the formula method:

# 2-sample t-test
#  IV = eyepatch (0 or 1)
#  DV = tattoos

(tat.patch.htest <- t.test(formula = tattoos ~ eyepatch,
                          data = pirates))

# This test gave us a test statistic of 1.22 and a p-value of 0.22. Because the p-value is greater than 0.05, we would fail to reject the null hypothesis.

# Show me all of the elements in the htest object
names(tat.patch.htest)

	Welch Two Sample t-test

data:  tattoos by eyepatch
t = 1.2249, df = 709.3, p-value = 0.221
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.1641539  0.7087957
sample estimates:
mean in group 0 mean in group 1 
       9.608187        9.335866

# To fix this, I need to tell the t.test() function which two values of age I want to test. To do this, use the subset argument and indicate which values of the IV you want to test using the %in% operator. For example, to compare the number of tattoos between pirates of age 29 and 30, I would add the subset = age %in% c(29, 30) argument like this:
# Compare the tattoos of pirates aged 29 and 30:

tatage.htest <- t.test(formula = tattoos ~ age,
                       data = pirates,
                       subset = age %in% c(29, 30))  # Compare age of 29 to 30
tatage.htest

# Looks like we got a p-value of 0.79 which is pretty high and suggests that we should fail to reject the null hypothesis.

	Welch Two Sample t-test

data:  tattoos by age
t = 0.26552, df = 119.15, p-value = 0.7911
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.058586  1.386455
sample estimates:
mean in group 29 mean in group 30 
       10.081967         9.918033

# You can select any subset of data in the subset argument to the t.test() function – not just the primary independent variable. For example, if I wanted to compare the number of tattoos between pirates who wear headbands or not, but only for female pirates, I would do the following:
# Is there an effect of college on # of tattoos only for female pirates?

t.test(formula = tattoos ~ college,
       data = pirates,
       subset = sex == "female")

	Welch Two Sample t-test

data:  tattoos by college
t = 1.0713, df = 461.26, p-value = 0.2846
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.2714396  0.9220492
sample estimates:
 mean in group CCCC mean in group JSSFP 
           9.596491            9.271186

# Consider the igf1 levels of girls in tanner puberty levels I and V. Test whether the mean igf1 level is the same in both populations. 
# Let μ1 be the mean igf1 level of all girls in tanner puberty level I, and let μ2 be the mean igf1 level of all girls in tanner puberty level V. We wish to test H0:μ1=μ2 versus Ha:μ1≠μ2. 
# Let’s think about the data that we have for a minute. Do we expect the igf1 level of girls in the two puberty levels to be approximately normal? Do we expect symmetry? How many samples do we have?

library(ISwR)
juul <- ISwR::juul

# We need to clean up the juul data:

juul$sex <- factor(juul$sex, levels = c(1,2), labels = c("Boy", "Girl"))
juul$menarche <- factor(juul$menarche, levels = c(1,2), labels = c("No", "Yes"))
juul$tanner <- factor(juul$tanner, levels = c(1,2,3,4,5), labels = c("I", "II", "III", "IV", "V"))

# Now let’s see what our sample sizes are:

table(juul$sex, juul$tanner)

      
         I  II III  IV   V
  Boy  291  55  34  41 124
  Girl 224  48  38  40 204

# And we can see that we have 224 girls in tanner I and 204 in tanner V. I doubt that the data would be so skewed that we would need to worry about using t test in this case. I also don’t think there will be outliers. We can get a boxplot of all of the girls in tanner 1 or 5’s igf1 levels. Let’s create a data frame that consists only of those observations that are girls in tanner 1 or tanner 5.

library(ggplot2)
smallJuul <- dplyr::filter(juul, tanner %in% c("I","V"), sex == "Girl")
ggplot(smallJuul, aes(x = tanner, y = igf1)) + geom_boxplot()

# Looks pretty symmetric, and the outliers aren’t too far away from the whiskers. It looks like we were correct to assume that t test will be applicable to this data. 
# Moreover, we had no reason before collecting the data to think that the variances were equal, so we will not make that assumption in t.test. 
# Finally, before collecting the data, we had no reason to believe that the igf1 levels of the two populations would differ in a particular way, so we do a 2-sided test.

Warning message:
“Removed 119 rows containing non-finite values (stat_boxplot).”

# Now, we run a t-test:

t.test(igf1~tanner, data = smallJuul, var.equal = FALSE)

# OK, that’s crazy. We definitely reject H0. Looking back at the boxplots, notice that the 25th percentile of the tanner 5 girls is almost above the whisker of the tanner 1 girls. That’s a pretty huge difference, and we almost didn’t even need to run a t test.

	Welch Two Sample t-test

data:  igf1 by tanner
t = -19.371, df = 290.68, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -266.5602 -217.3884
sample estimates:
mean in group I mean in group V 
       225.2941        467.2684

anorexia <- MASS::anorexia
weightdifference <- anorexia$Postwt - anorexia$Prewt
t.test(weightdifference, mu = 0, alternative = "greater", conf.level = .98)

	One Sample t-test

data:  weightdifference
t = 2.9376, df = 71, p-value = 0.002229
alternative hypothesis: true mean is greater than 0
98 percent confidence interval:
 0.7954181       Inf
sample estimates:
mean of x 
 2.763889

# Looks good. Let’s run the test.

t.test(anorexia$Prewt,anorexia$Postwt, paired = TRUE, alternative = "less")

	Paired t-test

data:  anorexia$Prewt and anorexia$Postwt
t = -2.9376, df = 71, p-value = 0.002229
alternative hypothesis: true difference in means is less than 0
95 percent confidence interval:
      -Inf -1.195825
sample estimates:
mean of the differences 
              -2.763889

# We see that we get a p-value of 0.01751(?). That is evidence that the treatment has been effective. Note that if we didn’t pair the data, we would get

t.test(anorexia$Prewt,anorexia$Postwt,alternative = "less")

	Welch Two Sample t-test

data:  anorexia$Prewt and anorexia$Postwt
t = -2.4528, df = 121.36, p-value = 0.007798
alternative hypothesis: true difference in means is less than 0
95 percent confidence interval:
       -Inf -0.8961579
sample estimates:
mean of x mean of y 
 82.40833  85.17222

# which has a p-value of .05024. That is, the p-value would be three times as large.
# Finally, let’s look at the data to make sure no outliers snuck in.

ggplot(anorexia, aes(x = "", y = Prewt)) + geom_boxplot()

# interesting that the pre-treatment weights are nice and symmetric, while the post-treatment weights are skew. This leads me to wonder whether the treatment wasn’t effective for all of the patients, and maybe there is some subset of patients for which this treatment tends to work. Without further information, we won’t be able to tell.

ggplot(anorexia, aes(x = Postwt - Prewt)) + geom_histogram(binwidth = 5)

# thus it seems that the treatment was totally ineffective for some girls, but almost unrealistically effective for some others. I would really like to understand which patients are more likely to be helped by this treatment. But, we don’t have any data that would help us understand it.

replicate	stat
1	0.03968254
2	0.03968254
3	-0.08730159
4	0.29365079
5	-0.08730159
6	-0.08730159
7	0.03968254
8	-0.21428571
9	0.29365079
10	-0.34126984
11	0.16666667
12	0.03968254
13	-0.08730159
14	-0.34126984
15	0.16666667
16	-0.08730159
17	-0.21428571
18	-0.08730159
19	0.16666667
20	0.03968254
21	-0.21428571
22	0.16666667
23	-0.08730159
24	0.03968254
25	-0.08730159
26	0.29365079
27	0.16666667
28	-0.08730159
29	0.29365079
30	-0.08730159
⋮	⋮
71	-0.08730159
72	0.16666667
73	0.29365079
74	0.03968254
75	0.03968254
76	0.03968254
77	0.03968254
78	-0.08730159
79	0.16666667
80	0.03968254
81	0.42063492
82	-0.08730159
83	-0.08730159
84	-0.08730159
85	0.03968254
86	-0.21428571
87	0.16666667
88	0.29365079
89	0.16666667
90	0.16666667
91	-0.21428571
92	0.16666667
93	-0.21428571
94	-0.34126984
95	0.16666667
96	0.29365079
97	0.03968254
98	-0.21428571
99	0.03968254
100	0.29365079

replicate	stat
1	3.38086745
2	0.73085035
3	0.03424939
4	0.51424311
5	0.46589292
6	2.60330465
7	2.58490960
8	0.33287084
9	0.31147838
10	2.53596439
11	2.37114092
12	1.67668745
13	0.33225837
14	1.52296590
15	1.65369113
16	2.30890348
17	0.30913268
18	0.17540941
19	1.37302813
20	0.45790174
21	0.02205692
22	0.06304843
23	0.14269766
24	1.56402147
25	0.35759996
26	0.70106793
27	0.09486101
28	0.97010212
29	1.86757860
30	2.36663679
⋮	⋮
71	1.16675178
72	1.83351868
73	0.39065922
74	1.28566595
75	1.78615242
76	3.97511660
77	0.53220849
78	1.94363964
79	0.26968657
80	0.71678758
81	0.56350499
82	1.50905007
83	0.68138423
84	0.10724397
85	0.06339594
86	4.64377446
87	2.46465639
88	0.47419940
89	1.94245361
90	2.40048330
91	0.86127733
92	0.07596044
93	0.25566631
94	0.64411827
95	1.29913642
96	0.71293286
97	4.67179951
98	0.41855590
99	0.38348726
100	1.49721907

.1	.05	.01	.001
0.14655	0.09955	0.04835	0.01565
0.12830	0.08585	0.03670	0.01135
0.11425	0.06675	0.02225	0.00610
0.10810	0.05930	0.01585	0.00440
0.10780	0.05250	0.01345	0.00275

year	month	day	dep_time	sched_dep_time	dep_delay	arr_time	sched_arr_time	arr_delay	carrier	flight	tailnum	origin	dest	air_time	distance	hour	minute	time_hour
2013	8	27	1753	1800	-7	1917	1913	4	US	2158	N961UW	LGA	BOS	34	184	18	0	2013-08-27 18:00:00
2013	3	20	1153	1200	-7	1256	1315	-19	9E	3483	N918XJ	JFK	BOS	43	187	12	0	2013-03-20 12:00:00
2013	12	22	2011	2015	-4	2148	2126	22	B6	418	N298JB	JFK	BOS	38	187	20	15	2013-12-22 20:00:00
2013	5	3	1202	1200	2	1307	1314	-7	9E	3381	N928XJ	JFK	BOS	37	187	12	0	2013-05-03 12:00:00
2013	12	3	1501	1459	2	1611	1625	-14	9E	2903	N920XJ	JFK	BOS	37	187	14	59	2013-12-03 14:00:00
2013	10	23	1300	1300	0	1408	1409	-1	US	2148	N951UW	LGA	BOS	45	184	13	0	2013-10-23 13:00:00
2013	7	8	2229	2045	104	2332	2159	93	B6	2680	N339JB	EWR	BOS	37	200	20	45	2013-07-08 20:00:00
2013	10	31	754	800	-6	856	907	-11	US	2138	N952UW	LGA	BOS	39	184	8	0	2013-10-31 08:00:00
2013	7	30	755	800	-5	922	904	18	US	2118	N950UW	LGA	BOS	39	184	8	0	2013-07-30 08:00:00
2013	3	17	2045	2040	5	2154	2148	6	B6	1178	N247JB	EWR	BOS	40	200	20	40	2013-03-17 20:00:00
2013	6	14	2219	2129	50	2310	2242	28	UA	291	N445UA	EWR	BOS	39	200	21	29	2013-06-14 21:00:00
2013	8	25	636	645	-9	732	752	-20	B6	318	N283JB	JFK	BOS	37	187	6	45	2013-08-25 06:00:00
2013	7	30	1151	1200	-9	1302	1309	-7	US	2126	N950UW	LGA	BOS	35	184	12	0	2013-07-30 12:00:00
2013	7	29	954	1000	-6	1119	1113	6	US	2122	N961UW	LGA	BOS	37	184	10	0	2013-07-29 10:00:00
2013	6	16	758	805	-7	854	922	-28	UA	1199	N18223	EWR	BOS	41	200	8	5	2013-06-16 08:00:00
2013	9	24	1253	1300	-7	1359	1409	-10	US	2148	N952UW	LGA	BOS	37	184	13	0	2013-09-24 13:00:00
2013	8	28	2203	2110	53	2305	2222	43	UA	1066	N16732	EWR	BOS	37	200	21	10	2013-08-28 21:00:00
2013	1	24	914	921	-7	1011	1029	-18	B6	1004	N571JB	JFK	BOS	39	187	9	21	2013-01-24 09:00:00
2013	1	27	1243	1250	-7	1346	1355	-9	B6	1006	N334JB	JFK	BOS	45	187	12	50	2013-01-27 12:00:00
2013	4	18	1054	1100	-6	1215	1212	3	US	2124	N945UW	LGA	BOS	37	184	11	0	2013-04-18 11:00:00
2013	5	31	2053	2058	-5	2158	2218	-20	UA	641	N805UA	EWR	BOS	43	200	20	58	2013-05-31 20:00:00
2013	6	20	2037	2045	-8	2145	2159	-14	B6	2680	N306JB	EWR	BOS	40	200	20	45	2013-06-20 20:00:00
2013	10	14	958	1000	-2	1105	1114	-9	UA	276	N848UA	EWR	BOS	37	200	10	0	2013-10-14 10:00:00
2013	1	19	2013	2015	-2	2114	2130	-16	AA	1762	N3JCAA	JFK	BOS	40	187	20	15	2013-01-19 20:00:00
2013	8	9	1156	1200	-4	1306	1309	-3	US	2146	N947UW	LGA	BOS	43	184	12	0	2013-08-09 12:00:00
2013	10	4	1358	1359	-1	1517	1511	6	UA	1481	N33103	EWR	BOS	43	200	13	59	2013-10-04 13:00:00
2013	7	9	1710	1659	11	1855	1825	30	UA	244	N807UA	EWR	BOS	50	200	16	59	2013-07-09 16:00:00
2013	8	7	551	600	-9	650	700	-10	US	2134	N766US	LGA	BOS	41	184	6	0	2013-08-07 06:00:00
2013	11	28	1028	1025	3	1125	1127	-2	B6	518	N375JB	JFK	BOS	36	187	10	25	2013-11-28 10:00:00
2013	10	30	1248	1245	3	1343	1349	-6	B6	1318	N324JB	JFK	BOS	42	187	12	45	2013-10-30 12:00:00
⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮	⋮
2013	2	23	655	700	-5	1023	1045	-22	DL	1865	N713TW	JFK	SFO	355	2586	7	0	2013-02-23 07:00:00
2013	12	1	1155	1200	-5	1500	1526	-26	UA	766	N595UA	JFK	SFO	336	2586	12	0	2013-12-01 12:00:00
2013	3	6	955	1000	-5	1318	1334	-16	DL	1765	N717TW	JFK	SFO	352	2586	10	0	2013-03-06 10:00:00
2013	6	30	2144	1925	139	31	2300	91	UA	1532	N78448	EWR	SFO	318	2565	19	25	2013-06-30 19:00:00
2013	12	11	1900	1855	5	2229	2235	-6	VX	29	N638VA	JFK	SFO	364	2586	18	55	2013-12-11 18:00:00
2013	3	28	1105	1106	-1	1408	1431	-23	UA	642	N512UA	JFK	SFO	341	2586	11	6	2013-03-28 11:00:00
2013	6	8	957	1000	-3	1258	1314	-16	UA	642	N525UA	JFK	SFO	327	2586	10	0	2013-06-08 10:00:00
2013	11	4	1158	1200	-2	1522	1515	7	UA	766	N518UA	JFK	SFO	353	2586	12	0	2013-11-04 12:00:00
2013	6	3	810	800	10	1125	1127	-2	UA	397	N560UA	JFK	SFO	346	2586	8	0	2013-06-03 08:00:00
2013	2	7	558	600	-2	940	925	15	UA	303	N510UA	JFK	SFO	367	2586	6	0	2013-02-07 06:00:00
2013	6	14	921	906	15	1227	1239	-12	B6	641	N562JB	JFK	SFO	338	2586	9	6	2013-06-14 09:00:00
2013	11	30	1858	1900	-2	2214	2251	-37	DL	435	N721TW	JFK	SFO	350	2586	19	0	2013-11-30 19:00:00
2013	12	25	702	654	8	1012	1025	-13	UA	1162	N66803	EWR	SFO	347	2565	6	54	2013-12-25 06:00:00
2013	2	7	657	700	-3	1039	1045	-6	DL	1865	N707TW	JFK	SFO	369	2586	7	0	2013-02-07 07:00:00
2013	5	31	1831	1830	1	2149	2200	-11	UA	389	N596UA	JFK	SFO	352	2586	18	30	2013-05-31 18:00:00
2013	1	5	2104	1945	79	56	2329	87	B6	645	N565JB	JFK	SFO	364	2586	19	45	2013-01-05 19:00:00
2013	12	3	1634	1530	64	2008	1905	63	UA	257	N596UA	JFK	SFO	369	2586	15	30	2013-12-03 15:00:00
2013	6	26	1714	1655	19	2014	2016	-2	UA	1284	N37466	EWR	SFO	324	2565	16	55	2013-06-26 16:00:00
2013	10	19	826	830	-4	1111	1130	-19	UA	281	N569UA	EWR	SFO	324	2565	8	30	2013-10-19 08:00:00
2013	10	3	748	723	25	1104	1025	39	UA	421	N559UA	EWR	SFO	350	2565	7	23	2013-10-03 07:00:00
2013	6	12	1000	1000	0	1317	1314	3	UA	642	N555UA	JFK	SFO	340	2586	10	0	2013-06-12 10:00:00
2013	1	14	1027	1030	-3	1409	1355	14	AA	179	N374AA	JFK	SFO	378	2586	10	30	2013-01-14 10:00:00
2013	3	25	1750	1729	21	2046	2057	-11	UA	512	N555UA	JFK	SFO	340	2586	17	29	2013-03-25 17:00:00
2013	7	24	959	934	25	1300	1249	11	UA	1281	N11206	EWR	SFO	334	2565	9	34	2013-07-24 09:00:00
2013	1	1	1029	1030	-1	1427	1355	32	AA	179	N325AA	JFK	SFO	389	2586	10	30	2013-01-01 10:00:00
2013	7	2	700	700	0	957	1015	-18	DL	269	N705TW	JFK	SFO	318	2586	7	0	2013-07-02 07:00:00
2013	5	27	655	700	-5	1007	1027	-20	UA	642	N505UA	JFK	SFO	349	2586	7	0	2013-05-27 07:00:00
2013	6	19	1721	1619	62	2034	1918	76	UA	996	N503UA	EWR	SFO	336	2565	16	19	2013-06-19 16:00:00
2013	4	25	724	725	-1	1107	1057	10	UA	316	N447UA	EWR	SFO	367	2565	7	25	2013-04-25 07:00:00
2013	8	29	1017	1019	-2	1307	1329	-22	UA	696	N573UA	EWR	SFO	321	2565	10	19	2013-08-29 10:00:00

	name	birthmonth	birthyear	length	width	sex	biggerfoot	domhand	orig.id	id1	id2
8	Scotty	3	88	25.7	9.7	B	R	R	8.8.3	B:8	B:3
3	Lars	10	87	25.4	8.8	B	L	L	3.3.9	B:3	B:9
9	Edward	2	88	26.1	9.6	B	L	R	9.8.3	B:8	B:3
3.1	Lars	10	87	25.4	8.8	B	L	L	3.9.8	B:9	B:8
8.1	Scotty	3	88	25.7	9.7	B	R	R	8.3.8	B:3	B:8
1	Peter	4	88	24.7	8.6	B	R	L	1.1.1	B:1	B:1
2	Teshanna	3	88	26.0	9.0	G	L	R	2.10.6	G:10	G:6
10	Danielle	6	88	24.0	9.3	G	L	R	10.2.10	G:2	G:10
6	Hannah	3	88	22.9	8.5	G	L	R	6.10.10	G:10	G:10
10.1	Danielle	6	88	24.0	9.3	G	L	R	10.6.2	G:6	G:2

R Programming & Statistics Notes

Chapter 6.1.3 Exercises

am	vs	replicate
1	0	1
0	0	1
0	1	1
0	1	1
0	0	1
1	1	1
1	0	1
0	1	1
1	1	1
0	1	1
1	1	1
0	0	1
1	0	1
0	0	1
1	0	1
0	0	1
0	0	1
0	1	1
1	1	1
1	1	1
0	1	1
1	0	1
0	0	1
0	0	1
0	0	1
0	1	1
1	0	1
0	1	1
0	0	1
1	0	1
⋮	⋮	⋮
0	1	100
0	1	100
0	0	100
0	1	100
0	0	100
0	1	100
1	1	100
1	1	100
1	1	100
1	0	100
1	0	100
0	0	100
1	0	100
1	0	100
0	0	100
0	1	100
0	1	100
0	1	100
1	1	100
0	0	100
1	0	100
0	0	100
1	0	100
1	1	100
0	0	100
1	1	100
0	0	100
0	0	100
0	0	100
0	1	100

dest	mean_time	sd_time
BOS	38.78	4.303111
SFO	342.16	17.766567

am	vs	replicate
1	0	1
0	0	1
0	1	1
0	1	1
0	0	1
1	1	1
1	0	1
0	1	1
1	1	1
0	1	1
1	1	1
0	0	1
1	0	1
0	0	1
1	0	1
0	0	1
0	0	1
0	1	1
1	1	1
1	1	1
0	1	1
1	0	1
0	0	1
0	0	1
0	0	1
0	1	1
1	0	1
0	1	1
0	0	1
1	0	1
⋮	⋮	⋮
0	1	100
0	1	100
0	0	100
0	1	100
0	0	100
0	1	100
1	1	100
1	1	100
1	1	100
1	0	100
1	0	100
0	0	100
1	0	100
1	0	100
0	0	100
0	1	100
0	1	100
0	1	100
1	1	100
0	0	100
1	0	100
0	0	100
1	0	100
1	1	100
0	0	100
1	1	100
0	0	100
0	0	100
0	0	100
0	1	100

am	vs	replicate
1	0	1
0	0	1
0	1	1
0	1	1
0	0	1
1	1	1
1	0	1
0	1	1
1	1	1
0	1	1
1	1	1
0	0	1
1	0	1
0	0	1
1	0	1
0	0	1
0	0	1
0	1	1
1	1	1
1	1	1
0	1	1
1	0	1
0	0	1
0	0	1
0	0	1
0	1	1
1	0	1
0	1	1
0	0	1
1	0	1
⋮	⋮	⋮
0	1	100
0	1	100
0	0	100
0	1	100
0	0	100
0	1	100
1	1	100
1	1	100
1	1	100
1	0	100
1	0	100
0	0	100
1	0	100
1	0	100
0	0	100
0	1	100
0	1	100
0	1	100
1	1	100
0	0	100
1	0	100
0	0	100
1	0	100
1	1	100
0	0	100
1	1	100
0	0	100
0	0	100
0	0	100
0	1	100