Chapter 2.2.1.1 Exercises

dplyr
In [5]:
# Suppose we wanted to extract the rows of the chicago data frame where the levels of PM2.5 are greater than 30 (which is a reasonably high level), we could do

library(dplyr)
chicago <- readRDS("./chicago.rds")

chic.f <- filter(chicago, pm25tmean2 > 30)
str(chic.f)
'data.frame':	194 obs. of  8 variables:
 $ city      : chr  "chic" "chic" "chic" "chic" ...
 $ tmpd      : num  23 28 55 59 57 57 75 61 73 78 ...
 $ dptp      : num  21.9 25.8 51.3 53.7 52 56 65.8 59 60.3 67.1 ...
 $ date      : Date, format: "1998-01-17" "1998-01-23" ...
 $ pm25tmean2: num  38.1 34 39.4 35.4 33.3 ...
 $ pm10tmean2: num  32.5 38.7 34 28.5 35 ...
 $ o3tmean2  : num  3.18 1.75 10.79 14.3 20.66 ...
 $ no2tmean2 : num  25.3 29.4 25.3 31.4 26.8 ...
In [9]:
# Looking at the first three and last three rows shows the dates in descending order.

head(select(chicago, date, pm25tmean2), 3)
tail(select(chicago, date, pm25tmean2), 3)
datepm25tmean2
2005-12-3115.00000
2005-12-3015.05714
2005-12-29 7.45000
datepm25tmean2
69381987-01-03NA
69391987-01-02NA
69401987-01-01NA
In [13]:
# First, we can create a year varible using as.POSIXlt().

chicago <- mutate(chicago, year = as.POSIXlt(date)$year + 1900)

# Now we can create a separate data frame that splits the original data frame by year.

years <- group_by(chicago, year)
chicago
citytmpddewpointdatepm25pm10tmean2o3tmean2no2tmean2year
chic 31.5 31.500 1987-01-01NA 34.00000 4.250000 19.98810 1987
chic 33.0 29.875 1987-01-02NA NA 3.304348 23.19099 1987
chic 33.0 27.375 1987-01-03NA 34.16667 3.333333 23.81548 1987
chic 29.0 28.625 1987-01-04NA 47.00000 4.375000 30.43452 1987
chic 32.0 28.875 1987-01-05NA NA 4.750000 30.33333 1987
chic 40.0 35.125 1987-01-06NA 48.00000 5.833333 25.77233 1987
chic 34.5 26.750 1987-01-07NA 41.00000 9.291667 20.58171 1987
chic 29.0 22.000 1987-01-08NA 36.00000 11.291667 17.03723 1987
chic 26.5 29.000 1987-01-09NA 33.28571 4.500000 23.38889 1987
chic 32.5 27.750 1987-01-10NA NA 4.958333 19.54167 1987
chic 29.5 20.125 1987-01-11NA 22.00000 17.541667 13.70139 1987
chic 34.5 26.000 1987-01-12NA 26.00000 8.000000 33.02083 1987
chic 34.0 32.250 1987-01-13NA 53.00000 4.958333 38.06142 1987
chic 37.5 36.375 1987-01-14NA 43.00000 4.208333 32.19444 1987
chic 32.5 24.250 1987-01-15NA 28.83333 4.458333 18.87131 1987
chic 25.0 21.500 1987-01-16NA 19.00000 7.916667 19.46667 1987
chic 27.0 24.750 1987-01-17NA NA 5.833333 20.70833 1987
chic 17.5 11.125 1987-01-18NA 39.00000 6.375000 21.03333 1987
chic 23.0 15.750 1987-01-19NA 32.00000 14.875000 17.17409 1987
chic 20.5 11.500 1987-01-20NA 38.00000 7.250000 21.61021 1987
chic 22.0 20.625 1987-01-21NA 32.85714 8.913043 24.52083 1987
chic 19.5 7.375 1987-01-22NA 52.00000 10.500000 16.98798 1987
chic 2.5 -12.250 1987-01-23NA 55.00000 14.625000 14.66250 1987
chic 2.0 -5.625 1987-01-24NA 38.00000 10.083333 18.69167 1987
chic 9.5 -5.250 1987-01-25NA NA 6.666667 26.30417 1987
chic 16.0 4.750 1987-01-26NA 71.00000 4.583333 32.42143 1987
chic 17.5 17.750 1987-01-27NA 39.33333 6.000000 30.69306 1987
chic 29.5 18.250 1987-01-28NA 47.00000 6.875000 29.12943 1987
chic 29.5 32.875 1987-01-29NA 35.00000 2.916667 28.14529 1987
chic 32.5 24.125 1987-01-30NA 59.00000 8.791667 19.79861 1987
chic 19 8.5 2005-12-02 NA 19.50 9.156250 23.29167 2005
chic 25 19.0 2005-12-0313.34286 20.00 10.333333 25.19444 2005
chic 20 15.8 2005-12-0415.30000 15.50 13.177083 21.70833 2005
chic 11 2.8 2005-12-05 NA 30.00 6.447917 28.38889 2005
chic 11 3.2 2005-12-0624.61667 33.00 4.701540 29.08333 2005
chic 8 -1.8 2005-12-0737.80000 39.00 3.916214 34.30952 2005
chic 16 15.6 2005-12-0824.30000 31.00 5.995265 34.22222 2005
chic 20 10.9 2005-12-0925.45000 22.00 5.958333 31.41667 2005
chic 22 16.2 2005-12-1018.20000 30.00 9.135417 28.70833 2005
chic 20 17.2 2005-12-1110.60000 14.00 11.333333 22.55556 2005
chic 19 11.6 2005-12-1219.22500 28.75 5.031250 39.74621 2005
chic 26 19.8 2005-12-1326.50000 21.00 6.628623 29.56944 2005
chic 32 27.4 2005-12-1426.90000 16.00 3.802083 30.63384 2005
chic 30 27.9 2005-12-1514.40000 16.50 4.895833 25.43056 2005
chic 21 14.7 2005-12-1611.00000 22.00 11.166667 16.87500 2005
chic 16 7.3 2005-12-1713.80000 20.00 8.593750 20.73611 2005
chic 10 1.9 2005-12-1812.20000 17.50 13.552083 19.11111 2005
chic 5 -0.3 2005-12-1921.15000 21.00 8.058877 31.79167 2005
chic 13 7.7 2005-12-2025.75000 32.00 3.849185 32.89773 2005
chic 12 7.7 2005-12-2137.92857 59.50 3.663949 34.86111 2005
chic 22 23.3 2005-12-2236.65000 42.50 5.385417 33.73026 2005
chic 41 32.6 2005-12-2332.90000 34.50 6.906250 29.08333 2005
chic 37 35.2 2005-12-2430.77143 25.20 1.770833 31.98611 2005
chic 35 32.1 2005-12-25 6.70000 8.00 14.354167 13.79167 2005
chic 35 29.6 2005-12-26 8.40000 8.50 14.041667 16.81944 2005
chic 40 33.6 2005-12-2723.56000 27.00 4.468750 23.50000 2005
chic 37 34.5 2005-12-2817.75000 27.50 3.260417 19.28563 2005
chic 35 29.4 2005-12-29 7.45000 23.50 6.794837 19.97222 2005
chic 36 31.0 2005-12-3015.05714 19.20 3.034420 22.80556 2005
chic 35 30.1 2005-12-3115.00000 23.50 2.531250 13.25000 2005
In [16]:
# Finally, we compute summary statistics for each year in the data frame with the summarize() function.

summarize(years, pm25 = mean(pm25, na.rm = TRUE), 
           o3 = max(o3tmean2, na.rm = TRUE), 
           no2 = median(no2tmean2, na.rm = TRUE))

# summarize() returns a data frame with year as the first column, and then the annual averages of pm25, o3, and no2.
yearpm25o3no2
1987 NaN62.9696623.49369
1988 NaN61.6770824.52296
1989 NaN59.7272726.14062
1990 NaN52.2291722.59583
1991 NaN63.1041721.38194
1992 NaN50.8287024.78921
1993 NaN44.3009325.76993
1994 NaN52.1784428.47500
1995 NaN66.5875027.26042
1996 NaN58.3958326.38715
1997 NaN56.5416725.48143
1998 18.2646750.6625024.58649
1999 18.4964657.4886424.66667
2000 16.9380655.7610323.46082
2001 16.9263251.8198425.06522
2002 15.2733554.8804322.73750
2003 15.2318356.1660824.62500
2004 14.6286444.4824023.39130
2005 16.1855658.8412622.62387
In [19]:
# In a slightly more complicated example, we might want to know what the average levels of ozone (o3) and nitrogen dioxide (no2) within quintiles of pm25 are. A slicker way to do this would be through a regression model, but we can actually do this quickly with group_by() and summarize().

# First, we can create a categorical variable of pm25 divided into quintiles.

qq <- quantile(chicago$pm25, seq(0, 1, 0.2), na.rm = TRUE)
chicago <- mutate(chicago, pm25.quint = cut(pm25, qq))
chicago
citytmpddewpointdatepm25pm10tmean2o3tmean2no2tmean2yearpm25.quint
chic 31.5 31.500 1987-01-01NA 34.00000 4.250000 19.98810 1987 NA
chic 33.0 29.875 1987-01-02NA NA 3.304348 23.19099 1987 NA
chic 33.0 27.375 1987-01-03NA 34.16667 3.333333 23.81548 1987 NA
chic 29.0 28.625 1987-01-04NA 47.00000 4.375000 30.43452 1987 NA
chic 32.0 28.875 1987-01-05NA NA 4.750000 30.33333 1987 NA
chic 40.0 35.125 1987-01-06NA 48.00000 5.833333 25.77233 1987 NA
chic 34.5 26.750 1987-01-07NA 41.00000 9.291667 20.58171 1987 NA
chic 29.0 22.000 1987-01-08NA 36.00000 11.291667 17.03723 1987 NA
chic 26.5 29.000 1987-01-09NA 33.28571 4.500000 23.38889 1987 NA
chic 32.5 27.750 1987-01-10NA NA 4.958333 19.54167 1987 NA
chic 29.5 20.125 1987-01-11NA 22.00000 17.541667 13.70139 1987 NA
chic 34.5 26.000 1987-01-12NA 26.00000 8.000000 33.02083 1987 NA
chic 34.0 32.250 1987-01-13NA 53.00000 4.958333 38.06142 1987 NA
chic 37.5 36.375 1987-01-14NA 43.00000 4.208333 32.19444 1987 NA
chic 32.5 24.250 1987-01-15NA 28.83333 4.458333 18.87131 1987 NA
chic 25.0 21.500 1987-01-16NA 19.00000 7.916667 19.46667 1987 NA
chic 27.0 24.750 1987-01-17NA NA 5.833333 20.70833 1987 NA
chic 17.5 11.125 1987-01-18NA 39.00000 6.375000 21.03333 1987 NA
chic 23.0 15.750 1987-01-19NA 32.00000 14.875000 17.17409 1987 NA
chic 20.5 11.500 1987-01-20NA 38.00000 7.250000 21.61021 1987 NA
chic 22.0 20.625 1987-01-21NA 32.85714 8.913043 24.52083 1987 NA
chic 19.5 7.375 1987-01-22NA 52.00000 10.500000 16.98798 1987 NA
chic 2.5 -12.250 1987-01-23NA 55.00000 14.625000 14.66250 1987 NA
chic 2.0 -5.625 1987-01-24NA 38.00000 10.083333 18.69167 1987 NA
chic 9.5 -5.250 1987-01-25NA NA 6.666667 26.30417 1987 NA
chic 16.0 4.750 1987-01-26NA 71.00000 4.583333 32.42143 1987 NA
chic 17.5 17.750 1987-01-27NA 39.33333 6.000000 30.69306 1987 NA
chic 29.5 18.250 1987-01-28NA 47.00000 6.875000 29.12943 1987 NA
chic 29.5 32.875 1987-01-29NA 35.00000 2.916667 28.14529 1987 NA
chic 32.5 24.125 1987-01-30NA 59.00000 8.791667 19.79861 1987 NA
chic 19 8.5 2005-12-02 NA 19.50 9.156250 23.29167 2005 NA
chic 25 19.0 2005-12-03 13.34286 20.00 10.333333 25.19444 2005 (12.4,16.7]
chic 20 15.8 2005-12-04 15.30000 15.50 13.177083 21.70833 2005 (12.4,16.7]
chic 11 2.8 2005-12-05 NA 30.00 6.447917 28.38889 2005 NA
chic 11 3.2 2005-12-06 24.61667 33.00 4.701540 29.08333 2005 (22.6,61.5]
chic 8 -1.8 2005-12-07 37.80000 39.00 3.916214 34.30952 2005 (22.6,61.5]
chic 16 15.6 2005-12-08 24.30000 31.00 5.995265 34.22222 2005 (22.6,61.5]
chic 20 10.9 2005-12-09 25.45000 22.00 5.958333 31.41667 2005 (22.6,61.5]
chic 22 16.2 2005-12-10 18.20000 30.00 9.135417 28.70833 2005 (16.7,22.6]
chic 20 17.2 2005-12-11 10.60000 14.00 11.333333 22.55556 2005 (8.7,12.4]
chic 19 11.6 2005-12-12 19.22500 28.75 5.031250 39.74621 2005 (16.7,22.6]
chic 26 19.8 2005-12-13 26.50000 21.00 6.628623 29.56944 2005 (22.6,61.5]
chic 32 27.4 2005-12-14 26.90000 16.00 3.802083 30.63384 2005 (22.6,61.5]
chic 30 27.9 2005-12-15 14.40000 16.50 4.895833 25.43056 2005 (12.4,16.7]
chic 21 14.7 2005-12-16 11.00000 22.00 11.166667 16.87500 2005 (8.7,12.4]
chic 16 7.3 2005-12-17 13.80000 20.00 8.593750 20.73611 2005 (12.4,16.7]
chic 10 1.9 2005-12-18 12.20000 17.50 13.552083 19.11111 2005 (8.7,12.4]
chic 5 -0.3 2005-12-19 21.15000 21.00 8.058877 31.79167 2005 (16.7,22.6]
chic 13 7.7 2005-12-20 25.75000 32.00 3.849185 32.89773 2005 (22.6,61.5]
chic 12 7.7 2005-12-21 37.92857 59.50 3.663949 34.86111 2005 (22.6,61.5]
chic 22 23.3 2005-12-22 36.65000 42.50 5.385417 33.73026 2005 (22.6,61.5]
chic 41 32.6 2005-12-23 32.90000 34.50 6.906250 29.08333 2005 (22.6,61.5]
chic 37 35.2 2005-12-24 30.77143 25.20 1.770833 31.98611 2005 (22.6,61.5]
chic 35 32.1 2005-12-25 6.70000 8.00 14.354167 13.79167 2005 (1.7,8.7]
chic 35 29.6 2005-12-26 8.40000 8.50 14.041667 16.81944 2005 (1.7,8.7]
chic 40 33.6 2005-12-27 23.56000 27.00 4.468750 23.50000 2005 (22.6,61.5]
chic 37 34.5 2005-12-28 17.75000 27.50 3.260417 19.28563 2005 (16.7,22.6]
chic 35 29.4 2005-12-29 7.45000 23.50 6.794837 19.97222 2005 (1.7,8.7]
chic 36 31.0 2005-12-30 15.05714 19.20 3.034420 22.80556 2005 (12.4,16.7]
chic 35 30.1 2005-12-31 15.00000 23.50 2.531250 13.25000 2005 (12.4,16.7]
In [20]:
# Now we can group the data frame by the pm25.quint variable.

quint <- group_by(chicago, pm25.quint)

# Finally, we can compute the mean of o3 and no2 within quintiles of pm25.

summarize(quint, o3 = mean(o3tmean2, na.rm = TRUE), 
           no2 = mean(no2tmean2, na.rm = TRUE))
# A tibble: 6 × 3

# From the table, it seems there isn’t a strong relationship between pm25 and o3, but there appears to be a positive correlation between pm25 and no2. More sophisticated statistical modeling can help to provide precise answers to these questions, but a simple application of dplyr functions can often get you most of the way there.
pm25.quinto3no2
(1.7,8.7] 21.66401 17.99129
(8.7,12.4] 20.38248 22.13004
(12.4,16.7]20.66160 24.35708
(16.7,22.6]19.88122 27.27132
(22.6,61.5]20.31775 29.64427
NA 18.79044 25.77585
In [7]:
# You can group by expressions: this is just short-hand for a mutate/rename followed by a simple group_by

mtcars %>% group_by(vsam = vs + am)
mpgcyldisphpdratwtqsecvsamgearcarbvsam
21.0 6 160.0110 3.90 2.62016.460 1 4 4 1
21.0 6 160.0110 3.90 2.87517.020 1 4 4 1
22.8 4 108.0 93 3.85 2.32018.611 1 4 1 2
21.4 6 258.0110 3.08 3.21519.441 0 3 1 1
18.7 8 360.0175 3.15 3.44017.020 0 3 2 0
18.1 6 225.0105 2.76 3.46020.221 0 3 1 1
14.3 8 360.0245 3.21 3.57015.840 0 3 4 0
24.4 4 146.7 62 3.69 3.19020.001 0 4 2 1
22.8 4 140.8 95 3.92 3.15022.901 0 4 2 1
19.2 6 167.6123 3.92 3.44018.301 0 4 4 1
17.8 6 167.6123 3.92 3.44018.901 0 4 4 1
16.4 8 275.8180 3.07 4.07017.400 0 3 3 0
17.3 8 275.8180 3.07 3.73017.600 0 3 3 0
15.2 8 275.8180 3.07 3.78018.000 0 3 3 0
10.4 8 472.0205 2.93 5.25017.980 0 3 4 0
10.4 8 460.0215 3.00 5.42417.820 0 3 4 0
14.7 8 440.0230 3.23 5.34517.420 0 3 4 0
32.4 4 78.7 66 4.08 2.20019.471 1 4 1 2
30.4 4 75.7 52 4.93 1.61518.521 1 4 2 2
33.9 4 71.1 65 4.22 1.83519.901 1 4 1 2
21.5 4 120.1 97 3.70 2.46520.011 0 3 1 1
15.5 8 318.0150 2.76 3.52016.870 0 3 2 0
15.2 8 304.0150 3.15 3.43517.300 0 3 2 0
13.3 8 350.0245 3.73 3.84015.410 0 3 4 0
19.2 8 400.0175 3.08 3.84517.050 0 3 2 0
27.3 4 79.0 66 4.08 1.93518.901 1 4 1 2
26.0 4 120.3 91 4.43 2.14016.700 1 5 2 1
30.4 4 95.1113 3.77 1.51316.901 1 5 2 2
15.8 8 351.0264 4.22 3.17014.500 1 5 4 1
19.7 6 145.0175 3.62 2.77015.500 1 5 6 1
15.0 8 301.0335 3.54 3.57014.600 1 5 8 1
21.4 4 121.0109 4.11 2.78018.601 1 4 2 2
In [5]:
# Newly created variables are available immediately
mtcars %>% as_tibble() %>% mutate(
cyl2 = cyl * 2,
cyl4 = cyl2 * 2
)

# You can also use mutate() to remove variables and
# modify existing variables
mtcars %>% as_tibble() %>% mutate(
mpg = NULL,
disp = disp * 0.0163871 # convert to litres
)


# window functions are useful for grouped mutates
mtcars %>%
group_by(cyl) %>%
mutate(rank = min_rank(desc(mpg)))
# see `vignette("window-functions")` for more details

# You can drop variables by setting them to NULL
mtcars %>% mutate(cyl = NULL)

# mutate() vs transmute --------------------------
# mutate() keeps all existing variables
mtcars %>%
mutate(displ_l = disp / 61.0237)

# transmute keeps only the variables you create
mtcars %>%
transmute(displ_l = disp / 61.0237)


# mutate() supports quasiquotation. You can unquote quosures, which
# can refer to both contextual variables and variable names:
var <- 100
as_tibble(mtcars) %>% mutate(cyl = !!quo(cyl * var))
mpgcyldisphpdratwtqsecvsamgearcarbcyl2cyl4
21.0 6 160.0110 3.90 2.62016.460 1 4 4 12 24
21.0 6 160.0110 3.90 2.87517.020 1 4 4 12 24
22.8 4 108.0 93 3.85 2.32018.611 1 4 1 8 16
21.4 6 258.0110 3.08 3.21519.441 0 3 1 12 24
18.7 8 360.0175 3.15 3.44017.020 0 3 2 16 32
18.1 6 225.0105 2.76 3.46020.221 0 3 1 12 24
14.3 8 360.0245 3.21 3.57015.840 0 3 4 16 32
24.4 4 146.7 62 3.69 3.19020.001 0 4 2 8 16
22.8 4 140.8 95 3.92 3.15022.901 0 4 2 8 16
19.2 6 167.6123 3.92 3.44018.301 0 4 4 12 24
17.8 6 167.6123 3.92 3.44018.901 0 4 4 12 24
16.4 8 275.8180 3.07 4.07017.400 0 3 3 16 32
17.3 8 275.8180 3.07 3.73017.600 0 3 3 16 32
15.2 8 275.8180 3.07 3.78018.000 0 3 3 16 32
10.4 8 472.0205 2.93 5.25017.980 0 3 4 16 32
10.4 8 460.0215 3.00 5.42417.820 0 3 4 16 32
14.7 8 440.0230 3.23 5.34517.420 0 3 4 16 32
32.4 4 78.7 66 4.08 2.20019.471 1 4 1 8 16
30.4 4 75.7 52 4.93 1.61518.521 1 4 2 8 16
33.9 4 71.1 65 4.22 1.83519.901 1 4 1 8 16
21.5 4 120.1 97 3.70 2.46520.011 0 3 1 8 16
15.5 8 318.0150 2.76 3.52016.870 0 3 2 16 32
15.2 8 304.0150 3.15 3.43517.300 0 3 2 16 32
13.3 8 350.0245 3.73 3.84015.410 0 3 4 16 32
19.2 8 400.0175 3.08 3.84517.050 0 3 2 16 32
27.3 4 79.0 66 4.08 1.93518.901 1 4 1 8 16
26.0 4 120.3 91 4.43 2.14016.700 1 5 2 8 16
30.4 4 95.1113 3.77 1.51316.901 1 5 2 8 16
15.8 8 351.0264 4.22 3.17014.500 1 5 4 16 32
19.7 6 145.0175 3.62 2.77015.500 1 5 6 12 24
15.0 8 301.0335 3.54 3.57014.600 1 5 8 16 32
21.4 4 121.0109 4.11 2.78018.601 1 4 2 8 16
cyldisphpdratwtqsecvsamgearcarb
6 2.621936110 3.90 2.620 16.46 0 1 4 4
6 2.621936110 3.90 2.875 17.02 0 1 4 4
4 1.769807 93 3.85 2.320 18.61 1 1 4 1
6 4.227872110 3.08 3.215 19.44 1 0 3 1
8 5.899356175 3.15 3.440 17.02 0 0 3 2
6 3.687098105 2.76 3.460 20.22 1 0 3 1
8 5.899356245 3.21 3.570 15.84 0 0 3 4
4 2.403988 62 3.69 3.190 20.00 1 0 4 2
4 2.307304 95 3.92 3.150 22.90 1 0 4 2
6 2.746478123 3.92 3.440 18.30 1 0 4 4
6 2.746478123 3.92 3.440 18.90 1 0 4 4
8 4.519562180 3.07 4.070 17.40 0 0 3 3
8 4.519562180 3.07 3.730 17.60 0 0 3 3
8 4.519562180 3.07 3.780 18.00 0 0 3 3
8 7.734711205 2.93 5.250 17.98 0 0 3 4
8 7.538066215 3.00 5.424 17.82 0 0 3 4
8 7.210324230 3.23 5.345 17.42 0 0 3 4
4 1.289665 66 4.08 2.200 19.47 1 1 4 1
4 1.240503 52 4.93 1.615 18.52 1 1 4 2
4 1.165123 65 4.22 1.835 19.90 1 1 4 1
4 1.968091 97 3.70 2.465 20.01 1 0 3 1
8 5.211098150 2.76 3.520 16.87 0 0 3 2
8 4.981678150 3.15 3.435 17.30 0 0 3 2
8 5.735485245 3.73 3.840 15.41 0 0 3 4
8 6.554840175 3.08 3.845 17.05 0 0 3 2
4 1.294581 66 4.08 1.935 18.90 1 1 4 1
4 1.971368 91 4.43 2.140 16.70 0 1 5 2
4 1.558413113 3.77 1.513 16.90 1 1 5 2
8 5.751872264 4.22 3.170 14.50 0 1 5 4
6 2.376130175 3.62 2.770 15.50 0 1 5 6
8 4.932517335 3.54 3.570 14.60 0 1 5 8
4 1.982839109 4.11 2.780 18.60 1 1 4 2
mpgcyldisphpdratwtqsecvsamgearcarbrank
21.0 6 160.0110 3.90 2.62016.460 1 4 4 2
21.0 6 160.0110 3.90 2.87517.020 1 4 4 2
22.8 4 108.0 93 3.85 2.32018.611 1 4 1 8
21.4 6 258.0110 3.08 3.21519.441 0 3 1 1
18.7 8 360.0175 3.15 3.44017.020 0 3 2 2
18.1 6 225.0105 2.76 3.46020.221 0 3 1 6
14.3 8 360.0245 3.21 3.57015.840 0 3 4 11
24.4 4 146.7 62 3.69 3.19020.001 0 4 2 7
22.8 4 140.8 95 3.92 3.15022.901 0 4 2 8
19.2 6 167.6123 3.92 3.44018.301 0 4 4 5
17.8 6 167.6123 3.92 3.44018.901 0 4 4 7
16.4 8 275.8180 3.07 4.07017.400 0 3 3 4
17.3 8 275.8180 3.07 3.73017.600 0 3 3 3
15.2 8 275.8180 3.07 3.78018.000 0 3 3 7
10.4 8 472.0205 2.93 5.25017.980 0 3 4 13
10.4 8 460.0215 3.00 5.42417.820 0 3 4 13
14.7 8 440.0230 3.23 5.34517.420 0 3 4 10
32.4 4 78.7 66 4.08 2.20019.471 1 4 1 2
30.4 4 75.7 52 4.93 1.61518.521 1 4 2 3
33.9 4 71.1 65 4.22 1.83519.901 1 4 1 1
21.5 4 120.1 97 3.70 2.46520.011 0 3 1 10
15.5 8 318.0150 2.76 3.52016.870 0 3 2 6
15.2 8 304.0150 3.15 3.43517.300 0 3 2 7
13.3 8 350.0245 3.73 3.84015.410 0 3 4 12
19.2 8 400.0175 3.08 3.84517.050 0 3 2 1
27.3 4 79.0 66 4.08 1.93518.901 1 4 1 5
26.0 4 120.3 91 4.43 2.14016.700 1 5 2 6
30.4 4 95.1113 3.77 1.51316.901 1 5 2 3
15.8 8 351.0264 4.22 3.17014.500 1 5 4 5
19.7 6 145.0175 3.62 2.77015.500 1 5 6 4
15.0 8 301.0335 3.54 3.57014.600 1 5 8 9
21.4 4 121.0109 4.11 2.78018.601 1 4 2 11
mpgdisphpdratwtqsecvsamgearcarb
21.0 160.0110 3.90 2.62016.460 1 4 4
21.0 160.0110 3.90 2.87517.020 1 4 4
22.8 108.0 93 3.85 2.32018.611 1 4 1
21.4 258.0110 3.08 3.21519.441 0 3 1
18.7 360.0175 3.15 3.44017.020 0 3 2
18.1 225.0105 2.76 3.46020.221 0 3 1
14.3 360.0245 3.21 3.57015.840 0 3 4
24.4 146.7 62 3.69 3.19020.001 0 4 2
22.8 140.8 95 3.92 3.15022.901 0 4 2
19.2 167.6123 3.92 3.44018.301 0 4 4
17.8 167.6123 3.92 3.44018.901 0 4 4
16.4 275.8180 3.07 4.07017.400 0 3 3
17.3 275.8180 3.07 3.73017.600 0 3 3
15.2 275.8180 3.07 3.78018.000 0 3 3
10.4 472.0205 2.93 5.25017.980 0 3 4
10.4 460.0215 3.00 5.42417.820 0 3 4
14.7 440.0230 3.23 5.34517.420 0 3 4
32.4 78.7 66 4.08 2.20019.471 1 4 1
30.4 75.7 52 4.93 1.61518.521 1 4 2
33.9 71.1 65 4.22 1.83519.901 1 4 1
21.5 120.1 97 3.70 2.46520.011 0 3 1
15.5 318.0150 2.76 3.52016.870 0 3 2
15.2 304.0150 3.15 3.43517.300 0 3 2
13.3 350.0245 3.73 3.84015.410 0 3 4
19.2 400.0175 3.08 3.84517.050 0 3 2
27.3 79.0 66 4.08 1.93518.901 1 4 1
26.0 120.3 91 4.43 2.14016.700 1 5 2
30.4 95.1113 3.77 1.51316.901 1 5 2
15.8 351.0264 4.22 3.17014.500 1 5 4
19.7 145.0175 3.62 2.77015.500 1 5 6
15.0 301.0335 3.54 3.57014.600 1 5 8
21.4 121.0109 4.11 2.78018.601 1 4 2
mpgcyldisphpdratwtqsecvsamgearcarbdispl_l
21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 2.621932
21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 2.621932
22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 1.769804
21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 4.227866
18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 5.899347
18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 3.687092
14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 5.899347
24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 2.403984
22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 2.307300
19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 2.746474
17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 2.746474
16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 4.519556
17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 4.519556
15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 4.519556
10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 7.734700
10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 7.538055
14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 7.210313
32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 1.289663
30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 1.240502
33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 1.165121
21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 1.968088
15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 5.211090
15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 4.981671
13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 5.735477
19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 6.554830
27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 1.294579
26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 1.971365
30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 1.558411
15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 5.751864
19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 2.376126
15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 4.932510
21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 1.982836
displ_l
2.621932
2.621932
1.769804
4.227866
5.899347
3.687092
5.899347
2.403984
2.307300
2.746474
2.746474
4.519556
4.519556
4.519556
7.734700
7.538055
7.210313
1.289663
1.240502
1.165121
1.968088
5.211090
4.981671
5.735477
6.554830
1.294579
1.971365
1.558411
5.751864
2.376126
4.932510
1.982836
mpgcyldisphpdratwtqsecvsamgearcarb
21.0 600 160.0110 3.90 2.62016.460 1 4 4
21.0 600 160.0110 3.90 2.87517.020 1 4 4
22.8 400 108.0 93 3.85 2.32018.611 1 4 1
21.4 600 258.0110 3.08 3.21519.441 0 3 1
18.7 800 360.0175 3.15 3.44017.020 0 3 2
18.1 600 225.0105 2.76 3.46020.221 0 3 1
14.3 800 360.0245 3.21 3.57015.840 0 3 4
24.4 400 146.7 62 3.69 3.19020.001 0 4 2
22.8 400 140.8 95 3.92 3.15022.901 0 4 2
19.2 600 167.6123 3.92 3.44018.301 0 4 4
17.8 600 167.6123 3.92 3.44018.901 0 4 4
16.4 800 275.8180 3.07 4.07017.400 0 3 3
17.3 800 275.8180 3.07 3.73017.600 0 3 3
15.2 800 275.8180 3.07 3.78018.000 0 3 3
10.4 800 472.0205 2.93 5.25017.980 0 3 4
10.4 800 460.0215 3.00 5.42417.820 0 3 4
14.7 800 440.0230 3.23 5.34517.420 0 3 4
32.4 400 78.7 66 4.08 2.20019.471 1 4 1
30.4 400 75.7 52 4.93 1.61518.521 1 4 2
33.9 400 71.1 65 4.22 1.83519.901 1 4 1
21.5 400 120.1 97 3.70 2.46520.011 0 3 1
15.5 800 318.0150 2.76 3.52016.870 0 3 2
15.2 800 304.0150 3.15 3.43517.300 0 3 2
13.3 800 350.0245 3.73 3.84015.410 0 3 4
19.2 800 400.0175 3.08 3.84517.050 0 3 2
27.3 400 79.0 66 4.08 1.93518.901 1 4 1
26.0 400 120.3 91 4.43 2.14016.700 1 5 2
30.4 400 95.1113 3.77 1.51316.901 1 5 2
15.8 800 351.0264 4.22 3.17014.500 1 5 4
19.7 600 145.0175 3.62 2.77015.500 1 5 6
15.0 800 301.0335 3.54 3.57014.600 1 5 8
21.4 400 121.0109 4.11 2.78018.601 1 4 2
In [3]:
library(dplyr)
chicago <- readRDS("./chicago.rds")
chicago <- rename(chicago, dewpoint = dptp, pm25 = pm25tmean2)

# Here we create a pm25detrend variable that subtracts the mean from the pm25 variable.

chicago <- mutate(chicago, pm25detrend = pm25 - mean(pm25, na.rm = TRUE))
head(chicago)
Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

citytmpddewpointdatepm25pm10tmean2o3tmean2no2tmean2pm25detrend
chic 31.5 31.500 1987-01-01NA 34.00000 4.250000 19.98810 NA
chic 33.0 29.875 1987-01-02NA NA 3.304348 23.19099 NA
chic 33.0 27.375 1987-01-03NA 34.16667 3.333333 23.81548 NA
chic 29.0 28.625 1987-01-04NA 47.00000 4.375000 30.43452 NA
chic 32.0 28.875 1987-01-05NA NA 4.750000 30.33333 NA
chic 40.0 35.125 1987-01-06NA 48.00000 5.833333 25.77233 NA
In [4]:
mutate(airquality, new = -Ozone, Temp = (Temp - 32) / 1.8)
OzoneSolar.RWindTempMonthDaynew
41 190 7.4 19.444445 1 -41
36 118 8.0 22.222225 2 -36
12 149 12.6 23.333335 3 -12
18 313 11.5 16.666675 4 -18
NA NA 14.3 13.333335 5 NA
28 NA 14.9 18.888895 6 -28
23 299 8.6 18.333335 7 -23
19 99 13.8 15.000005 8 -19
8 19 20.1 16.111115 9 -8
NA 194 8.6 20.555565 10 NA
7 NA 6.9 23.333335 11 -7
16 256 9.7 20.555565 12 -16
11 290 9.2 18.888895 13 -11
14 274 10.9 20.000005 14 -14
18 65 13.2 14.444445 15 -18
14 334 11.5 17.777785 16 -14
34 307 12.0 18.888895 17 -34
6 78 18.4 13.888895 18 -6
30 322 11.5 20.000005 19 -30
11 44 9.7 16.666675 20 -11
1 8 9.7 15.000005 21 -1
11 320 16.6 22.777785 22 -11
4 25 9.7 16.111115 23 -4
32 92 12.0 16.111115 24 -32
NA 66 16.6 13.888895 25 NA
NA 266 14.9 14.444445 26 NA
NA NA 8.0 13.888895 27 NA
23 13 12.0 19.444445 28 -23
45 252 14.9 27.222225 29 -45
115 223 5.7 26.111115 30 -115
96 167 6.9 32.777789 1 -96
78 197 5.1 33.333339 2 -78
73 183 2.8 33.888899 3 -73
91 189 4.6 33.888899 4 -91
47 95 7.4 30.555569 5 -47
32 92 15.5 28.888899 6 -32
20 252 10.9 26.666679 7 -20
23 220 10.3 25.555569 8 -23
21 230 10.9 23.888899 9 -21
24 259 9.7 22.777789 10 -24
44 236 14.9 27.222229 11 -44
21 259 15.5 24.444449 12 -21
28 238 6.3 25.000009 13 -28
9 24 10.9 21.666679 14 -9
13 112 11.5 21.666679 15 -13
46 237 6.9 25.555569 16 -46
18 224 13.8 19.444449 17 -18
13 27 10.3 24.444449 18 -13
24 238 10.3 20.000009 19 -24
16 201 8.0 27.777789 20 -16
13 238 12.6 17.777789 21 -13
23 14 9.2 21.666679 22 -23
36 139 10.3 27.222229 23 -36
7 49 10.3 20.555569 24 -7
14 20 16.6 17.222229 25 -14
30 193 6.9 21.111119 26 -30
NA 145 13.2 25.000009 27 NA
14 191 14.3 23.888899 28 -14
18 131 8.0 24.444449 29 -18
20 223 11.5 20.000009 30 -20
In [5]:
library(dplyr)
chicago <- readRDS("./chicago.rds")
chicago <- rename(chicago, dewpoint = dptp, pm25 = pm25tmean2)

qq <- quantile(chicago$pm25, seq(0, 1, 0.2), na.rm = TRUE)
chicago <- mutate(chicago, pm25.quint = cut(pm25, qq))
In [6]:
mutate(chicago, pm25.quint = cut(pm25, qq)) %>%    
         group_by(pm25.quint) %>% 
         summarize(o3 = mean(o3tmean2, na.rm = TRUE), 
                   no2 = mean(no2tmean2, na.rm = TRUE))
pm25.quinto3no2
(1.7,8.7] 21.66401 17.99129
(8.7,12.4] 20.38248 22.13004
(12.4,16.7]20.66160 24.35708
(16.7,22.6]19.88122 27.27132
(22.6,61.5]20.31775 29.64427
NA 18.79044 25.77585
In [7]:
#Another example might be computing the average pollutant level by month. This could be useful to see if there are any seasonal trends in the data.

mutate(chicago, month = as.POSIXlt(date)$mon + 1) %>% 
         group_by(month) %>% 
         summarize(pm25 = mean(pm25, na.rm = TRUE), 
                   o3 = max(o3tmean2, na.rm = TRUE), 
                   no2 = median(no2tmean2, na.rm = TRUE))
# A tibble: 12 × 4

# Here we can see that o3 tends to be low in the winter months and high in the summer while no2 is higher in the winter and lower in the summer.
monthpm25o3no2
1 17.7699628.2222225.35417
2 20.3751337.3750026.78034
3 17.4081839.0500026.76984
4 13.8587947.9490725.03125
5 14.0742052.7500024.22222
6 15.8646166.5875025.01140
7 16.5708759.5416722.38442
8 16.9338053.9670122.98333
9 15.9127957.4886424.47917
10 14.2355747.0927524.15217
11 15.1579429.4583323.56537
12 17.5222127.7083324.45773
In [2]:
# Here you can see the names of the first five variables in the chicago data frame.

library(dplyr)
chicago <- readRDS("./chicago.rds")

head(chicago[, 1:5], 3)

# The dptp column is supposed to represent the dew point temperature adn the pm25tmean2 column provides the PM2.5 data. However, these names are pretty obscure or awkward and probably be renamed to something more sensible.

chicago <- rename(chicago, dewpoint = dptp, pm25 = pm25tmean2)
head(chicago[, 1:5], 3)

# The syntax inside the rename() function is to have the new name on the left-hand side of the = sign and the old name on the right-hand side.
citytmpddptpdatepm25tmean2
chic 31.5 31.500 1987-01-01NA
chic 33.0 29.875 1987-01-02NA
chic 33.0 27.375 1987-01-03NA
citytmpddewpointdatepm25
chic 31.5 31.500 1987-01-01NA
chic 33.0 29.875 1987-01-02NA
chic 33.0 27.375 1987-01-03NA
In [13]:
# The select() function also allows a special syntax that allows you to specify variable names based on patterns. So, for example, if you wanted to keep every variable that ends with a “2”, we could do

subset <- select(chicago, ends_with("2"))
str(subset)
'data.frame':	6940 obs. of  4 variables:
 $ pm25tmean2: num  NA NA NA NA NA NA NA NA NA NA ...
 $ pm10tmean2: num  34 NA 34.2 47 NA ...
 $ o3tmean2  : num  4.25 3.3 3.33 4.38 4.75 ...
 $ no2tmean2 : num  20 23.2 23.8 30.4 30.3 ...
In [22]:
mtcars$model <- rownames(mtcars)
first <- mtcars[1:20, ]
second <- mtcars[10:32, ]
first
mpgcyldisphpdratwtqsecvsamgearcarbmodel
Mazda RX421.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4
Mazda RX4 Wag21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Mazda RX4 Wag
Datsun 71022.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Datsun 710
Hornet 4 Drive21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 Hornet 4 Drive
Hornet Sportabout18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 Hornet Sportabout
Valiant18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 Valiant
Duster 36014.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 Duster 360
Merc 240D24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 Merc 240D
Merc 23022.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 Merc 230
Merc 28019.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 Merc 280
Merc 280C17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 Merc 280C
Merc 450SE16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 Merc 450SE
Merc 450SL17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Merc 450SL
Merc 450SLC15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 Merc 450SLC
Cadillac Fleetwood10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Cadillac Fleetwood
Lincoln Continental10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Lincoln Continental
Chrysler Imperial14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 Chrysler Imperial
Fiat 12832.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 Fiat 128
Honda Civic30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Honda Civic
Toyota Corolla33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 Toyota Corolla
In [8]:
second
mpgcyldisphpdratwtqsecvsamgearcarbmodel
Merc 28019.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 Merc 280
Merc 280C17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 Merc 280C
Merc 450SE16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 Merc 450SE
Merc 450SL17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Merc 450SL
Merc 450SLC15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 Merc 450SLC
Cadillac Fleetwood10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Cadillac Fleetwood
Lincoln Continental10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Lincoln Continental
Chrysler Imperial14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 Chrysler Imperial
Fiat 12832.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 Fiat 128
Honda Civic30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Honda Civic
Toyota Corolla33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 Toyota Corolla
Toyota Corona21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 Toyota Corona
Dodge Challenger15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 Dodge Challenger
AMC Javelin15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 AMC Javelin
Camaro Z2813.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 Camaro Z28
Pontiac Firebird19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 Pontiac Firebird
Fiat X1-927.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 Fiat X1-9
Porsche 914-226.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 Porsche 914-2
Lotus Europa30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 Lotus Europa
Ford Pantera L15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 Ford Pantera L
Ferrari Dino19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 Ferrari Dino
Maserati Bora15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 Maserati Bora
Volvo 142E21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 Volvo 142E
In [27]:
intersect(first, second)
mpgcyldisphpdratwtqsecvsamgearcarbmodel
19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 Merc 280
17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 Merc 280C
16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 Merc 450SE
17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Merc 450SL
15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 Merc 450SLC
10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Cadillac Fleetwood
10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Lincoln Continental
14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 Chrysler Imperial
32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 Fiat 128
30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Honda Civic
33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 Toyota Corolla
In [29]:
union(first, second)
mpgcyldisphpdratwtqsecvsamgearcarbmodel
21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 Volvo 142E
15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 Maserati Bora
19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 Ferrari Dino
15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4 Ford Pantera L
30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2 Lotus Europa
26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 Porsche 914-2
27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 Fiat X1-9
19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2 Pontiac Firebird
13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4 Camaro Z28
15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2 AMC Javelin
15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2 Dodge Challenger
21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 Toyota Corona
33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 Toyota Corolla
30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Honda Civic
32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 Fiat 128
14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 Chrysler Imperial
10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Lincoln Continental
10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Cadillac Fleetwood
15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3 Merc 450SLC
17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3 Merc 450SL
16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3 Merc 450SE
17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4 Merc 280C
19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 Merc 280
22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 Merc 230
24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 Merc 240D
14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 Duster 360
18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 Valiant
18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 Hornet Sportabout
21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 Hornet 4 Drive
22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Datsun 710
21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Mazda RX4 Wag
21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4
In [30]:
setdiff(first, second)
mpgcyldisphpdratwtqsecvsamgearcarbmodel
21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4
21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Mazda RX4 Wag
22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Datsun 710
21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 Hornet 4 Drive
18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 Hornet Sportabout
18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 Valiant
14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 Duster 360
24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 Merc 240D
22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 Merc 230
In [15]:
# Each summary call removes one grouping level (since that group is now just a single row)

mtcars %>%
group_by(cyl, vs) %>%
summarise(cyl_n = n()) %>%
group_vars()
'cyl'
In [16]:
# Note that with data frames, newly created summaries immediately overwrite existing variables

mtcars %>%
group_by(cyl) %>%
summarise(disp = mean(disp), sd = sd(disp))
cyldispsd
4 105.1364NA
6 183.3143NA
8 353.1000NA