Chapter 4.2.1 Exercises

ggpl-f
In [3]:
library(ggplot2)

# There are two ways of zooming the plot display: with scales or with coordinate systems. They work in two rather different ways.

p <- ggplot(mtcars, aes(disp, wt)) +
geom_point() +
geom_smooth()
p
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
In [5]:
# Setting the limits on a scale converts all values outside the range to NA.

p + scale_x_continuous(limits = c(325, 500))
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
Warning message:
“Removed 24 rows containing non-finite values (stat_smooth).”Warning message:
“Removed 24 rows containing missing values (geom_point).”
In [6]:
# Setting the limits on the coordinate system performs a visual zoom. The data is unchanged, and we just view a small portion of the original plot. Note how smooth continues past the points visible on this plot.

p + coord_cartesian(xlim = c(325, 500))
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
In [7]:
# By default, the same expansion factor is applied as when setting scale limits. You can set the limits precisely by setting expand = FALSE

p + coord_cartesian(xlim = c(325, 500), expand = FALSE)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
In [10]:
# Simiarly, we can use expand = FALSE to turn off expansion with the default limits

p + coord_cartesian(expand = FALSE)
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
In [4]:
library(ggplot2)
# ensures that the ranges of axes are equal to the specified ratio by adjusting the plot aspect ratio

p <- ggplot(mtcars, aes(mpg, wt)) + geom_point()
p + coord_fixed(ratio = 1)
p + coord_fixed(ratio = 5)
p + coord_fixed(ratio = 1/5)
p + coord_fixed(xlim = c(15, 30))

# Resize the plot to see that the specified aspect ratio is maintained
In [4]:
# Very useful for creating boxplots, and other interval geoms in the horizontal instead of vertical position.

ggplot(diamonds, aes(cut, price)) +
geom_boxplot() +
coord_flip()
In [5]:
h <- ggplot(diamonds, aes(carat)) +
geom_histogram()
h
h + coord_flip()
h + coord_flip() + scale_x_reverse()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
In [6]:
# You can also use it to flip line and area plots:

df <- data.frame(x = 1:5, y = (1:5) ^ 2)
ggplot(df, aes(x, y)) +
geom_area()
last_plot() + coord_flip()
In [3]:
library(ggplot2)
if (require("maps")) {
nz <- map_data("nz")
    
# Prepare a map of NZ
    
nzmap <- ggplot(nz, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white", colour = "black")

# Plot it in cartesian coordinates
    
nzmap
    
# With correct mercator projection
    
nzmap + coord_map()
    
# With the aspect ratio approximation
    
nzmap + coord_quickmap()
    }
In [13]:
# Other projections

nzmap + coord_map("cylindrical")
nzmap + coord_map("azequalarea", orientation = c(-36.92, 174.6, 0))
nzmap + coord_map("lambert", parameters = c(-37, -44))
In [14]:
states <- map_data("state")
usamap <- ggplot(states, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black")

# Use cartesian coordinates

usamap

# With mercator projection

usamap + coord_map()
usamap + coord_quickmap()

# See ?mapproject for coordinate systems and their parameters

usamap + coord_map("gilbert")
usamap + coord_map("lagrange")
In [15]:
# For most projections, you'll need to set the orientation yourself as the automatic selection done by mapproject is not available to ggplot.

usamap + coord_map("orthographic")
usamap + coord_map("stereographic")
usamap + coord_map("conic", lat0 = 30)
usamap + coord_map("bonne", lat0 = 50)
In [20]:
# World map, using geom_path instead of geom_polygon

world <- map_data("world")
worldmap <- ggplot(world, aes(x = long, y = lat, group = group)) +
geom_path() +
scale_y_continuous(breaks = (-2:2) * 30) +
scale_x_continuous(breaks = (-4:4) * 45)

# Orthographic projection with default orientation (looking down at North pole)

worldmap + coord_map("ortho")
In [21]:
# Looking up up at South Pole

worldmap + coord_map("ortho", orientation = c(-90, 0, 0))
In [25]:
# Centered on New York (currently has issues with closing polygons)

worldmap + coord_map("ortho", orientation = c(41, -74, 0))
In [5]:
library(ggplot2)

# NOTE: Use these plots with caution - polar coordinates has major perceptual problems. The main point of these examples is to demonstrate how these common plots can be described in the grammar. Use with EXTREME caution.
# A pie chart = stacked bar chart + polar coordinates

pie <- ggplot(mtcars, aes(x = factor(1), fill = factor(cyl))) +
geom_bar(width = 1)
pie + coord_polar(theta = "y")
In [6]:
# A coxcomb plot = bar chart + polar coordinates

cxc <- ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar(width = 1, colour = "black")
cxc + coord_polar()
In [7]:


cxc + coord_polar(theta = "y")
In [14]:
# Hadley's favourite pie chart

df <- data.frame(
variable = c("does not resemble", "resembles"),
value = c(20, 80)
)
ggplot(df, aes(x = "", y = value, fill = variable)) +
geom_col(width = 1) +                              # sets bar chart to y values
scale_fill_manual(values = c("red", "yellow")) +   # fills in the colors to y
coord_polar("y", start = pi / 2) +                 # transforms into pie chart
labs(title = "Pac man")
In [53]:
# Windrose + doughnut plot

if (require("ggplot2movies")) {
movies$rrating <- cut_interval(movies$rating, length = 1)
movies$budgetq <- cut_number(movies$budget, 4)

doh <- ggplot(movies, aes(x = rrating, fill = budgetq))

# Wind rose
    
doh + geom_bar(width = 1) + coord_polar()
    
# Race track plot
    
doh + geom_bar(width = 0.9, position = "fill") + coord_polar(theta = "y")
}
In [13]:
library(ggplot2)
p <- ggplot(mpg, aes(displ, cty)) + geom_point()

# Use vars() to supply variables from the dataset:

p + facet_grid(rows = vars(drv))
In [3]:
p + facet_grid(cols = vars(cyl))
p + facet_grid(vars(drv), vars(cyl))
In [9]:
# The historical formula interface is also available:

p + facet_grid(. ~ cyl)
p + facet_grid(drv ~ .)
p + facet_grid(drv ~ cyl)
In [15]:
# To change plot order of facet grid, change the order of variable levels with factor(). If you combine a facetted dataset with a dataset that lacks those faceting variables, the data will be repeated across the missing combinations:

df <- data.frame(displ = mean(mpg$displ), cty = mean(mpg$cty))
p +
facet_grid(cols = vars(cyl)) +
geom_point(data = df, colour = "red", size = 2)
In [16]:
# Free scales
# You can also choose whether the scales should be constant across all panels (the default), or whether they should be allowed to vary

mt <- ggplot(mtcars, aes(mpg, wt, colour = factor(cyl))) +
geom_point()
mt + facet_grid(. ~ cyl, scales = "free")
In [17]:
# If scales and space are free, then the mapping between position and values in the data will be the same across all panels. This is particularly useful for categorical axes.

ggplot(mpg, aes(drv, model)) +
geom_point() +
facet_grid(manufacturer ~ ., scales = "free", space = "free") +
theme(strip.text.y = element_text(angle = 0))
In [22]:
mg + facet_grid(vs + am ~ gear, margins = "am")
In [10]:
mpg
manufacturermodeldisplyearcyltransdrvctyhwyflclass
audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
audi a4 2.0 2008 4 auto(av) f 21 30 p compact
audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
audi a4 2.8 1999 6 manual(m5) f 18 26 p compact
audi a4 3.1 2008 6 auto(av) f 18 27 p compact
audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26 p compact
audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25 p compact
audi a4 quattro 2.0 2008 4 manual(m6) 4 20 28 p compact
audi a4 quattro 2.0 2008 4 auto(s6) 4 19 27 p compact
audi a4 quattro 2.8 1999 6 auto(l5) 4 15 25 p compact
audi a4 quattro 2.8 1999 6 manual(m5) 4 17 25 p compact
audi a4 quattro 3.1 2008 6 auto(s6) 4 17 25 p compact
audi a4 quattro 3.1 2008 6 manual(m6) 4 15 25 p compact
audi a6 quattro 2.8 1999 6 auto(l5) 4 15 24 p midsize
audi a6 quattro 3.1 2008 6 auto(s6) 4 17 25 p midsize
audi a6 quattro 4.2 2008 8 auto(s6) 4 16 23 p midsize
chevrolet c1500 suburban 2wd5.3 2008 8 auto(l4) r 14 20 r suv
chevrolet c1500 suburban 2wd5.3 2008 8 auto(l4) r 11 15 e suv
chevrolet c1500 suburban 2wd5.3 2008 8 auto(l4) r 14 20 r suv
chevrolet c1500 suburban 2wd5.7 1999 8 auto(l4) r 13 17 r suv
chevrolet c1500 suburban 2wd6.0 2008 8 auto(l4) r 12 17 r suv
chevrolet corvette 5.7 1999 8 manual(m6) r 16 26 p 2seater
chevrolet corvette 5.7 1999 8 auto(l4) r 15 23 p 2seater
chevrolet corvette 6.2 2008 8 manual(m6) r 16 26 p 2seater
chevrolet corvette 6.2 2008 8 auto(s6) r 15 25 p 2seater
chevrolet corvette 7.0 2008 8 manual(m6) r 15 24 p 2seater
chevrolet k1500 tahoe 4wd 5.3 2008 8 auto(l4) 4 14 19 r suv
chevrolet k1500 tahoe 4wd 5.3 2008 8 auto(l4) 4 11 14 e suv
toyota toyota tacoma 4wd3.4 1999 6 auto(l4) 4 15 19 r pickup
toyota toyota tacoma 4wd4.0 2008 6 manual(m6) 4 15 18 r pickup
toyota toyota tacoma 4wd4.0 2008 6 auto(l5) 4 16 20 r pickup
volkswagen gti 2.0 1999 4 manual(m5) f 21 29 r compact
volkswagen gti 2.0 1999 4 auto(l4) f 19 26 r compact
volkswagen gti 2.0 2008 4 manual(m6) f 21 29 p compact
volkswagen gti 2.0 2008 4 auto(s6) f 22 29 p compact
volkswagen gti 2.8 1999 6 manual(m5) f 17 24 r compact
volkswagen jetta 1.9 1999 4 manual(m5) f 33 44 d compact
volkswagen jetta 2.0 1999 4 manual(m5) f 21 29 r compact
volkswagen jetta 2.0 1999 4 auto(l4) f 19 26 r compact
volkswagen jetta 2.0 2008 4 auto(s6) f 22 29 p compact
volkswagen jetta 2.0 2008 4 manual(m6) f 21 29 p compact
volkswagen jetta 2.5 2008 5 auto(s6) f 21 29 r compact
volkswagen jetta 2.5 2008 5 manual(m5) f 21 29 r compact
volkswagen jetta 2.8 1999 6 auto(l4) f 16 23 r compact
volkswagen jetta 2.8 1999 6 manual(m5) f 17 24 r compact
volkswagen new beetle 1.9 1999 4 manual(m5) f 35 44 d subcompact
volkswagen new beetle 1.9 1999 4 auto(l4) f 29 41 d subcompact
volkswagen new beetle 2.0 1999 4 manual(m5) f 21 29 r subcompact
volkswagen new beetle 2.0 1999 4 auto(l4) f 19 26 r subcompact
volkswagen new beetle 2.5 2008 5 manual(m5) f 20 28 r subcompact
volkswagen new beetle 2.5 2008 5 auto(s6) f 20 29 r subcompact
volkswagen passat 1.8 1999 4 manual(m5) f 21 29 p midsize
volkswagen passat 1.8 1999 4 auto(l5) f 18 29 p midsize
volkswagen passat 2.0 2008 4 auto(s6) f 19 28 p midsize
volkswagen passat 2.0 2008 4 manual(m6) f 21 29 p midsize
volkswagen passat 2.8 1999 6 auto(l5) f 16 26 p midsize
volkswagen passat 2.8 1999 6 manual(m5) f 18 26 p midsize
volkswagen passat 3.6 2008 6 auto(s6) f 17 26 p midsize
In [5]:
library(ggplot2)
p <- ggplot(mpg, aes(displ, hwy)) + geom_point(); p
In [6]:
# Use vars() to supply faceting variables:

p + facet_wrap(vars(class))
In [7]:
# The historical interface with formulas is also available:

p + facet_wrap(~class)
In [8]:
# Control the number of rows and columns with nrow and ncol

p + facet_wrap(vars(class), nrow = 4)
In [9]:
# You can facet by multiple variables

ggplot(mpg, aes(displ, hwy)) +
geom_point() +
facet_wrap(vars(cyl, drv))
In [14]:
# Use the `labeller` option to control how labels are printed:

ggplot(mpg, aes(displ, hwy)) +
geom_point() +
facet_wrap(c("cyl", "drv"), labeller = "label_both")
In [22]:
# Use `strip.position` to display the facet labels at the side of your choice. Setting it to `bottom` makes it act as a subtitle for the axis. This is typically used with free scales and a theme without boxes around strip labels.

ggplot(economics_long, aes(date, value)) +
geom_line() +
facet_wrap(~variable, scales = "free_y", nrow = 2, strip.position = "bottom") +
theme(strip.background = element_blank(), strip.placement = "outside")
Warning message:
“Suppressing axis rendering when strip.position = 'bottom' and strip.placement == 'outside'”
In [8]:
# Calculate slope and intercept of line of best fit

coef(lm(mpg ~ wt, data = mtcars))
p + geom_abline(intercept = 37, slope = -5)

# But this is easier to do with geom_smooth:

p + geom_smooth(method = "lm", se = FALSE)
(Intercept)
37.285126167342
wt
-5.34447157272267
In [4]:
ggplot(diamonds, aes(depth, colour = cut)) +
geom_density() +
xlim(55, 70)
ggplot(diamonds, aes(depth, fill = cut, colour = cut)) +
geom_density(alpha = 0.1) +
xlim(55, 70)
Warning message:
“Removed 45 rows containing non-finite values (stat_density).”
Warning message:
“Removed 45 rows containing non-finite values (stat_density).”
In [4]:
ggplot(diamonds, aes(depth, colour = cut)) +
geom_density() +
xlim(55, 70)
ggplot(diamonds, aes(depth, fill = cut, colour = cut)) +
geom_density(alpha = 0.1) +
xlim(55, 70)
Warning message:
“Removed 45 rows containing non-finite values (stat_density).”
Warning message:
“Removed 45 rows containing non-finite values (stat_density).”
In [8]:
# You can use position="fill" to produce a conditional density estimate

ggplot(diamonds, aes(carat, stat(count), fill = cut)) +
geom_density(position = "fill")
In [10]:
h +
  geom_ribbon(aes(ymin = level - 1, ymax = level + 1), fill = "grey70") +
  geom_line(aes(y = level))
In [4]:
ggplot(luv_colours, aes(u, v)) +
geom_point(aes(colour = col), size = 3) +
scale_color_identity() +
coord_equal()
In [10]:
library(ggplot2)

base <- ggplot(economics_long, aes(date, value01))
base + geom_line(aes(group = variable))
base + geom_line(aes(linetype = variable))

# See scale_manual for more flexibility
In [18]:
# You can set color and fill aesthetics at the same time

ggplot(
mtcars,
aes(mpg, wt, colour = factor(cyl), fill = factor(cyl))
) +
geom_point(shape = 21, alpha = 0.5, size = 2) +
scale_colour_manual(
values = cols,
aesthetics = c("colour", "fill")
)
In [4]:
# Manipulating the default position scales lets you change the axis labels

p1 +
scale_x_continuous("Engine displacement (L)") +
scale_y_continuous("Highway MPG")
In [4]:
# Permutations to create a simulation-based null distribution for one numerical response and one categorical predictor using t statistic

library(dplyr)
library(infer)

mtcars %>%
dplyr::mutate(am = factor(am)) %>%
specify(mpg ~ am) %>%                      # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "t", order = c("1", "0")) %>%
visualize(method = "simulation")           #default method
In [6]:
# Theoretical t distribution for one numerical response and one categorical predictor using t statistic

mtcars %>%
dplyr::mutate(am = factor(am)) %>%
specify(mpg ~ am) %>%                      # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%

# generate() is not needed since we are not doing simulation

calculate(stat = "t", order = c("1", "0")) %>%
visualize(method = "theoretical")
Warning message:
“Check to make sure the conditions have been met for the theoretical method. {infer} currently does not check these for you.”
In [7]:
# Overlay theoretical distribution on top of randomized t-statistics

mtcars %>%
dplyr::mutate(am = factor(am)) %>%
specify(mpg ~ am) %>%                      # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "t", order = c("1", "0")) %>%
visualize(method = "both")
Warning message:
“Check to make sure the conditions have been met for the theoretical method. {infer} currently does not check these for you.”
In [ ]: