Chapter 4.2.1 Exercises
In [3]:
library(ggplot2)
# There are two ways of zooming the plot display: with scales or with coordinate systems. They work in two rather different ways.
p <- ggplot(mtcars, aes(disp, wt)) +
geom_point() +
geom_smooth()
p
In [5]:
# Setting the limits on a scale converts all values outside the range to NA.
p + scale_x_continuous(limits = c(325, 500))
In [6]:
# Setting the limits on the coordinate system performs a visual zoom. The data is unchanged, and we just view a small portion of the original plot. Note how smooth continues past the points visible on this plot.
p + coord_cartesian(xlim = c(325, 500))
In [7]:
# By default, the same expansion factor is applied as when setting scale limits. You can set the limits precisely by setting expand = FALSE
p + coord_cartesian(xlim = c(325, 500), expand = FALSE)
In [10]:
# Simiarly, we can use expand = FALSE to turn off expansion with the default limits
p + coord_cartesian(expand = FALSE)
In [4]:
library(ggplot2)
# ensures that the ranges of axes are equal to the specified ratio by adjusting the plot aspect ratio
p <- ggplot(mtcars, aes(mpg, wt)) + geom_point()
p + coord_fixed(ratio = 1)
p + coord_fixed(ratio = 5)
p + coord_fixed(ratio = 1/5)
p + coord_fixed(xlim = c(15, 30))
# Resize the plot to see that the specified aspect ratio is maintained
In [4]:
# Very useful for creating boxplots, and other interval geoms in the horizontal instead of vertical position.
ggplot(diamonds, aes(cut, price)) +
geom_boxplot() +
coord_flip()
In [5]:
h <- ggplot(diamonds, aes(carat)) +
geom_histogram()
h
h + coord_flip()
h + coord_flip() + scale_x_reverse()
In [6]:
# You can also use it to flip line and area plots:
df <- data.frame(x = 1:5, y = (1:5) ^ 2)
ggplot(df, aes(x, y)) +
geom_area()
last_plot() + coord_flip()
In [3]:
library(ggplot2)
if (require("maps")) {
nz <- map_data("nz")
# Prepare a map of NZ
nzmap <- ggplot(nz, aes(x = long, y = lat, group = group)) +
geom_polygon(fill = "white", colour = "black")
# Plot it in cartesian coordinates
nzmap
# With correct mercator projection
nzmap + coord_map()
# With the aspect ratio approximation
nzmap + coord_quickmap()
}
In [13]:
# Other projections
nzmap + coord_map("cylindrical")
nzmap + coord_map("azequalarea", orientation = c(-36.92, 174.6, 0))
nzmap + coord_map("lambert", parameters = c(-37, -44))
In [14]:
states <- map_data("state")
usamap <- ggplot(states, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black")
# Use cartesian coordinates
usamap
# With mercator projection
usamap + coord_map()
usamap + coord_quickmap()
# See ?mapproject for coordinate systems and their parameters
usamap + coord_map("gilbert")
usamap + coord_map("lagrange")
In [15]:
# For most projections, you'll need to set the orientation yourself as the automatic selection done by mapproject is not available to ggplot.
usamap + coord_map("orthographic")
usamap + coord_map("stereographic")
usamap + coord_map("conic", lat0 = 30)
usamap + coord_map("bonne", lat0 = 50)
In [20]:
# World map, using geom_path instead of geom_polygon
world <- map_data("world")
worldmap <- ggplot(world, aes(x = long, y = lat, group = group)) +
geom_path() +
scale_y_continuous(breaks = (-2:2) * 30) +
scale_x_continuous(breaks = (-4:4) * 45)
# Orthographic projection with default orientation (looking down at North pole)
worldmap + coord_map("ortho")
In [21]:
# Looking up up at South Pole
worldmap + coord_map("ortho", orientation = c(-90, 0, 0))
In [25]:
# Centered on New York (currently has issues with closing polygons)
worldmap + coord_map("ortho", orientation = c(41, -74, 0))
In [5]:
library(ggplot2)
# NOTE: Use these plots with caution - polar coordinates has major perceptual problems. The main point of these examples is to demonstrate how these common plots can be described in the grammar. Use with EXTREME caution.
# A pie chart = stacked bar chart + polar coordinates
pie <- ggplot(mtcars, aes(x = factor(1), fill = factor(cyl))) +
geom_bar(width = 1)
pie + coord_polar(theta = "y")
In [6]:
# A coxcomb plot = bar chart + polar coordinates
cxc <- ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar(width = 1, colour = "black")
cxc + coord_polar()
In [7]:
cxc + coord_polar(theta = "y")
In [14]:
# Hadley's favourite pie chart
df <- data.frame(
variable = c("does not resemble", "resembles"),
value = c(20, 80)
)
ggplot(df, aes(x = "", y = value, fill = variable)) +
geom_col(width = 1) + # sets bar chart to y values
scale_fill_manual(values = c("red", "yellow")) + # fills in the colors to y
coord_polar("y", start = pi / 2) + # transforms into pie chart
labs(title = "Pac man")
In [53]:
# Windrose + doughnut plot
if (require("ggplot2movies")) {
movies$rrating <- cut_interval(movies$rating, length = 1)
movies$budgetq <- cut_number(movies$budget, 4)
doh <- ggplot(movies, aes(x = rrating, fill = budgetq))
# Wind rose
doh + geom_bar(width = 1) + coord_polar()
# Race track plot
doh + geom_bar(width = 0.9, position = "fill") + coord_polar(theta = "y")
}
In [13]:
library(ggplot2)
p <- ggplot(mpg, aes(displ, cty)) + geom_point()
# Use vars() to supply variables from the dataset:
p + facet_grid(rows = vars(drv))
In [3]:
p + facet_grid(cols = vars(cyl))
p + facet_grid(vars(drv), vars(cyl))
In [9]:
# The historical formula interface is also available:
p + facet_grid(. ~ cyl)
p + facet_grid(drv ~ .)
p + facet_grid(drv ~ cyl)
In [15]:
# To change plot order of facet grid, change the order of variable levels with factor(). If you combine a facetted dataset with a dataset that lacks those faceting variables, the data will be repeated across the missing combinations:
df <- data.frame(displ = mean(mpg$displ), cty = mean(mpg$cty))
p +
facet_grid(cols = vars(cyl)) +
geom_point(data = df, colour = "red", size = 2)
In [16]:
# Free scales
# You can also choose whether the scales should be constant across all panels (the default), or whether they should be allowed to vary
mt <- ggplot(mtcars, aes(mpg, wt, colour = factor(cyl))) +
geom_point()
mt + facet_grid(. ~ cyl, scales = "free")
In [17]:
# If scales and space are free, then the mapping between position and values in the data will be the same across all panels. This is particularly useful for categorical axes.
ggplot(mpg, aes(drv, model)) +
geom_point() +
facet_grid(manufacturer ~ ., scales = "free", space = "free") +
theme(strip.text.y = element_text(angle = 0))
In [22]:
mg + facet_grid(vs + am ~ gear, margins = "am")
In [10]:
mpg
In [5]:
library(ggplot2)
p <- ggplot(mpg, aes(displ, hwy)) + geom_point(); p
In [6]:
# Use vars() to supply faceting variables:
p + facet_wrap(vars(class))
In [7]:
# The historical interface with formulas is also available:
p + facet_wrap(~class)
In [8]:
# Control the number of rows and columns with nrow and ncol
p + facet_wrap(vars(class), nrow = 4)
In [9]:
# You can facet by multiple variables
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
facet_wrap(vars(cyl, drv))
In [14]:
# Use the `labeller` option to control how labels are printed:
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
facet_wrap(c("cyl", "drv"), labeller = "label_both")
In [22]:
# Use `strip.position` to display the facet labels at the side of your choice. Setting it to `bottom` makes it act as a subtitle for the axis. This is typically used with free scales and a theme without boxes around strip labels.
ggplot(economics_long, aes(date, value)) +
geom_line() +
facet_wrap(~variable, scales = "free_y", nrow = 2, strip.position = "bottom") +
theme(strip.background = element_blank(), strip.placement = "outside")
In [8]:
# Calculate slope and intercept of line of best fit
coef(lm(mpg ~ wt, data = mtcars))
p + geom_abline(intercept = 37, slope = -5)
# But this is easier to do with geom_smooth:
p + geom_smooth(method = "lm", se = FALSE)
In [4]:
ggplot(diamonds, aes(depth, colour = cut)) +
geom_density() +
xlim(55, 70)
ggplot(diamonds, aes(depth, fill = cut, colour = cut)) +
geom_density(alpha = 0.1) +
xlim(55, 70)
In [4]:
ggplot(diamonds, aes(depth, colour = cut)) +
geom_density() +
xlim(55, 70)
ggplot(diamonds, aes(depth, fill = cut, colour = cut)) +
geom_density(alpha = 0.1) +
xlim(55, 70)
In [8]:
# You can use position="fill" to produce a conditional density estimate
ggplot(diamonds, aes(carat, stat(count), fill = cut)) +
geom_density(position = "fill")
In [10]:
h +
geom_ribbon(aes(ymin = level - 1, ymax = level + 1), fill = "grey70") +
geom_line(aes(y = level))
In [4]:
ggplot(luv_colours, aes(u, v)) +
geom_point(aes(colour = col), size = 3) +
scale_color_identity() +
coord_equal()
In [10]:
library(ggplot2)
base <- ggplot(economics_long, aes(date, value01))
base + geom_line(aes(group = variable))
base + geom_line(aes(linetype = variable))
# See scale_manual for more flexibility
In [18]:
# You can set color and fill aesthetics at the same time
ggplot(
mtcars,
aes(mpg, wt, colour = factor(cyl), fill = factor(cyl))
) +
geom_point(shape = 21, alpha = 0.5, size = 2) +
scale_colour_manual(
values = cols,
aesthetics = c("colour", "fill")
)
In [4]:
# Manipulating the default position scales lets you change the axis labels
p1 +
scale_x_continuous("Engine displacement (L)") +
scale_y_continuous("Highway MPG")
In [4]:
# Permutations to create a simulation-based null distribution for one numerical response and one categorical predictor using t statistic
library(dplyr)
library(infer)
mtcars %>%
dplyr::mutate(am = factor(am)) %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "t", order = c("1", "0")) %>%
visualize(method = "simulation") #default method
In [6]:
# Theoretical t distribution for one numerical response and one categorical predictor using t statistic
mtcars %>%
dplyr::mutate(am = factor(am)) %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
# generate() is not needed since we are not doing simulation
calculate(stat = "t", order = c("1", "0")) %>%
visualize(method = "theoretical")
In [7]:
# Overlay theoretical distribution on top of randomized t-statistics
mtcars %>%
dplyr::mutate(am = factor(am)) %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "t", order = c("1", "0")) %>%
visualize(method = "both")
In [ ]: