#################
# Estimates of interprovincial migrants by province or territory of origin and destination, annual
# https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710002201
#
# Load Data
tbl_number <- ’17-10-0022′
move_prov <-
cansim::get_cansim(tbl_number) |>
janitor::clean_names() |>
mutate(from = str_remove_all(geo, ‘, province of origin$’),
to = str_remove_all(geography_province_of_destination, ‘, province of destination$’) ) |>
select(
date,
from,
to,
value
)
#################
# province names and regions
provinces <- read_csv(I(
‘iso2 , region , name
AB , Prairies , Alberta
BC , BC , British Columbia
MB , Prairies , Manitoba
NB , Atlantic, New Brunswick
NL , Atlantic, Newfoundland and Labrador
NT , North , Northwest Territories
NT , North , Northwest Territories including Nunavut
NS , Atlantic, Nova Scotia
NU , North , Nunavut
ON , Ontario, Ontario
PE , Atlantic, Prince Edward Island
QC , Quebec , Quebec
SK , Prairies , Saskatchewan
YU , North , Yukon’
)
)
###############
# where to put the graphs for
geo_regions <- read_csv(I(
‘row , col , code
2 , 1 , BC
2 , 2 , Prairies
2 , 3 , Ontario
2 , 4 , Quebec
1 , 4 , Atlantic
1 , 2 , North’
)
) |> mutate(name = code)
#’ join_replace
#’
#’ Helper function to join and replace
join_replace <- function(a, b, old_name, joiner = ‘name’, new_name = ‘region’){
a |>
inner_join(b |> select(all_of(c(new_name, joiner))),
by = setNames(c(joiner), old_name)
) |>
select(-!!sym(old_name)) |>
rename(!!sym(old_name) := new_name)
}
#####################
# population by region not province
pop_region <-
pop |>
join_replace(provinces, old_name = ‘geo’) |>
summarise(value = sum(value), .by = c(date, geo)) |>
rename(pop := value)
###################
# big center label for each facet
center_lbl <-
plot_df |>
summarise(
across(
c(date, y_val), ~{mean(range(.x))}
), .by = geo
)
##########################
# facet_geo seems to have a bug when scale = ‘free_y’ so I am making a custom label as the y scale and setting the y scale to element_blank
line_lbl <-
bind_rows(
plot_df |> slice_max(y_val, with_ties = FALSE, by = geo),
plot_df |> slice_min(y_val, with_ties = FALSE, by = geo)
) |> mutate(lbl = paste0(
#format(round(y_val*100), 1), ‘%’
prettyNum(signif(y_val, 2), big.mark = ‘,’)
)) |>
mutate(date = min(date)-(365*4))
###############
# Range of years in the data
yr_rng <- plot_df$date |> range() |> year()
################
# Make the plot
plot_df |>
ggplot(aes(x = date, y = y_val)) +
geom_text(aes(label = geo, color = geo),
data = center_lbl, size = 15,
#color = ‘grey’
) +
geom_label(aes(label = lbl, fill = geo),
data = line_lbl,
hjust = 0,
nudge_x = -3000,
color = ‘white’, alpha = 0.7 ) +
geom_line(aes(color = geo), color = ‘grey’) +
stat_difference(aes(ymin = 0, ymax = y_val, fill = geo), alpha = 0.25) +
#geom_point(aes(color = geo), color = ‘grey’) +
geom_hline(yintercept = 0, color = ‘black’) +
scale_x_date(labels = range(year(plot_df$date)), breaks = range(plot_df$date)) +
guides(color = ‘none’, fill = ‘none’) +
labs (y =”, x =”, caption = glue(‘Source Cansim tables {tbl_number}, {pop_tbl_number}’),
title =str_to_title(glue(‘Net Interprovincial Migration in Canada’)),
subtitle = str_to_title(glue(‘by Region over the last {yr_rng|> diff()} years [{yr_rng[1]}-{yr_rng[2]}]’)),
) +
1 Comment
Data Comes from StatsCan table [17-10-0022](https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710002201) Code to reproduce is below
#################
# LOAD LIBRARIES
library(cansim)
library(tidyverse)
library(ggplot2)
library(janitor)
library(lubridate)
library(glue)
library(geofacet)
library(ggrepel)
library(ggh4x)
#################
# Estimates of interprovincial migrants by province or territory of origin and destination, annual
# https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710002201
#
# Load Data
tbl_number <- ’17-10-0022′
move_prov <-
cansim::get_cansim(tbl_number) |>
janitor::clean_names() |>
mutate(from = str_remove_all(geo, ‘, province of origin$’),
to = str_remove_all(geography_province_of_destination, ‘, province of destination$’) ) |>
select(
date,
from,
to,
value
)
########################
# Population estimates, quarterly
# https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710000901
pop_tbl_number <- ’17-10-0009′
pop <-
cansim::get_cansim(pop_tbl_number) |>
janitor::clean_names() |>
select(geo, value , date)
#################
# Set GGPlot theme
theme_set(
theme_minimal() +
theme(
axis.text = element_text(size = 15, color = ‘grey’),
strip.background = element_blank(),
strip.text = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
plot.caption = element_text(color = ‘grey’),
plot.title = element_text(color = ‘grey’, size = 30, hjust = 0.5),
plot.subtitle = element_text(color = ‘grey’, size = 20, hjust = 0.5)
)
)
#################
# province names and regions
provinces <- read_csv(I(
‘iso2 , region , name
AB , Prairies , Alberta
BC , BC , British Columbia
MB , Prairies , Manitoba
NB , Atlantic, New Brunswick
NL , Atlantic, Newfoundland and Labrador
NT , North , Northwest Territories
NT , North , Northwest Territories including Nunavut
NS , Atlantic, Nova Scotia
NU , North , Nunavut
ON , Ontario, Ontario
PE , Atlantic, Prince Edward Island
QC , Quebec , Quebec
SK , Prairies , Saskatchewan
YU , North , Yukon’
)
)
###############
# where to put the graphs for
geo_regions <- read_csv(I(
‘row , col , code
2 , 1 , BC
2 , 2 , Prairies
2 , 3 , Ontario
2 , 4 , Quebec
1 , 4 , Atlantic
1 , 2 , North’
)
) |> mutate(name = code)
#’ join_replace
#’
#’ Helper function to join and replace
join_replace <- function(a, b, old_name, joiner = ‘name’, new_name = ‘region’){
a |>
inner_join(b |> select(all_of(c(new_name, joiner))),
by = setNames(c(joiner), old_name)
) |>
select(-!!sym(old_name)) |>
rename(!!sym(old_name) := new_name)
}
########################
# Movements by Region not province
move_region <-
move_prov |>
join_replace(provinces, old_name = ‘from’) |>
join_replace(provinces, old_name = ‘to’) |>
summarise(value = sum(value), .by = c(date, from, to)) |>
filter(from != to) %>%
inner_join(., ., by = c(‘date’ = ‘date’, ‘from’ = ‘to’, ‘to’ = ‘from’), suffix = c(‘.from’,’.to’)) |>
mutate(value.net = value.to – value.from)
#####################
# population by region not province
pop_region <-
pop |>
join_replace(provinces, old_name = ‘geo’) |>
summarise(value = sum(value), .by = c(date, geo)) |>
rename(pop := value)
############################
# the dataframe to plot
plot_df <-
move_region |>
summarise(
across(
starts_with(‘value.’),
~ sum(.x)
), .by = c(‘date’, ‘from’)
) |>
rename(geo := from) |>
inner_join(pop_region, by = c(‘geo’, ‘date’) ) |>
inner_join(pop|> filter(geo == ‘Canada’) |> select(-geo) |> rename(can := value), by = c(‘date’)) |>
mutate(across(
.cols = starts_with(‘value.’),
.fns = c(f_pop = ~{ .x / pop }, f_can = ~{ .x / can }),
.names = ‘{.fn}_{.col}’
)) |>
mutate(y_val = value.net)
###################
# big center label for each facet
center_lbl <-
plot_df |>
summarise(
across(
c(date, y_val), ~{mean(range(.x))}
), .by = geo
)
##########################
# facet_geo seems to have a bug when scale = ‘free_y’ so I am making a custom label as the y scale and setting the y scale to element_blank
line_lbl <-
bind_rows(
plot_df |> slice_max(y_val, with_ties = FALSE, by = geo),
plot_df |> slice_min(y_val, with_ties = FALSE, by = geo)
) |> mutate(lbl = paste0(
#format(round(y_val*100), 1), ‘%’
prettyNum(signif(y_val, 2), big.mark = ‘,’)
)) |>
mutate(date = min(date)-(365*4))
###############
# Range of years in the data
yr_rng <- plot_df$date |> range() |> year()
################
# Make the plot
plot_df |>
ggplot(aes(x = date, y = y_val)) +
geom_text(aes(label = geo, color = geo),
data = center_lbl, size = 15,
#color = ‘grey’
) +
geom_label(aes(label = lbl, fill = geo),
data = line_lbl,
hjust = 0,
nudge_x = -3000,
color = ‘white’, alpha = 0.7 ) +
geom_line(aes(color = geo), color = ‘grey’) +
stat_difference(aes(ymin = 0, ymax = y_val, fill = geo), alpha = 0.25) +
#geom_point(aes(color = geo), color = ‘grey’) +
geom_hline(yintercept = 0, color = ‘black’) +
scale_x_date(labels = range(year(plot_df$date)), breaks = range(plot_df$date)) +
guides(color = ‘none’, fill = ‘none’) +
labs (y =”, x =”, caption = glue(‘Source Cansim tables {tbl_number}, {pop_tbl_number}’),
title =str_to_title(glue(‘Net Interprovincial Migration in Canada’)),
subtitle = str_to_title(glue(‘by Region over the last {yr_rng|> diff()} years [{yr_rng[1]}-{yr_rng[2]}]’)),
) +
#facet_wrap(~geo)
facet_geo(~geo, grid = geo_regions, scale= ‘free_y’) +
theme(
axis.text.y = element_blank()
)