[OC] Net Interprovincial Migration In Canada By Region Over last 50+ years 1972-2023

    by hswerdfe_2

    1 Comment

    1. Data Comes from StatsCan table [17-10-0022](https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710002201) Code to reproduce is below

      #################
      # LOAD LIBRARIES
      library(cansim)
      library(tidyverse)
      library(ggplot2)
      library(janitor)
      library(lubridate)
      library(glue)
      library(geofacet)
      library(ggrepel)
      library(ggh4x)

      #################
      # Estimates of interprovincial migrants by province or territory of origin and destination, annual
      # https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710002201
      #
      # Load Data
      tbl_number <- ’17-10-0022′
      move_prov <-
      cansim::get_cansim(tbl_number) |>
      janitor::clean_names() |>
      mutate(from = str_remove_all(geo, ‘, province of origin$’),
      to = str_remove_all(geography_province_of_destination, ‘, province of destination$’) ) |>
      select(
      date,
      from,
      to,
      value
      )

      ########################
      # Population estimates, quarterly
      # https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1710000901
      pop_tbl_number <- ’17-10-0009′
      pop <-
      cansim::get_cansim(pop_tbl_number) |>
      janitor::clean_names() |>
      select(geo, value , date)

      #################
      # Set GGPlot theme
      theme_set(
      theme_minimal() +
      theme(
      axis.text = element_text(size = 15, color = ‘grey’),
      strip.background = element_blank(),
      strip.text = element_blank(),
      panel.grid = element_blank(),
      panel.border = element_blank(),
      plot.caption = element_text(color = ‘grey’),
      plot.title = element_text(color = ‘grey’, size = 30, hjust = 0.5),
      plot.subtitle = element_text(color = ‘grey’, size = 20, hjust = 0.5)
      )
      )

      #################
      # province names and regions
      provinces <- read_csv(I(
      ‘iso2 , region , name
      AB , Prairies , Alberta
      BC , BC , British Columbia
      MB , Prairies , Manitoba
      NB , Atlantic, New Brunswick
      NL , Atlantic, Newfoundland and Labrador
      NT , North , Northwest Territories
      NT , North , Northwest Territories including Nunavut
      NS , Atlantic, Nova Scotia
      NU , North , Nunavut
      ON , Ontario, Ontario
      PE , Atlantic, Prince Edward Island
      QC , Quebec , Quebec
      SK , Prairies , Saskatchewan
      YU , North , Yukon’
      )
      )

      ###############
      # where to put the graphs for
      geo_regions <- read_csv(I(
      ‘row , col , code
      2 , 1 , BC
      2 , 2 , Prairies
      2 , 3 , Ontario
      2 , 4 , Quebec
      1 , 4 , Atlantic
      1 , 2 , North’
      )
      ) |> mutate(name = code)

      #’ join_replace
      #’
      #’ Helper function to join and replace
      join_replace <- function(a, b, old_name, joiner = ‘name’, new_name = ‘region’){

      a |>
      inner_join(b |> select(all_of(c(new_name, joiner))),
      by = setNames(c(joiner), old_name)
      ) |>
      select(-!!sym(old_name)) |>
      rename(!!sym(old_name) := new_name)
      }

      ########################
      # Movements by Region not province
      move_region <-
      move_prov |>
      join_replace(provinces, old_name = ‘from’) |>
      join_replace(provinces, old_name = ‘to’) |>
      summarise(value = sum(value), .by = c(date, from, to)) |>
      filter(from != to) %>%
      inner_join(., ., by = c(‘date’ = ‘date’, ‘from’ = ‘to’, ‘to’ = ‘from’), suffix = c(‘.from’,’.to’)) |>
      mutate(value.net = value.to – value.from)

      #####################
      # population by region not province
      pop_region <-
      pop |>
      join_replace(provinces, old_name = ‘geo’) |>
      summarise(value = sum(value), .by = c(date, geo)) |>
      rename(pop := value)

      ############################
      # the dataframe to plot
      plot_df <-
      move_region |>
      summarise(
      across(
      starts_with(‘value.’),
      ~ sum(.x)
      ), .by = c(‘date’, ‘from’)
      ) |>
      rename(geo := from) |>
      inner_join(pop_region, by = c(‘geo’, ‘date’) ) |>
      inner_join(pop|> filter(geo == ‘Canada’) |> select(-geo) |> rename(can := value), by = c(‘date’)) |>
      mutate(across(
      .cols = starts_with(‘value.’),
      .fns = c(f_pop = ~{ .x / pop }, f_can = ~{ .x / can }),
      .names = ‘{.fn}_{.col}’
      )) |>
      mutate(y_val = value.net)

      ###################
      # big center label for each facet
      center_lbl <-
      plot_df |>
      summarise(
      across(
      c(date, y_val), ~{mean(range(.x))}
      ), .by = geo
      )

      ##########################
      # facet_geo seems to have a bug when scale = ‘free_y’ so I am making a custom label as the y scale and setting the y scale to element_blank
      line_lbl <-
      bind_rows(
      plot_df |> slice_max(y_val, with_ties = FALSE, by = geo),
      plot_df |> slice_min(y_val, with_ties = FALSE, by = geo)
      ) |> mutate(lbl = paste0(
      #format(round(y_val*100), 1), ‘%’
      prettyNum(signif(y_val, 2), big.mark = ‘,’)
      )) |>
      mutate(date = min(date)-(365*4))

      ###############
      # Range of years in the data
      yr_rng <- plot_df$date |> range() |> year()

      ################
      # Make the plot
      plot_df |>
      ggplot(aes(x = date, y = y_val)) +
      geom_text(aes(label = geo, color = geo),
      data = center_lbl, size = 15,
      #color = ‘grey’
      ) +

      geom_label(aes(label = lbl, fill = geo),
      data = line_lbl,
      hjust = 0,
      nudge_x = -3000,
      color = ‘white’, alpha = 0.7 ) +
      geom_line(aes(color = geo), color = ‘grey’) +
      stat_difference(aes(ymin = 0, ymax = y_val, fill = geo), alpha = 0.25) +
      #geom_point(aes(color = geo), color = ‘grey’) +
      geom_hline(yintercept = 0, color = ‘black’) +
      scale_x_date(labels = range(year(plot_df$date)), breaks = range(plot_df$date)) +
      guides(color = ‘none’, fill = ‘none’) +
      labs (y =”, x =”, caption = glue(‘Source Cansim tables {tbl_number}, {pop_tbl_number}’),
      title =str_to_title(glue(‘Net Interprovincial Migration in Canada’)),
      subtitle = str_to_title(glue(‘by Region over the last {yr_rng|> diff()} years [{yr_rng[1]}-{yr_rng[2]}]’)),
      ) +

      #facet_wrap(~geo)
      facet_geo(~geo, grid = geo_regions, scale= ‘free_y’) +
      theme(
      axis.text.y = element_blank()
      )

    Leave A Reply