A Distribuição Racial do Brasil em Regiões

Code
# Setup -------------------------------------------------------------------

library(geobr)
library(dplyr)
library(ggplot2)
library(stringr)
library(sf)
library(ggnewscale)
library(showtext)

import::from(janitor, clean_names)
import::from(readxl, read_excel)
import::from(sidrar, get_sidra)
import::from(stringi, stri_trans_general)
import::from(tidyr, separate_wider_delim)

font_add_google("Lato", "Lato")
showtext_auto()

# Data --------------------------------------------------------------------

# Download data from SIDRA (Brazilian Institute of Geography and Statistics database)
# Table 9605: Population by race/color
sidra9605 <- get_sidra(9605, variable = 93, geo = "City")

url <- "https://geoftp.ibge.gov.br/organizacao_do_territorio/divisao_regional/divisao_regional_do_brasil/divisao_regional_do_brasil_em_regioes_geograficas_2017/tabelas/regioes_geograficas_composicao_por_municipios_2017_20180911.xlsx"

nmfile <- tempfile(fileext = "xlsx")
download.file(url, nmfile)
dim_inter <- readxl::read_excel(nmfile)

dim_inter <- dim_inter |>
  rename(
    code_muni = CD_GEOCODI,
    code_intermediate = cod_rgint,
    code_immediate = cod_rgi,
    name_muni = nome_mun,
    name_intermediate = nome_rgi
  ) |>
  mutate(across(starts_with("code"), as.numeric))

# Helper function to standardize text: remove accents and convert to lowercase
str_simplify <- function(x) {
  y <- stringi::stri_trans_general(x, "latin-ascii")
  y <- stringr::str_to_lower(y)
  return(y)
}

pop <- sidra9605 |>
  as_tibble() |>
  janitor::clean_names() |>
  mutate(
    code_muni = municipio_codigo,
    code_region = str_sub(code_muni, 1, 1),
    code_state = str_sub(code_muni, 1, 2),
    race = str_simplify(cor_ou_raca),
    race_label = cor_ou_raca,
    pop = valor
  ) |>
  mutate(across(starts_with("code"), as.numeric))

pop <- left_join(
  pop,
  select(dim_inter, code_muni, code_intermediate, code_immediate),
  by = "code_muni"
)

pop <- pop |>
  # Split municipality name and state abbreviation
  separate_wider_delim(
    cols = municipio,
    delim = " - ",
    names = c("name_muni", "abbrev_state")
  ) |>
  # Select and reorder relevant columns
  select(starts_with("code"), name_muni, abbrev_state, race, race_label, pop) |>
  # Remove totals and calculate proportions by municipality
  filter(race != "total") |>
  mutate(prop = pop / sum(pop, na.rm = TRUE), .by = "code_muni")

# Define grouping variables for different geographic levels (id variables)
pop_summary <- list(
  region = c("code_region"),
  state = c("code_state"),
  intermediate = c("code_intermediate"),
  immediate = c("code_immediate"),
  brasil = c("race")
)

# Function to create summary tables
summarise_tables <- function(x) {
  pop |>
    # Sums population by id variables + race
    summarise(
      total = sum(pop, na.rm = TRUE),
      .by = c(all_of(x), "race", "race_label")
    ) |>
    # Computes population shares by id variables
    mutate(
      prop = total / sum(total),
      .by = all_of(x)
    )
}

# Create all summary tables at once
tbls <- lapply(pop_summary, summarise_tables)

# Find the highest share within each intermediate region

top_inter <- tbls$intermediate |>
  group_by(code_intermediate) |>
  slice_max(prop, n = 1) |>
  ungroup()

geo_inter_full <- read_intermediate_region(showProgress = FALSE, simplified = FALSE)
geo_inter <- read_intermediate_region(showProgress = FALSE)

geo_inter_full <- left_join(geo_inter_full, top_inter, by = "code_intermediate")
inter <- left_join(geo_inter, top_inter, by = "code_intermediate")

# Map ---------------------------------------------------------------------

legend_breaks <- seq(0.5, 0.8, 0.1)
legend_labels <- legend_breaks * 100
legend_limits <- c(0.4, 0.85)

base_map <- ggplot() +
  geom_sf(
    data = filter(inter, race != "branca"),
    aes(fill = prop),
    lwd = 0.1,
    color = "white"
  ) +
  scale_fill_distiller(
    name = "% Pardos",
    palette = "Purples",
    direction = 1,
    breaks = legend_breaks,
    labels = legend_labels,
    limits = legend_limits
  ) +
  # Creates a secondary scale using ggnewscale::new_scale_fill
  new_scale_fill() +
  geom_sf(
    data = filter(inter, race == "branca"),
    aes(fill = prop),
    lwd = 0.1,
    color = "white"
  ) +
  scale_fill_distiller(
    name = "% Brancos",
    palette = "Greens",
    direction = 1,
    breaks = legend_breaks,
    labels = legend_labels,
    limits = legend_limits
  )

# Find the region with the highest share for each race
sf_top <- inter |>
  group_by(race) |>
  slice_max(prop, n = 1, na_rm = TRUE) |>
  ungroup()

# Get the position of the centroids (for the arrow)
coords_centroid_top <- sf_top |>
  st_centroid() |>
  st_coordinates()

# Auxiliar data.frame to position the text labels
df_annotation <- tibble(
  x = c(-40, -65, -43, -44),
  y = c(-27, -18, -32, 2),
  label = c(
    "Regiões em verde\nindicam que\nbrancos são maioria",
    "Regiões em roxo\nindicam que\npardos são maioria",
    "Santa Cruz é a RI\ncom maior percentual\nde brancos (82%)",
    "Parintins é a RI\ncom maior percentual\nde pardos (80%)"
  )
)

# Add text labels to plot
map_annotations <- base_map +
  geom_label(
    data = df_annotation,
    aes(x = x, y = y, label = label),
    family = "Lato",
    size = c(4, 4, 3, 3),
    fill = "white",      # background color
    alpha = 0.8,         # transparency
    label.padding = unit(0.2, "lines"),  # padding around text
    label.size = 0.1     # border thickness
  ) +
  # Top %share -- Santa Cruz - Lajeado  | Parintins-- dark border
  geom_sf(
    data = filter(inter, code_intermediate %in% c(1304, 4308)),
    fill = NA,
    color = "black",
    lwd = 0.15
  ) +
  # arrow/curve segment: Santa Cruz - Lajeado
  geom_curve(
    data = data.frame(x = coords_centroid_top[1, 1], y = coords_centroid_top[1, 2]),
    aes(x = x, y = y + 0.25, xend = x + 5, yend = y - 2.25),
    linewidth = 0.3,
    angle = 45,
    alpha = 0.8,
    arrow = arrow(length = unit(2.5, "pt"))
  ) +
  # arrow/curve segment: Parintins
  geom_curve(
    data = data.frame(x = coords_centroid_top[2, 1], y = coords_centroid_top[2, 2]),
    aes(x = x, y = y, xend = x + 10, yend = y + 5),
    linewidth = 0.3,
    angle = 45,
    curvature = -0.1,
    alpha = 0.8,
    arrow = arrow(length = unit(2.5, "pt"))
  ) +
  # Remove excess white space from map
  coord_sf(xlim = c(-72.2, -35.5))

# Add title and thematic elements
final_map <- map_annotations +
  labs(
    title = "A Divisão Racial do Brasil",
    subtitle = "Percentual do grupo racial majoritário por Região Intermediária* (RI). Em todos as RIs, ou brancos ou pardos são a maioria.\nOs dados são do Censo Demográfico de 2022 do IBGE e mostram um padrão norte-sul no país.\nExceções ao padrão incluem a região do Rio de Janeiro, no sudeste, e de Caicó, no nordeste.",
    caption = "Fonte: IBGE (Censo Demográfico 2022). @viniciusoike\n(*) Regiões intermediárias agrupam municípios que compartilham relações econômicas e sociais em torno de um mesmo centro urbano principal.") +
  ggthemes::theme_map(base_family = "Lato") +
  theme(
    plot.title = element_text(
      hjust = 0,
      size = 22
    ),
    plot.subtitle = element_text(
      hjust = 0,
      size = 10
    ),
    plot.caption = element_text(hjust = 0),
    legend.position =  "inside",
    legend.position.inside = c(0.01, 0.01),
    legend.box = "horizontal",
    legend.key.size = unit(1, "cm"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    plot.background = element_rect(fill = "#F6EEE3", color = "#F6EEE3"),
    legend.background = element_rect(fill = "#F6EEE3", color = "#F6EEE3")
  )

ggsave(
  here::here("static/images/censo_mapa_raca.svg"),
  final_map,
  width = 8,
  height = 9
)
O mapa abaixo mostra o grupo racial predominante em cada região do Brasil. A divisão escolhida tem o objetivo de facilitar a leitura dos dados. No agregado, 45,3% da população do Brasil é parda, 43,5% é branca, 10,2% é preta, 0,6% é indígena e 0,4% é amarela.
Tecnicamente, uma região intermediária é uma agregação de regiões imediatas, que são uma divisão territorial do Brasil que agrupa municípios com base em centros urbanos. Na prática, uma região intermediária é como uma região metropolitana, um grupo de cidades com laços econômicos significativos.
Dados: IBGE, Censo (2022)
Tipografia: Lato
Paleta: Greens e Purple (MetBrewer)