import geoquetzal as gq
# List all variables in the household dataset
gq.describe_hogares()
# Details for a specific variable
gq.describe_hogares("PCH4")
# Returns label, type, and coded values
# Person-level variables
gq.describe_personas("PCP12")
# Ethnic self-identification: Maya, Garífuna, Xinka, Ladino...Examples: Census Data
This section shows how to explore and analyze the 2018 Census microdata without maps. To combine census data with geographic visualizations, see Census + Maps.
Explore available variables
Use the describe_* functions to learn what variables are available and what values they take.
library(geoquetzal)
# List all variables in the household dataset
describe_hogares()
# Details for a specific variable
describe_hogares("PCH4")
# Returns label, type, and coded values
# Person-level variables
describe_personas("PCP12")
# Ethnic self-identification: Maya, Garífuna, Xinka, Ladino...Basic services access by department
What percentage of households has access to electricity in each department?
import geoquetzal as gq
# Download all households (~38 MB)
df = gq.hogares()
# PCH8 = lighting type; 1 = Electric grid
electricidad = (
df.groupby("departamento")["PCH8"]
.apply(lambda x: (x == 1).mean() * 100)
.round(1)
.reset_index(name="pct_electricity")
.sort_values("pct_electricity", ascending=False)
)
print(electricidad.to_string(index=False))library(geoquetzal)
library(dplyr)
# Download all households (~38 MB)
df <- hogares()
# PCH8 = lighting type; 1 = Electric grid
electricidad <- df |>
group_by(departamento) |>
summarise(pct_electricity = round(mean(PCH8 == 1, na.rm = TRUE) * 100, 1)) |>
arrange(desc(pct_electricity))
print(electricidad)Ethnic self-identification in Sacatepéquez
import geoquetzal as gq
df = gq.personas(departamento="Sacatepequez")
# PCP12: ethnic self-identification
# 1=Maya, 2=Garífuna, 3=Xinka, 4=Afrodescendiente, 5=Ladino, 6=Extranjero
etnicidad = (
df["PCP12"]
.value_counts(normalize=True)
.mul(100)
.round(1)
.rename({1: "Maya", 2: "Garífuna", 3: "Xinka",
4: "Afrodescendiente", 5: "Ladino", 6: "Extranjero"})
)
print(etnicidad)library(geoquetzal)
library(dplyr)
df <- personas(departamento = "Sacatepequez")
# PCP12: ethnic self-identification
# 1=Maya, 2=Garífuna, 3=Xinka, 4=Afrodescendiente, 5=Ladino, 6=Extranjero
etnicidad <- df |>
count(PCP12) |>
mutate(
pct = round(n / sum(n) * 100, 1),
ethnicity = recode(as.character(PCP12),
"1" = "Maya", "2" = "Garífuna", "3" = "Xinka",
"4" = "Afrodescendiente", "5" = "Ladino", "6" = "Extranjero")
) |>
select(ethnicity, pct) |>
arrange(desc(pct))
print(etnicidad)Dominant mother tongue per municipality
import geoquetzal as gq
df = gq.personas(departamento="Huehuetenango")
# PCP15: language learned to speak in
# Mode per municipality = most frequent language
idioma_dominante = (
df.groupby("MUNICIPIO")["PCP15"]
.agg(lambda x: int(x.dropna().mode()[0]) if not x.dropna().empty else None)
.reset_index(name="idioma_dominante")
)
# Replace codes with labels
valores = gq.describe_personas("PCP15")["valores"]
idioma_dominante["idioma_dominante"] = idioma_dominante["idioma_dominante"].map(valores)
print(idioma_dominante.to_string(index=False))library(geoquetzal)
library(dplyr)
df <- personas(departamento = "Huehuetenango")
# PCP15: language learned to speak in
# Mode per municipality = most frequent language
idioma_dominante <- df |>
group_by(MUNICIPIO) |>
summarise(idioma_dominante = as.integer(names(which.max(table(PCP15))))) |>
left_join(
data.frame(
idioma_dominante = as.integer(names(describe_personas("PCP15")$valores)),
label = unlist(describe_personas("PCP15")$valores)
),
by = "idioma_dominante"
)
print(idioma_dominante)Average years of education per municipality
import geoquetzal as gq
import pandas as pd
df = gq.personas(departamento="Sacatepequez")
# ANEDUCA is stored as VARCHAR, cast before aggregating
df["ANEDUCA"] = pd.to_numeric(df["ANEDUCA"], errors="coerce")
aneduca = (
df.groupby("MUNICIPIO")["ANEDUCA"]
.mean()
.round(1)
.reset_index(name="aneduca_promedio")
.sort_values("aneduca_promedio", ascending=False)
)
print(aneduca.to_string(index=False))library(geoquetzal)
library(dplyr)
df <- personas(departamento = "Sacatepequez")
# ANEDUCA: years of education (stored as character — cast before aggregating)
aneduca <- df |>
mutate(ANEDUCA = as.numeric(ANEDUCA)) |>
group_by(MUNICIPIO) |>
summarise(avg_years_education = round(mean(ANEDUCA, na.rm = TRUE), 1)) |>
arrange(desc(avg_years_education))
print(aneduca)Digital divide — technology access
What percentage of households has computer and internet access in Quiché?
import geoquetzal as gq
import pandas as pd
df = gq.hogares(departamento="Quiche")
# PCH9_H = computer, PCH9_I = internet, PCH9_M = car (1=Yes, 2=No)
tecnologia = pd.DataFrame({
"Indicator": ["Computer", "Internet", "Car"],
"% households": [
(df["PCH9_H"] == 1).mean() * 100,
(df["PCH9_I"] == 1).mean() * 100,
(df["PCH9_M"] == 1).mean() * 100,
]
}).round(1)
print(tecnologia.to_string(index=False))library(geoquetzal)
df <- hogares(departamento = "Quiche")
# PCH9_H = computer, PCH9_I = internet, PCH9_M = car (1=Yes, 2=No)
tecnologia <- data.frame(
Indicator = c("Computer", "Internet", "Car"),
pct_households = round(c(
mean(df$PCH9_H == 1, na.rm = TRUE) * 100,
mean(df$PCH9_I == 1, na.rm = TRUE) * 100,
mean(df$PCH9_M == 1, na.rm = TRUE) * 100
), 1)
)
print(tecnologia)Sub-municipal data with lugares poblados
Pre-aggregated lugar poblado data enables sub-municipal analysis without processing microdata.
import geoquetzal as gq
# Load indicators for Antigua Guatemala
lp = gq.lugares_poblados(municipio="Antigua Guatemala")
# Compute % of households with internet per lugar poblado
lp["pct_internet"] = (
lp["pch9_i_si"] / (lp["pch9_i_si"] + lp["pch9_i_no"]) * 100
).round(1)
# Top 5 lugares by internet access
print(
lp[["nombre", "poblacion_total", "pct_internet"]]
.sort_values("pct_internet", ascending=False)
.head(5)
.to_string(index=False)
)
# Look up what a column means
gq.describe_lugares_poblados("pch9_i_si")
# {'variable': 'pch9_i_si',
# 'etiqueta': 'Conteo: tiene internet',
# 'tipo': 'equipamiento',
# 'fuente': 'hogar'}library(geoquetzal)
library(dplyr)
# Load indicators for Antigua Guatemala
lp <- lugares_poblados(municipio = "Antigua Guatemala")
# Compute % of households with internet per lugar poblado
lp$pct_internet <- round(
lp$pch9_i_si / (lp$pch9_i_si + lp$pch9_i_no) * 100,
1
)
# Top 5 lugares by internet access
lp |>
select(nombre, poblacion_total, pct_internet) |>
arrange(desc(pct_internet)) |>
head(5)
# Look up what a column means
describe_lugares_poblados("pch9_i_si")
# $variable: "pch9_i_si"
# $etiqueta: "Conteo: tiene internet"
# $tipo: "equipamiento"
# $fuente: "hogar"