Package 'AgePopDenom' reference manual

Title:	Model Fine-Scale Age-Structured Population Data using Open-Source Data
Description:	Automate the modelling of age-structured population data using survey data, grid population estimates and urban-rural extents.
Authors:	Mohamed A. Yusuf [aut, cre] , Victor A. Alegana [aut] , Chris Nnanatu [aut] , Andrew Tatem [aut]
Maintainer:	Mohamed A. Yusuf <[email protected]>
License:	MIT + file LICENSE
Version:	0.4.0
Built:	2025-02-26 07:21:15 UTC
Source:	https://github.com/truenomad/agepopdenom

Aggregate Individual Survey Data and Extract Gamma Parameters by Location

Description

This script aggregates the individual Survey data to location level and extracts the gamma parameters for the locations.

Usage

aggregate_and_extract_gamma(
  data,
  lat_column = "lat",
  long_column = "long",
  age_column = "ageyrs",
  urban_column = "urban"
)
aggregate_and_extract_gamma(
  data,
  lat_column = "lat",
  long_column = "long",
  age_column = "ageyrs",
  urban_column = "urban"
)

Arguments

`data`	Data frame containing individual-level age data with coordinates and urban/rural classification.
`lat_column`	Column name for latitude coordinates (default: "lat")
`long_column`	Column name for longitude coordinates (default: "long")
`age_column`	Column name for age values (default: "ageyrs")
`urban_column`	Column name for urban/rural classification (default: "urban")

Value

List containing:

outlier_free_data: Original data with added spatial coordinates, outliers removed, and clusters with less than 10 samples removed
age_param_data: Location-level gamma parameters with columns: lon, lat, web_x, web_y, log_scale, log_shape, urban, b1, c, b2, nsampled

Compute Covariance Matrix for Spatial Model

Description

This function computes a block covariance matrix for a bivariate spatial model with age-structured parameters.

Usage

compute_cov(
  gamma,
  sigma2,
  phi,
  u_dist,
  n_x,
  tau2_1 = 1,
  tau2_2 = 1,
  age_param_data
)
compute_cov(
  gamma,
  sigma2,
  phi,
  u_dist,
  n_x,
  tau2_1 = 1,
  tau2_2 = 1,
  age_param_data
)

Arguments

`gamma`	Correlation parameter between the two spatial processes
`sigma2`	Variance parameter for the spatial processes
`phi`	Range parameter for the spatial correlation
`u_dist`	Distance matrix between locations
`n_x`	Number of spatial locations
`tau2_1`	Variance parameter for first process (default = 1)
`tau2_2`	Variance parameter for second process (default = 1)
`age_param_data`	List containing age-structured parameters: b1: Vector of age parameters for first process b2: Vector of age parameters for second process c: Vector of cross-process age parameters

Value

A sparse symmetric matrix of dimension 2n_x × 2n_x

Generate or Load Cached Predictors Data

Description

This function creates predictors data based on spatial inputs or loads cached predictors data if the file already exists. It saves the generated data to a specified directory for reuse and provides progress updates.

Usage

create_prediction_data(
  country_code,
  country_shape,
  pop_raster,
  ur_raster,
  adm2_shape,
  cell_size = 5000,
  ignore_cache = FALSE,
  output_dir = here::here("03_outputs", "3a_model_outputs")
)
create_prediction_data(
  country_code,
  country_shape,
  pop_raster,
  ur_raster,
  adm2_shape,
  cell_size = 5000,
  ignore_cache = FALSE,
  output_dir = here::here("03_outputs", "3a_model_outputs")
)

Arguments

`country_code`	A string representing the country code (e.g., "KEN").
`country_shape`	An ‘sf' object representing the country’s administrative boundaries.
`pop_raster`	A 'terra' raster object representing the population raster.
`ur_raster`	A 'terra' raster object representing the urban extent raster.
`adm2_shape`	An 'sf' object representing the administrative level 2 boundaries.
`cell_size`	An integer specifying the cell size for the prediction grid in meters (default is 5000).
`ignore_cache`	A boolean input which is set to determine whether to ignore the existing cache and write over it. Default is set to FALSE.
`output_dir`	A string specifying the directory where the predictors data file should be saved (default is "03_outputs/3a_model_outputs").

Value

A data object ('predictor_data') containing the generated predictors.

Examples


tf <- file.path(tempdir(), "test_env")

# Initialize with normalized path
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = tf,
  open_r_script = FALSE
)

# Download shapefiles
download_shapefile(
  country_codes =  "COM",
  dest_file = file.path(
    tf, "01_data", "1c_shapefiles",
    "district_shape.gpkg"
  )
)

# Download population rasters from worldpop
download_pop_rasters(
  country_codes = "COM",
  dest_dir = file.path(tf, "01_data", "1b_rasters", "pop_raster")
)

# Extract urban extent raster
extract_afurextent(
  dest_dir = file.path(tf, "01_data", "1b_rasters", "urban_extent")
)


urban_raster <-  terra::rast(
  file.path(tf, "01_data", "1b_rasters",
            "urban_extent", "afurextent.asc"))

pop_raster <-  terra::rast(
  file.path(tf, "01_data", "1b_rasters", "pop_raster",
            "com_ppp_2020_constrained.tif")

)

adm2_sf <- sf::read_sf(
 file.path(tf, "01_data", "1c_shapefiles",
            "district_shape.gpkg"))

country_sf <- sf::st_union(adm2_sf)

predictors <- create_prediction_data(
  country_code =  "COM",
  country_shape = country_sf,
  pop_raster = pop_raster,
  ur_raster = urban_raster,
  adm2_shape = adm2_sf,
  cell_size = 5000,
  output_dir = file.path(
    tf, "03_outputs/3a_model_outputs"
  )
)


tf <- file.path(tempdir(), "test_env")

# Initialize with normalized path
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = tf,
  open_r_script = FALSE
)

# Download shapefiles
download_shapefile(
  country_codes =  "COM",
  dest_file = file.path(
    tf, "01_data", "1c_shapefiles",
    "district_shape.gpkg"
  )
)

# Download population rasters from worldpop
download_pop_rasters(
  country_codes = "COM",
  dest_dir = file.path(tf, "01_data", "1b_rasters", "pop_raster")
)

# Extract urban extent raster
extract_afurextent(
  dest_dir = file.path(tf, "01_data", "1b_rasters", "urban_extent")
)


urban_raster <-  terra::rast(
  file.path(tf, "01_data", "1b_rasters",
            "urban_extent", "afurextent.asc"))

pop_raster <-  terra::rast(
  file.path(tf, "01_data", "1b_rasters", "pop_raster",
            "com_ppp_2020_constrained.tif")

)

adm2_sf <- sf::read_sf(
 file.path(tf, "01_data", "1c_shapefiles",
            "district_shape.gpkg"))

country_sf <- sf::st_union(adm2_sf)

predictors <- create_prediction_data(
  country_code =  "COM",
  country_shape = country_sf,
  pop_raster = pop_raster,
  ur_raster = urban_raster,
  adm2_shape = adm2_sf,
  cell_size = 5000,
  output_dir = file.path(
    tf, "03_outputs/3a_model_outputs"
  )
)

Create a Standardized Project Folder Structure

Description

This function creates a standardized folder structure for organizing data, scripts, and outputs within a project directory. It ensures consistency and reproducibility for data-related workflows.

Usage

create_project_structure(base_path = here::here())
create_project_structure(base_path = here::here())

Arguments

base_path

A character string specifying the root directory where the folder structure will be created. Defaults to 'here::here()' to use the current project directory.

Details

The function generates the following folder structure:

# 01_data/
# +-- 1a_survey_data/
# |    +-- processed/
# |    \-- raw/
# +-- 1b_rasters/
# |    +-- urban_extent/
# |    \-- pop_raster/
# +-- 1c_shapefiles/
# 02_scripts/
# 03_outputs/
# +-- 3a_model_outputs/
# +-- 3b_visualizations/
# +-- 3c_table_outputs/
# \-- 3d_compiled_results/

Value

Creates directories under the specified 'base_path'. Returns invisible 'NULL' and prints messages about folder creation status.

Examples


# Create temp directory with normalized path
tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

#  Initialize with normalized path
cpp_path <- file.path(tf, "02_scripts", "model")
dir.create(cpp_path, recursive = TRUE, showWarnings = FALSE)
cpp_path <- normalizePath(cpp_path, winslash = "/", mustWork = FALSE)

create_project_structure(base_path = tf)

# Create temp directory with normalized path
tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

#  Initialize with normalized path
cpp_path <- file.path(tf, "02_scripts", "model")
dir.create(cpp_path, recursive = TRUE, showWarnings = FALSE)
cpp_path <- normalizePath(cpp_path, winslash = "/", mustWork = FALSE)

create_project_structure(base_path = tf)

Main Function to Download DHS Datasets

Description

Downloads the latest PR (household) and GE (geographic) DHS datasets for specified countries.

Usage

download_dhs_datasets(
  country_codes,
  cache_path = here::here("01_data", "1a_survey_data", "raw"),
  output_dir_root = here::here("01_data", "1a_survey_data", "raw"),
  survey_id = NULL,
  email,
  project,
  verbose = TRUE,
  clear_cache = TRUE
)
download_dhs_datasets(
  country_codes,
  cache_path = here::here("01_data", "1a_survey_data", "raw"),
  output_dir_root = here::here("01_data", "1a_survey_data", "raw"),
  survey_id = NULL,
  email,
  project,
  verbose = TRUE,
  clear_cache = TRUE
)

Arguments

`country_codes`	A character vector of ISO3 country codes.
`cache_path`	A character string specifying the cache path for RDHS.
`output_dir_root`	A character string specifying the root directory for output.
`survey_id`	A character vector of survey IDs. If NULL, uses latest survey.
`email`	A character string. Email registered with DHS.
`project`	A character string. Project name as registered with DHS.
`verbose`	Logical for rdhs setup and messages to be printed.
`clear_cache`	Logical whether to clear cache before downloading.

Value

Invisibly returns a list of downloaded dataset filenames.

Download population rasters for given country codes.

Description

This function attempts to download population rasters from WorldPop for the specified country codes. If 'dest_dir' is not provided, file paths will be generated based on the given country codes and saved into the current project directory (using 'here::here()'). It first checks if a local file already exists, and if so, it will skip downloading.

Usage

download_pop_rasters(
  country_codes,
  dest_dir = here::here("01_data", "1b_rasters", "pop_raster"),
  quiet = FALSE
)
download_pop_rasters(
  country_codes,
  dest_dir = here::here("01_data", "1b_rasters", "pop_raster"),
  quiet = FALSE
)

Arguments

`country_codes`	A character vector of ISO3 country codes.
`dest_dir`	A character vector of file paths for saving rasters. If NULL, defaults to "<cc>_ppp_2020_constrained2.tif" in the project dir.
`quiet`	Logical; if TRUE, suppress status messages.

Details

The function tries a baseline URL (BSGM) for each country code. If the file is not found there, it then tries a secondary URL (maxar_v1). If neither location provides the file, it returns NA and prompts you to check the WorldPop website directly.

Value

Invisibly returns a vector of downloaded file paths (or NA if not found).

Examples


download_pop_rasters(country_codes = "COM", dest_dir = tempdir())


download_pop_rasters(country_codes = "COM", dest_dir = tempdir())

Download WHO ADM2 Boundaries with Partial Update

Description

This function ensures the specified shapefile ('dest_file') contains boundaries for all requested ISO3 codes ('country_codes'). It dynamically Downloades only the missing codes and appends them to the file, ensuring no duplication.

Usage

download_shapefile(
  country_codes,
  dest_file = here::here("01_data", "1c_shapefiles", "district_shape.gpkg")
)
download_shapefile(
  country_codes,
  dest_file = here::here("01_data", "1c_shapefiles", "district_shape.gpkg")
)

Arguments

`country_codes`	Character vector of ISO3 country codes (e.g. c("KEN","UGA")).
`dest_file`	File path where data is saved (default: "01_data/1c_shapefiles/district_shape.gpkg").

Value

An 'sf' object of combined boundaries for all requested country codes.

Examples




tf <- file.path(tempdir(), "test_env")

# Download population rasters from worldpop
download_shapefile(
  country_codes = "COM",
  dest_file = here::here(tf, "district_shape.gpkg")
)

tf <- file.path(tempdir(), "test_env")

# Download population rasters from worldpop
download_shapefile(
  country_codes = "COM",
  dest_file = here::here(tf, "district_shape.gpkg")
)

Extract Urban/Rural Extent Raster

Description

Extracts the ‘afurextent.asc' raster file from the package’s 'inst/extdata' directory to a specified destination.

Usage

extract_afurextent(
  dest_dir = here::here("01_data", "1b_rasters", "urban_extent"),
  overwrite = FALSE
)
extract_afurextent(
  dest_dir = here::here("01_data", "1b_rasters", "urban_extent"),
  overwrite = FALSE
)

Arguments

`dest_dir`	A character string specifying the directory to save the extracted raster file.
`overwrite`	Logical. Whether to overwrite an existing file in the destination directory. Default is FALSE.

Details

This function extracts the ‘afurextent.asc' file from the package’s 'extdata' directory, where it is stored as a compressed '.zip' file. It requires the 'raster' package to load the raster file.

Value

A character string representing the full path to the extracted raster file.

Examples


 extract_afurextent(tempdir(), overwrite = TRUE)

extract_afurextent(tempdir(), overwrite = TRUE)

Extract Parameters and Optimization Details with Log-Likelihood

Description

Reads files matching the pattern "age_param_spatial_urban" in a specified directory, extracts the country name, parameter values, and optimization details, combines the results into a data frame, and optionally saves the output to a file.

Usage

extract_age_param(
  dir_path = here::here("03_outputs", "3a_model_outputs"),
  output_file = here::here("03_outputs", "3d_compiled_results", "model_params.csv")
)
extract_age_param(
  dir_path = here::here("03_outputs", "3a_model_outputs"),
  output_file = here::here("03_outputs", "3d_compiled_results", "model_params.csv")
)

Arguments

`dir_path`	A character string specifying the directory containing the files.
`output_file`	A character string specifying the path to save the output data frame. If NULL, the output will not be saved.

Value

A data frame with the extracted parameters, log-likelihood, and optimization details.

Examples



# Create temporary directory for dummy parameter files
dummy_dir <- tempdir()

dummy_params <- list(
  par = c(0.5, 1.2, 0.8, log(2), log(3), log(4)),
  objective = -123.45,
  convergence = 0,
  iterations = 10,
  evaluations = c("function = 20, gradient = 15"),
  message = "Converged"
)

saveRDS(dummy_params, file = file.path(dummy_dir,
                                       "abc_age_param_spatial_urban.rds"))

params_df <- extract_age_param(dir_path = dummy_dir,
                               output_file = tempdir())


# Create temporary directory for dummy parameter files
dummy_dir <- tempdir()

dummy_params <- list(
  par = c(0.5, 1.2, 0.8, log(2), log(3), log(4)),
  objective = -123.45,
  convergence = 0,
  iterations = 10,
  evaluations = c("function = 20, gradient = 15"),
  message = "Converged"
)

saveRDS(dummy_params, file = file.path(dummy_dir,
                                       "abc_age_param_spatial_urban.rds"))

params_df <- extract_age_param(dir_path = dummy_dir,
                               output_file = tempdir())

Extract Beta Parameters from Model Output

Description

This function extracts beta coefficients from a model parameter object, separating them into beta1 and beta2 components.

Usage

extract_betas(
  params_result,
  params = c("gamma", "log_sigma2", "log_phi", "log_tau1")
)
extract_betas(
  params_result,
  params = c("gamma", "log_sigma2", "log_phi", "log_tau1")
)

Arguments

`params_result`	A model parameter object containing parameter estimates
`params`	A character vector specifying parameter names, defaults to c("gamma", "log_sigma2", "log_phi", "log_tau1")

Details

The function assumes the parameter vector contains beta coefficients followed by other model parameters. It splits the betas into two equal groups after removing the last 4 parameters.

Value

A list with two components:

beta1: First set of beta coefficients
beta2: Second set of beta coefficients

Fit a Spatial Model for Age Parameters using TMB

Description

Fits a spatial model for age parameters using Template Model Builder (TMB) and C++. The model incorporates spatial correlation through distance matrices and handles both scale and shape parameters simultaneously. Progress is tracked with clear status updates. Can optionally load from cache.

Usage

fit_spatial_model(
  data,
  country_code = NULL,
  scale_outcome,
  shape_outcome,
  covariates,
  cpp_script_name,
  verbose = TRUE,
  control_params = list(trace = 2),
  manual_params = NULL,
  output_dir = NULL,
  ignore_cache = FALSE
)
fit_spatial_model(
  data,
  country_code = NULL,
  scale_outcome,
  shape_outcome,
  covariates,
  cpp_script_name,
  verbose = TRUE,
  control_params = list(trace = 2),
  manual_params = NULL,
  output_dir = NULL,
  ignore_cache = FALSE
)

Arguments

`data`	A data frame containing the response variables, covariates, and spatial coordinates (web_x, web_y)
`country_code`	Optional country code to save/load cached model. Default NULL runs model without caching.
`scale_outcome`	Character string specifying the column name for the scale parameter response variable
`shape_outcome`	Character string specifying the column name for the shape parameter response variable
`covariates`	Character vector of covariate names to include in both scale and shape models
`cpp_script_name`	Character string specifying the name of the C++ file (without extension) containing the TMB model definition
`verbose`	Logical indicating whether to show progress updates. Default TRUE
`control_params`	List of control parameters passed to nlminb optimizer. Default: list(trace = 2)
`manual_params`	Optional list of manual parameter values. If NULL (default), initial parameters are estimated from linear regression. The list should contain: beta1: Vector of coefficients for scale model beta2: Vector of coefficients for shape model gamma: Scalar value (default 1.0) log_sigma2: Log of sigma squared (default log(1.0)) log_phi: Log of phi (estimated from variogram) log_tau2_1: Log of tau squared (default log(1.0))
`output_dir`	Directory to save cached models. Only used if country_code is provided.
`ignore_cache`	Whether to ignore existing cache. Default FALSE.

Details

The function performs the following steps with progress tracking: 1. Fits initial linear models for scale and shape parameters 2. Calculates spatial distance matrix from web coordinates 3. Estimates optimal phi parameter using variogram: - Computes empirical variogram using automap - Automatically selects best theoretical variogram model - Range parameter is used to initialize spatial correlation - Default range of 100 used if estimation fails 4. Compiles and loads the TMB C++ template 5. Optimizes the joint likelihood using nlminb

The spatial correlation is modeled using an exponential variogram with parameters estimated from the data. The distance matrix is computed from the web coordinates (web_x, web_y) and used in the spatial covariance structure.

The C++ template should implement the joint spatial model for both parameters.

Value

An object of class 'nlminb' containing:

par - Optimized parameter values
objective - Final value of objective function
convergence - Convergence code
message - Convergence message
iterations - Number of iterations
evaluations - Number of function/gradient evaluations
scale_formula - Formula used for scale model
shape_formula - Formula used for shape model
variogram - Fitted variogram model from automap containing:
- range - Spatial correlation range parameter
- psill - Partial sill (structured variance)
- nugget - Nugget effect (unstructured variance)
- kappa - Smoothness parameter for Matern models

Note

Requires TMB package and a working C++ compiler. The C++ template must be properly structured for TMB. The automap package is required for variogram fitting.

Examples



set.seed(123)
# Set parameters for simulation
total_population <- 266
urban_proportion <- 0.602
total_coords <- 266
lon_range <- c(-16.802, -13.849)
lat_range <- c(13.149, 13.801)
mean_web_x <- -1764351
mean_web_y <- 1510868

# Simulate processed survey dataset for Gambia
df_gambia <- NULL
df_gambia$age_param_data <- dplyr::tibble(
  country = "Gambia",
  country_code_iso3 = "GMB",
  country_code_dhs = "GM",
  year_of_survey = 2024,
  id_coords = rep(1:total_coords, length.out = total_population),
  lon = runif(total_population, lon_range[1], lon_range[2]),
  lat = runif(total_population, lat_range[1], lat_range[2]),
  web_x = rnorm(total_population, mean_web_x, 50000),
  web_y = rnorm(total_population, mean_web_y, 50000),
  log_scale = rnorm(total_population, 2.82, 0.2),
  log_shape = rnorm(total_population, 0.331, 0.1),
  urban = rep(c(1, 0), c(
    round(total_population * urban_proportion),
    total_population - round(total_population * urban_proportion)
 )),
 b1 = rnorm(total_population, 0.0142, 0.002),
 c = rnorm(total_population, -0.00997, 0.001),
  b2 = rnorm(total_population, 0.00997, 0.002),
  nsampled = sample(180:220, total_population, replace = TRUE)
)


tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

#initialise files and key scripts
init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = tf,
  open_r_script = FALSE
)

mod <- fit_spatial_model(
  df_gambia$age_param_data,
  scale_outcome = "log_scale",
  shape_outcome = "log_shape",
  covariates = "urban",
  cpp_script_name = file.path(tf, "02_scripts/model"),
  country_code = "GMB",
  output_dir = file.path(tf, "03_outputs/3a_model_outputs")
)


set.seed(123)
# Set parameters for simulation
total_population <- 266
urban_proportion <- 0.602
total_coords <- 266
lon_range <- c(-16.802, -13.849)
lat_range <- c(13.149, 13.801)
mean_web_x <- -1764351
mean_web_y <- 1510868

# Simulate processed survey dataset for Gambia
df_gambia <- NULL
df_gambia$age_param_data <- dplyr::tibble(
  country = "Gambia",
  country_code_iso3 = "GMB",
  country_code_dhs = "GM",
  year_of_survey = 2024,
  id_coords = rep(1:total_coords, length.out = total_population),
  lon = runif(total_population, lon_range[1], lon_range[2]),
  lat = runif(total_population, lat_range[1], lat_range[2]),
  web_x = rnorm(total_population, mean_web_x, 50000),
  web_y = rnorm(total_population, mean_web_y, 50000),
  log_scale = rnorm(total_population, 2.82, 0.2),
  log_shape = rnorm(total_population, 0.331, 0.1),
  urban = rep(c(1, 0), c(
    round(total_population * urban_proportion),
    total_population - round(total_population * urban_proportion)
 )),
 b1 = rnorm(total_population, 0.0142, 0.002),
 c = rnorm(total_population, -0.00997, 0.001),
  b2 = rnorm(total_population, 0.00997, 0.002),
  nsampled = sample(180:220, total_population, replace = TRUE)
)


tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

#initialise files and key scripts
init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = tf,
  open_r_script = FALSE
)

mod <- fit_spatial_model(
  df_gambia$age_param_data,
  scale_outcome = "log_scale",
  shape_outcome = "log_shape",
  covariates = "urban",
  cpp_script_name = file.path(tf, "02_scripts/model"),
  country_code = "GMB",
  output_dir = file.path(tf, "03_outputs/3a_model_outputs")
)

Generate Age Population Raster

Description

Creates age-stratified population raster layers from predictor data and gamma distribution parameters. Supports parallel processing and caching of results. The output is a multi-layer raster stack with each layer representing the population proportion for a specific age interval.

Usage

generate_age_pop_raster(
  predictor_data,
  scale_pred,
  shape_pred,
  age_range = c(0, 10),
  age_interval = 1,
  country_code,
  ignore_cache = FALSE,
  output_dir,
  n_cores = parallel::detectCores() - 2
)
generate_age_pop_raster(
  predictor_data,
  scale_pred,
  shape_pred,
  age_range = c(0, 10),
  age_interval = 1,
  country_code,
  ignore_cache = FALSE,
  output_dir,
  n_cores = parallel::detectCores() - 2
)

Arguments

`predictor_data`	Data frame containing population and spatial data with columns: country, region, district, pop, web_x, web_y
`scale_pred`	Matrix of scale parameters for gamma distribution predictions
`shape_pred`	Matrix of shape parameters for gamma distribution predictions
`age_range`	Numeric vector of length 2 specifying min and max ages, default c(0,99)
`age_interval`	Numeric interval size between age groups in years, default 1
`country_code`	Character ISO3 country code
`ignore_cache`	Logical whether to ignore cached results, default FALSE
`output_dir`	Character path to output directory
`n_cores`	Integer number of cores for parallel processing, default detectCores()-2

Details

The function processes age intervals sequentially, computing population proportions using parallel processing. Results are cached as a GeoTIFF file for future use. The output raster maintains spatial properties of the input data and is suitable for GIS analysis and visualization.

Value

SpatRaster object (terra package) containing multiple layers, where each layer represents the population proportion for an age interval. Layer names indicate the age range (e.g., "Age 0 to 1 years"). The raster uses EPSG:3857 projection with 5000m resolution.

Examples


predictor_data <- data.frame(
 country = rep("CountryX", 100),
 region = rep("RegionA", 100),
 district = rep("District1", 100),
 pop = sample(100:1000, 100, replace = TRUE),
 web_x = runif(100, -100, 100),
 web_y = runif(100, -50, 50)
)

scale_pred <- matrix(runif(100 * 10, 1, 5), nrow = 100, ncol = 10)
shape_pred <- matrix(runif(100 * 10, 1, 5), nrow = 100, ncol = 10)

res <- generate_age_pop_raster(predictor_data,
                       scale_pred,
                       shape_pred,
                       country_code = "COD",
                       output_dir = file.path(tempdir()),
                       n_cores = 1)


predictor_data <- data.frame(
 country = rep("CountryX", 100),
 region = rep("RegionA", 100),
 district = rep("District1", 100),
 pop = sample(100:1000, 100, replace = TRUE),
 web_x = runif(100, -100, 100),
 web_y = runif(100, -50, 50)
)

scale_pred <- matrix(runif(100 * 10, 1, 5), nrow = 100, ncol = 10)
shape_pred <- matrix(runif(100 * 10, 1, 5), nrow = 100, ncol = 10)

res <- generate_age_pop_raster(predictor_data,
                       scale_pred,
                       shape_pred,
                       country_code = "COD",
                       output_dir = file.path(tempdir()),
                       n_cores = 1)

Generate Age Population Tables

Description

Creates age-stratified population tables from predictor data and gamma distribution parameters. Supports parallel processing and caching of results.

Usage

generate_age_pop_table(
  predictor_data,
  scale_pred,
  shape_pred,
  age_range = c(0, 99),
  age_interval = 1,
  country_code,
  ignore_cache = FALSE,
  output_dir,
  n_cores = parallel::detectCores() - 2
)
generate_age_pop_table(
  predictor_data,
  scale_pred,
  shape_pred,
  age_range = c(0, 99),
  age_interval = 1,
  country_code,
  ignore_cache = FALSE,
  output_dir,
  n_cores = parallel::detectCores() - 2
)

Arguments

`predictor_data`	Data frame containing population data with columns: country, region, district, pop
`scale_pred`	Matrix of scale parameters for gamma distribution predictions
`shape_pred`	Matrix of shape parameters for gamma distribution predictions
`age_range`	Numeric vector of length 2 specifying min and max ages, default c(0,99)
`age_interval`	Numeric interval size between age groups in years, default 1
`country_code`	Character ISO3 country code
`ignore_cache`	Logical whether to ignore cached results, default FALSE
`output_dir`	Character path to output directory
`n_cores`	Integer number of cores for parallel processing, default detectCores()-2

Value

List containing two data frames: - prop_df: Age-stratified population proportions with uncertainty intervals - pop_df: Age-stratified population counts with uncertainty intervals

Examples



predictor_data <- data.frame(
 country = rep("ABC", 1100),
  region = rep("Region1", 1100),
  district = rep("District1", 1100),
  pop = rep(1000, 1100)
)
scale_pred <- matrix(rep(1:10, 1100), nrow = 1100, ncol = 10)
shape_pred <- matrix(rep(1:10, 1100), nrow = 1100, ncol = 10)
output <- generate_age_pop_table(
  predictor_data, scale_pred, shape_pred, age_range = c(0, 99),
  age_interval = 10, country_code = "ABC", ignore_cache = TRUE,
  output_dir = tempdir(), n_cores = 1
)


predictor_data <- data.frame(
 country = rep("ABC", 1100),
  region = rep("Region1", 1100),
  district = rep("District1", 1100),
  pop = rep(1000, 1100)
)
scale_pred <- matrix(rep(1:10, 1100), nrow = 1100, ncol = 10)
shape_pred <- matrix(rep(1:10, 1100), nrow = 1100, ncol = 10)
output <- generate_age_pop_table(
  predictor_data, scale_pred, shape_pred, age_range = c(0, 99),
  age_interval = 10, country_code = "ABC", ignore_cache = TRUE,
  output_dir = tempdir(), n_cores = 1
)

Generate and Save Age Pyramid Plot

Description

This function processes an input dataset to compute age distribution, generates age pyramid plots by region showing both proportions and counts, and saves the plots to a specified directory.

Usage

generate_age_pyramid_plot(
  dataset,
  country_code,
  output_dir,
  line_color = "#67000d",
  fill_high = "#fee0d2",
  fill_low = "#a50f15",
  break_axis_by = 10,
  caption = paste0("Note: Total population includes ",
    "ages 99+, pyramid shows ages 0-99")
)
generate_age_pyramid_plot(
  dataset,
  country_code,
  output_dir,
  line_color = "#67000d",
  fill_high = "#fee0d2",
  fill_low = "#a50f15",
  break_axis_by = 10,
  caption = paste0("Note: Total population includes ",
    "ages 99+, pyramid shows ages 0-99")
)

Arguments

`dataset`	A list containing two data frames: - prop_df: Population proportions data frame - pop_df: Population counts data frame Each with columns for 'country', 'region', 'district', and columns ending with "mean"
`country_code`	A string representing the country code (e.g., "ken").
`output_dir`	A string specifying the directory where plots should be saved.
`line_color`	A string specifying the color of the plot's lines. Default is '"#67000d"'.
`fill_high`	A string specifying the fill color for high values. Default is '"#fee0d2"'.
`fill_low`	A string specifying the fill color for low values. Default is '"#a50f15"'
`break_axis_by`	break axis to show less cluttered age groups. Default is 10
`caption`	A string specifying the caption text. Default is "Note: Total population includes ages 99+, pyramid shows ages 0-99"

Value

A list containing both proportion and count plots.

Examples


set.seed(123)
prop_df <- data.frame(
 country = rep("COD", 10),
 region = rep("RegionA", 10),
 district = paste("District", 1:10),
 popsize = runif(10, 2340, 28761),
 `0-4_mean` = runif(10, 0.1, 0.5),
 `5-9_mean` = runif(10, 0.05, 0.4),
 `10-14_mean` = runif(10, 0.03, 0.3)
)

pop_df <- data.frame(
 country = rep("COD", 10),
 region = rep("RegionA", 10),
 district = paste("District", 1:10),
 popsize = runif(10, 2340, 28761),
 `0-4_mean` = runif(10, 1000, 5000),
 `5-9_mean` = runif(10, 800, 4500),
 `10-14_mean` = runif(10, 700, 4000)
)

dataset <- list(prop_df = prop_df, pop_df = pop_df)

res <- generate_age_pyramid_plot(
         dataset = dataset,
         country_code = "COD",
         output_dir = file.path(tempdir()))


set.seed(123)
prop_df <- data.frame(
 country = rep("COD", 10),
 region = rep("RegionA", 10),
 district = paste("District", 1:10),
 popsize = runif(10, 2340, 28761),
 `0-4_mean` = runif(10, 0.1, 0.5),
 `5-9_mean` = runif(10, 0.05, 0.4),
 `10-14_mean` = runif(10, 0.03, 0.3)
)

pop_df <- data.frame(
 country = rep("COD", 10),
 region = rep("RegionA", 10),
 district = paste("District", 1:10),
 popsize = runif(10, 2340, 28761),
 `0-4_mean` = runif(10, 1000, 5000),
 `5-9_mean` = runif(10, 800, 4500),
 `10-14_mean` = runif(10, 700, 4000)
)

dataset <- list(prop_df = prop_df, pop_df = pop_df)

res <- generate_age_pyramid_plot(
         dataset = dataset,
         country_code = "COD",
         output_dir = file.path(tempdir()))

Predict Gamma Distribution Parameters for Spatial Grid

Description

This function predicts the scale and shape parameters of a Gamma distribution across a spatial grid using a bivariate spatial model. It can either generate new predictions or load cached results if available.

Usage

generate_gamma_predictions(
  country_code,
  age_param_data,
  model_params,
  predictor_data,
  shapefile,
  cell_size = 5000,
  n_sim = 5000,
  ignore_cache = FALSE,
  save_file = FALSE,
  output_dir = here::here("03_outputs", "3a_model_outputs")
)
generate_gamma_predictions(
  country_code,
  age_param_data,
  model_params,
  predictor_data,
  shapefile,
  cell_size = 5000,
  n_sim = 5000,
  ignore_cache = FALSE,
  save_file = FALSE,
  output_dir = here::here("03_outputs", "3a_model_outputs")
)

Arguments

`country_code`	A string representing the country code (e.g., "KEN").
`age_param_data`	A data frame containing: web_x, web_y: Spatial coordinates urban: Urban/rural indicator log_scale: Log of scale parameter at observed locations log_shape: Log of shape parameter at observed locations
`model_params`	A list containing model parameters: par: Named vector with gamma, log_sigma2, log_phi, log_tau1 Additional parameters for extracting beta coefficients
`predictor_data`	A data object containing the predictors data.
`shapefile`	An sf object defining the boundary for predictions
`cell_size`	Numeric. Grid cell size in meters (default: 5000)
`n_sim`	Integer. Number of simulations for prediction (default: 5000)
`ignore_cache`	A boolean input which is set to determine whether to ignore the existing cache and write over it. Default is set to FALSE.
`save_file`	A boolean to determine whether to save prediction or not. Default is FALSE as this will require lots of space.
`output_dir`	A string specifying the directory where the predictions file should be saved (default is "03_outputs/3a_model_outputs").

Value

A list containing:

scale_pred: Matrix of simulated scale parameters
shape_pred: Matrix of simulated shape parameters

Generate and Save Raster Plot for Gamma Predictions

Description

This function creates rasters from prediction data, combines them into a stack, and saves the faceted plot to a specified file.

Usage

generate_gamma_raster_plot(
  predictor_data,
  pred_list,
  country_code,
  output_dir,
  save_raster = TRUE,
  file_name_suffix = "gamma_prediction_rasters",
  width = 2500,
  height = 2000,
  png_resolution = 300
)
generate_gamma_raster_plot(
  predictor_data,
  pred_list,
  country_code,
  output_dir,
  save_raster = TRUE,
  file_name_suffix = "gamma_prediction_rasters",
  width = 2500,
  height = 2000,
  png_resolution = 300
)

Arguments

`predictor_data`	A data frame containing 'web_x' and 'web_y' coordinates and associated prediction values.
`pred_list`	A list containing predictions ('shape_hat', 'scale_hat', 'mean_age_pred') for creating rasters.
`country_code`	A string representing the lowercase country code, used for naming the output file.
`output_dir`	A string specifying the directory where the plot should be saved.
`save_raster`	A logical input specifying whether to save output or not. Default is TRUE.
`file_name_suffix`	A string specifying the suffix for the file name (default is "gamma_prediction_rasters").
`width`	Numeric. Width of output plot in pixels (default: 2500).
`height`	Numeric. Height of output plot in pixels (default: 2000).
`png_resolution`	An integer specifying the resolution of the plot in DPI (default: 300).

Value

The path to the saved raster plot.

Examples



predictor_data <- data.frame(
  web_x = runif(100, -100, 100),
  web_y = runif(100, -50, 50)
)

pred_list <- list(
  shape_hat = rnorm(100, mean = 2, sd = 0.5),
  scale_hat = rnorm(100, mean = 10, sd = 2),
  mean_age_pred = rnorm(100, mean = 30, sd = 5)
)

generate_gamma_raster_plot(predictor_data,
                           pred_list,
                           country_code = "COD",
                           output_dir = file.path(tempdir()))


predictor_data <- data.frame(
  web_x = runif(100, -100, 100),
  web_y = runif(100, -50, 50)
)

pred_list <- list(
  shape_hat = rnorm(100, mean = 2, sd = 0.5),
  scale_hat = rnorm(100, mean = 10, sd = 2),
  mean_age_pred = rnorm(100, mean = 30, sd = 5)
)

generate_gamma_raster_plot(predictor_data,
                           pred_list,
                           country_code = "COD",
                           output_dir = file.path(tempdir()))

Generate Variogram Plot

Description

Creates a variogram plot showing the spatial dependence structure of the data. The plot includes both the empirical variogram points and the fitted theoretical variogram line. The empirical variogram points show the actual semivariance values at different distances, while the red line shows the fitted exponential variogram model.

Usage

generate_variogram_plot(
  age_param_data,
  fit_vario,
  country_code,
  scale_outcome = "log_scale",
  output_dir,
  width = 12,
  height = 9,
  png_resolution = 300
)
generate_variogram_plot(
  age_param_data,
  fit_vario,
  country_code,
  scale_outcome = "log_scale",
  output_dir,
  width = 12,
  height = 9,
  png_resolution = 300
)

Arguments

`age_param_data`	Data frame containing the age parameter data. Must include columns 'web_x' and 'web_y' for spatial coordinates and the response variable specified in scale_outcome.
`fit_vario`	Fitted variogram object from automap package. Should contain components $psill (partial sill) and $range (range parameter) for the exponential variogram model.
`country_code`	Character string of the country code (e.g. "TZA") used for plot title and output filename.
`scale_outcome`	Character string specifying the column name for the scale parameter response variable (default: "log_scale"). This is the variable for which the variogram is computed.
`output_dir`	Character string specifying the directory path where the plot will be saved as a PNG file.
`width`	Plot width in pixels (default: 2000). Controls the output image width.
`height`	Plot height in pixels (default: 1500). Controls the output image height.
`png_resolution`	PNG resolution in DPI (dots per inch, default: 300). Higher values create larger, higher quality images.

Details

The function creates a variogram plot with the following elements: - Points showing empirical semivariance values at different distances - A red line showing the fitted exponential variogram model - Clear axis labels and title - Comma-formatted distance values on x-axis - Clean theme with black and white style

The output filename is constructed as lowercase country code + "_variogram.png"

Value

Invisibly returns the ggplot object containing the variogram plot. The plot is also saved as a PNG file in the specified output directory.

Examples


set.seed(123)  # For reproducibility
age_param_data <- data.frame(
  country = rep("TZA", 100),
  web_x = runif(100, 0, 100),
  web_y = runif(100, 0, 100),
  log_scale = rnorm(100, mean = 5, sd = 2)
)

# Create a dummy fitted variogram object
fit_vario <- list(
  psill = c(0.1, 0.5),
  range = c(0, 50)
)

vario_plot <- generate_variogram_plot(
   age_param_data = age_param_data,
   fit_vario = fit_vario,
   country_code = "TZA",
   output_dir = file.path(tempdir()))


set.seed(123)  # For reproducibility
age_param_data <- data.frame(
  country = rep("TZA", 100),
  web_x = runif(100, 0, 100),
  web_y = runif(100, 0, 100),
  log_scale = rnorm(100, mean = 5, sd = 2)
)

# Create a dummy fitted variogram object
fit_vario <- list(
  psill = c(0.1, 0.5),
  range = c(0, 50)
)

vario_plot <- generate_variogram_plot(
   age_param_data = age_param_data,
   fit_vario = fit_vario,
   country_code = "TZA",
   output_dir = file.path(tempdir()))

Initialize Full Pipeline Script and Model Script

Description

Creates a full pipeline R script and a model C++ script, saving them to the appropriate folders within the project directory structure. The folder structure is created using 'AgePopDenom::create_project_structure()'. The scripts contain example code for downloading and processing DHS data, shapefiles, and running models.

Usage

init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = here::here(),
  open_r_script = TRUE,
  setup_rscript = TRUE
)
init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = here::here(),
  open_r_script = TRUE,
  setup_rscript = TRUE
)

Arguments

`r_script_name`	Character. The name of the R script file to be created. Defaults to '"full_pipeline.R"'.
`cpp_script_name`	Character. The name of the C++ script file to be created. Defaults to '"model.cpp"'.
`path`	Character. The directory in which to create the scripts. Defaults to 'here::here()'.
`open_r_script`	Logical. Whether to open the R script automatically in RStudio (if available). Defaults to 'TRUE'.
`setup_rscript`	Logical. Whether to setup the R script with example code. Defaults to 'TRUE'.

Value

Invisibly returns a list containing:

r_script_path: Path to the created R script
cpp_script_path: Path to the created C++ script

The function also prints success messages upon script creation.

Examples


# Create temp directory with normalized path
tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

#  Initialize with normalized path
cpp_path <- file.path(tf, "02_scripts", "model")
dir.create(cpp_path, recursive = TRUE, showWarnings = FALSE)
cpp_path <- normalizePath(cpp_path, winslash = "/", mustWork = FALSE)

init(
r_script_name = "full_pipeline.R",
cpp_script_name = "model.cpp",
path = tf,
open_r_script = FALSE
)

# Create temp directory with normalized path
tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)

#  Initialize with normalized path
cpp_path <- file.path(tf, "02_scripts", "model")
dir.create(cpp_path, recursive = TRUE, showWarnings = FALSE)
cpp_path <- normalizePath(cpp_path, winslash = "/", mustWork = FALSE)

init(
r_script_name = "full_pipeline.R",
cpp_script_name = "model.cpp",
path = tf,
open_r_script = FALSE
)

Log-Likelihood Function for Spatial Model

Description

Computes the log-likelihood for a spatial statistical model with a covariance structure determined by parameters including spatial decay and variance.

Usage

log_lik(
  par,
  p1,
  p2,
  d1,
  d2,
  y,
  u_dist,
  n_x,
  tau2_1 = 1,
  tau2_2 = 1,
  age_param_data
)
log_lik(
  par,
  p1,
  p2,
  d1,
  d2,
  y,
  u_dist,
  n_x,
  tau2_1 = 1,
  tau2_2 = 1,
  age_param_data
)

Arguments

`par`	A numeric vector of parameters to estimate. The vector contains: `par[1:p1]`: Coefficients for fixed effects in dataset 1 ( $\beta_1$ ). `par[(p1 + 1):(p1 + p2)]`: Coefficients for fixed effects in dataset 2 ( $\beta_2$ ). `par[p1 + p2 + 1]`: Spatial decay parameter ( $\gamma$ ). `par[p1 + p2 + 2]`: Log of the variance parameter ( $\sigma^2$ ). `par[p1 + p2 + 3]`: Log of the range parameter ( $\phi$ ).
`p1`	An integer. The number of fixed-effect parameters in dataset 1.
`p2`	An integer. The number of fixed-effect parameters in dataset 2.
`d1`	A numeric matrix. Design matrix for dataset 1 used to model the mean structure.
`d2`	A numeric matrix. Design matrix for dataset 2 used to model the mean structure.
`y`	A numeric vector. Observed response variable, including both datasets.
`u_dist`	A numeric matrix. Distance matrix for spatial locations.
`n_x`	An integer. The number of unique spatial locations.
`tau2_1`	Variance parameter for first process (default = 1)
`tau2_2`	Variance parameter for second process (default = 1)
`age_param_data`	A numeric matrix or vector. Additional parameters specific to age-based modeling.

Details

The log-likelihood is computed as:

$-0.5 \left[ \log(\det(M)) + (y - \mu)^T M^{-1} (y - \mu) \right]$

where:

$M$ is the covariance matrix, computed using compute_cov.
$\mu$ is the mean structure, determined by the design matrices d1, d2 and coefficients $\beta_1, \beta_2$ .

The covariance matrix $M$ is computed using spatial parameters ( $\gamma, \sigma^2, \phi$ ) and the distance matrix u_dist.

Value

A numeric scalar. The computed log-likelihood value.

Note

This function requires a helper function, compute_cov, to compute the covariance matrix based on spatial parameters.

Process DHS Data: Merge RDS Files with Shapefiles and Extract Gamma Parameters

Description

This function processes DHS (Demographic and Health Survey) data by: 1. Reading RDS files and shapefiles for each country. 2. Merging demographic data with geographic information. 3. Cleaning and aggregating the data. 4. Extracting gamma parameters for age-related analysis.

Usage

process_dhs_data(
  rds_dir = here::here("01_data", "1a_survey_data", "raw", "pr_records"),
  shp_dir = here::here("01_data", "1a_survey_data", "raw", "shapefiles"),
  output_path = here::here("01_data", "1a_survey_data", "processed",
    "dhs_pr_records_combined.rds")
)
process_dhs_data(
  rds_dir = here::here("01_data", "1a_survey_data", "raw", "pr_records"),
  shp_dir = here::here("01_data", "1a_survey_data", "raw", "shapefiles"),
  output_path = here::here("01_data", "1a_survey_data", "processed",
    "dhs_pr_records_combined.rds")
)

Arguments

`rds_dir`	Character. Path to the directory containing raw RDS files.
`shp_dir`	Character. Path to the directory containing shapefiles.
`output_path`	Character. Path to save the final processed dataset as an RDS file.

Details

The function loops through RDS files, processes each country's data by merging demographic information with shapefile data, and computes gamma parameters for age-related analysis. The progress is tracked and displayed for each country.

The function also filters out incomplete data (e.g., age values of '98') and handles labelled data using the 'haven::zap_labels' function.

The final output includes two datasets: 1. Outlier-free data. 2. Aggregated age parameter data.

Value

None. Saves the final combined dataset to the specified output path.

Examples


tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)
tmp_rds_dir <- file.path(tf, "rds")
tmp_shp_dir <- file.path(tf, "shp")
tmp_output <- file.path(tf, "output.rds")

dir.create(tmp_rds_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(tmp_shp_dir, recursive = TRUE, showWarnings = FALSE)

# Create fake DHS data
create_fake_dhs_data <- function(country_code) {
  set.seed(123) # For reproducibility
  n <- 100

  # Create labelled vectors
  hv007 <- haven::labelled(
    sample(c(2015, 2016), n, replace = TRUE),
    labels = c("2015" = 2015, "2016" = 2016)
  )

  hv001 <- haven::labelled(
    sample(1:20, n, replace = TRUE),
    labels = setNames(1:20, paste("Cluster", 1:20))
  )

  hv105 <- haven::labelled(
    sample(c(1:97, 98), n, replace = TRUE),
    labels = c(setNames(1:97, paste("Age", 1:97)), "Don't know" = 98)
  )

  # Combine into data frame
  data.frame(
    hv007 = hv007,
    hv001 = hv001,
    hv105 = hv105
  )
}

# Create fake shapefile data
# Create fake shapefile data with explicit CRS
create_fake_shapefile <- function(country_code) {
  set.seed(123)
  n_clusters <- 20

  # Create spatial data frame with explicit CRS
  sf_data <- sf::st_as_sf(
    dplyr::tibble(
      DHSCLUST = 1:n_clusters,
      URBAN_RURA = sample(c("R", "U"), n_clusters, replace = TRUE),
      LATNUM = runif(n_clusters, -10, 10),
    LONGNUM = runif(n_clusters, -10, 10)
  ),
    coords = c("LONGNUM", "LATNUM"),
    crs = 4326 # WGS84
  ) |>
    dplyr::mutate(
      LATNUM = runif(n_clusters, -10, 10),
      LONGNUM = runif(n_clusters, -10, 10)
    )
}

# Save test data for two countries
countries <- c("KE", "TZ")
for (country in countries) {
  saveRDS(
    create_fake_dhs_data(country),
    file = file.path(tmp_rds_dir, paste0(country, "HR71FL.rds"))
  )
  saveRDS(
    create_fake_shapefile(country),
    file = file.path(tmp_shp_dir, paste0(country, "HR7SHP.rds"))
  )
}

# Run the function
process_dhs_data(
  rds_dir = tmp_rds_dir,
  shp_dir = tmp_shp_dir,
  output_path = tmp_output
)


tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)
tmp_rds_dir <- file.path(tf, "rds")
tmp_shp_dir <- file.path(tf, "shp")
tmp_output <- file.path(tf, "output.rds")

dir.create(tmp_rds_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(tmp_shp_dir, recursive = TRUE, showWarnings = FALSE)

# Create fake DHS data
create_fake_dhs_data <- function(country_code) {
  set.seed(123) # For reproducibility
  n <- 100

  # Create labelled vectors
  hv007 <- haven::labelled(
    sample(c(2015, 2016), n, replace = TRUE),
    labels = c("2015" = 2015, "2016" = 2016)
  )

  hv001 <- haven::labelled(
    sample(1:20, n, replace = TRUE),
    labels = setNames(1:20, paste("Cluster", 1:20))
  )

  hv105 <- haven::labelled(
    sample(c(1:97, 98), n, replace = TRUE),
    labels = c(setNames(1:97, paste("Age", 1:97)), "Don't know" = 98)
  )

  # Combine into data frame
  data.frame(
    hv007 = hv007,
    hv001 = hv001,
    hv105 = hv105
  )
}

# Create fake shapefile data
# Create fake shapefile data with explicit CRS
create_fake_shapefile <- function(country_code) {
  set.seed(123)
  n_clusters <- 20

  # Create spatial data frame with explicit CRS
  sf_data <- sf::st_as_sf(
    dplyr::tibble(
      DHSCLUST = 1:n_clusters,
      URBAN_RURA = sample(c("R", "U"), n_clusters, replace = TRUE),
      LATNUM = runif(n_clusters, -10, 10),
    LONGNUM = runif(n_clusters, -10, 10)
  ),
    coords = c("LONGNUM", "LATNUM"),
    crs = 4326 # WGS84
  ) |>
    dplyr::mutate(
      LATNUM = runif(n_clusters, -10, 10),
      LONGNUM = runif(n_clusters, -10, 10)
    )
}

# Save test data for two countries
countries <- c("KE", "TZ")
for (country in countries) {
  saveRDS(
    create_fake_dhs_data(country),
    file = file.path(tmp_rds_dir, paste0(country, "HR71FL.rds"))
  )
  saveRDS(
    create_fake_shapefile(country),
    file = file.path(tmp_shp_dir, paste0(country, "HR7SHP.rds"))
  )
}

# Run the function
process_dhs_data(
  rds_dir = tmp_rds_dir,
  shp_dir = tmp_shp_dir,
  output_path = tmp_output
)

Process Final Population Data

Description

Reads population and proportion data from RDS files, processes the data to generate age-group-based population summaries and proportions at different administrative levels (Country, Region, District), and writes the results to an Excel file with separate sheets for each level and metric.

Usage

process_final_population_data(
  input_dir = here::here("03_outputs", "3c_table_outputs"),
  excel_output_file = here::here("03_outputs", "3d_compiled_results",
    "age_pop_denom_compiled.xlsx")
)
process_final_population_data(
  input_dir = here::here("03_outputs", "3c_table_outputs"),
  excel_output_file = here::here("03_outputs", "3d_compiled_results",
    "age_pop_denom_compiled.xlsx")
)

Arguments

`input_dir`	A character string specifying the directory containing RDS files. Default is "03_outputs/3c_table_outputs" in the project directory.
`excel_output_file`	A character string specifying the output Excel file path. Default is "03_outputs/3d_compiled_results/age_pop_denom_compiled.xlsx" in the project directory.

Value

None. The function writes an Excel file to the specified output location with six sheets containing population counts and proportions at different administrative levels.

Process Gamma Prediction Results

Description

This function processes gamma prediction results to calculate the mean age predictions, scale, and shape parameters efficiently.

Usage

process_gamma_predictions(gamma_prediction)
process_gamma_predictions(gamma_prediction)

Arguments

gamma_prediction

A list containing 'scale_pred' and 'shape_pred' matrices from the gamma prediction model.

Value

A list containing the following elements: - 'mean_age_pred': A vector of mean age predictions. - 'scale_hat': A vector of mean scale parameters. - 'shape_hat': A vector of mean shape parameters.

Rasterize Spatial Data

Description

This function converts spatial data with x, y coordinates and a value field into a raster using a specified resolution and CRS.

Usage

rasterize_data(x_coords, y_coords, values, cell_size = 5000, crs, fun = mean)
rasterize_data(x_coords, y_coords, values, cell_size = 5000, crs, fun = mean)

Arguments

`x_coords`	Numeric vector of x-coordinates (e.g., longitude).
`y_coords`	Numeric vector of y-coordinates (e.g., latitude).
`values`	Numeric vector of values associated with each point.
`cell_size`	Numeric. Grid cell size in meters (default: 5000).
`crs`	Character, the coordinate reference system in EPSG format (e.g., "EPSG:3857").
`fun`	Function to aggregate values in cells (default is 'mean').

Value

A 'terra::SpatRaster' object.

Examples



x_coords <- runif(100, -100, 100)
y_coords <- runif(100, -50, 50)
values <- rnorm(100, mean = 10, sd = 5)

rasterize_data(x_coords, y_coords, values,
               cell_size = 5000, crs = "EPSG:3857", fun = mean)


x_coords <- runif(100, -100, 100)
y_coords <- runif(100, -50, 50)
values <- rnorm(100, mean = 10, sd = 5)

rasterize_data(x_coords, y_coords, values,
               cell_size = 5000, crs = "EPSG:3857", fun = mean)

Run Country-Specific Spatial Modeling Workflow with Logging

Description

This function runs the entire spatial modeling workflow for a given country code and logs the results. It processes Survey data, fits a spatial model, generates predictions, creates population tables, and produces raster outputs. The function is modular and can be reused for different countries with minimal adjustments.

Usage

run_full_workflow(
  country_code,
  survey_data_path = here::here("01_data", "1a_survey_data", "processed"),
  survey_data_suffix = "dhs_pr_records_combined.rds",
  shape_path = here::here("01_data", "1c_shapefiles"),
  shape_suffix = "district_shape.gpkg",
  pop_raster_path = here::here("01_data", "1b_rasters", "pop_raster"),
  pop_raster_suffix = "_ppp_2020_constrained.tif",
  ur_raster_path = here::here("01_data", "1b_rasters", "urban_extent"),
  ur_raster_suffix = "afurextent.asc",
  pred_save_file = FALSE,
  raster_width = 2500,
  raster_height = 2000,
  raster_resolution = 300,
  save_raster = TRUE,
  generate_pop_raster = FALSE,
  pyramid_line_color = "#67000d",
  pyramid_fill_high = "#fee0d2",
  pyramid_fill_low = "#a50f15",
  pyramid_caption = paste0("Note: Total population includes ",
    "ages 99+, pyramid shows ages 0-99"),
  output_paths = list(),
  model_params = list(),
  return_results = FALSE,
  n_cores = parallel::detectCores() - 2,
  ...
)
run_full_workflow(
  country_code,
  survey_data_path = here::here("01_data", "1a_survey_data", "processed"),
  survey_data_suffix = "dhs_pr_records_combined.rds",
  shape_path = here::here("01_data", "1c_shapefiles"),
  shape_suffix = "district_shape.gpkg",
  pop_raster_path = here::here("01_data", "1b_rasters", "pop_raster"),
  pop_raster_suffix = "_ppp_2020_constrained.tif",
  ur_raster_path = here::here("01_data", "1b_rasters", "urban_extent"),
  ur_raster_suffix = "afurextent.asc",
  pred_save_file = FALSE,
  raster_width = 2500,
  raster_height = 2000,
  raster_resolution = 300,
  save_raster = TRUE,
  generate_pop_raster = FALSE,
  pyramid_line_color = "#67000d",
  pyramid_fill_high = "#fee0d2",
  pyramid_fill_low = "#a50f15",
  pyramid_caption = paste0("Note: Total population includes ",
    "ages 99+, pyramid shows ages 0-99"),
  output_paths = list(),
  model_params = list(),
  return_results = FALSE,
  n_cores = parallel::detectCores() - 2,
  ...
)

Arguments

`country_code`	Character. The ISO3 country code (e.g., "TZA").
`survey_data_path`	Character. Path to Survey data. Default: "01_data/1a_survey_data/processed".
`survey_data_suffix`	Character. Suffix for Survey data files. Default: "dhs_pr_records_combined.rds".
`shape_path`	Character. Path to shapefile data. Default: "01_data/1c_shapefiles".
`shape_suffix`	Character. Suffix for shapefile data. Default: "district_shape.gpkg".
`pop_raster_path`	Character. Path to population raster data. Default: "01_data/1b_rasters/pop_raster".
`pop_raster_suffix`	Character. Suffix for population raster files. Default: "_ppp_2020_constrained.tif".
`ur_raster_path`	Character. Path to urban-rural extent data. Default: "01_data/1b_rasters/urban_extent".
`ur_raster_suffix`	Character. Suffix for urban-rural raster. Default: "afurextent.asc".
`pred_save_file`	Logical. Whether to save prediction files. Default: FALSE
`raster_width`	Integer. Width of raster plots in pixels. Default: 2500
`raster_height`	Integer. Height of raster plots in pixels. Default: 2000
`raster_resolution`	Integer. Resolution of PNG outputs. Default: 300
`save_raster`	Logical. Whether to save raster outputs to disk. Default: TRUE
`generate_pop_raster`	Logical. Whether to generate population raster. Default: FALSE
`pyramid_line_color`	Character. Hex color code for the age pyramid's outline. Default: "#67000d"
`pyramid_fill_high`	Character. Hex color code for the age pyramid's higher values fill. Default: "#fee0d2"
`pyramid_fill_low`	Character. Hex color code for the age pyramid's lower values fill. Default: "#a50f15"
`pyramid_caption`	Character. Caption text for the age pyramid plot. Default: "Note: Total population includes ages 99+, pyramid shows ages 0-99"
`output_paths`	List of output paths: model: Path for model outputs. Default: "03_outputs/3a_model_outputs" plot: Path for plots. Default: "03_outputs/3b_visualizations" raster: Path for rasters. Default: "03_outputs/3c_raster_outputs" table: Path for tables. Default: "03_outputs/3c_table_outputs" compiled: Path for compiled results. Default: "03_outputs/3d_compiled_results" excel: Path for Excel outputs. Default: "03_outputs/3d_compiled_results/age_pop_denom_compiled.xlsx" log: Path for logs. Default: "03_outputs/3a_model_outputs/modelling_log.rds"
`model_params`	List of model parameters: cell_size: Cell size in meters. Default: 5000 n_sim: Number of simulations. Default: 5000 ignore_cache: Whether to ignore cache. Default: FALSE age_range: Age range vector. Default: c(0, 99) age_interval: Age interval. Default: 1 return_prop: Return proportions. Default: TRUE scale_outcome: Scale outcome variable. Default: "log_scale" shape_outcome: Shape outcome variable. Default: "log_shape" covariates: Model covariates. Default: "urban" cpp_script: C++ script path. Default: "02_scripts/model" control_params: Control parameters. Default: list(trace = 2) manual_params: Manual parameters. Default: NULL verbose: Verbose output. Default: TRUE age_range_raster: Age range for raster output. Default: c(0, 10) age_interval_raster: Age interval for raster output. Default: 1
`return_results`	Logical. Whether to return results. Default: FALSE.
`n_cores`	Integer number of cores for parallel processing for age population table, default detectCores()-2
`...`	Additional arguments passed to subfunctions.

Value

If return_results is TRUE, a list containing:

spat_model_param: Fitted spatial model parameters
predictor_data: Predictor dataset
gamma_prediction: Generated gamma predictions
pred_list: Processed gamma prediction results
final_age_pop_table: Age-population table data
final_pop: Compiled population data
all_mod_params: Compiled model parameters

If return_results is FALSE, the function saves all outputs to disk and returns NULL invisibly.

Examples



# set country code
country_codeiso <- "GMB"

set.seed(123)
# Set parameters for simulation
total_population <- 266
urban_proportion <- 0.602
total_coords <- 266
lon_range <- c(-16.802, -13.849)
lat_range <- c(13.149, 13.801)
mean_web_x <- -1764351
mean_web_y <- 1510868

# Simulate processed survey dataset for Gambia
df_gambia <- NULL
df_gambia$age_param_data <- dplyr::tibble(
  country = "Gambia",
  country_code_iso3 = "GMB",
  country_code_dhs = "GM",
  year_of_survey = 2024,
  id_coords = rep(1:total_coords, length.out = total_population),
  lon = runif(total_population, lon_range[1], lon_range[2]),
  lat = runif(total_population, lat_range[1], lat_range[2]),
  web_x = rnorm(total_population, mean_web_x, 50000),
  web_y = rnorm(total_population, mean_web_y, 50000),
  log_scale = rnorm(total_population, 2.82, 0.2),
  log_shape = rnorm(total_population, 0.331, 0.1),
  urban = rep(c(1, 0), c(
    round(total_population * urban_proportion),
    total_population - round(total_population * urban_proportion)
  )),
  b1 = rnorm(total_population, 0.0142, 0.002),
  c = rnorm(total_population, -0.00997, 0.001),
  b2 = rnorm(total_population, 0.00997, 0.002),
 nsampled = sample(180:220, total_population, replace = TRUE)
)


# Create temp directory with normalized path
tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)
tf <- normalizePath(tf, winslash = "/", mustWork = FALSE)

AgePopDenom::init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = tf,
  open_r_script = FALSE
)

# save as processed dhs data
saveRDS(
  df_gambia,
  file = file.path(
    tf, "01_data", "1a_survey_data", "processed",
    "dhs_pr_records_combined.rds"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE)
)

# Download shapefiles
download_shapefile(
  country_codes = country_codeiso,
  dest_file = file.path(
    tf, "01_data", "1c_shapefiles",
    "district_shape.gpkg"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE)
)

# Download population rasters from worldpop
download_pop_rasters(
 country_codes = country_codeiso,
  dest_dir = file.path(tf, "01_data", "1b_rasters", "pop_raster") |>
    normalizePath(winslash = "/", mustWork = FALSE)
)
# Extract urban extent raster
extract_afurextent(
  dest_dir = file.path(tf, "01_data", "1b_rasters", "urban_extent") |>
    normalizePath(winslash = "/", mustWork = FALSE)
)

# Modelling --------------------------------------------------------------

run_full_workflow(
  country_code = country_codeiso,
  survey_data_path = file.path(
    tf, "01_data", "1a_survey_data", "processed"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  survey_data_suffix = "dhs_pr_records_combined.rds",
  shape_path = file.path(
    tf, "01_data", "1c_shapefiles"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  shape_suffix = "district_shape.gpkg",
  pop_raster_path = file.path(
    tf, "01_data", "1b_rasters", "pop_raster"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  pop_raster_suffix = "_ppp_2020_constrained.tif",
  ur_raster_path = file.path(
    tf, "01_data", "1b_rasters", "urban_extent"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  ur_raster_suffix = "afurextent.asc",
  pred_save_file = FALSE,
  raster_width = 2500,
  raster_height = 2000,
  raster_resolution = 300,
  save_raster = TRUE,
  pyramid_line_color = "#67000d",
  pyramid_fill_high = "#fee0d2",
  pyramid_fill_low = "#a50f15",
  pyramid_caption = paste0(
    "Note: Total population includes ",
    "ages 99+, pyramid shows ages 0-99"
  ),
  generate_pop_raster = TRUE,
  output_paths = list(
    model = file.path(tf, "03_outputs", "3a_model_outputs"),
    plot = file.path(tf, "03_outputs", "3b_visualizations"),
    raster = file.path(tf, "03_outputs", "3c_raster_outputs"),
    table = file.path(tf, "03_outputs", "3c_table_outputs"),
    compiled = file.path(tf, "03_outputs", "3d_compiled_results"),
    excel = file.path(
      tf, "03_outputs", "3d_compiled_results",
      "age_pop_denom_compiled.xlsx"
    ),
    log = file.path(
      tf, "03_outputs", "3a_model_outputs", "modelling_log.rds"
    )
  ) |> lapply(\(x) normalizePath(x, winslash = "/", mustWork = FALSE)),
  model_params = list(
    cell_size = 5000,
    n_sim = 10,
    ignore_cache = FALSE,
    age_range = c(0, 1),
    age_interval = 1,
    return_prop = TRUE,
    scale_outcome = "log_scale",
    shape_outcome = "log_shape",
    covariates = "urban",
    cpp_script = file.path(tf, "02_scripts", "model") |>
      normalizePath(winslash = "/", mustWork = FALSE),
    control_params = list(trace = 2),
    manual_params = NULL,
    verbose = TRUE
  ),
  return_results = FALSE,
  n_cores = 1
)


# set country code
country_codeiso <- "GMB"

set.seed(123)
# Set parameters for simulation
total_population <- 266
urban_proportion <- 0.602
total_coords <- 266
lon_range <- c(-16.802, -13.849)
lat_range <- c(13.149, 13.801)
mean_web_x <- -1764351
mean_web_y <- 1510868

# Simulate processed survey dataset for Gambia
df_gambia <- NULL
df_gambia$age_param_data <- dplyr::tibble(
  country = "Gambia",
  country_code_iso3 = "GMB",
  country_code_dhs = "GM",
  year_of_survey = 2024,
  id_coords = rep(1:total_coords, length.out = total_population),
  lon = runif(total_population, lon_range[1], lon_range[2]),
  lat = runif(total_population, lat_range[1], lat_range[2]),
  web_x = rnorm(total_population, mean_web_x, 50000),
  web_y = rnorm(total_population, mean_web_y, 50000),
  log_scale = rnorm(total_population, 2.82, 0.2),
  log_shape = rnorm(total_population, 0.331, 0.1),
  urban = rep(c(1, 0), c(
    round(total_population * urban_proportion),
    total_population - round(total_population * urban_proportion)
  )),
  b1 = rnorm(total_population, 0.0142, 0.002),
  c = rnorm(total_population, -0.00997, 0.001),
  b2 = rnorm(total_population, 0.00997, 0.002),
 nsampled = sample(180:220, total_population, replace = TRUE)
)


# Create temp directory with normalized path
tf <- file.path(tempdir(), "test_env")
dir.create(tf, recursive = TRUE, showWarnings = FALSE)
tf <- normalizePath(tf, winslash = "/", mustWork = FALSE)

AgePopDenom::init(
  r_script_name = "full_pipeline.R",
  cpp_script_name = "model.cpp",
  path = tf,
  open_r_script = FALSE
)

# save as processed dhs data
saveRDS(
  df_gambia,
  file = file.path(
    tf, "01_data", "1a_survey_data", "processed",
    "dhs_pr_records_combined.rds"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE)
)

# Download shapefiles
download_shapefile(
  country_codes = country_codeiso,
  dest_file = file.path(
    tf, "01_data", "1c_shapefiles",
    "district_shape.gpkg"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE)
)

# Download population rasters from worldpop
download_pop_rasters(
 country_codes = country_codeiso,
  dest_dir = file.path(tf, "01_data", "1b_rasters", "pop_raster") |>
    normalizePath(winslash = "/", mustWork = FALSE)
)
# Extract urban extent raster
extract_afurextent(
  dest_dir = file.path(tf, "01_data", "1b_rasters", "urban_extent") |>
    normalizePath(winslash = "/", mustWork = FALSE)
)

# Modelling --------------------------------------------------------------

run_full_workflow(
  country_code = country_codeiso,
  survey_data_path = file.path(
    tf, "01_data", "1a_survey_data", "processed"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  survey_data_suffix = "dhs_pr_records_combined.rds",
  shape_path = file.path(
    tf, "01_data", "1c_shapefiles"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  shape_suffix = "district_shape.gpkg",
  pop_raster_path = file.path(
    tf, "01_data", "1b_rasters", "pop_raster"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  pop_raster_suffix = "_ppp_2020_constrained.tif",
  ur_raster_path = file.path(
    tf, "01_data", "1b_rasters", "urban_extent"
  ) |>
    normalizePath(winslash = "/", mustWork = FALSE),
  ur_raster_suffix = "afurextent.asc",
  pred_save_file = FALSE,
  raster_width = 2500,
  raster_height = 2000,
  raster_resolution = 300,
  save_raster = TRUE,
  pyramid_line_color = "#67000d",
  pyramid_fill_high = "#fee0d2",
  pyramid_fill_low = "#a50f15",
  pyramid_caption = paste0(
    "Note: Total population includes ",
    "ages 99+, pyramid shows ages 0-99"
  ),
  generate_pop_raster = TRUE,
  output_paths = list(
    model = file.path(tf, "03_outputs", "3a_model_outputs"),
    plot = file.path(tf, "03_outputs", "3b_visualizations"),
    raster = file.path(tf, "03_outputs", "3c_raster_outputs"),
    table = file.path(tf, "03_outputs", "3c_table_outputs"),
    compiled = file.path(tf, "03_outputs", "3d_compiled_results"),
    excel = file.path(
      tf, "03_outputs", "3d_compiled_results",
      "age_pop_denom_compiled.xlsx"
    ),
    log = file.path(
      tf, "03_outputs", "3a_model_outputs", "modelling_log.rds"
    )
  ) |> lapply(\(x) normalizePath(x, winslash = "/", mustWork = FALSE)),
  model_params = list(
    cell_size = 5000,
    n_sim = 10,
    ignore_cache = FALSE,
    age_range = c(0, 1),
    age_interval = 1,
    return_prop = TRUE,
    scale_outcome = "log_scale",
    shape_outcome = "log_shape",
    covariates = "urban",
    cpp_script = file.path(tf, "02_scripts", "model") |>
      normalizePath(winslash = "/", mustWork = FALSE),
    control_params = list(trace = 2),
    manual_params = NULL,
    verbose = TRUE
  ),
  return_results = FALSE,
  n_cores = 1
)

Package 'AgePopDenom'

Help Index

Aggregate Individual Survey Data and Extract Gamma Parameters by Location

Description

Usage

Arguments

Value

Compute Covariance Matrix for Spatial Model

Description

Usage

Arguments

Value

Generate or Load Cached Predictors Data

Description

Usage

Arguments

Value

Examples

Create a Standardized Project Folder Structure

Description

Usage

Arguments

Details

Value

Examples

Main Function to Download DHS Datasets

Description

Usage

Arguments

Value

Download population rasters for given country codes.

Description

Usage

Arguments

Details

Value

Examples

Download WHO ADM2 Boundaries with Partial Update

Description

Usage

Arguments

Value

Examples

Extract Urban/Rural Extent Raster

Description

Usage

Arguments

Details

Value

Examples

Extract Parameters and Optimization Details with Log-Likelihood

Description

Usage

Arguments

Value

Examples

Extract Beta Parameters from Model Output

Description

Usage

Arguments

Details

Value

Fit a Spatial Model for Age Parameters using TMB

Description

Usage

Arguments

Details

Value

Note

Examples

Generate Age Population Raster

Description

Usage

Arguments

Details

Value

Examples

Generate Age Population Tables

Description

Usage