Exposome Scores

Load Data and Libraries

# Load Libraries
library(tidyverse)
library(tidyexposomics)

We will start off with our example dataset pulled from the ISGlobal Exposome Data Challenge 2021 (Maitre et al., 2022).

# Load example data
data("tidyexposomics_example")

# Create exposomic set object
expom <- create_exposomicset(
    codebook = tidyexposomics_example$annotated_cb,
    exposure = tidyexposomics_example$meta,
    omics = list(
        "Gene Expression" = tidyexposomics_example$exp_filt,
        "Methylation" = tidyexposomics_example$methyl_filt
    ),
    row_data = list(
        "Gene Expression" = tidyexposomics_example$exp_fdata,
        "Methylation" = tidyexposomics_example$methyl_fdata
    )
)

## Ensuring all omics datasets are matrices with column names.

## Creating SummarizedExperiment objects.

## Creating MultiAssayExperiment object.

## MultiAssayExperiment created successfully.

We will focus on a few exposure variable categories.

# Grab exposure variables
exp_vars <- tidyexposomics_example$annotated_cb |>
    filter(category %in% c(
        "aerosol",
        "main group molecular entity",
        "polyatomic entity"
    )) |>
    pull(variable) |>
    as.character()

Quality Control

As in the main vignette, we will impute exposure data using missforest.

# Impute missing values
expom <- run_impute_missing(
    exposomicset = expom,
    exposure_impute_method = "missforest",
    exposure_cols = exp_vars
)

## Imputing exposure data using method: missforest

And we will transform our exposure data to ensure it is more normally distributed using the boxcox_best method.

# Transform variables
expom <- transform_exposure(
    exposomicset = expom,
    transform_method = "boxcox_best",
    exposure_cols = exp_vars
)

## Applying the boxcox_best transformation.

We can calculate exposome scores, which are a summary measure of exposure. The run_exposome_score function is used to calculate the exposome score. The exposure_cols argument is used to set the columns to use for the exposome score. The score_type argument is used to set the type of score to calculate. Here we could use:

median: Calculates the median of the exposure variables.
mean: Calculates the mean of the exposure variables.
sum: Calculates the sum of the exposure variables.
pca: Calculates the first principal component of the exposure variables.
irt: Uses Item Response Theory to calculate the exposome score.
quantile: Calculates the quantile of the exposure variables.
var: Calculates the variance of the exposure variables.

The score_column_name argument is used to set the name of the column to store the exposome score in. Here we will define a score for aerosols using a variety of different methods and demonstrate their use in association with asthma status.

# determine which aerosol variables to use
aerosols <- c("h_pm25_ratio_preg_None", "h_pm10_ratio_preg_None")

# Create exposome scores
expom <- expom |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "median",
        score_column_name = "exposome_median_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "pca",
        score_column_name = "exposome_pca_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "irt",
        score_column_name = "exposome_irt_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "quantile",
        score_column_name = "exposome_quantile_score"
    ) |>
    run_exposome_score(
        exposure_cols = aerosols,
        score_type = "var",
        score_column_name = "exposome_var_score"
    )

## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...
## Extracting exposure data...

## Calculating median exposure scores...

## Calculating PCA exposure scores...

## Calculating IRT exposure scores...

## Warning: EM cycles terminated after 500 iterations.

## Calculating quantile exposure scores...

## Calculating variance exposure scores...

We can then associate these exposome scores with asthma status using the run_association function, just like we did before. However, this time we specify our feature_set to be the exposome scores we just calculated.

# Associate exposome scores with outcome
expom <- run_association(
    exposomicset = expom,
    outcome = "hs_asthma",
    source = "exposures",
    feature_set = c(
        "exposome_median_score",
        "exposome_pca_score",
        "exposome_irt_score",
        "exposome_quantile_score",
        "exposome_var_score"
    ),
    action = "add",
    family = "binomial"
)

## Running GLMs.

# Plot the association forest plot
plot_association(
    exposomicset = expom,
    source = "exposures",
    terms = c(
        "exposome_median_score",
        "exposome_pca_score",
        "exposome_irt_score",
        "exposome_quantile_score",
        "exposome_var_score"
    ),
    filter_col = "p.value",
    filter_thresh = 0.05,
    r2_col = "r2"
)

Associations of aerosol exposome scores with asthma status. The variance-based score has the strongest association with asthma status.

Session Info

See Session Info


sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] tidyexposomics_0.99.16      MultiAssayExperiment_1.36.1
##  [3] SummarizedExperiment_1.40.0 Biobase_2.70.0             
##  [5] GenomicRanges_1.62.1        Seqinfo_1.0.0              
##  [7] IRanges_2.44.0              S4Vectors_0.48.0           
##  [9] BiocGenerics_0.56.0         generics_0.1.4             
## [11] MatrixGenerics_1.22.0       matrixStats_1.5.0          
## [13] lubridate_1.9.5             forcats_1.0.1              
## [15] stringr_1.6.0               dplyr_1.1.4                
## [17] purrr_1.2.0                 readr_2.2.0                
## [19] tidyr_1.3.2                 tibble_3.3.0               
## [21] ggplot2_4.0.2               tidyverse_2.0.0            
## [23] BiocStyle_2.38.0           
## 
## loaded via a namespace (and not attached):
##   [1] fs_2.1.0             naniar_1.1.0         httr_1.4.8          
##   [4] RColorBrewer_1.1-3   tools_4.5.1          doRNG_1.8.6.3       
##   [7] backports_1.5.0      R6_2.6.1             DT_0.34.0           
##  [10] vegan_2.7-3          mgcv_1.9-3           permute_0.9-10      
##  [13] withr_3.0.2          gridExtra_2.3        progressr_0.18.0    
##  [16] cli_3.6.5            textshaping_1.0.4    factoextra_2.0.0    
##  [19] RGCCA_3.0.3          labeling_0.4.3       sass_0.4.10         
##  [22] S7_0.2.1             randomForest_4.7-1.2 proxy_0.4-29        
##  [25] pbapply_1.7-4        pkgdown_2.2.0        systemfonts_1.3.2   
##  [28] foreign_0.8-90       R.utils_2.13.0       parallelly_1.46.1   
##  [31] sessioninfo_1.2.3    itertools_0.1-3      limma_3.66.0        
##  [34] rstudioapi_0.18.0    RSQLite_2.4.6        car_3.1-5           
##  [37] Matrix_1.7-4         clipr_0.8.0          abind_1.4-8         
##  [40] R.methodsS3_1.8.2    lifecycle_1.0.5      yaml_2.3.12         
##  [43] carData_3.0-6        recipes_1.3.1        SparseArray_1.10.8  
##  [46] BiocFileCache_3.0.0  grid_4.5.1           blob_1.3.0          
##  [49] promises_1.5.0       crayon_1.5.3         lattice_0.22-7      
##  [52] pillar_1.11.1        knitr_1.51           corpcor_1.6.10      
##  [55] future.apply_1.20.2  mixOmics_6.34.0      codetools_0.2-20    
##  [58] glue_1.8.0           beepr_2.0            data.table_1.18.2.1 
##  [61] vctrs_0.6.5          Rdpack_2.6.6         testthat_3.3.2      
##  [64] gtable_0.3.6         assertthat_0.2.1     cachem_1.1.0        
##  [67] gower_1.0.2          xfun_0.54            rbibutils_2.4.1     
##  [70] S4Arrays_1.10.1      mime_0.13            prodlim_2025.04.28  
##  [73] survival_3.8-3       timeDate_4052.112    audio_0.1-12        
##  [76] iterators_1.0.14     hardhat_1.4.2        lava_1.8.2          
##  [79] statmod_1.5.1        ipred_0.9-15         nlme_3.1-168        
##  [82] fenr_1.8.1           bit64_4.6.0-1        filelock_1.0.3      
##  [85] splines2_0.5.4       bslib_0.10.0         Deriv_4.2.0         
##  [88] otel_0.2.0           rpart_4.1.24         colorspace_2.1-2    
##  [91] DBI_1.3.0            Hmisc_5.2-5          nnet_7.3-20         
##  [94] tidyselect_1.2.1     bit_4.6.0            compiler_4.5.1      
##  [97] curl_7.0.0           httr2_1.2.2          htmlTable_2.4.3     
## [100] desc_1.4.3           DelayedArray_0.36.0  bookdown_0.46       
## [103] checkmate_2.3.4      scales_1.4.0         rappdirs_0.3.4      
## [106] digest_0.6.39        rmarkdown_2.30       XVector_0.50.0      
## [109] htmltools_0.5.9      pkgconfig_2.0.3      base64enc_0.1-6     
## [112] SimDesign_2.24       dbplyr_2.5.2         fastmap_1.2.0       
## [115] rlang_1.1.7          htmlwidgets_1.6.4    shiny_1.13.0        
## [118] farver_2.1.2         jquerylib_0.1.4      jsonlite_2.0.0      
## [121] BiocParallel_1.44.0  dcurver_0.9.3        ModelMetrics_1.2.2.2
## [124] R.oo_1.27.1          magrittr_2.0.4       Formula_1.2-5       
## [127] patchwork_1.3.2      Rcpp_1.1.1           visdat_0.6.0        
## [130] stringi_1.8.7        pROC_1.19.0.1        brio_1.1.5          
## [133] MASS_7.3-65          plyr_1.8.9           parallel_4.5.1      
## [136] listenv_0.10.1       ggrepel_0.9.7        splines_4.5.1       
## [139] hms_1.1.4            igraph_2.2.2         ggpubr_0.6.3        
## [142] ranger_0.18.0        ggsignif_0.6.4       rngtools_1.5.2      
## [145] reshape2_1.4.5       GPArotation_2025.3-1 tidybulk_2.0.1      
## [148] evaluate_1.0.5       BiocManager_1.30.27  tzdb_0.5.0          
## [151] foreach_1.5.2        missForest_1.6.1     httpuv_1.6.16       
## [154] future_1.70.0        mirt_1.46.1          BiocBaseUtils_1.12.0
## [157] broom_1.0.12         xtable_1.8-8         e1071_1.7-17        
## [160] RSpectra_0.16-2      rstatix_0.7.3        later_1.4.8         
## [163] class_7.3-23         ragg_1.5.0           rARPACK_0.11-0      
## [166] memoise_2.0.1        ellipse_0.5.0        cluster_2.1.8.2     
## [169] timechange_0.4.0     globals_0.19.1       caret_7.0-1

Exposome Scores

Jason Laird

Load Data and Libraries

Quality Control

Exposome Scores

Session Info