Main functions: binary data • biomod2

Complete code example

Here are presented, in a full and complete example, all main functions (starting with BIOMOD_[...]) of biomod2.

The data set used is the DataSpecies containing presence/absence data.
Similar examples are presented for count data on the Main functions (abund) webpage.

Load dataset and variables

library(biomod2)
library(terra)

# Load species occurrences (6 species available)
data('DataSpecies')
head(DataSpecies)

# Select the name of the studied species
myRespName <- 'GuloGulo'

# Get corresponding presence/absence data
myResp <- as.numeric(DataSpecies[, myRespName])

# Get corresponding XY coordinates
myRespXY <- DataSpecies[, c('X_WGS84', 'Y_WGS84')]

# Load environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12)
data('bioclim_current')
myExpl <- rast(bioclim_current)

Prepare data & parameters

Format data (observations & explanatory variables)

# Format data with true absences
myBiomodData <- BIOMOD_FormatingData(resp.name = myRespName,
                                     resp.var = myResp,
                                     resp.xy = myRespXY,
                                     expl.var = myExpl)
myBiomodData
myPlot <- plot(myBiomodData)

Pseudo-absences extraction

Single or multiple set of pseudo-absences can be selected with the BIOMOD_FormatingData function, which calls the bm_PseudoAbsences function to do so. More examples are presented on the Auxiliary functions webpage.

# # Transform true absences into potential pseudo-absences
# myResp.PA <- ifelse(myResp == 1, 1, NA)
# 
# # Format data with pseudo-absences : random method
# myBiomodData.r <- BIOMOD_FormatingData(resp.var = myResp.PA,
#                                        expl.var = myExpl,
#                                        resp.xy = myRespXY,
#                                        resp.name = myRespName,
#                                        PA.nb.rep = 4,
#                                        PA.nb.absences = 1000,
#                                        PA.strategy = 'random')
# 
# myBiomodData.r
# myPlot <- plot(myBiomodData.r)

# # Select multiple sets of pseudo-absences
#
# # Transform true absences into potential pseudo-absences
# myResp.PA <- ifelse(myResp == 1, 1, NA)
# 
# # Format Data with pseudo-absences : random method
# myBiomodData.multi <- BIOMOD_FormatingData(resp.var = myResp.PA,
#                                            expl.var = myExpl,
#                                            resp.xy = myRespXY,
#                                            resp.name = myRespName,
#                                            PA.nb.rep = 4,
#                                            PA.nb.absences = c(1000, 500, 500, 200),
#                                            PA.strategy = 'random')
# myBiomodData.multi
# summary(myBiomodData.multi)
# myPlot <- plot(myBiomodData.multi)

Cross-validation datasets

Several cross-validation methods are available and can be selected with the BIOMOD_Modeling function, which calls the bm_CrossValidation function to do so. More examples are presented on the Auxiliary functions webpage.

# # k-fold selection
# cv.k <- bm_CrossValidation(bm.format = myBiomodData,
#                            strategy = 'kfold',
#                            nb.rep = 2,
#                            k = 3)
# 
# # stratified selection (geographic)
# cv.s <- bm_CrossValidation(bm.format = myBiomodData,
#                            strategy = 'strat',
#                            k = 3,
#                            balance = 'presences',
#                            strat = 'x')
#
# head(cv.k)
# head(cv.s)
# myPlot <- plot(myBiomodData, plot.eval = TRUE, calib.lines = cv.k)
# myPlot <- plot(myBiomodData, plot.eval = TRUE, calib.lines = cv.s)

Retrieve modeling options

Modeling options are automatically retrieved from selected models within the BIOMOD_Modeling function, which calls the bm_ModelingOptions function to do so. Model parameters can also be automatically tuned to a specific dataset, by calling the bm_Tuning function, however it can be quite long. More examples are presented on the Auxiliary functions webpage.

# # bigboss parameters
# opt.b <- bm_ModelingOptions(data.type = 'binary',
#                             models = c('RF', 'XGBOOST'),
#                             strategy = 'bigboss')
# 
# # tuned parameters with formated data
# opt.t <- bm_ModelingOptions(data.type = 'binary',
#                             models = c('RF', 'XGBOOST'),
#                             strategy = 'tuned',
#                             bm.format = myBiomodData)
# 
# opt.b
# opt.t

Run modeling

Single models

# Model single models
myBiomodSM <- BIOMOD_Modeling(bm.format = myBiomodData,
                              modeling.id = 'AllModels',
                              CV.strategy = 'random',
                              CV.nb.rep = 2,
                              CV.perc = 0.8,
                              OPT.strategy = 'bigboss',
                              metric.eval = c('AUCroc', 'AUCprg', 'BOYCE', 'TSS'),
                              var.import = 3)
                              # seed.val = 123)
                              # nb.cpu = 8)
myBiomodSM

# Get evaluation scores & variables importance
get_evaluations(myBiomodSM)
get_variables_importance(myBiomodSM)

# Represent evaluation scores & variables importance
myPlot <- bm_PlotEvalMean(bm.out = myBiomodSM, dataset = 'calibration')
myPlot <- bm_PlotEvalMean(bm.out = myBiomodSM, dataset = 'validation')
myPlot <- bm_PlotEvalBoxplot(bm.out = myBiomodSM, dataset = 'calibration', group.by = c('algo', 'algo'))
myPlot <- bm_PlotEvalBoxplot(bm.out = myBiomodSM, dataset = 'calibration', group.by = c('algo', 'run'))
myPlot <- bm_PlotVarImpBoxplot(bm.out = myBiomodSM, group.by = c('expl.var', 'algo', 'algo'))
myPlot <- bm_PlotVarImpBoxplot(bm.out = myBiomodSM, group.by = c('expl.var', 'algo', 'run'))
myPlot <- bm_PlotVarImpBoxplot(bm.out = myBiomodSM, group.by = c('algo', 'expl.var', 'run'))
names(myPlot)

# Create model subsets
mySet1 <- get_built_models(myBiomodSM)[c(1, 4, 8, 10, 13)]
mySet2 <- get_built_models(myBiomodSM)[4]

# Represent response curves
myPlot <- bm_PlotResponseCurves(bm.out = myBiomodSM, models.chosen = mySet1, fixed.var = 'median')
myPlot <- bm_PlotResponseCurves(bm.out = myBiomodSM, models.chosen = mySet1, fixed.var = 'min')
myPlot <- bm_PlotResponseCurves(bm.out = myBiomodSM, models.chosen = mySet2, do.bivariate = TRUE)
                      
# Explore models' outliers & residuals
myPlot <- bm_ModelAnalysis(bm.mod = myBiomodSM, models.chosen = mySet1)
names(myPlot)

Ensemble models

# Model ensemble models
myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = myBiomodSM,
                                      models.chosen = 'all',
                                      em.by = 'all',
                                      em.algo = c('EMmedian', 'EMmean', 'EMwmean',
                                                  'EMca', 'EMci', 'EMcv'),
                                      metric.select = 'BOYCE',
                                      metric.select.thresh = c(0.8),
                                      metric.eval = c('AUCroc', 'AUCprg', 'BOYCE', 'TSS'),
                                      var.import = 3,
                                      EMci.alpha = 0.05,
                                      EMwmean.decay = 'proportional')
myBiomodEM

# Get evaluation scores & variables importance
get_evaluations(myBiomodEM)
get_variables_importance(myBiomodEM)

# Represent evaluation scores & variables importance
myPlot <- bm_PlotEvalMean(bm.out = myBiomodEM, dataset = 'calibration', group.by = 'full.name')
myPlot <- bm_PlotEvalBoxplot(bm.out = myBiomodEM, dataset = 'calibration', group.by = c('full.name', 'full.name'))
myPlot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('expl.var', 'full.name', 'full.name'))
myPlot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('expl.var', 'algo', 'merged.by.run'))
myPlot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('algo', 'expl.var', 'merged.by.run'))

# Create model subsets
mySet1 <- get_built_models(myBiomodEM)[c(1, 6, 7)]
mySet2 <- get_built_models(myBiomodEM)[7]

# Represent response curves
myPlot <- bm_PlotResponseCurves(bm.out = myBiomodEM, models.chosen = mySet1, fixed.var = 'median')
myPlot <- bm_PlotResponseCurves(bm.out = myBiomodEM, models.chosen = mySet1, fixed.var = 'min')
myPlot <- bm_PlotResponseCurves(bm.out = myBiomodEM, models.chosen = mySet2, do.bivariate = TRUE)

Project models

Single models

# Project single models
myBiomodProj <- BIOMOD_Projection(bm.mod = myBiomodSM,
                                  proj.name = 'Current',
                                  new.env = myExpl,
                                  models.chosen = 'all',
                                  metric.binary = 'all',
                                  metric.filter = 'all',
                                  build.clamping.mask = TRUE)
myBiomodProj
plot(myBiomodProj)

Ensemble models

# Project ensemble models (from single projections)
myBiomodEMProj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM, 
                                             bm.proj = myBiomodProj,
                                             models.chosen = 'all',
                                             metric.binary = 'all',
                                             metric.filter = 'all')
                                             
# Project ensemble models (building single projections)
myBiomodEMProj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM,
                                             proj.name = 'CurrentEM',
                                             new.env = myExpl,
                                             models.chosen = 'all',
                                             metric.binary = 'all',
                                             metric.filter = 'all')
myBiomodEMProj
plot(myBiomodEMProj)

Compare range sizes

# Load environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12)
data('bioclim_future')
myExplFuture = rast(bioclim_future)

# Project onto future conditions
myBiomodProjFuture <- BIOMOD_Projection(bm.mod = myBiomodSM,
                                        proj.name = 'Future',
                                        new.env = myExplFuture,
                                        models.chosen = 'all',
                                        metric.binary = 'TSS',
                                        build.clamping.mask = TRUE)

# Compute differences
myBiomodRangeSize <- BIOMOD_RangeSize(proj.current = myBiomodProj, 
                                      proj.future = myBiomodProjFuture,
                                      metric.binary = 'TSS')

myBiomodRangeSize@Compt.By.Models
plot(myBiomodRangeSize@Diff.By.Pixel)

# Represent main results 
myPlot <- bm_PlotRangeSize(bm.range = myBiomodRangeSize, 
                           do.count = TRUE,
                           do.perc = TRUE,
                           do.maps = TRUE,
                           do.mean = TRUE)

Export a report

# Get a summary report
BIOMOD_Report(bm.out = myBiomodEM, strategy = 'report')

# Get a pre-filled ODMAP
BIOMOD_Report(bm.out = myBiomodEM, strategy = 'ODMAP')

# Get a code report
BIOMOD_Report(bm.out = myBiomodEM, strategy = 'code')