Main functions: abundance data • biomod2

Complete code example

Here are presented, in a full and complete example, all main functions (starting with BIOMOD_[...]) of biomod2.

The data set used is the DataSTOC containing abundance data.
Similar examples are presented for binary data on the Main functions (bin) webpage.

Load dataset and variables

library(biomod2)
library(terra)

# Load species abundances (20 species available)
data('DataSTOC')
head(DataSTOC)

# Divide into calibration/validation and evaluation
period1 <- which(DataSTOC[, 'period'] == '2006-2011')
period2 <- which(DataSTOC[, 'period'] == '2012-2017')

# Select the name of the studied species
myRespName <- 'Periparus.ater'

# Get corresponding count data
myResp <- as.numeric(DataSTOC[, myRespName])

# Get corresponding XY coordinates
myRespXY <- DataSTOC[, c('X_WGS84', 'Y_WGS84')]

# Get corresponding environmental variables
myExpl <- DataSTOC[, c('temp', 'precip', 'sdiv_hab',
                       'cover_agri', 'cover_water', 'cover_wet')]

Prepare data & parameters

Format data (observations & explanatory variables)

Main difference when providing abundance data instead of binary is to define the data.type parameter.
The distribution used in the selected algorithms afterwards will be set accordingly, for example :

Type	Data	Distribution
`binary`	Only 1, and 0 or NA	`binomial`
`count`	Positive integer values	`poisson`
`multiclass`	Categorical classes	classification
`ordinal`	Ordered categorical classes	classification
`relative`	Continuous values between 0 and 1	beta
`abundance`	Positive continuous values	`gaussian`

# Format data with count data type
myBiomodData <- BIOMOD_FormatingData(resp.name = myRespName, 
                                     resp.var = myResp[period1],
                                     resp.xy = myRespXY[period1, ],
                                     expl.var = myExpl[period1, ],
                                     eval.resp.var = myResp[period2],
                                     eval.resp.xy = myRespXY[period2, ],
                                     eval.expl.var = myExpl[period2, ],
                                     data.type = 'count')
myBiomodData
summary(myBiomodData)
plot(myBiomodData, plot.eval = TRUE)

Note that pseudo-absence selection is only possible when data.type = 'binary'.

Cross-validation datasets

Several cross-validation methods are available and can be selected with the BIOMOD_Modeling function, which calls the bm_CrossValidation function to do so. More examples are presented on the Auxiliary functions webpage.

When using the balance parameter with multiclass or ordinal data, proportions are balanced within each class as much as possible.

# # k-fold selection
# cv.k <- bm_CrossValidation(bm.format = myBiomodData,
#                            strategy = 'kfold',
#                            nb.rep = 2,
#                            k = 3)
# 
# # stratified selection (geographic)
# cv.s <- bm_CrossValidation(bm.format = myBiomodData,
#                            strategy = 'strat',
#                            k = 2,
#                            balance = 'presences',
#                            strat = 'x')
# head(cv.k)
# head(cv.s)

Retrieve modeling options

Modeling options are automatically retrieved from selected models within the BIOMOD_Modeling function, which calls the bm_ModelingOptions function to do so. Model parameters can also be automatically tuned to a specific dataset, by calling the bm_Tuning function, however it can be quite long. More examples are presented on the Auxiliary functions webpage.

# # bigboss parameters
# opt.b <- bm_ModelingOptions(data.type = 'count',
#                             models = c('DNN', 'XGBOOST'),
#                             strategy = 'bigboss')
# 
# # tuned parameters with formated data
# opt.t <- bm_ModelingOptions(data.type = 'count',
#                             models = c('DNN', 'XGBOOST'),
#                             strategy = 'tuned',
#                             bm.format = myBiomodData)
# 
# opt.b
# opt.t

Run modeling

Not all algorithms are available when it comes to data type other than binary.

	ANN	CTA	DNN	FDA	GAM	GBM	GLM	MARS	MAXENT	MAXNET	RF	RFd	SRE	XGBOOST
binary	x	x	x	x	x	x	x	x	x	x	x	x	x	x
multiclass		x	x	x				x			x			x
ordinal		x	x	x	x		x	x			x			x
count / relative / abundance		x	x		x	x	x	x			x			x

New evaluation metrics have been added :

Accuracy, Recall, Precision and F1 for multiclass and ordinal
RMSE, MSE, MAE, Max_error, Rsquared and Rsquared_aj for count, relative and abundance

as well as new ensemble models :

EMmode and EMfreq for multiclass and ordinal

The selection of single models for the ensemble modeling is different for RMSE, MSE, MAE and Max_error metrics : are kept all models whose evaluation value is < (best value + the given threshold).

Single models

# Model single models
myBiomodModelOut <- BIOMOD_Modeling(bm.format = myBiomodData,
                                    modeling.id = 'AllModels',
                                    models = c('DNN', 'GBM', 'GLM', 'MARS', 'RF', 'XGBOOST'),
                                    CV.strategy = 'block',
                                    OPT.strategy = 'bigboss',
                                    metric.eval = c('Rsquared', 'Rsquared_aj', 'RMSE'),
                                    var.import = 3)
                                    # seed.val = 123)
                                    # nb.cpu = 8)
myBiomodModelOut

# Get evaluation scores & variables importance
get_evaluations(myBiomodModelOut)
get_variables_importance(myBiomodModelOut)

# Represent evaluation scores & variables importance
bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'calibration')
bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'validation')
bm_PlotEvalBoxplot(bm.out = myBiomodModelOut, dataset = 'calibration', group.by = c('algo', 'algo'))
bm_PlotEvalBoxplot(bm.out = myBiomodModelOut, dataset = 'calibration', group.by = c('algo', 'run'))
bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'algo'))
bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'run'))
bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('algo', 'expl.var', 'run'))

# Represent response curves
bm_PlotResponseCurves(bm.out = myBiomodModelOut, 
                      models.chosen = get_built_models(myBiomodModelOut)[c(1:3, 7:9)],
                      fixed.var = 'median')
bm_PlotResponseCurves(bm.out = myBiomodModelOut, 
                      models.chosen = get_built_models(myBiomodModelOut)[c(1:3, 7:9)],
                      fixed.var = 'min')
bm_PlotResponseCurves(bm.out = myBiomodModelOut, 
                      models.chosen = get_built_models(myBiomodModelOut)[3],
                      fixed.var = 'median',
                      do.bivariate = TRUE)
                      
# Explore models' outliers & residuals
bm_ModelAnalysis(bm.mod = myBiomodModelOut,
                 models.chosen = get_built_models(myBiomodModelOut)[c(1:3, 7:9)])

Ensemble models

# Model ensemble models
myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = myBiomodModelOut,
                                      models.chosen = 'all',
                                      em.by = 'all',
                                      em.algo = c('EMmedian', 'EMmean', 'EMwmean'),
                                      metric.select = c('Rsquared_aj', 'RMSE'),
                                      metric.select.thresh = c(0.4, 2),
                                      metric.eval = c('Rsquared', 'Rsquared_aj', 'RMSE'),
                                      var.import = 3)
myBiomodEM

# Get evaluation scores & variables importance
get_evaluations(myBiomodEM)
get_variables_importance(myBiomodEM)

# Represent evaluation scores & variables importance
bm_PlotEvalMean(bm.out = myBiomodEM, dataset = 'calibration', group.by = 'full.name')
bm_PlotEvalBoxplot(bm.out = myBiomodEM, dataset = 'calibration', group.by = c('full.name', 'full.name'))
bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('expl.var', 'full.name', 'full.name'))
bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('expl.var', 'algo', 'merged.by.run'))
bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('algo', 'expl.var', 'merged.by.run'))

# Represent response curves
bm_PlotResponseCurves(bm.out = myBiomodEM, 
                      models.chosen = get_built_models(myBiomodEM)[c(2, 5)],
                      fixed.var = 'median')
bm_PlotResponseCurves(bm.out = myBiomodEM, 
                      models.chosen = get_built_models(myBiomodEM)[c(2, 5)],
                      fixed.var = 'min')
bm_PlotResponseCurves(bm.out = myBiomodEM, 
                      models.chosen = get_built_models(myBiomodEM)[5],
                      fixed.var = 'median',
                      do.bivariate = TRUE)

Project models

The digits parameter is used to define the number of digits after the decimal point for the predicted values.
For relative data, the on_0_1000 parameter can be used in the same way than with binary data.

Note that integer values are lighter when saved in memory than float (decimal values).

Single models

# Project single models
myBiomodProj <- BIOMOD_Projection(bm.mod = myBiomodModelOut,
                                  proj.name = 'Period1',
                                  new.env = myExpl[period1, ],
                                  new.env.xy = myRespXY[period1, ],
                                  models.chosen = 'all',
                                  build.clamping.mask = TRUE, 
                                  digits = 1)
myBiomodProj
plot(myBiomodProj, coord = myRespXY[period1, ])

Ensemble models

# Project ensemble models (from single projections)
myBiomodEMProj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM, 
                                             bm.proj = myBiomodProj,
                                             models.chosen = 'all')

# Project ensemble models (building single projections)
myBiomodEMProj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM,
                                             proj.name = 'Period1EM',
                                             new.env = myExpl[period1, ],
                                             models.chosen = 'all')
myBiomodEMProj
plot(myBiomodEMProj, coord = myRespXY[period1, ])

Compare range sizes

# Project onto future conditions
myBiomodProjectionFuture <- BIOMOD_Projection(bm.mod = myBiomodModelOut,
                                              proj.name = 'Period2',
                                              new.env = myExpl[period2, ],
                                              new.env.xy = myRespXY[period2, ],
                                              models.chosen = 'all',
                                              build.clamping.mask = TRUE)

# Compute differences
myBiomodRangeSize <- BIOMOD_RangeSize(proj.current = myBiomodProj, 
                                      proj.future = myBiomodProjectionFuture,
                                      metric.binary = 'TSS')

myBiomodRangeSize@Compt.By.Models

Export a report

# Get a summary report
BIOMOD_Report(bm.out = myBiomodEM, strategy = 'report')

# Get a pre-filled ODMAP
BIOMOD_Report(bm.out = myBiomodEM, strategy = 'ODMAP')

# Get a code report
BIOMOD_Report(bm.out = myBiomodEM, strategy = 'code')