vignettes/examples_2_secundaryFunctions.Rmd
examples_2_secundaryFunctions.Rmd
Here are presented, in small and unlinked examples, secundary
functions (starting with bm_[...]
) of biomod2
.
Most of these functions are internally called by some main functions
(starting with BIOMOD_[...]
) of biomod2
, but
can be easily used as such.
## Generate a binary vector -------------------------------------------------------------
vec.a <- sample(c(0, 1), 100, replace = TRUE)
## Generate a 0-1000 vector (random drawing) --------------------------------------------
vec.b <- runif(100, min = 0, max = 1000)
## Generate a 0-1000 vector (biased drawing) --------------------------------------------
BiasedDrawing <- function(x, m1 = 300, sd1 = 200, m2 = 700, sd2 = 200) {
return(ifelse(x < 0.5, rnorm(1, m1, sd1), rnorm(1, m2, sd2)))
}
vec.c <- sapply(vec.a, BiasedDrawing)
vec.c[which(vec.c < 0)] <- 0
vec.c[which(vec.c > 1000)] <- 1000
## Generate a 0-1000 vector (normal distribution) ---------------------------------------
vec.d <- rnorm(100, 500, 100)
## Create simple simulated data ---------------------------------------------------------
myResp.sim <- sample(c(0, 1), 20, replace = TRUE)
myExpl.sim <- data.frame(var1 = sample(c(0, 1), 100, replace = TRUE),
var2 = rnorm(100),
var3 = 1:100)
## Create raster data -------------------------------------------------------------------
ras.1 <- ras.2 <- mask.out <- rast(nrows = 10, ncols = 10)
ras.1[] <- as.factor(rep(c(1, 2, 3, 4, 5), each = 20))
ras.1 <- as.factor(ras.1)
ras.2[] <- rnorm(100)
stk <- c(ras.1, ras.2)
names(stk) <- c('varFact', 'varNorm')
## define a mask for already sampled points
mask.out[1:40] <- 1
## define a list of masks where we want to sample in priority
mask.in <- list(ras.1, ras.1)
mask.in[[1]][1:80] <- NA ## only level 5 should be sampled in this mask
mask.in[[1]][21:80] <- NA ## only levels 1 and 5 should be sampled in this mask
## Load real data ---------------------------------------------------------
data(DataSpecies)
myResp.real <- as.numeric(DataSpecies[, 'GuloGulo'])
data(bioclim_current)
myExpl.real <- rast(bioclim_current)
myCoord.real <- DataSpecies[, c('X_WGS84', 'Y_WGS84')]
myCoord.real.1 <- DataSpecies[which(myResp.real == 1), c('X_WGS84', 'Y_WGS84')]
myResp.real.raster <- classify(subset(myExpl.real, 1),
matrix(c(-Inf, Inf, 0), ncol = 3))
myResp.real.raster[cellFromXY(myResp.real.raster, myCoord.real.1)] <- 1
bm_SampleBinaryVector(obs = vec.a, ratio = 0.7)
# varying the number of thresholds to test
bm_FindOptimStat(metric.eval = 'TSS', obs = vec.a, fit = vec.b, nb.thresh = 5)
bm_FindOptimStat(metric.eval = 'TSS', obs = vec.a, fit = vec.b, nb.thresh = 100)
# different metrics
bm_FindOptimStat(metric.eval = 'TSS', obs = vec.a, fit = vec.c)
bm_FindOptimStat(metric.eval = 'ROC', obs = vec.a, fit = vec.c)
bm_FindOptimStat(metric.eval = 'BOYCE', obs = vec.a, fit = vec.c)
vec.d_bin <- bm_BinaryTransformation(data = vec.d, threshold = 500)
vec.d_filt <- bm_BinaryTransformation(data = vec.d, threshold = 500, do.filtering = TRUE)
cbind(vec.d, vec.d_bin, vec.d_filt)
bm_MakeFormula(resp.name = 'myResp.sim',
expl.var = head(myExpl.sim),
type = 'quadratic',
interaction.level = 0)
samp1 <- bm_SampleFactorLevels(expl.var = stk, mask.out = mask.out)
samp2 <- bm_SampleFactorLevels(expl.var = stk, mask.in = mask.in)
samp3 <- bm_SampleFactorLevels(expl.var = stk, mask.out = mask.out, mask.in = mask.in)
# Compute SRE for several quantile values
sre.100 <- bm_SRE(resp.var = myResp.real.raster,
expl.var = myExpl.real,
new.env = myExpl.real,
quant = 0)
sre.095 <- bm_SRE(resp.var = myResp.real.raster,
expl.var = myExpl.real,
new.env = myExpl.real,
quant = 0.025)
sre.090 <- bm_SRE(resp.var = myResp.real.raster,
expl.var = myExpl.real,
new.env = myExpl.real,
quant = 0.05)
# Visualize results
res <- c(myResp.real.raster, sre.100, sre.095, sre.090)
names(res) <- c("Original distribution", "Full data calibration", "Over 95 percent", "Over 90 percent")
plot(res, zlim = c(0, 1))
mod <- glm(var1 ~ var2 + var3, data = myExpl.sim)
bm_VariablesImportance(bm.model = mod,
expl.var = myExpl.sim[, c('var2', 'var3')],
method = 'full_rand',
nb.rep = 3)
biomod2
data
# Format Data with true absences
myBiomodData <- BIOMOD_FormatingData(resp.var = myResp.real,
expl.var = myExpl.real,
resp.xy = myCoord.real,
resp.name = 'Alien')
# Transform true absences into potential pseudo-absences
myResp.real.PA <- ifelse(myResp.real == 1, 1, NA)
myResp.real.PA.vect <- vect(cbind(myCoord.real, myResp.real.PA), geom = c('X_WGS84','Y_WGS84'))
# random method
PA.r <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = 1000,
strategy = 'random')
# disk method
PA.d <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = 500,
strategy = 'disk',
dist.min = 5,
dist.max = 35)
# SRE method
PA.s <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = 1000,
strategy = 'sre',
sre.quant = 0.025)
# user.defined method
myPAtable <- data.frame(PA1 = ifelse(myResp.real == 1, TRUE, FALSE),
PA2 = ifelse(myResp.real == 1, TRUE, FALSE))
for (i in 1:ncol(myPAtable)) myPAtable[sample(which(myPAtable[, i] == FALSE), 500), i] = TRUE
PA.u <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
strategy = 'user.defined',
user.table = myPAtable)
str(PA.r)
head(PA.r$pa.tab)
apply(PA.r$pa.tab, 2, table)
head(PA.d$pa.tab)
apply(PA.d$pa.tab, 2, table)
head(PA.s$pa.tab)
apply(PA.s$pa.tab, 2, table)
tail(PA.u$pa.tab)
apply(PA.u$pa.tab, 2, table)
# random method : different number of PA
PA.r_mult <- bm_PseudoAbsences(resp.var = myResp.real.PA.vect,
expl.var = myExpl.real,
nb.rep = 4,
nb.absences = c(1000, 500, 500, 200),
strategy = 'random')
str(PA.r_mult)
head(PA.r_mult$pa.tab)
apply(PA.r_mult$pa.tab, 2, table)
# random selection
cv.r <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'random',
nb.rep = 3,
k = 0.8)
# k-fold selection
cv.k <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'kfold',
nb.rep = 2,
k = 3)
# block selection
cv.b <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'block')
# stratified selection (geographic)
cv.s <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'strat',
k = 2,
balance = 'presences',
strat = 'x')
# stratified selection (environmental)
cv.e <- bm_CrossValidation(bm.format = myBiomodData,
strategy = 'env',
k = 2,
balance = 'presences')
head(cv.r)
apply(cv.r, 2, table)
head(cv.k)
apply(cv.k, 2, table)
head(cv.b)
apply(cv.b, 2, table)
head(cv.s)
apply(cv.s, 2, table)
head(cv.e)
apply(cv.e, 2, table)
All models currently available in biomod2
(and their
related package and function) are listed within a small dataset. Some of
them can be tuned through the train
function of the
caret
package and corresponding training function to be
used is also indicated.
data(ModelsTable)
ModelsTable
allModels <- c('ANN', 'CTA', 'FDA', 'GAM', 'GBM', 'GLM'
, 'MARS', 'MAXENT', 'MAXNET', 'RF', 'SRE', 'XGBOOST')
# default parameters
opt.d <- bm_ModelingOptions(data.type = 'binary',
models = allModels,
strategy = 'default')
# providing formated data
opt.df <- bm_ModelingOptions(data.type = 'binary',
models = allModels,
strategy = 'default',
bm.format = myBiomodData,
calib.lines = cv.k)
opt.d
opt.d@models
opt.d@options$ANN.binary.nnet.nnet
names(opt.d@options$ANN.binary.nnet.nnet@args.values)
opt.df@options$ANN.binary.nnet.nnet
names(opt.df@options$ANN.binary.nnet.nnet@args.values)
# bigboss parameters
opt.b <- bm_ModelingOptions(data.type = 'binary',
models = allModels,
strategy = 'bigboss')
# user defined parameters
user.SRE <- list('_allData_allRun' = list(quant = 0.01))
user.XGBOOST <- list('_allData_allRun' = list(nrounds = 10))
user.val <- list(SRE.binary.biomod2.bm_SRE = user.SRE
, XGBOOST.binary.xgboost.xgboost = user.XGBOOST)
opt.u <- bm_ModelingOptions(data.type = 'binary',
models = c('SRE', 'XGBOOST'),
strategy = 'user.defined',
user.val = user.val)
opt.b
opt.u
# tune parameters for Random Forest model
tuned.rf <- bm_Tuning(model = 'RF',
tuning.fun = 'rf', ## see in ModelsTable
do.formula = TRUE,
bm.options = opt.d@options$RF.binary.randomForest.randomForest,
bm.format = myBiomodData)
tuned.rf ## list object
# tune parameters for GAM (from mgcv package) model
tuned.gam <- bm_Tuning(model = 'GAM',
tuning.fun = 'gam', ## see in ModelsTable
do.formula = TRUE,
do.stepAIC = TRUE,
bm.options = opt.d@options$GAM.binary.mgcv.gam,
bm.format = myBiomodData)
tuned.gam ##list object
# tuned parameters with formated data
opt.t <- bm_ModelingOptions(data.type = 'binary',
models = c('SRE', 'XGBOOST'),
strategy = 'tuned',
bm.format = myBiomodData)
opt.t ## BIOMOD.modeling.options object