Warning this is more of a code dump instead of a blogpost but I'm still putting it out there hoping it's still useful for someone else as it has been sitting in my draft folder for far too long. Anyways the paper by Robert Hijmans is really worth reading if you're interested in the evaluation of species distribution models. If you have questions, don't hesitate to contact me.
Geographic null model from paper by Robert Hijsmans (
link to paper).
package
dismo (maintained by
Robert J. Hijmans). and the package
BIOMOD2.
Load libraries:
library("biomod2")
library("dismo")
The null_model_evaluation function takes as input a BIOMOD2 model.
null_model_evaluation <- function(models) {
## flow
## 1) create geographic null model from the points used as training data
## 2) compare AUC of the model with AUC of the geographic null model (use same presence/absence data)
## extract test data from models
data <- get_formal_data(models)
calib.lines <- get(load(models@calib.lines@link))
nbOfRuns <- (length(calib.lines) / length(data@data.species))
calib.lines.mat <- as.matrix(calib.lines, nrow=length(data@data.species), ncol=nbOfRuns)
evaluations <- get_evaluations(models)
dims <- dimnames(evaluations)
rocIndex <- which(dims[1][[1]] == "ROC")
testingDataIndex <- which( dims[2][[1]] == "Testing.data")
species.data <- data@coord[which(data@data.species == 1),]
background.data <- data@coord[is.na(data@data.species),]
result <- array(dim=c(nbOfRuns, 2, length(dims[3][[1]])), dimnames=list(dims[4][[1]],c("model", "geo_null_model"), dims[3][[1]]))
## calculate geographic null models
for( methodIndex in 1:length(dims[3][[1]])){
for (runIndex in 1:nbOfRuns){
model.auc <- evaluations[rocIndex, testingDataIndex,methodIndex,runIndex,1]
result[runIndex,1,methodIndex] <- model.auc
train.species.data <- species.data[calib.lines[1:nrow(species.data),runIndex],]
test.species.data <- species.data[!calib.lines[1:nrow(species.data),runIndex],]
if (nrow(test.species.data) > 0 & !is.na(model.auc)){
train.background.data <- background.data[calib.lines[nrow(species.data)+1:nrow(calib.lines)-nrow(species.data),runIndex],]
test.background.data <- background.data[!calib.lines[nrow(species.data)+1:nrow(calib.lines)-nrow(species.data),runIndex],]
gd <- geoDist(train.species.data, lonlat=TRUE)
#p <- predict(gd, env)
#plot(p)
#points(train.background.data, pch=16, col="purple")
#points(test.background.data, pch=16, col="blue")
#points(train.species.data, pch=16, col="green")
#points(test.species.data, pch=16, col="red")
gd.evaluation <- evaluate(gd, p=test.species.data, a=test.background.data)
result[runIndex,2,methodIndex] <- gd.evaluation@auc
}
}
}
result
}