Customize Consent Preferences

We use cookies to help you navigate efficiently and perform certain functions. You will find detailed information about all cookies under each consent category below.

The cookies that are categorized as "Necessary" are stored on your browser as they are essential for enabling the basic functionalities of the site. ... 

Always Active

Necessary cookies are required to enable the basic features of this site, such as providing secure log-in or adjusting your consent preferences. These cookies do not store any personally identifiable data.

No cookies to display.

Functional cookies help perform certain functionalities like sharing the content of the website on social media platforms, collecting feedback, and other third-party features.

No cookies to display.

Analytical cookies are used to understand how visitors interact with the website. These cookies help provide information on metrics such as the number of visitors, bounce rate, traffic source, etc.

No cookies to display.

Performance cookies are used to understand and analyze the key performance indexes of the website which helps in delivering a better user experience for the visitors.

No cookies to display.

Advertisement cookies are used to provide visitors with customized advertisements based on the pages you visited previously and to analyze the effectiveness of the ad campaigns.

No cookies to display.

R Code for Mapping Dissolved Oxygen Using Random Forest (RF) and Support Vector Machine (SVM)

First, I will give credit to the Soil Organic Carbon Mapping Cookbook for making the original R code available for use to the public.

In this paper, I did a little tweak to the R code so I could apply it to map dissolved oxygen using Random Forest (RF) and Support Vector Machine (SVM).

Here is the compiled R-code. You may need to tweak the code further, depending on your dataset.

install.packages(c("aqp", "automap", "car", "caret", "e1071", "GSIF", "htmlwidgets", "leaflet", "mapview", "Metrics", "openair", "plotKML", "psych", "quantregForest", "randomForest", "raster", "rasterVis", "reshape", "rgdal", "RSQLite", "snow", "soiltexture", "sf", "sp"))

shell("cls")

# Data-splitting for training and test dataset

library(caret)
dat <- read.csv(file = "data/site-level.csv")
train.ind <- createDataPartition(1:nrow(dat), p = .70, list = FALSE)
train <- dat[ train.ind,]
test  <- dat[-train.ind,]

plot(density (log(train$DO)), col='red', main="")
lines(density(log(test$DO)), col='blue')
legend('topright', legend=c("train", "test"),
       col=c("red", "blue"), lty=1, cex=1.5)

write.csv(train, file="data/dat_train.csv", row.names = FALSE)
write.csv(test, file="data/dat_test.csv", row.names = FALSE)

# Data Loading and Preprocessing

dat <- read.csv(file = "data/dat_train.csv ")
files <- list.files(path = "data/covs", pattern = "tif$", full.names = TRUE)
covs <- stack(files)
save(covs, file = " data/covariates.RData")

# Upgrade points data frame to SpatialPointsDataFrame. Extract values from covariates to the soil points

coordinates(dat) <- ~ X + Y
dat <- extract(x = covs, y = dat, sp = TRUE)
summary(dat@data)
dat <- as.data.frame(dat)

# Remove NA values. Export as a *.csv table

dat <- dat[complete.cases(dat),]
write.csv(dat, "data /MKD_RegMatrix.csv", row.names = FALSE)



# METHOD 1: RANDOM FOREST
# Load data

dat <- read.csv(file = "data/MKD_RegMatrix.csv")
str(dat)
library(sp)
coordinates(dat) <- ~ X + Y
class(dat)

# CRS of the point data and covariates must be the same

dat@proj4string <- CRS(projargs = "+init=epsg:4326")
dat@proj4string
load(file = "data/covariates.RData")
names(covs)
fm = as.formula(paste("log(DO) ~", paste0(names(covs[[-14]]),
                                          collapse = "+"))) 
# Tuning parameters

library(randomForest)
library(caret)
ctrl <- trainControl(method = "cv", savePred=T)
rfmodel <- train(fm, data=dat@data, method = "rf", trControl = ctrl, 
                 importance=TRUE)
rfmodel

# Variable importance plot, compare with the correlation matrix

varImpPlot(rfmodel[11][[1]])
plot(rfmodel[11][[1]])
pred <- predict(covs, rfmodel)
pred <- exp(pred)
writeRaster(pred, filename = "data/DO_rf.tif", overwrite=TRUE)
plot(pred)

# METHOD 2: SUPPORT VECTOR MACHINE
# Load data

dat <- read.csv("data/MKD_RegMatrix.csv")
library(sp)
coordinates(dat) <- ~ X + Y
class(dat)

# Import and load covariates

load(file = "data/covariates.RData")
names(covs)
names(dat@data)
selectedCovs <- cor(x = as.matrix(dat@data[,2]), y = as.matrix(dat@data[,-c(1:15)]))
selectedCovs

# Select the top five covariates

library(reshape)
x <- subset(melt(selectedCovs), value != 1 | value != NA)
x <- x[with(x, order(-abs(x$value))),]
idx <- as.character(x$X2[1:5])
COV <- covs[[idx]]
names(COV)
dat@data <- cbind(dat@data)
names(dat@data)

# Fitting a SVM model

library(e1071)
library(caret)
tuneResult <- tune(svm, DO ~.,  data = dat@data[,c("DO", names(COV))],
                   ranges = list(epsilon = seq(0.1,0.2,0.02),
                                 cost = c(5,7,15,20)))
plot(tuneResult)
tunedModel <- tuneResult$best.model
print(tunedModel)

# Predict the DO 

DOsvm <- predict(COV, tunedModel)
rmse <- function(errval)
{
  val = sqrt(mean(errval^2))
  return(val)
}

errval <- tunedModel$residuals  # same as data$Y - predictedY
svr_RMSE <- rmse(errval)   
print(paste('svr RMSE = ', 
            svr_RMSE))
writeRaster(DOsvm, filename = "data/DO_svm.tif", overwrite=TRUE)
plot(DOsvm)

# Evaluate the contribution of each covariate to the model 

w <- t(tunedModel$coefs) %*% tunedModel$SV
w <- apply(w, 2, function(v){sqrt(sum(v^2))})  
w <- sort(w, decreasing = T)
print(w)

# Validation

library(sp)
library(raster)
dat <- read.csv(file = "data/dat_test.csv")
coordinates(dat) <- ~ X + Y

DO_rf <- raster("data/DO_rf.tif")
DO_svm <- raster("data/DO_svm.tif")

dat <- extract(x = DO_rf, y = dat, sp = TRUE)
dat <- extract(x = DO_svm, y = dat, sp = TRUE)
dat$PE_rf <- dat$DO_rf - dat$DO
dat$PE_svm <- dat$DO_svm - dat$DO 
write.csv(dat, "data /validation.csv", row.names = F)

# Estimating the map quality measures

ME_rf <- mean(dat$PE_rf, na.rm=TRUE)
ME_svm <- mean(dat$PE_svm, na.rm=TRUE)
MAE_rf <- mean(abs(dat$PE_rf), na.rm=TRUE)
MAE_svm <- mean(abs(dat$PE_svm), na.rm=TRUE)
MSE_rf <- mean(dat$PE_rf^2, na.rm=TRUE)
MSE_svm <- mean(dat$PE_svm^2, na.rm=TRUE)
RMSE_rf <- sqrt(sum(dat$PE_rf^2, na.rm=TRUE) / length(dat$PE_rf))
RMSE_svm <- sqrt(sum(dat$PE_svm^2, na.rm=TRUE) / length(dat$PE_svm))
AVE_rf <- 1 - sum(dat$PE_rf^2, na.rm=TRUE) / 
  sum( (dat$DO - mean(dat$DO, na.rm = TRUE))^2, 
       na.rm = TRUE)

AVE_svm <- 1 - sum(dat$PE_svm^2, na.rm=TRUE) / 
  sum( (dat$DO - mean(dat$DO, na.rm = TRUE))^2, 
       na.rm = TRUE)

# Graphical map quality measures

par(mfrow=c(2,2))
plot(dat$DO_rf, dat$DO, main="rf", xlab="predicted", 
     ylab='observed')
abline(0,1, lty=2, col='black')
abline(lm(dat$DO ~ dat$DO_rf), col = 'blue', lty=2)

plot(dat$DO_svm, dat$DO, main="svm", xlab="predicted", 
     ylab='observed')
abline(0,1, lty=2, col='black')
abline(lm(dat$DO ~ dat$DO_svm), col = 'blue', lty=2)
par(mfrow=c(1,1))

# Spatial bubbles for prediction errors 

bubble(dat[!is.na(dat$PE_rf),], "PE_rf", pch = 21, 
       col=c('red', 'green'))
bubble(dat[!is.na(dat$PE_svm),], "PE_svm", pch = 21, 
       col=c('red', 'green'))

# Model correlations and spatial differences and evaluation

library(raster)
RF<-raster('data/DO_rf.tif')
SVM<-raster('data/DO_svm.tif')
models <- stack(RF, SVM)

# Compare the statistical distribution and correlation between the models

library(psych)
pairs.panels(na.omit(as.data.frame(models)),
             # Correlation method
             method = "pearson", 
             hist.col = "#00AFBB",
             # Show density plots
             density = TRUE,
# Show correlation ellipses

             ellipses = TRUE 
)

library(rasterVis)
library(mapview)
densityplot(models)
SD <- calc(models , sd)
mapview(SD)
RFSVM <- calc(models[[c(1,2)]], diff)
names(RFSVM) <- c('RFvsSVM')
X <- raster::cellStats(RFSVM, mean)
levelplot(RFSVM - X, at=seq(-0.5,0.5, length.out=10),
          par.settings = RdBuTheme)

# Model evaluation

dat <- read.csv("data/validation.csv")
modRF <- data.frame(obs = dat$DO, mod = dat$DO_rf, 
                    model = "RF")
modSVM <- data.frame(obs = dat$DO, mod = dat$DO_svm, 
                     model = "SVM")
modData <- rbind(modRF, modSVM)
summary(modData)

# Calculate statistical evaluation metrics

library(openair)
modsts <- modStats(modData,obs = "obs", mod = "mod", type = "model")
modsts

TaylorDiagram(modData, obs = "obs", mod = "mod", group = "model",
              cols = c("green", "yellow", "red","blue"), 
              cor.col='brown', rms.col='black')
conditionalQuantile(modData,obs = "obs", mod = "mod", type = "model")

Leave a Reply

Your email address will not be published. Required fields are marked *

3 + 15 =

This site uses Akismet to reduce spam. Learn how your comment data is processed.