diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/src/000_set_environment.R b/src/000_set_environment.R new file mode 100644 index 0000000..015ddd4 --- /dev/null +++ b/src/000_set_environment.R @@ -0,0 +1,80 @@ +# Set path --------------------------------------------------------------------- +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/" +} + +filepath_source = paste0(filepath_base, "hySpecVisKili/src/functions/001_functions.R") +path_data = paste0(filepath_base, "/data/") +path_biodiv = paste0(path_data, "/biodiv/") + +path_hyp_org = paste0(path_data, "/020_hypspec_org/") +path_hyp_aio = paste0(path_data, "/025_hypspec_aio/") +path_hyp_nrm = paste0(path_data, "/030_hypspec_nrm/") +path_hyp_vegidcs = paste0(path_data, "/040_hypspec_vegidcs/") +path_hyp_kmdc = paste0(path_data, "/050_hypspec_kmdc/") +path_hyp_raoq = paste0(path_data, "/060_hypspec_raoq/") +path_hyp_glcm = paste0(path_data, "/070_hypspec_glcm/") +path_hyp_pred = paste0(path_data, "/090_hypspec_pred/") +path_ldr_pred = paste0(path_data, "/095_lidar_pred/") +path_comb_gpm_sr = paste0(path_data, "/100_comb_gpm_sr/") +path_model_gpm_sr = paste0(path_data, "/110_model_gpm_sr/") +path_compile_analysis_sr = paste0(path_data, "/120_compile_analysis_sr/") +path_comb_gpm_sr_res = paste0(path_data, "/200_comb_gpm_sr_res/") +path_model_gpm_sr_res = paste0(path_data, "/210_model_gpm_sr_res/") +path_compile_analysis_sr_elev_res = paste0(path_data, "/220_compile_analysis_sr_elev_res/") +path_model_gpm_sr_indp = paste0(path_data, "/300_model_gpm_sr_indp/") +path_comb_gpm_sr_elev_res_indp = paste0(path_data, "/310_comb_gpm_sr_elev_res_indp/") +path_model_gpm_sr_elev_res_indp = paste0(path_data, "/320_model_gpm_sr_elev_res_indp/") +path_analysis_sr = paste0(path_data, "/500_analysis_sr/") +path_analysis_sr_elev_res = paste0(path_data, "/510_analysis_sr_elev_res/") + +path_plots = paste0(path_data, "/plots/") +path_rdata = paste0(path_data, "/rdata/") +path_meta = paste0(path_data, "/meta/") +path_temp = paste0(path_data, "/temp/") +path_output = paste0(path_data, "/output/") +path_vis = paste0(path_data, "/vis/") + + +# Set libraries ---------------------------------------------------------------- +library(biodivTools) # devtools::install_github("environmentalinformatics-marburg/biodivTools") +library(CAST) +library(corrplot) +library(doParallel) +library(grid) +library(gridExtra) +library(gpm) # devtools::install_github("environmentalinformatics-marburg/gpm") +library(ggplot2) +# library(ggbiplot) +library(hsdar) +# library(lavaan) +# library(rPointDB) +library(rgeos) +library(ggplot2) +library(mapview) +# library(metTools) # devtools::install_github("environmentalinformatics-marburg/metTools") +library(raster) +library(RStoolbox) +library(reshape2) +library(rgdal) +# library(satellite) +library(satelliteTools) # devtools::install_github("environmentalinformatics-marburg/satelliteTools") +# library(semPlot) +library(sp) +library(spacetime) +library(vegan) +# library(yaml) + + +# Other settings --------------------------------------------------------------- +source(filepath_source) + +rasterOptions(tmpdir = path_temp) + +saga_cmd = "C:/OSGeo4W64/apps/saga-ltr/saga_cmd.exe" +# initOTB("C:/OSGeo4W64/bin/") +# initOTB("C:/OSGeo4W64/OTB-6.2.0-Win64/bin/") + + diff --git a/src/000_set_environment_linux.R b/src/000_set_environment_linux.R new file mode 100644 index 0000000..3c06631 --- /dev/null +++ b/src/000_set_environment_linux.R @@ -0,0 +1,77 @@ +# Set path --------------------------------------------------------------------- +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/" +} + +filepath_source = paste0(filepath_base, "hySpecVisKili/src/functions/001_functions.R") +path_data = paste0(filepath_base, "/data/") +path_biodiv = paste0(path_data, "/biodiv/") + +path_hyp_org = paste0(path_data, "/020_hypspec_org/") +path_hyp_aio = paste0(path_data, "/025_hypspec_aio/") +path_hyp_nrm = paste0(path_data, "/030_hypspec_nrm/") +path_hyp_vegidcs = paste0(path_data, "/040_hypspec_vegidcs/") +path_hyp_kmdc = paste0(path_data, "/050_hypspec_kmdc/") +path_hyp_raoq = paste0(path_data, "/060_hypspec_raoq/") +path_hyp_glcm = paste0(path_data, "/070_hypspec_glcm/") +path_hyp_pred = paste0(path_data, "/090_hypspec_pred/") +path_ldr_pred = paste0(path_data, "/095_lidar_pred/") +path_comb_gpm_sr = paste0(path_data, "/100_comb_gpm_sr/") +path_model_gpm_sr = paste0(path_data, "/110_model_gpm_sr/") +path_compile_analysis_sr = paste0(path_data, "/120_compile_analysis_sr/") +path_comb_gpm_sr_res = paste0(path_data, "/200_comb_gpm_sr_res/") +path_model_gpm_sr_res = paste0(path_data, "/210_model_gpm_sr_res/") +path_compile_analysis_sr_elev_res = paste0(path_data, "/220_compile_analysis_sr_elev_res/") +path_model_gpm_sr_indp = paste0(path_data, "/300_model_gpm_sr_indp/") +path_comb_gpm_sr_elev_res_indp = paste0(path_data, "/310_comb_gpm_sr_elev_res_indp/") +path_model_gpm_sr_elev_res_indp = paste0(path_data, "/320_model_gpm_sr_elev_res_indp/") +path_analysis_sr = paste0(path_data, "/500_analysis_sr/") +path_analysis_sr_elev_res = paste0(path_data, "/510_analysis_sr_elev_res/") + +path_plots = paste0(path_data, "/plots/") +path_rdata = paste0(path_data, "/rdata/") +path_meta = paste0(path_data, "/meta/") +path_temp = paste0(path_data, "/temp/") +path_output = paste0(path_data, "/output/") +path_vis = paste0(path_data, "/vis/") + + +# Set libraries ---------------------------------------------------------------- +#library(biodivTools) # devtools::install_github("environmentalinformatics-marburg/biodivTools") +library(CAST) +# library(corrplot) +library(doParallel) +library(grid) +library(gridExtra) +library(gpm) # devtools::install_github("environmentalinformatics-marburg/gpm") +# library(hsdar) +# library(lavaan) +# library(rPointDB) +# library(rgeos) +# library(ggplot2) +# library(mapview) +# library(metTools) # devtools::install_github("environmentalinformatics-marburg/metTools") +library(raster) +# library(RStoolbox) +library(reshape2) +# library(rgdal) +# library(satellite) +# library(satelliteTools) # devtools::install_github("environmentalinformatics-marburg/satelliteTools") +# library(semPlot) +# library(sp) +# library(spacetime) +# library(vegan) +# library(yaml) + +# Other settings --------------------------------------------------------------- +source(filepath_source) + +rasterOptions(tmpdir = path_temp) + +# saga_cmd = "C:/OSGeo4W64/apps/saga-ltr/saga_cmd.exe" +# initOTB("C:/OSGeo4W64/bin/") +# initOTB("C:/OSGeo4W64/OTB-6.2.0-Win64/bin/") + + diff --git a/src/000_setup_windows.R b/src/000_setup_windows.R new file mode 100644 index 0000000..f89a830 --- /dev/null +++ b/src/000_setup_windows.R @@ -0,0 +1,54 @@ +# Set environment for environmental information systems analysis +require(envimaR) + +root_folder = path.expand("~/plygrnd/hySpecVisKili/") +fcts_folder = file.path(root_folder, "hySpecVisKili/src/functions/") + +project_folders = c("data/", + "data/biodiv", + "data/020_hypspec_org/", + "data/025_hypspec_aio/", + "data/030_hypspec_nrm/", + "data/040_hypspec_vegidcs/", + "data/050_hypspec_kmdc/", + "data/060_hypspec_raoq/", + "data/070_hypspec_glcm/", + "data/090_hypspec_pred/", + "data/100_comb_gpm_sr/", + "data/110_model_gpm_sr/", + "data/120_compile_analysis_sr/", + "data/200_comb_gpm_sr_res/", + "data/210_model_gpm_sr_res/", + "data/220_compile_analysis_sr_elev_res/", + "data/300_model_gpm_sr_indp/", + "data/310_comb_gpm_sr_elev_res_indp/", + "data/320_model_gpm_sr_elev_res_indp/", + "data/500_analysis_sr/", + "data/510_analysis_sr_elev_res/", + "data/plots/", + "data/rdata/", + "data/meta/", + "data/output/", + "data/vis/", + "data/temp/") + +libs = c("biodivTools", "CAST", "corrplot", "doParallel", "grid", "gridExtra", + "gpm", "ggplot2", "hsdar", "rgeos", "ggplot2", "mapview", + "raster", "RStoolbox", "reshape2", "rgdal", "satelliteTools", "sp", + "spacetime", "vegan") + +envrmt = createEnvi(root_folder = root_folder, + fcts_folder = fcts_folder, + folders = project_folders, + path_prefix = "path_", libs = libs, + alt_env_id = "COMPUTERNAME", alt_env_value = "PCRZP", + alt_env_root_folder = "F:\\BEN\\edu") + +# More settings +rasterOptions(tmpdir = envrmt$path_temp) +mapviewOptions(basemaps = mapviewGetOption("basemaps")[c(3, 1:2, 4:5)]) +saga_cmd = "C:/OSGeo4W64/apps/saga-ltr/saga_cmd.exe" +# initOTB("C:/OSGeo4W64/bin/") +initOTB("C:/OSGeo4W64/OTB-6.2.0-Win64/bin/") + + diff --git a/src/010_biodiv_preprocessing.R b/src/010_biodiv_preprocessing.R new file mode 100644 index 0000000..38c8dd9 --- /dev/null +++ b/src/010_biodiv_preprocessing.R @@ -0,0 +1,179 @@ +# Preprocess biodiversity observations. + +source("D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R") + + +# Read species richness dataset (Peters et al. 2016) +bd = read.table(paste0(path_biodiv, "Biodiversity_Data_Marcel.csv"), + header = TRUE, sep = ";", dec = ",") + +saveRDS(as.character(bd$plotID), file = paste0(path_biodiv, "biodiv_plots.rds")) +saveRDS(bd, file = paste0(path_biodiv, "biodiv.rds")) + +# Read animal species dataset +ad = read.table(paste0(path_biodiv, "animals_plotIDcomplete_Syn1.csv"), + header = TRUE, sep = ";", dec = ",") + +ad_diet = lapply(unique(ad$taxon), function(t){ + data.frame(taxon = t, + ad$diet) +}) + + + +# Set species number to 0/1 and reduce to complete cases +ad[, 14:73][!is.na(ad[, 14:73]) & ad[, 14:73]>1] = 1 +adc = ad[complete.cases(ad[, 14:73]),] + +# Split into taxon groups +adc_taxl = lapply(unique(adc$taxon), function(t){ + act_level = adc[adc$taxon == t, ] + act_grp = act_level[, 14:73] + rownames(act_grp) = act_level$species + return(t(act_grp)) +}) + +adc_taxl_all = adc[, 14:73] +rownames(adc_all) = adc$species + +adc_taxl = c(list(t(adc_taxl_all)), adc_taxl) +names(adc_taxl) = c("Animals", as.character(unique(adc$taxon))) + +saveRDS(adc_taxl, file = paste0(path_biodiv, "adc_taxl.rds")) + + + +# Split into trophic levels +adc_tlevels = lapply(unique(adc$diet), function(d){ + act_level = adc[adc$diet == d, ] + act_grp = act_level[, 14:73] + rownames(act_grp) = act_level$species + return(t(act_grp)) +}) + +adc_all =adc[, 14:73] +rownames(adc_all) = adc$species + +adc_tlevels = c(list(t(adc_all)), adc_tlevels) +names(adc_tlevels) = c("Animals", as.character(unique(adc$diet))) + +saveRDS(adc_tlevels, file = paste0(path_biodiv, "adc_tlevels.rds")) + + + +# Compute species richness from taxon groups and tropich levels +adc_taxl_sr = data.frame(plotID = rownames(adc_taxl[[1]])) +for(i in seq(length(adc_taxl))){ + adc_taxl_sr[, i+1] = rowSums(adc_taxl[[i]]) + names(adc_taxl_sr)[i+1] = paste0("SR", tolower(names(adc_taxl[i]))) +} + +adc_tlevels_sr = data.frame(plotID = rownames(adc_tlevels[[1]])) +for(i in seq(length(adc_tlevels))){ + adc_tlevels_sr[, i+1] = rowSums(adc_tlevels[[i]]) + names(adc_tlevels_sr)[i+1] = paste0("SR", tolower(names(adc_tlevels[i]))) +} + +adc_sr = merge(adc_tlevels_sr, + adc_taxl_sr[, -grep("SRanimals", colnames(adc_taxl_sr))], + by = "plotID") + +saveRDS(adc_sr, file = paste0(path_biodiv, "adc_sr.rds")) + + + +# Merge species richness data +s_adc_sr = colnames(adc_sr[-grep("plotID", colnames(adc_sr))]) + +colnames(bd)[which("SRsyrphids" == colnames(bd))] = "SRsyrphid_flies" +colnames(bd)[which("SRbats" == colnames(bd))] = "SRinsectivorous_bats" +colnames(bd)[which("SRmammals" == colnames(bd))] = "SRlarge_mammals" +colnames(bd)[which("SRparasitoids" == colnames(bd))] = "SRparasitoid_wasps" +colnames(bd)[which("SRotheraculeata" == colnames(bd))] = "SRaculeate_wasps" +colnames(bd)[which("SRmillipedes" == colnames(bd))] = "SRmilipeds" +colnames(bd)[which("SRsnails" == colnames(bd))] = "SRgastropods" + +s_bd = colnames(bd[, grep("SR", colnames(bd))]) +# which(tolower(s_adc_taxl_sr) %in% tolower(s_bd)) + +species_richness = merge(bd, adc_sr, by = "plotID") +species_richness = species_richness[, -grep("\\.x", colnames(species_richness))] +colnames(species_richness)[grep("\\.y", colnames(species_richness))] = + substr(colnames(species_richness)[grep("\\.y", colnames(species_richness))], 1, + (nchar(colnames(species_richness)[grep("\\.y", colnames(species_richness))])-2)) + +saveRDS(species_richness, file = paste0(path_biodiv, "species_richness.rds")) + + + +# Compute community composition using detrended correspondence analysis +species_composition_dcor = lapply(adc_tlevels, function(l){ + l = l[rowSums(l) > 0, ] + decorana(l) +}) +names(species_composition_dcor) = names(adc_tlevels) +# for(i in seq(5)) plot(species_composition_dcor[[i]], display = "sites") + +saveRDS(species_composition_dcor, file = paste0(path_biodiv, "species_composition_dcor.rds")) + + + +# Compute relative network using PCA +tlevels = colnames(species_richness)[ + seq(grep("SRanimals", colnames(species_richness)), + grep("SRanimals", colnames(species_richness))+4)] + +adn_matrix = matrix(ncol = 5, nrow = 60) +for(i in seq(5)){ + adn_matrix[, i] = species_richness[, tlevels[i]] +} +rownames(adn_matrix) <- species_richness$plotID +colnames(adn_matrix ) <- tlevels + +species_network_pca <- princomp(adn_matrix[,-1], cor=T) + +# biplot(species_network_pca, choices = 2:3) +# summary(species_network_pca) + +saveRDS(species_network_pca, file = paste0(path_biodiv, "species_network_pca.rds")) + + +# comb_grigusovaine diversity for Grigusova et al. 2019 +comb_grigusova = data.frame(species_network_pca$scores) +colnames(comb_grigusova) = paste0("sn_pca", seq(4)) +comb_grigusova$plotID = rownames(species_network_pca$scores) + +comb_grigusova = merge(adc_sr, comb_grigusova, by = c("plotID"), all.x = TRUE, all.y = TRUE) + +for(i in seq(length(species_composition_dcor))){ + act = data.frame(species_composition_dcor[[i]]$rproj) + colnames(act) = paste0("sn_dca", seq(4), "_", names(species_composition_dcor[i])) + act$plotID = rownames(act) + comb_grigusova = merge(comb_grigusova, act, by = c("plotID"), all.x = TRUE, all.y = TRUE) +} + +comb_grigusova = droplevels(comb_grigusova) + +saveRDS(comb_grigusova, file = paste0(path_biodiv, "comb_grigusova.rds")) + +# Cross check +# sort(colnames(adc_taxl_sr)) +# sort(colnames(bd[, c(1, grep("SR", colnames(bd)))])) +# +# test = merge(adc_taxl_sr, bd, by = "plotID", all = TRUE) +# +# test_df = lapply(grep("\\.x", colnames(test)), function(t){ +# x = colnames(test)[t] +# y = paste0(substr(x, 1, (nchar(x)-1)), "y") +# act_df = data.frame(plotID = test$plotID, test[, x], test[, y]) +# colnames(act_df) = c("plotId", x, y) +# return(act_df) +# }) +# test_df + + + + + + + diff --git a/src/020_rasterdb_hyperspectral_processing.R b/src/020_rasterdb_hyperspectral_processing.R new file mode 100644 index 0000000..33a044a --- /dev/null +++ b/src/020_rasterdb_hyperspectral_processing.R @@ -0,0 +1,79 @@ +# Extract hyperspectral data from database using a buffer of 100 m in diameter +# arround the center of each observation plot. + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") + +# Set account +userpwd <- "thomas.nauss:cd7dLfgm" # use this account (if not loaded from file) + +# Open remote sensing database +remotesensing <- RemoteSensing$new("http://137.248.191.215:8081", userpwd) # remote server + +# Get rasterdb +dbs = c("kili_campaign1_hyperspectral_lvl3_2015", "kili_campaign2_hyperspectral_lvl3_2016") +for(db in dbs){ + dir.create(paste0(path_hyp_org, db), + showWarnings = FALSE) + rasterdb <- remotesensing$rasterdb(db) + bands = rasterdb$bands + saveRDS(bands, + file = paste0(path_hyp_org, db, "/bands_", db, ".rds")) + + # Get data + rg = "kili_poi_plots" + pois = remotesensing$poi_group(rg) + for(n in pois$name){ + radius = 100 + if(n == "fer3"){ + radius = 150 + } + poi <- remotesensing$poi(group_name=rg, poi_name=n) + ext <- extent_diameter(poi$x, poi$y, radius) + r <- rasterdb$raster(ext, product="gap_filling(full_spectrum)") + saveRDS(r, file = paste0(path_hyp_org, db, "/", n, ".rds")) + } +} + +# Combine from first and second flight based on filesize +# (non-observed areas have considerably smaller file sizes) +# Use foc1 and foc6 from second flight. +bd_plots = readRDS(paste0(path_biodiv, "biodiv_plots.rds")) +hd_files = list.files(path_hyp_org, recursive = TRUE, full.names = TRUE) +hd_files = hd_files[nchar(basename(hd_files)) == 8] +hd_size = lapply(hd_files, function(f){ + c = if(grepl(dbs[[1]], f)){ + c = dbs[[1]] + } else { + c = dbs[[2]] + } + data.frame(f = f, + s = file.size(f), + c = c, + plotID = substr(basename(f), 1, 4)) +}) +hd_size = do.call("rbind", hd_size) +hd_size_valid = hd_size[hd_size$s > 300000, ] +hd_size_valid$f = as.character(hd_size_valid$f) + +hd_size_valid = hd_size_valid[-grep(paste0(dbs[[1]], "_fill/foc1.rds"), hd_size_valid$f),] +hd_size_valid = hd_size_valid[-grep(paste0(dbs[[1]], "_fill/foc6.rds"), hd_size_valid$f),] + +hd_size_valid = hd_size_valid[substr(basename(hd_size_valid$f), 1, 4) %in% bd_plots,] + +for(f in hd_size_valid$f){ + file.copy(f, paste0(path_hyp_org, "/", basename(f))) +} + + +# Save metadata +# Combine metadata +meta = list(data.frame(hd_size_valid[, c("c", "plotID")], list = 1)) +meta[[1]]$list[meta[[1]]$c == dbs[[2]]] = 2 +meta[[2]] = list(meta_01 = readRDS(paste0(path_hyp_org, dbs[[1]], "_fill/bands_", dbs[[1]], ".rds")), + meta_02 = readRDS(paste0(path_hyp_org, dbs[[2]], "_fill/bands_", dbs[[2]], ".rds"))) +dir.create(path_meta, showWarnings = FALSE) +saveRDS(meta, file = paste0(path_meta, "hyp_meta.rds")) + + +# Visually check data +visCheck(datapath = path_hyp_org, polygonfile = paste0(path_plots, "BPolygon.shp")) diff --git a/src/025_extract_aoi.R b/src/025_extract_aoi.R new file mode 100644 index 0000000..fe4eb7f --- /dev/null +++ b/src/025_extract_aoi.R @@ -0,0 +1,28 @@ +# Extract plot area (50 x 50 m) from hyperspectral data ans mask all NAs + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") + +hd_files = list.files(path_hyp_org, recursive = FALSE, full.names = TRUE) +hd_files = hd_files[-grep("kili_campaign", hd_files)] +pb = shapefile(paste0(path_plots, "BPolygon.shp")) + +dir.create(paste0(path_hyp_aio), showWarnings = FALSE) + +reproj = TRUE +for(f in hd_files){ + r = readRDS(f) + if(reproj){ + pb = spTransform(pb, projection(r)) + reproj = FALSE + } + pid = substr(basename(f), 1, 4) + aoi = pb[grep(pid, pb$PlotID),] + r = raster::mask(crop(r, aoi), aoi) + # r = raster::mask(r, calc(r, fun=sum)) # We will do that later in the workflow + names(r) = paste0(pid, "_", seq(nlayers(r))) + saveRDS(r, file = paste0(path_hyp_aio, pid, ".rds")) +} + + +# Visually check data +visCheck(datapath = path_hyp_aio, polygonfile = paste0(path_plots, "BPolygon.shp")) diff --git a/src/030_noise_removal.R b/src/030_noise_removal.R new file mode 100644 index 0000000..0e35f78 --- /dev/null +++ b/src/030_noise_removal.R @@ -0,0 +1,93 @@ +# Compute noise removal on a per plot basis + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") +if(length(showConnections()) == 0){ + cores = 3 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + +hd_files = list.files(path_hyp_aio, recursive = FALSE, full.names = TRUE) +h_meta = readRDS(paste0(path_meta, "hyp_meta.rds")) + +pb = shapefile(paste0(path_plots, "BPolygon.shp")) + +dir.create(paste0(path_hyp_nrm), showWarnings = FALSE) + +# for(f in hd_files){ +# r = readRDS(f) +# +# m = mnf(as(r, "SpatialGridDataFrame"), use = "complete.obs") +# +# # thv = 1-m$values +# # set_mean = which(thv < -0.10) +# use = seq(2, length(m$values)) +# mi = as.matrix(m$x@data[, use]) %*% solve(m$rotation)[use, ] +# +# tmp = r[[1]] +# mir = stack(lapply(seq(ncol(mi)), function(i){ +# setValues(tmp, mi[, i]) +# })) +# +# saveRDS(mir, file = paste0(path_hyp_nrm, substr(basename(f), 1, 4), "_mnfi.rds")) +# } + + +log = foreach (i = seq(length(hd_files)), .packages = c("raster", "RStoolbox")) %dopar% { + f = hd_files[i] + plotid = substr(basename(f), 1, 4) + r = readRDS(f) + nl = nlayers(r) + + all_na = grep(ncell(r), summary(r)[6,]) + if(length(all_na) > 0){ + r = r[[-all_na]] + } else { + all_na = -1 + } + + pca = rasterPCA(r) + v = pca$model$sdev**2 + + # Continuous Significant Dimensionality + csd = round(sum(sapply(v, function(x){min(x,1)})), 0) + use = seq(csd) + + log = list(file = basename(f), all_na = all_na, csd = csd) + + pcai = t(t(as.matrix(pca$map)[, use] %*% t(pca$model$loadings)[use, ]) + pca$model$center) + + tmp = r[[1]] + pcair = stack(lapply(seq(ncol(pcai)), function(i){ + setValues(tmp, pcai[, i]) + })) + + if(all_na == 1){ + pcair = stack(setValues(tmp, rep(NA, ncell(tmp))), pcair) + } else if(all_na > 1){ + pcair = stack(pcair[[1:(all_na-1)]], + setValues(tmp, rep(NA, ncell(tmp))), + pcair[[(all_na):nlayers(pcair)]]) + } + names(pcair) = paste0(plotid, "_pcai_", seq(nl)) + + saveRDS(pcair, file = paste0(path_hyp_nrm, plotid, "_pcai.rds")) + + return(log) +} + +saveRDS(log, file = paste0(path_meta, "030_noise_removal_log.rds")) + +stopCluster(cl) + + +# Cross check +log = readRDS(file = paste0(path_meta, "030_noise_removal_log.rds")) + +csd_stat = sapply(log, function(l){l$csd}) +summary(csd_stat) + +all_na_stat = sapply(log, function(l){l$all_na}) +summary(all_na_stat) + + \ No newline at end of file diff --git a/src/040_comp_vegidcs.R b/src/040_comp_vegidcs.R new file mode 100644 index 0000000..ca23930 --- /dev/null +++ b/src/040_comp_vegidcs.R @@ -0,0 +1,59 @@ +# Compute vegetation indicies + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") +if(length(showConnections()) == 0){ + cores = 3 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + +hd_files = list.files(path_hyp_nrm, recursive = FALSE, full.names = TRUE) +h_meta = readRDS(paste0(path_meta, "hyp_meta.rds")) + +dir.create(paste0(path_hyp_vegidcs), showWarnings = FALSE) + +vis = c("CARI", + "Carter", "Carter2", "Carter3", "Carter4", "Carter5", "Carter6", + "CI", "CI2", "ClAInt", + "CRI1", "CRI2", "CRI3", "CRI4", + "Datt", "Datt2", "Datt4", "Datt5", "Datt6", + "DD", "DDn", "DWSI4", + "EVI", "GDVI_2", "GDVI_3", "GDVI_4", "GI", "Gitelson", "Gitelson2", + "GMI1", "GMI2", "Maccioni", + "MCARI", "MCARI/OSAVI", "MCARI2", "MCARI2/OSAVI2", + "mND705", "mNDVI", "MPRI", "MSAVI", "mSR", "mSR2", "mSR705", + "MTCI", "MTVI", "NDVI", "NDVI2", "NDVI3", "NPCI", + "OSAVI", "OSAVI2", "PARS", "PRI", "PRI*CI2", "PRI_norm", "PSND", + "PSRI", "PSSR", "PWI", "RDVI", "REP_Li", "SAVI", "SIPI", "SPVI", + "SR", "SR1", "SR2", "SR3", "SR4", "SR5", "SR6", "SR7", "SR8", + "SRPI", "TCARI", "TCARI/OSAVI", "TCARI2", "TCARI2/OSAVI2", + "TGI", "TVI", "Vogelmann", "Vogelmann2", "Vogelmann4") + +foreach(i = seq(length(hd_files)), .packages = c("hsdar", "raster")) %do% { + plotid = substr(basename(hd_files[[i]]), 1, 4) + m = h_meta[[2]][[h_meta[[1]]$list[grep(plotid, h_meta[[1]]$plotID)]]] + r = hsdar::speclib(brick(readRDS(hd_files[[i]])), + wavelength = m$wavelength, + fwhm = m$fwhm, + continuousdata = "auto") + v = vegindex(r, index = vis) + vr = readAll(v@spectra@spectra_ra) + names(vr) = paste0(plotid, "_", vis) + saveRDS(vr, file = paste0(path_hyp_vegidcs, plotid, "_vegidcs.rds")) +} + +stopCluster(cl) + + +# Visually check data +visCheck(datapath = path_hyp_vegidcs, polygonfile = paste0(path_plots, "BPolygon.shp"), band = 47) + +files = list.files(path_hyp_vegidcs, full.names = TRUE) +nacheck = do.call("rbind", lapply(files, function(f){ + df = readRDS(f) + nasum = lapply(seq(nlayers(df)), function(i){ + sum(is.na(getValues(df[[i]]))) + }) + return(data.frame(f = basename(f), nacheck = min(unlist(nasum)) == max(unlist(nasum)))) +})) +nacheck diff --git a/src/050_comp_kmdc.R b/src/050_comp_kmdc.R new file mode 100644 index 0000000..6551cab --- /dev/null +++ b/src/050_comp_kmdc.R @@ -0,0 +1,60 @@ +# Compute mean distance from centroid on original band stack and +# scaled vegetation inidces stack + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") + +if(length(showConnections()) == 0){ + cores = 3 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + + +dir.create(paste0(path_hyp_kmdc), showWarnings = FALSE) + + +hd_files = c(list.files(path_hyp_nrm, recursive = FALSE, full.names = TRUE), + list.files(path_hyp_vegidcs, recursive = FALSE, full.names = TRUE)) + +h_meta = readRDS(paste0(path_meta, "hyp_meta.rds")) + +foreach (i = seq(length(hd_files)), .packages = c("raster")) %dopar% { + filename = basename(hd_files[i]) + productid = paste0(substr(filename, 1, nchar(filename)-4), "_kmdc") + + r = readRDS(hd_files[[i]]) + rds = getValues(r) + + all_na = grep(nrow(rds), colSums(is.na(rds))) + if(length(all_na) > 0){ + rds = rds[,-all_na] + } else { + all_na = -1 + } + + cc = which(complete.cases(rds)) + rds_cc = rds[cc, ] + + # Scale vegetation indicies + if(grepl("vegidcs", filename)){ + rds_cc = scale(rds_cc, center = TRUE, scale = TRUE) + } + + km = kmeans(rds_cc, center = 1) + kmd = sqrt(rowSums(rds_cc - fitted(km))**2) + + rds_kmd = rds[, 1] + rds_kmd[cc] = kmd + rds_kmd = setValues(r[[1]], rds_kmd) + + names(rds_kmd) = productid + + saveRDS(rds_kmd, file=paste0(path_hyp_kmdc, productid, ".rds")) +} + +stopCluster(cl) + + +# Visually check data +visCheck(datapath = path_hyp_kmdc, polygonfile = paste0(path_plots, "BPolygon.shp"), band = 1) + diff --git a/src/060_comp_raoq.R b/src/060_comp_raoq.R new file mode 100644 index 0000000..9ff31f8 --- /dev/null +++ b/src/060_comp_raoq.R @@ -0,0 +1,52 @@ +# Compute Rao's Q on original bands stack, scaled vegetation indices stack, +# and mean distance from centroid band and vegetation index data. + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") +# if(length(showConnections()) == 0){ +# cores = 2 +# cl = parallel::makeCluster(cores) +# doParallel::registerDoParallel(cl) +# } + +dir.create(paste0(path_hyp_raoq), showWarnings = FALSE) +windows = c(3) + +hd_files = c(list.files(path_hyp_nrm, recursive = FALSE, full.names = TRUE), + list.files(path_hyp_vegidcs, recursive = FALSE, full.names = TRUE), + list.files(path_hyp_kmdc, recursive = FALSE, full.names = TRUE)) + +h_meta = readRDS(paste0(path_meta, "hyp_meta.rds")) + +foreach (i = seq(length(hd_files))) %do% { + filename = basename(hd_files[i]) + productid = paste0(substr(filename, 1, nchar(filename)-4), "_raoq") + + r = readRDS(hd_files[[i]]) + + # Scale vegetation indicies + if(grepl("vegidcs", filename)){ + r = scale(r, center = TRUE, scale = TRUE) + } + + # ra = aggregate(r, fact=2, fun=mean) + for(w in windows){ + raomatrix <- spectralrao(as.list(r), + mode="multidimension", + distance_m="euclidean", + window=w, + shannon=FALSE, + debugging=TRUE, + simplify=3) + raor = setValues(r[[1]], raomatrix[[1]]) + names(raor) = productid + saveRDS(raor, file = paste0(path_hyp_raoq, + productid, "_", w, ".rds")) + } +} + + +# stopCluster(cl) + +# Visually check data +visCheck(datapath = path_hyp_raoq, polygonfile = paste0(path_plots, "BPolygon.shp"), band = 1) + diff --git a/src/070_comp_txtr.R b/src/070_comp_txtr.R new file mode 100644 index 0000000..1ffe0b6 --- /dev/null +++ b/src/070_comp_txtr.R @@ -0,0 +1,37 @@ +# Compute texture metrics on mean distance from centroid datasets. + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") + +dir.create(paste0(path_hyp_glcm), showWarnings = FALSE) + +hd_files = c(list.files(path_hyp_kmdc, recursive = FALSE, full.names = TRUE)) + +h_meta = readRDS(paste0(path_meta, "hyp_meta.rds")) + + +windows = c(3, 11, 31) +n_grey = c(32) + +foreach (i = seq(length(hd_files))) %do% { + filename = basename(hd_files[i]) + productid = substr(filename, 1, nchar(filename)-4) + + r = readRDS(hd_files[[i]]) + + txtr_wg = lapply(windows, function(w){ + txtr_g = lapply(n_grey, function(g){ + txtr = glcmTextures(r, kernel_size = w, + stats = c("entropy", "homogeneity", "second_moment"), + n_grey = g, parallel = FALSE) + names(txtr[[1]]) = paste0(productid, "_", names(txtr[[1]]), "_w", sprintf("%02d", w), "_g", sprintf("%02d", g)) + return(txtr) + }) + }) + txtr_wg = stack(unlist(txtr_wg, recursive = TRUE)) + + saveRDS(txtr_wg, file = paste0(path_hyp_glcm, productid, "_glcm.rds")) +} + +# Visually check data +visCheck(datapath = path_hyp_glcm, polygonfile = paste0(path_plots, "BPolygon.shp"), band = 1) + diff --git a/src/090_combine_predictors.R b/src/090_combine_predictors.R new file mode 100644 index 0000000..fd8a4e6 --- /dev/null +++ b/src/090_combine_predictors.R @@ -0,0 +1,63 @@ +# Compute mean and sd of final predictor sets. + +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") +if(length(showConnections()) == 0){ + cores = 3 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + +dir.create(paste0(path_hyp_pred), showWarnings = FALSE) + +hd_files = c(list.files(path_hyp_vegidcs, recursive = FALSE, full.names = TRUE), + list.files(path_hyp_kmdc, recursive = FALSE, full.names = TRUE), + list.files(path_hyp_glcm, recursive = FALSE, full.names = TRUE), + list.files(path_hyp_raoq, recursive = FALSE, full.names = TRUE)) + +h_meta = readRDS(paste0(path_meta, "hyp_meta.rds")) + +preds = foreach (i = seq(length(hd_files))) %do% { + + print(i) + + r = readRDS(hd_files[[i]]) + + nms = names(r) + plotid = substr(nms[1], 1, 4) + productid = substr(nms, 6, nchar(nms)) + + l = lapply(seq(nlayers(r)), function(l){ + df = data.frame(mean(getValues(r[[l]]), na.rm = TRUE), sd(getValues(r[[l]]), na.rm = TRUE)) + colnames(df) = c(paste0(productid[l], c("_mean", "_sd"))) + return(df) + }) + df = do.call("cbind", l) + df = data.frame(plotID = plotid, df) + return(df) +} + +grp = list(grep("vegidcs.rds", hd_files), + grep("vegidcs_raoq_3.rds", hd_files), + grep("vegidcs_kmdc.rds", hd_files), + grep("vegidcs_kmdc_glcm.rds", hd_files), + grep("vegidcs_kmdc_raoq_3.rds", hd_files), + grep("pcai_raoq_3.rds", hd_files), + grep("pcai_kmdc.rds", hd_files), + grep("pcai_kmdc_glcm.rds", hd_files), + grep("pcai_kmdc_raoq_3.rds", hd_files)) + +if(length(unlist(grp)) == length(preds)){ + df = lapply(grp, function(g){ + do.call("rbind", preds[g]) + }) + df = do.call("cbind", df) + df = df[, -grep("plotID", colnames(df))[-1]] +} + +saveRDS(df, file = paste0(path_hyp_pred, "hyperspec_preds.rds")) + +stopCluster(cl) + +# Visually check data +corrplot(cor(df[, -1])) + diff --git a/src/100_combine_predictores_biodiv_sr.R b/src/100_combine_predictores_biodiv_sr.R new file mode 100644 index 0000000..832c0c6 --- /dev/null +++ b/src/100_combine_predictores_biodiv_sr.R @@ -0,0 +1,66 @@ +# Combine hyperspectral predictores and biodiversity variables in gpm class. + +source("D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R") + + +preds_hyspec = readRDS(paste0(path_hyp_pred, "hyperspec_preds.rds")) +preds_lidar = readRDS(file.path(path_ldr_pred, "lidar_preds.rds")) +preds_lidar = preds_lidar[, c(which(colnames(preds_lidar) == "plotID"), + seq(which(colnames(preds_lidar) == "AGB"), + which(colnames(preds_lidar) == "qntl_rng")))] + +# bd = readRDS(paste0(path_biodiv, "biodiv.rds")) +species_richness = readRDS(paste0(path_biodiv, "species_richness.rds")) +species_composition_dcor = readRDS(paste0(path_biodiv, "species_composition_dcor.rds")) +species_network_pca = readRDS(paste0(path_biodiv, "species_network_pca.rds")) + +comb = data.frame(species_network_pca$scores) +colnames(comb) = paste0("sn_pca", seq(4)) +comb$plotID = rownames(species_network_pca$scores) + +comb = merge(species_richness, comb, by = c("plotID"), all.x = TRUE, all.y = TRUE) + +for(i in seq(length(species_composition_dcor))){ + act = data.frame(species_composition_dcor[[i]]$rproj) + colnames(act) = paste0("sn_dca", seq(4), "_", names(species_composition_dcor[i])) + act$plotID = rownames(act) + comb = merge(comb, act, by = c("plotID"), all.x = TRUE, all.y = TRUE) +} + +comb = merge(comb, preds_hyspec, by = c("plotID")) +comb = merge(comb, preds_lidar, by = c("plotID")) + +comb = droplevels(comb) + +comb$SelCat = substr(as.character(comb$plotID), 1, 3) + +selnbr = lapply(table(comb$SelCat), function(c){ + seq(c) +}) +comb$SelNbr = unlist(selnbr) + + +col_selector = which(names(comb) %in% c("SelCat", "SelNbr")) + +col_diversity = seq(which("SRspiders" == colnames(comb)), + which("sn_dca4_Decomposer" == colnames(comb))) + +col_precitors = c(which("elevation" == colnames(comb)), + which("lui" == colnames(comb)), + seq(which("CARI_mean" == colnames(comb)), + which("qntl_rng" == colnames(comb)))) + +col_meta = which(!seq(ncol(comb)) %in% c(col_selector, col_diversity, col_precitors)) + + +meta <- createGPMMeta(comb, type = "input", + selector = col_selector, + response = col_diversity, + predictor = col_precitors, + meta = col_meta) + +comb <- gpm(comb, meta, scale = FALSE) + +dir.create(paste0(path_comb_gpm_sr), showWarnings = FALSE) + +saveRDS(comb, file = paste0(path_comb_gpm_sr, "ki_hyperspec_lidar_biodiv_non_scaled.rds")) diff --git a/src/110_predict_biodiv_sr_rf.R b/src/110_predict_biodiv_sr_rf.R new file mode 100644 index 0000000..8a819b9 --- /dev/null +++ b/src/110_predict_biodiv_sr_rf.R @@ -0,0 +1,88 @@ +# Predict species richness using different models and predictor sets +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/hySpecVisKili/src/000_set_environment_linux.R" +} +source(filepath_base) + +if(length(showConnections()) == 0){ + cores = 20 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + + +dir.create(paste0(path_model_gpm_sr), showWarnings = FALSE) + +comb = readRDS(paste0(path_comb_gpm_sr, "ki_hyperspec_lidar_biodiv_non_scaled.rds")) + + +# Predict with all elevation and lui information, hyperspectral data only, +# all data, and kmdc and raoq only using gam, pls and rf models. +mtypes = c("gam", "pls", "rf") +mtypes = c("rf") +ptypes = c("*elev*", "*elui*", + "*spec*", "*elsp*", + "*lidr*", "*eldr*", + "*splr*", "*esld*", + "*kmra*") + +mt = mtypes[1] +pt = ptypes[1] + + +elev_cols = seq(which(comb@meta$input$PREDICTOR == "elevation")) + +elui_cols = seq(which(comb@meta$input$PREDICTOR == "elevation"), + which(comb@meta$input$PREDICTOR == "lui")) + +spec_cols = seq(which(comb@meta$input$PREDICTOR == "CARI_mean"), + which(comb@meta$input$PREDICTOR == "pcai_kmdc_raoq_sd")) + +ldr_cols = seq(which(comb@meta$input$PREDICTOR == "AGB"), + which(comb@meta$input$PREDICTOR == "qntl_rng")) + +for(mt in mtypes){ + for(pt in ptypes){ + + if(pt == "*elev*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[elev_cols] + + } else if(pt == "*elui*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[elui_cols] + + } else if(pt == "*spec*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[spec_cols] + + } else if(pt == "*elsp*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[c(elui_cols, + spec_cols)] + + } else if(pt == "*lidr*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[ldr_cols] + + } else if(pt == "*eldr*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[c(elui_cols, + ldr_cols)] + + } else if(pt == "*splr*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[c(spec_cols, + ldr_cols)] + + } else if(pt == "*esld*"){ + comb@meta$input$PREDICTOR_FINAL = comb@meta$input$PREDICTOR[c(elui_cols, + spec_cols, + ldr_cols)] + } else if(pt == "*kmra*"){ + comb@meta$input$PREDICTOR_FINAL = unique(comb@meta$input$PREDICTOR[ + c(grep("kmdc", comb@meta$input$PREDICTOR), + grep("raoq", comb@meta$input$PREDICTOR))]) + } + + compModels(model = comb, pt = pt, mt = mt, outpath = path_model_gpm_sr) + } +} + + +stopCluster(cl) diff --git a/src/120_compile_analyse_biodiv_sr.R b/src/120_compile_analyse_biodiv_sr.R new file mode 100644 index 0000000..f08dcca --- /dev/null +++ b/src/120_compile_analyse_biodiv_sr.R @@ -0,0 +1,44 @@ +# Combine species richness model results in one variable. + +require(envimaR) +root_folder = path.expand("~/plygrnd/hySpecVisKili/") +source(file.path(root_folder, "hySpecVisKili/src/000_setup_windows.R")) +dir.create(envrmt$path_120_compile_analysis_sr, showWarnings = FALSE) + + +# Combine all models into one gpm object +# mtypes = c("gam", "pls", "rf") +mtypes = c("gam", "pls", "rf") + + + +all_models = lapply(mtypes, function(mt){ + ptypes = c("*elui*", + "*spec*", "*elsp*", + "*lidr*", "*eldr*", + "*splr*", "*esld*", + "*kmra*") + if(mt == "gam"){ + ptypes = c("*elev*", ptypes) + } + all_pmodels = lapply(ptypes, function(pt){ + model_files = list.files(envrmt$path_110_model_gpm_sr, full.names = TRUE, + pattern = glob2rx(paste0(pt, mt, "*"))) + + all_models = readRDS(model_files[[1]]) + + for(i in (seq(2, length(model_files)))){ + all_models@model[[1]][[i]] = readRDS(model_files[[i]])@model[[1]][[1]] + } + + return(all_models) + }) + names(all_pmodels) = gsub("[*]", "", ptypes) + return(all_pmodels) +}) +names(all_models) = gsub("[*]", "", gsub("_", "", mtypes)) + +saveRDS(all_models, file = file.path(envrmt$path_120_compile_analysis_sr, + "models_sr.rds")) + + diff --git a/src/200_combine_predictores_biodiv_sr_residuals.R b/src/200_combine_predictores_biodiv_sr_residuals.R new file mode 100644 index 0000000..6a0cf1c --- /dev/null +++ b/src/200_combine_predictores_biodiv_sr_residuals.R @@ -0,0 +1,55 @@ +# Compile species richness dataset containing residuals from some previous modelling +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/HySpec_KiLi/src/000_set_environment_linux.R" +} + +source(filepath_base) + + +dir.create(paste0(path_comb_gpm_sr_res), showWarnings = FALSE) + + +# 1st version - not used anymore +# Compile elevation residuals for gam model using eleveation as only predictor +# pt = "*elev*" +# mt = "*gam*" +# +# comb_sr = readRDS(paste0(path_comb_gpm_sr, "ki_hyperspec_biodiv_non_scaled.rds")) +# comb_sr_elev_res = compResData(comb_sr, pt, mt) +# +# saveRDS(comb_sr_elev_res, +# file = file.path(path_comb_gpm_sr_res, +# paste0("ki_hyperspec_biodiv_non_scaled", +# gsub("[*]", "", paste0("_", mt, "_", pt, "_res.rds"))))) + + + +# 1st version - not used anymore +# Compile elevation residuals for pls model using elevation and LUI as only predictor +# pt = "*elui*" +# mt = "*pls*" +# +# comb_sr = readRDS(paste0(path_comb_gpm_sr, "ki_hyperspec_biodiv_non_scaled.rds")) +# comb_sr_elev_res = compResData(comb_sr, pt, mt) +# +# saveRDS(comb_sr_elev_res, +# file = file.path(path_comb_gpm_sr_res, +# paste0("ki_hyperspec_biodiv_non_scaled", +# gsub("[*]", "", paste0("_", mt, "_", pt, "_res.rds"))))) + + + +# Compile elevation residuals for rf model using elevation and LUI as only predictor +pt = "*elui*" +mt = "*rf*" + +comb_sr = readRDS(paste0(path_comb_gpm_sr, "ki_hyperspec_biodiv_non_scaled.rds")) +comb_sr_elev_res = compResData(comb_sr, pt, mt) + +saveRDS(comb_sr_elev_res, + file = file.path(path_comb_gpm_sr_res, + paste0("ki_hyperspec_biodiv_non_scaled", + gsub("[*]", "", paste0("_", mt, "_", pt, "_res.rds"))))) + diff --git a/src/210_predict_biodiv_sr_res_rf.R b/src/210_predict_biodiv_sr_res_rf.R new file mode 100644 index 0000000..ac3a905 --- /dev/null +++ b/src/210_predict_biodiv_sr_res_rf.R @@ -0,0 +1,45 @@ +# comb_elev_resine hyperspectral predictores and biodiversity variables in gpm class. +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/HySpec_KiLi/src/000_set_environment_linux.R" +} +source(filepath_base) + +if(length(showConnections()) == 0){ + cores = 30 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + +dir.create(paste0(path_model_gpm_sr_res), showWarnings = FALSE) + +# Predict gam, pls and rf elevation and elevation/lui based residuals using +# pls and rf models with hyperspectral data only +# res_suffixes = c("_gam_elev_res", "_pls_elui_res", "_rf_elui_res") +res_suffixes = c("_rf_elui_res") +# mtypes = c("pls", "rf") +mtypes = c("rf") +ptypes = c("*spec*", "*kmra*") + + +for(res_suffix in res_suffixes){ + comb_elev_res = readRDS(paste0(path_comb_gpm_sr_res, "ki_hyperspec_biodiv_non_scaled", + res_suffix, ".rds")) + for(mt in mtypes){ + for(pt in ptypes){ + + if(pt == "*spec*"){ + comb_elev_res@meta$input$PREDICTOR_FINAL = comb_elev_res@meta$input$PREDICTOR[-c(1,2)] + } else if(pt == "*kmra*"){ + comb_elev_res@meta$input$PREDICTOR_FINAL = unique(comb_elev_res@meta$input$PREDICTOR[ + c(grep("kmdc", comb_elev_res@meta$input$PREDICTOR), + grep("raoq", comb_elev_res@meta$input$PREDICTOR))]) + } + + compModels(model = comb_elev_res, pt = pt, mt = mt, outpath = path_model_gpm_sr_res) + } + } +} + +stopCluster(cl) \ No newline at end of file diff --git a/src/220_compile_analyse_biodiv_sr_elev_res.R b/src/220_compile_analyse_biodiv_sr_elev_res.R new file mode 100644 index 0000000..17bed1d --- /dev/null +++ b/src/220_compile_analyse_biodiv_sr_elev_res.R @@ -0,0 +1,45 @@ +# Combine species richness residual model results in one variable. +source("D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R") + + +dir.create(path_compile_analysis_sr_elev_res, showWarnings = FALSE) + + +# Combine all models into one gpm object +pt = "*spec*" +mtypes = c("*rf*") +rtypes = c("*gam_elev_res*", "*pls_elui_res*", "*rf_elui_res*") +rtypes = c("*pls_elui_res*", "*rf_elui_res*") + +all_models = lapply(mtypes, function(mt){ + all_pmodels = lapply(rtypes, function(rt){ + model_files = list.files(path_model_gpm_sr_res, full.names = TRUE, + pattern = glob2rx(paste0(mt, rt))) + + l = 0 + i = 0 + while(l == 0){ + i = i + 1 + all_models = readRDS(model_files[[i]]) + l = length(all_models@model) + } + + + for(i in (seq(2, length(model_files)))){ + act_model = readRDS(model_files[[i]]) + if(length(act_model@model) > 0){ + all_models@model[[1]][[i]] = act_model@model[[1]][[1]] + } else { + all_models@model[[1]][[i]] = NA + } + } + + return(all_models) + }) + names(all_pmodels) = gsub("[*]", "", rtypes) + return(all_pmodels) +}) +names(all_models) = gsub("[*]", "", gsub("_", "", mtypes)) + +saveRDS(all_models, file = file.path(path_compile_analysis_sr_elev_res, + "models_sr_elev_res.rds")) diff --git a/src/300_predict_biodiv_sr_rf_indp.R b/src/300_predict_biodiv_sr_rf_indp.R new file mode 100644 index 0000000..42abafa --- /dev/null +++ b/src/300_predict_biodiv_sr_rf_indp.R @@ -0,0 +1,38 @@ +# Recompute models using best variable subset and completely independent validation + +# Predict species richness using different models and predictor sets +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/HySpec_KiLi/src/000_set_environment_linux.R" +} +source(filepath_base) + +if(length(showConnections()) == 0){ + cores = 3 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + +dir.create(path_model_gpm_sr_indp, showWarnings = FALSE) + +all_models = readRDS(file.path(path_compile_analysis_sr, "models_sr.rds")) + +mt = "rf" + +# Predict variables again using best predictors and independent validation. +for(pt in names(all_models[[mt]])){ + comb = all_models[[mt]][[pt]] + var_imp <- compVarImp(comb@model[[1]], scale = FALSE) + + for(rs in seq(length(var_imp))){ + comb@meta$input$RESPONSE = as.character(var_imp[[rs]]$RESPONSE[1]) + comb@meta$input$PREDICTOR_FINAL = as.character(var_imp[[rs]]$VARIABLE) + + compModels(model = comb, pt = pt, mt = mt, rs = rs, outpath = path_model_gpm_sr_indp, nested_cv = TRUE) + } +} + + +stopCluster(cl) + diff --git a/src/310_combine_predictores_biodiv_sr_residuals_indp.R b/src/310_combine_predictores_biodiv_sr_residuals_indp.R new file mode 100644 index 0000000..253e66a --- /dev/null +++ b/src/310_combine_predictores_biodiv_sr_residuals_indp.R @@ -0,0 +1,25 @@ +# Compile species richness dataset containing residuals from some previous modelling +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/HySpec_KiLi/src/000_set_environment_linux.R" +} + +source(filepath_base) + + +dir.create(paste0(path_comb_gpm_sr_res), showWarnings = FALSE) + +# Compile elevation residuals for rf model using elevation and LUI as only predictor +pt = "*elui*" +mt = "*rf*" +suf = "_res_indp" + +comb_sr = readRDS(paste0(path_comb_gpm_sr, "ki_hyperspec_biodiv_non_scaled.rds")) +comb_sr_elev_res = compResData(comb_sr, pt, mt, model_path = path_model_gpm_sr_indp, suf = suf) + +saveRDS(comb_sr_elev_res, + file = file.path(path_comb_gpm_sr_elev_res_indp, + paste0("ki_hyperspec_biodiv_non_scaled", + gsub("[*]", "", paste0("_", mt, "_", pt, suf, ".rds"))))) + diff --git a/src/320_predict_biodiv_sr_rf_indp.R b/src/320_predict_biodiv_sr_rf_indp.R new file mode 100644 index 0000000..db24516 --- /dev/null +++ b/src/320_predict_biodiv_sr_rf_indp.R @@ -0,0 +1,41 @@ +# Recompute elevation residual models using best variable subset and completely independent validation + +# Predict species richness using different models and predictor sets +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/HySpec_KiLi/src/000_set_environment_linux.R" +} +source(filepath_base) + +if(length(showConnections()) == 0){ + cores = 3 + cl = parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) +} + +dir.create(path_model_gpm_sr_elev_res_indp, showWarnings = FALSE) + +elev_res_indp = readRDS(file.path(path_comb_gpm_sr_elev_res_indp, "ki_hyperspec_biodiv_non_scaled_rf_elui_res_indp.rds")) +all_models_res = readRDS(file.path(path_compile_analysis_sr_elev_res, + "models_sr_elev_res.rds")) + +mt = "rf" + +# Predict variables again using best predictors and independent validation. +for(pt in names(all_models_res[[mt]])){ + comb_res = all_models_res[[mt]][[pt]] + var_imp <- compVarImp(comb_res@model[[1]], scale = FALSE) + com_res_indp = elev_res_indp + + for(rs in seq(length(var_imp))){ + com_res_indp@meta$input$RESPONSE = paste0(as.character(var_imp[[rs]]$RESPONSE[1]), "_indp") + com_res_indp@meta$input$PREDICTOR_FINAL = as.character(var_imp[[rs]]$VARIABLE) + + compModels(model = com_res_indp, pt = pt, mt = mt, rs = rs, outpath = path_model_gpm_sr_elev_res_indp, nested_cv = TRUE) + } +} + + +stopCluster(cl) + diff --git a/src/500_analyse_biodiv_sr.Rmd b/src/500_analyse_biodiv_sr.Rmd new file mode 100644 index 0000000..e72905e --- /dev/null +++ b/src/500_analyse_biodiv_sr.Rmd @@ -0,0 +1,244 @@ +--- +title: "500 Analyse Biodiv-RS" +output: html_notebook +editor_options: + chunk_output_type: console +--- + +```{r, include = FALSE} +# Set up working environment and defaults -------------------------------------- +if(Sys.info()["sysname"] == "Windows"){ + filepath_base = "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base = "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/hySpecVisKili/src/000_set_environment_linux.R" +} +source(filepath_base) + +all_models = readRDS(file.path(path_compile_analysis_sr, + "models_sr.rds")) + +# Collect model performance +gam_sr = modelPerformance(all_models[["gam"]]) +pls_sr = modelPerformance(all_models[["pls"]]) +rf_sr = modelPerformance(all_models[["rf"]]) + +summary(gam_sr) +summary(pls_sr) +summary(rf_sr) + +# Get trophic levels +tl = read.table(file.path(path_meta, "trophic_levels.csv"), header = TRUE, sep = ";") +gam_sr = merge(gam_sr, tl, by.x = "resp", by.y = "Species") +pls_sr = merge(pls_sr, tl, by.x = "resp", by.y = "Species") +rf_sr = merge(rf_sr, tl, by.x = "resp", by.y = "Species") + +# Arrange levels and species names +gam_sr$Level = factor(gam_sr$Level, levels(gam_sr$Level)[c(1, 5, 4, 3, 6, 2)] ) +gam_sr$resp = as.character(gam_sr$resp) +gam_sr$resp = substr(gam_sr$resp, 3, nchar(gam_sr$resp)) +gam_sr$resp = gsub("(^[[:alpha:]])", "\\U\\1", gam_sr$resp, perl=TRUE) +gam_sr$resp = factor(gam_sr$resp, unique(gam_sr$resp[order(gam_sr$Level, gam_sr$resp)])) + +pls_sr$Level = factor(pls_sr$Level, levels(pls_sr$Level)[c(1, 5, 4, 3, 6, 2)] ) +pls_sr$resp = as.character(pls_sr$resp) +pls_sr$resp = substr(pls_sr$resp, 3, nchar(pls_sr$resp)) +pls_sr$resp = gsub("(^[[:alpha:]])", "\\U\\1", pls_sr$resp, perl=TRUE) +pls_sr$resp = factor(pls_sr$resp, unique(pls_sr$resp[order(pls_sr$Level, pls_sr$resp)])) + + +rf_sr$Level = factor(rf_sr$Level, levels(rf_sr$Level)[c(1, 5, 4, 3, 6, 2)] ) +rf_sr$resp = as.character(rf_sr$resp) +rf_sr$resp = substr(rf_sr$resp, 3, nchar(rf_sr$resp)) +rf_sr$resp = gsub("(^[[:alpha:]])", "\\U\\1", rf_sr$resp, perl=TRUE) +rf_sr$resp = factor(rf_sr$resp, unique(rf_sr$resp[order(rf_sr$Level, rf_sr$resp)])) + +gam_sr$mptype = paste0(gam_sr$mtype, "_", gam_sr$ptype) +pls_sr$mptype = paste0(pls_sr$mtype, "_", pls_sr$ptype) +rf_sr$mptype = paste0(rf_sr$mtype, "_", rf_sr$ptype) + +model_sr_m = rbind(gam_sr[, c("resp", "mtype", "mptype", "Resample", "RMSE", "Rsquared", "RMSE_normSD")], + pls_sr[, c("resp", "mtype", "mptype", "Resample", "RMSE", "Rsquared", "RMSE_normSD")], + rf_sr[, c("resp", "mtype", "mptype", "Resample", "RMSE", "Rsquared", "RMSE_normSD")]) + +model_sr_m = model_sr_m[model_sr_m$Resample != "Mean", ] + +ggplot(data = rbind(model_sr_m[model_sr_m$mptype == "gam_elui" | model_sr_m$mptype == "gam_elev" | model_sr_m$mptype == "pls_spec" | model_sr_m$mptype == "pls_lidr" | model_sr_m$mptype == "pls_elui" | model_sr_m$mptype == "rf_spec" | model_sr_m$mptype == "rf_lidr" | model_sr_m$mptype == "rf_elui", ]), aes(x = resp, y = RMSE_normSD, fill = mptype)) + + geom_boxplot() + + geom_hline(yintercept=c(0.5,1), linetype="dashed", color = "black") + + scale_fill_brewer(palette="Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + + + +best_model = lapply(unique(model_sr_m$)) + + + + +label_colors = "black" + + +pls_sr$RMSE_normSD[pls_sr$] + +label_colors <- ifelse(pls_sr$RMSE_normSD == 0, "red", "blue") + +pls_sr_tmp = pls_sr[pls_sr$mtype == "pls" & + (pls_sr$mptype == "pls_spec" | + pls_sr$mptype == "pls_lidr" | + pls_sr$mptype == "pls_elui"),] + +label_colors = rep("black", length(unique(pls_sr_tmp$resp))) +if() + +ggplot(data = pls_sr[pls_sr$mtype == "pls" & + (pls_sr$mptype == "pls_spec" | + pls_sr$mptype == "pls_lidr" | + pls_sr$mptype == "pls_elui"),], + aes(x = resp, y = RMSE_normSD, fill = mptype)) + + geom_boxplot() + + geom_hline(yintercept=c(0.5,1), linetype="dashed", color = "black") + + scale_fill_brewer(palette="Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + + +ggplot(data = rf_sr[rf_sr$mtype == "rf" & + (rf_sr$mptype == "rf_spec" | + rf_sr$mptype == "rf_lidr" | + rf_sr$mptype == "rf_elui"),], + aes(x = resp, y = RMSE_normSD, fill = mptype)) + + geom_boxplot() + + geom_hline(yintercept=c(0.5,1), linetype="dashed", color = "black") + + scale_fill_brewer(palette="Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + + + + + +``` + +# Compare PLS and RF +```{r, echo=FALSE} +models_sr = rbind(pls_sr[, -4], rf_sr[, -4]) +models_sr$mptype = paste0(models_sr$mtype, "_", models_sr$ptype) +models_sr$mptype = factor(models_sr$mptype, levels = c("pls_elsp", "rf_elsp", + "pls_elui", "rf_elui", + "pls_kmra", "rf_kmra", + "pls_spec", "rf_spec")) + +ggplot(data = models_sr[models_sr$mtype == "pls" & + (models_sr$ptype == "elui" | models_sr$ptype == "spec"),], + aes(x = resp, y = RMSE_normSD, fill = mptype)) + + geom_boxplot() + + geom_hline(yintercept=c(0.5,1), linetype="dashed", color = "black") + + scale_fill_brewer(palette="Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + + + +ggplot(data = models_sr[models_sr$mtype == "rf" & + (models_sr$ptype == "elui" | models_sr$ptype == "spec"),], + aes(x = resp, y = RMSE_normSD, fill = mptype)) + + geom_boxplot() + + geom_hline(yintercept=c(0.5,1), linetype="dashed", color = "black") + + scale_fill_brewer(palette="Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + + +ggplot(data = models_sr[models_sr$mtype == "rf" & models_sr$ptype == "spec",], + aes(x = resp, y = RMSE_normSD, fill = Level)) + + geom_boxplot() + + geom_hline(yintercept=c(0.5,1), linetype="dashed", color = "black") + + scale_fill_brewer(palette="Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species richness", y = "RMSEn", fill = "Trophic level")) +``` + +```{r, echo=FALSE} +pls_rf_sr = merge(pls_sr, rf_sr, by = c("ptype", "resp", "Resample"), all.y = TRUE) +colnames(pls_rf_sr)[grep("\\.x", colnames(pls_rf_sr))] = + gsub("\\.x", "_pls", colnames(pls_rf_sr)[grep("\\.x", colnames(pls_rf_sr))]) +colnames(pls_rf_sr)[grep("\\.y", colnames(pls_rf_sr))] = + gsub("\\.y", "_rf", colnames(pls_rf_sr)[grep("\\.y", colnames(pls_rf_sr))]) +# nrow(pls_rf_sr) + +ptypes = c("elui", "kmra", "spec", "elsp") +perf_check = lapply(ptypes, function(pt){ + subdf = pls_rf_sr[!is.na(pls_rf_sr$RMSE_pls) & + pls_rf_sr$ptype == pt & + pls_rf_sr$Resample == "Mean", ] + rownames(subdf[subdf$RMSE_pls < subdf$RMSE_rf, ]) +}) +names(perf_check) = ptypes +``` + +# Check performance of PLS and RF +```{r, echo = FALSE} +for(i in seq(length(perf_check))){ + rmse_perf = sort(round(1-pls_rf_sr[as.numeric(perf_check[[i]]), "RMSE_pls"] / + pls_rf_sr[as.numeric(perf_check[[i]]), "RMSE_rf"],2)) + var_rf_prct = sort(round(pls_rf_sr[as.numeric(perf_check[[i]]), "nvars_rf"] / + pls_rf_sr[as.numeric(perf_check[[i]]), "nvars_pls"],2)) + level_pls = sort(table(pls_rf_sr[as.numeric(perf_check[[i]]), "Level_pls"])) + print(names(perf_check[i])) + print(pls_rf_sr[as.numeric(perf_check[[i]]),]) + cat("RMSE (1 - PLS/RF):", rmse_perf, "\n") + cat("Var number (RF/PLS):", var_rf_prct, "\n") + cat("Levels with PLS is better:", level_pls, "\n") + cat("\n\n") +} +``` + +# Collect variable importance +## Number of variables +```{r} +pls_rf_sr_long = melt(pls_rf_sr[pls_rf_sr$Resample == "Mean", c(1, 2, 6, 13)], id.vars = c("ptype", "resp")) +ggplot(data = pls_rf_sr_long, aes(x = variable, y = value, fill = ptype)) + + geom_boxplot() + + labs(list(x = "Models", y = "Number of variables" , + fill = "Predictor Set")) + + theme_bw() +``` + + +# Variable importance for PLS +```{r, echo=FALSE} +var_imp <- compVarImp(all_models[["pls"]][["spec"]]@model[[1]], scale = FALSE) +# plotVarImp(var_imp) +plotVarImpHeatmap(var_imp, xlab = "Species", ylab = "Band") +``` + +# Variable importance for RF +```{r, echo=FALSE} +var_imp <- compVarImp(all_models[["rf"]][["spec"]]@model[[1]], scale = FALSE) +# plotVarImp(var_imp) +plotVarImpHeatmap(var_imp, xlab = "Species", ylab = "Band") +``` + + +# Trophic levels +```{r} +var_imp_levels = var_imp +for(i in seq(length(var_imp_levels))){ + var_imp_levels[[i]]$RESPONSE = tl$Level[grep(var_imp_levels[[i]]$RESPONSE[1], tl$Species)] +} +plotVarImpHeatmap(var_imp_levels, xlab = "Species", ylab = "Band") +``` + + + + +When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file). + +The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed. diff --git a/src/500_analyse_biodiv_sr.nb.html b/src/500_analyse_biodiv_sr.nb.html new file mode 100644 index 0000000..1be7136 --- /dev/null +++ b/src/500_analyse_biodiv_sr.nb.html @@ -0,0 +1,1971 @@ + + + + + + + + + + + + + +500 Analyse Biodiv-RS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + +
+

Compare PLS and RF

+ + + +

+ + + + + + + +
+
+

Check performance of PLS and RF

+ + + +
[1] \elui\
+ [1] ptype           resp            Resample        mtype_pls       ncomp           nvars_pls       RMSE_pls       
+ [8] Rsquared_pls    MAE_pls         RMSE_normSD_pls Level_pls       mtype_rf        mtry            nvars_rf       
+[15] RMSE_rf         Rsquared_rf     MAE_rf          RMSE_normSD_rf  Level_rf       
+<0 Zeilen> (oder row.names mit Länge 0)
+RMSE (1 - PLS/RF):  
+Var number (RF/PLS):  
+Levels with PLS is better: 0 0 0 0 0 0 
+
+
+[1] \kmra\
+    ptype       resp Resample mtype_pls ncomp nvars_pls RMSE_pls Rsquared_pls  MAE_pls RMSE_normSD_pls  Level_pls mtype_rf mtry
+241  kmra SRasterids     Mean       pls     8        14 4.379693    0.5013369 3.558558       0.7824208     Plants       rf    6
+305  kmra    SRferns     Mean       pls    11        12 4.701912    0.7137181 3.686251       0.5388445     Plants       rf    2
+329  kmra  SRmammals     Mean       pls     5         7 1.676879    0.2334310 1.422120       0.9741508 Generalist       rf    2
+345  kmra SRmonocots     Mean       pls    13        14 4.060297    0.7540859 2.972480       0.5711398     Plants       rf    8
+    nvars_rf  RMSE_rf Rsquared_rf   MAE_rf RMSE_normSD_rf   Level_rf
+241        8 4.876534   0.2983567 3.992938      0.8711802     Plants
+305        4 4.973091   0.6784929 4.025896      0.5699220     Plants
+329        3 1.686781   0.3031130 1.388244      0.9799032 Generalist
+345        3 4.378556   0.7194768 3.490220      0.6159075     Plants
+RMSE (1 - PLS/RF): 0.01 0.05 0.07 0.1 
+Var number (RF/PLS): 0.21 0.33 0.43 0.57 
+Levels with PLS is better: 0 0 0 0 1 3 
+
+
+[1] \spec\
+    ptype              resp Resample mtype_pls ncomp nvars_pls   RMSE_pls Rsquared_pls   MAE_pls RMSE_normSD_pls
+433  spec       SRallplants     Mean       pls    10        20 12.1677951    0.6404985 9.8793737       0.6038114
+449  spec            SRants     Mean       pls    16        23  1.1402304    0.8933173 0.9233010       0.3360080
+465  spec            SRbats     Mean       pls    12        22  1.2893364    0.8664830 1.0101511       0.3942589
+473  spec            SRbees     Mean       pls     6        19  2.4459734    0.8413755 2.0174785       0.4138771
+481  spec           SRbirds     Mean       pls     6        14  4.2239376    0.7781512 3.1281718       0.4910325
+489  spec      SRcollembola     Mean       pls     8        24  1.0518208    0.7729542 0.7911845       0.4280729
+513  spec        SReudicots     Mean       pls    11        17  1.4665003    0.5199711 1.1955748       0.6671803
+537  spec      SRmagnoliids     Mean       pls    13        21  0.6450509    0.7977228 0.5350513       0.4503717
+545  spec         SRmammals     Mean       pls    12        19  1.3371388    0.4846824 1.0720648       0.7767851
+593  spec SRothercoleoptera     Mean       pls    10        17  3.1074301    0.6235629 2.4441677       0.6358466
+609  spec          SRrosids     Mean       pls    10        21  5.1116419    0.7113718 3.9698183       0.5027286
+617  spec          SRsnails     Mean       pls    16        30  2.7158186    0.7472981 1.8932602       0.4878470
+           Level_pls mtype_rf mtry nvars_rf    RMSE_rf Rsquared_rf     MAE_rf RMSE_normSD_rf         Level_rf
+433           Plants       rf    7        9 13.0671744   0.6428939 10.7297639      0.6484419           Plants
+449       Generalist       rf    8        8  1.7206668   0.7564282  1.1918567      0.5070535       Generalist
+465 Flyingpredatores       rf    2        7  1.7621145   0.7801090  1.3642665      0.5388270 Flyingpredatores
+473        Herbivore       rf    8        6  2.4714365   0.8242960  1.8320004      0.4181857        Herbivore
+481 Flyingpredatores       rf    8        7  4.4892308   0.7962765  3.3893444      0.5218728 Flyingpredatores
+489       Decomposer       rf    7        8  1.0924602   0.8015405  0.8882414      0.4446124       Decomposer
+513           Plants       rf    4        4  1.5758132   0.4602090  1.2865897      0.7169120           Plants
+537           Plants       rf    6        8  0.6691076   0.8298201  0.4836353      0.4671680           Plants
+545       Generalist       rf    8        3  1.3750537   0.5186430  1.1302886      0.7988110       Generalist
+593        Predators       rf    7        8  3.3653395   0.6095221  2.7642226      0.6886204        Predators
+609           Plants       rf    2        5  5.6763627   0.7203280  4.3897044      0.5582687           Plants
+617       Generalist       rf    3        3  2.8439540   0.7518932  2.1876421      0.5108641       Generalist
+RMSE (1 - PLS/RF): 0.01 0.03 0.04 0.04 0.05 0.06 0.07 0.07 0.08 0.1 0.27 0.34 
+Var number (RF/PLS): 0.1 0.16 0.24 0.24 0.32 0.32 0.33 0.35 0.38 0.45 0.47 0.5 
+Levels with PLS is better: 1 1 1 2 3 4 
+
+
+[1] \elsp\
+    ptype              resp Resample mtype_pls ncomp nvars_pls RMSE_pls Rsquared_pls  MAE_pls RMSE_normSD_pls  Level_pls
+649  elsp       SRallplants     Mean       pls    12        21 9.762681    0.7556949 7.467871       0.4804236     Plants
+673  elsp        SRasterids     Mean       pls    15        25 3.880332    0.5859014 3.304183       0.6880815     Plants
+809  elsp SRothercoleoptera     Mean       pls     8        14 3.298003    0.5622939 2.618039       0.6748419  Predators
+849  elsp        SRsyrphids     Mean       pls     7        24 1.939375    0.6371102 1.518929       0.5864162 Generalist
+    mtype_rf mtry nvars_rf   RMSE_rf Rsquared_rf    MAE_rf RMSE_normSD_rf   Level_rf
+649       rf    7        6 13.646908   0.5863973 11.277148      0.6715672     Plants
+673       rf    7        4  4.196627   0.5075030  3.459266      0.7441684     Plants
+809       rf    4        6  3.386110   0.5924158  2.858294      0.6928704  Predators
+849       rf    7        5  2.099933   0.5995575  1.712869      0.6349646 Generalist
+RMSE (1 - PLS/RF): 0.03 0.08 0.08 0.28 
+Var number (RF/PLS): 0.16 0.21 0.29 0.43 
+Levels with PLS is better: 0 0 0 1 1 2 
+ + + +
+
+

Collect variable importance

+
+

Number of variables

+ + + +
```r
+```r
+```r
+```r
+pls_rf_sr_long = melt(pls_rf_sr[pls_rf_sr$Resample == \Mean\, c(1, 2, 6, 13)], id.vars = c(\ptype\, \resp\))
+
+
+
+ + +
Error in melt(pls_rf_sr[pls_rf_sr$Resample == \Mean\, c(1, 2, 6, 13)],  : 
+  object 'pls_rf_sr' not found
+ + + +
+
+
+

Variable importance for PLS

+ + + +

+ + + +
+
+

Variable importance for RF

+ + + +

+ + + +
+
+

Trophic levels

+ + + +

+ + + +

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

+

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

+ +
+ +
LS0tDQp0aXRsZTogIjUwMCBBbmFseXNlIEJpb2Rpdi1SUyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyLCBpbmNsdWRlID0gRkFMU0V9DQojIFNldCB1cCB3b3JraW5nIGVudmlyb25tZW50IGFuZCBkZWZhdWx0cyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KcmVxdWlyZShlbnZpbWFSKQ0Kcm9vdF9mb2xkZXIgPSBwYXRoLmV4cGFuZCgifi9wbHlncm5kL2h5U3BlY1Zpc0tpbGkvIikNCnNvdXJjZShmaWxlLnBhdGgocm9vdF9mb2xkZXIsICJoeVNwZWNWaXNLaWxpL3NyYy8wMDBfc2V0dXBfd2luZG93cy5SIikpDQoNCmFsbF9tb2RlbHMgPSByZWFkUkRTKGZpbGUucGF0aChlbnZybXQkcGF0aF8xMjBfY29tcGlsZV9hbmFseXNpc19zciwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIm1vZGVsc19zci5yZHMiKSkNCg0KIyBDb2xsZWN0IG1vZGVsIHBlcmZvcm1hbmNlDQpnYW1fc3IgPSBtb2RlbFBlcmZvcm1hbmNlKGFsbF9tb2RlbHNbWyJnYW0iXV0pDQpwbHNfc3IgPSBtb2RlbFBlcmZvcm1hbmNlKGFsbF9tb2RlbHNbWyJwbHMiXV0pDQpyZl9zciA9IG1vZGVsUGVyZm9ybWFuY2UoYWxsX21vZGVsc1tbInJmIl1dKQ0KDQpzdW1tYXJ5KGdhbV9zcikNCnN1bW1hcnkocGxzX3NyKQ0Kc3VtbWFyeShyZl9zcikNCg0KIyBHZXQgdHJvcGhpYyBsZXZlbHMNCnRsID0gcmVhZC50YWJsZShmaWxlLnBhdGgocGF0aF9tZXRhLCAidHJvcGhpY19sZXZlbHMuY3N2IiksIGhlYWRlciA9IFRSVUUsIHNlcCA9ICI7IikNCmdhbV9zciA9IG1lcmdlKGdhbV9zciwgdGwsIGJ5LnggPSAicmVzcCIsIGJ5LnkgPSAiU3BlY2llcyIpDQpwbHNfc3IgPSBtZXJnZShwbHNfc3IsIHRsLCBieS54ID0gInJlc3AiLCBieS55ID0gIlNwZWNpZXMiKQ0KcmZfc3IgPSBtZXJnZShyZl9zciwgdGwsIGJ5LnggPSAicmVzcCIsIGJ5LnkgPSAiU3BlY2llcyIpDQoNCiMgQXJyYW5nZSBsZXZlbHMgYW5kIHNwZWNpZXMgbmFtZXMNCnBsc19zciRMZXZlbCA9IGZhY3RvcihwbHNfc3IkTGV2ZWwsIGxldmVscyhwbHNfc3IkTGV2ZWwpW2MoMSwgNSwgNCwgMywgNiwgMildICkNCnBsc19zciRyZXNwID0gYXMuY2hhcmFjdGVyKHBsc19zciRyZXNwKQ0KcGxzX3NyJHJlc3AgPSBzdWJzdHIocGxzX3NyJHJlc3AsIDMsIG5jaGFyKHBsc19zciRyZXNwKSkNCnBsc19zciRyZXNwID0gZ3N1YigiKF5bWzphbHBoYTpdXSkiLCAiXFxVXFwxIiwgcGxzX3NyJHJlc3AsIHBlcmw9VFJVRSkNCnBsc19zciRyZXNwID0gZmFjdG9yKHBsc19zciRyZXNwLCB1bmlxdWUocGxzX3NyJHJlc3Bbb3JkZXIocGxzX3NyJExldmVsLCBwbHNfc3IkcmVzcCldKSkNCg0KDQpyZl9zciRMZXZlbCA9IGZhY3RvcihyZl9zciRMZXZlbCwgbGV2ZWxzKHJmX3NyJExldmVsKVtjKDEsIDUsIDQsIDMsIDYsIDIpXSApDQpyZl9zciRyZXNwID0gYXMuY2hhcmFjdGVyKHJmX3NyJHJlc3ApDQpyZl9zciRyZXNwID0gc3Vic3RyKHJmX3NyJHJlc3AsIDMsIG5jaGFyKHJmX3NyJHJlc3ApKQ0KcmZfc3IkcmVzcCA9IGdzdWIoIiheW1s6YWxwaGE6XV0pIiwgIlxcVVxcMSIsIHJmX3NyJHJlc3AsIHBlcmw9VFJVRSkNCnJmX3NyJHJlc3AgPSBmYWN0b3IocmZfc3IkcmVzcCwgdW5pcXVlKHJmX3NyJHJlc3Bbb3JkZXIocmZfc3IkTGV2ZWwsIHJmX3NyJHJlc3ApXSkpDQpgYGANCg0KIyBDb21wYXJlIFBMUyBhbmQgUkYNCmBgYHtyLCBlY2hvPUZBTFNFfQ0KbW9kZWxzX3NyID0gcmJpbmQocGxzX3NyWywgLTRdLCByZl9zclssIC00XSkNCm1vZGVsc19zciRtcHR5cGUgPSBwYXN0ZTAobW9kZWxzX3NyJG10eXBlLCAiXyIsIG1vZGVsc19zciRwdHlwZSkNCm1vZGVsc19zciRtcHR5cGUgPSBmYWN0b3IobW9kZWxzX3NyJG1wdHlwZSwgbGV2ZWxzID0gYygicGxzX2Vsc3AiLCAicmZfZWxzcCIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInBsc19lbHVpIiwgInJmX2VsdWkiLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJwbHNfa21yYSIsICJyZl9rbXJhIiwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAicGxzX3NwZWMiLCAicmZfc3BlYyIpKQ0KDQpnZ3Bsb3QoZGF0YSA9IG1vZGVsc19zclttb2RlbHNfc3IkbXR5cGUgPT0gInBscyIgJiANCiAgICAgICAgICAgICAgICAgICAgICAgICAgKG1vZGVsc19zciRwdHlwZSA9PSAiZWx1aSIgfCBtb2RlbHNfc3IkcHR5cGUgPT0gInNwZWMiKSxdLCANCiAgICAgICBhZXMoeCA9IHJlc3AsIHkgPSBSTVNFX25vcm1TRCwgZmlsbCA9IG1wdHlwZSkpICsgDQogIGdlb21fYm94cGxvdCgpICsNCiAgZ2VvbV9obGluZSh5aW50ZXJjZXB0PWMoMC41LDEpLCBsaW5ldHlwZT0iZGFzaGVkIiwgY29sb3IgPSAiYmxhY2siKSArIA0KICBzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlPSJEYXJrMiIpICsgDQogIHRoZW1lX2J3KCkgKyANCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkgKyANCiAgbGFicyhsaXN0KHggPSAiU3BlY2llcyBncm91cHMiLCB5ID0gIlJNU0VuIiwgZmlsbCA9ICJNb2RlbCBzZXQiKSkNCg0KDQoNCmdncGxvdChkYXRhID0gbW9kZWxzX3NyW21vZGVsc19zciRtdHlwZSA9PSAicmYiICYgDQogICAgICAgICAgICAgICAgICAgICAgICAgIChtb2RlbHNfc3IkcHR5cGUgPT0gImVsdWkiIHwgbW9kZWxzX3NyJHB0eXBlID09ICJzcGVjIiksXSwgDQogICAgICAgYWVzKHggPSByZXNwLCB5ID0gUk1TRV9ub3JtU0QsIGZpbGwgPSBtcHR5cGUpKSArIA0KICBnZW9tX2JveHBsb3QoKSArDQogIGdlb21faGxpbmUoeWludGVyY2VwdD1jKDAuNSwxKSwgbGluZXR5cGU9ImRhc2hlZCIsIGNvbG9yID0gImJsYWNrIikgKyANCiAgc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0iRGFyazIiKSArIA0KICB0aGVtZV9idygpICsgDQogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSkpICsgDQogIGxhYnMobGlzdCh4ID0gIlNwZWNpZXMgZ3JvdXBzIiwgeSA9ICJSTVNFbiIsIGZpbGwgPSAiTW9kZWwgc2V0IikpDQoNCg0KZ2dwbG90KGRhdGEgPSBtb2RlbHNfc3JbbW9kZWxzX3NyJG10eXBlID09ICJyZiIgJiBtb2RlbHNfc3IkcHR5cGUgPT0gInNwZWMiLF0sIA0KICAgICAgIGFlcyh4ID0gcmVzcCwgeSA9IFJNU0Vfbm9ybVNELCBmaWxsID0gTGV2ZWwpKSArIA0KICBnZW9tX2JveHBsb3QoKSArDQogIGdlb21faGxpbmUoeWludGVyY2VwdD1jKDAuNSwxKSwgbGluZXR5cGU9ImRhc2hlZCIsIGNvbG9yID0gImJsYWNrIikgKyANCiAgc2NhbGVfZmlsbF9icmV3ZXIocGFsZXR0ZT0iRGFyazIiKSArIA0KICB0aGVtZV9idygpICsgDQogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSkpICsgDQogIGxhYnMobGlzdCh4ID0gIlNwZWNpZXMgcmljaG5lc3MiLCB5ID0gIlJNU0VuIiwgZmlsbCA9ICJUcm9waGljIGxldmVsIikpDQpgYGANCg0KYGBge3IsIGVjaG89RkFMU0V9DQpwbHNfcmZfc3IgPSBtZXJnZShwbHNfc3IsIHJmX3NyLCBieSA9IGMoInB0eXBlIiwgInJlc3AiLCAiUmVzYW1wbGUiKSwgYWxsLnkgPSBUUlVFKQ0KY29sbmFtZXMocGxzX3JmX3NyKVtncmVwKCJcXC54IiwgY29sbmFtZXMocGxzX3JmX3NyKSldID0gDQogIGdzdWIoIlxcLngiLCAiX3BscyIsIGNvbG5hbWVzKHBsc19yZl9zcilbZ3JlcCgiXFwueCIsIGNvbG5hbWVzKHBsc19yZl9zcikpXSkNCmNvbG5hbWVzKHBsc19yZl9zcilbZ3JlcCgiXFwueSIsIGNvbG5hbWVzKHBsc19yZl9zcikpXSA9IA0KICBnc3ViKCJcXC55IiwgIl9yZiIsIGNvbG5hbWVzKHBsc19yZl9zcilbZ3JlcCgiXFwueSIsIGNvbG5hbWVzKHBsc19yZl9zcikpXSkNCiMgbnJvdyhwbHNfcmZfc3IpDQoNCnB0eXBlcyA9IGMoImVsdWkiLCAia21yYSIsICJzcGVjIiwgImVsc3AiKQ0KcGVyZl9jaGVjayA9IGxhcHBseShwdHlwZXMsIGZ1bmN0aW9uKHB0KXsNCiAgc3ViZGYgPSBwbHNfcmZfc3JbIWlzLm5hKHBsc19yZl9zciRSTVNFX3BscykgJiANCiAgICAgICAgICAgICAgICAgICAgICBwbHNfcmZfc3IkcHR5cGUgPT0gcHQgJg0KICAgICAgICAgICAgICAgICAgICAgIHBsc19yZl9zciRSZXNhbXBsZSA9PSAiTWVhbiIsIF0NCiAgcm93bmFtZXMoc3ViZGZbc3ViZGYkUk1TRV9wbHMgPCBzdWJkZiRSTVNFX3JmLCBdKQ0KfSkNCm5hbWVzKHBlcmZfY2hlY2spID0gcHR5cGVzDQpgYGANCg0KIyBDaGVjayBwZXJmb3JtYW5jZSBvZiBQTFMgYW5kIFJGDQpgYGB7ciwgZWNobyA9IEZBTFNFfQ0KZm9yKGkgaW4gc2VxKGxlbmd0aChwZXJmX2NoZWNrKSkpew0KICBybXNlX3BlcmYgPSBzb3J0KHJvdW5kKDEtcGxzX3JmX3NyW2FzLm51bWVyaWMocGVyZl9jaGVja1tbaV1dKSwgIlJNU0VfcGxzIl0gLyANCiAgICAgICAgICAgICAgICAgICAgICAgICAgIHBsc19yZl9zclthcy5udW1lcmljKHBlcmZfY2hlY2tbW2ldXSksICJSTVNFX3JmIl0sMikpDQogIHZhcl9yZl9wcmN0ID0gc29ydChyb3VuZChwbHNfcmZfc3JbYXMubnVtZXJpYyhwZXJmX2NoZWNrW1tpXV0pLCAibnZhcnNfcmYiXSAvIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwbHNfcmZfc3JbYXMubnVtZXJpYyhwZXJmX2NoZWNrW1tpXV0pLCAibnZhcnNfcGxzIl0sMikpDQogIGxldmVsX3BscyA9IHNvcnQodGFibGUocGxzX3JmX3NyW2FzLm51bWVyaWMocGVyZl9jaGVja1tbaV1dKSwgIkxldmVsX3BscyJdKSkNCiAgcHJpbnQobmFtZXMocGVyZl9jaGVja1tpXSkpDQogIHByaW50KHBsc19yZl9zclthcy5udW1lcmljKHBlcmZfY2hlY2tbW2ldXSksXSkNCiAgY2F0KCJSTVNFICgxIC0gUExTL1JGKToiLCBybXNlX3BlcmYsICJcbiIpDQogIGNhdCgiVmFyIG51bWJlciAoUkYvUExTKToiLCB2YXJfcmZfcHJjdCwgIlxuIikNCiAgY2F0KCJMZXZlbHMgd2l0aCBQTFMgaXMgYmV0dGVyOiIsIGxldmVsX3BscywgIlxuIikNCiAgY2F0KCJcblxuIikNCn0NCmBgYA0KDQojIENvbGxlY3QgdmFyaWFibGUgaW1wb3J0YW5jZQ0KIyMgTnVtYmVyIG9mIHZhcmlhYmxlcw0KYGBge3J9DQpwbHNfcmZfc3JfbG9uZyA9IG1lbHQocGxzX3JmX3NyW3Bsc19yZl9zciRSZXNhbXBsZSA9PSAiTWVhbiIsIGMoMSwgMiwgNiwgMTMpXSwgaWQudmFycyA9IGMoInB0eXBlIiwgInJlc3AiKSkNCmdncGxvdChkYXRhID0gcGxzX3JmX3NyX2xvbmcsIGFlcyh4ID0gdmFyaWFibGUsIHkgPSB2YWx1ZSwgZmlsbCA9IHB0eXBlKSkgKw0KICBnZW9tX2JveHBsb3QoKSArIA0KICBsYWJzKGxpc3QoeCA9ICJNb2RlbHMiLCB5ID0gIk51bWJlciBvZiB2YXJpYWJsZXMiICwNCiAgICAgICAgICAgIGZpbGwgPSAiUHJlZGljdG9yIFNldCIpKSArDQogIHRoZW1lX2J3KCkNCmBgYA0KDQoNCiMgVmFyaWFibGUgaW1wb3J0YW5jZSBmb3IgUExTDQpgYGB7ciwgZWNobz1GQUxTRX0NCnZhcl9pbXAgPC0gY29tcFZhckltcChhbGxfbW9kZWxzW1sicGxzIl1dW1sic3BlYyJdXUBtb2RlbFtbMV1dLCBzY2FsZSA9IEZBTFNFKQ0KIyBwbG90VmFySW1wKHZhcl9pbXApDQpwbG90VmFySW1wSGVhdG1hcCh2YXJfaW1wLCB4bGFiID0gIlNwZWNpZXMiLCB5bGFiID0gIkJhbmQiKQ0KYGBgDQoNCiMgVmFyaWFibGUgaW1wb3J0YW5jZSBmb3IgUkYNCmBgYHtyLCBlY2hvPUZBTFNFfQ0KdmFyX2ltcCA8LSBjb21wVmFySW1wKGFsbF9tb2RlbHNbWyJyZiJdXVtbInNwZWMiXV1AbW9kZWxbWzFdXSwgc2NhbGUgPSBGQUxTRSkNCiMgcGxvdFZhckltcCh2YXJfaW1wKQ0KcGxvdFZhckltcEhlYXRtYXAodmFyX2ltcCwgeGxhYiA9ICJTcGVjaWVzIiwgeWxhYiA9ICJCYW5kIikNCmBgYA0KDQoNCiMgVHJvcGhpYyBsZXZlbHMNCmBgYHtyfQ0KdmFyX2ltcF9sZXZlbHMgPSB2YXJfaW1wDQpmb3IoaSBpbiBzZXEobGVuZ3RoKHZhcl9pbXBfbGV2ZWxzKSkpew0KICB2YXJfaW1wX2xldmVsc1tbaV1dJFJFU1BPTlNFID0gdGwkTGV2ZWxbZ3JlcCh2YXJfaW1wX2xldmVsc1tbaV1dJFJFU1BPTlNFWzFdLCB0bCRTcGVjaWVzKV0NCn0NCnBsb3RWYXJJbXBIZWF0bWFwKHZhcl9pbXBfbGV2ZWxzLCB4bGFiID0gIlNwZWNpZXMiLCB5bGFiID0gIkJhbmQiKQ0KYGBgDQoNCg0KDQoNCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ3RybCtTaGlmdCtLKiB0byBwcmV2aWV3IHRoZSBIVE1MIGZpbGUpLg0KDQpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuDQo=
+ + + +
+ + + + + + + + + + + + + + + + diff --git a/src/510_analyse_biodiv_sr_elev_res.Rmd b/src/510_analyse_biodiv_sr_elev_res.Rmd new file mode 100644 index 0000000..bc9ebf7 --- /dev/null +++ b/src/510_analyse_biodiv_sr_elev_res.Rmd @@ -0,0 +1,93 @@ +--- +title: "510 Analyse Biodiv-RS" +output: html_notebook +--- + +```{r, include = FALSE} +root_folder = path.expand("~/plygrnd/hySpecVisKili/") +source(file.path(root_folder, "hySpecVisKili/src/000_setup_windows.R")) + +all_models_res = readRDS(file.path(envrmt$path_220_compile_analysis_sr_elev_res, + "models_sr_elev_res.rds")) + + +# Collect model performance +models_res = modelPerformance(all_models_res[["rf"]]) + +models_res$resmodel = models_res$resp +models_res$resp = gsub("_pls_elui_res", "", models_res$resp) +models_res$resp = gsub("_rf_elui_res", "", models_res$resp) + +rf_pls_res = models_res[models_res$ptype == "pls_elui_res",] +rf_rf_res = models_res[models_res$ptype == "rf_elui_res",] + +summary(rf_pls_res) +summary(rf_rf_res) + +# Get trophic levels +tl = read.table(file.path(path_meta, "trophic_levels.csv"), header = TRUE, sep = ";") +rf_pls_res = merge(rf_pls_res, tl, by.x = "resp", by.y = "Species") +rf_rf_res = merge(rf_rf_res, tl, by.x = "resp", by.y = "Species") +``` + +# Compare PLS and RF +```{r, echo=FALSE} +models_res$mptype = paste0(models_res$mtype, "_", models_res$ptype) +models_res$mptype = factor(models_res$mptype) + + +ggplot(data = models_res, aes(x = resp, y = RMSE_normSD, fill = mptype)) + + geom_boxplot() + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) +``` + +# Collect variable importance +## Number of variables +```{r} +rf_pls_res_long = melt(models_res[models_res$Resample == "Mean", c(2,3,5)], id.vars = c("ptype", "resp")) +ggplot(data = rf_pls_res_long, aes(x = variable, y = value, fill = ptype)) + + geom_boxplot() + + labs(list(x = "Models", y = "Number of variables" , + fill = "Predictor Set")) + + theme_bw() +``` + + +# Variable importance for RF +```{r} +var_imp <- compVarImp(all_models_res$rf$rf_elui_res@model[[1]], scale = FALSE) +# plotVarImp(var_imp) +plotVarImpHeatmap(var_imp, xlab = "Species", ylab = "Band") +``` + + +# Trophic levels +```{r} +var_imp_levels = var_imp +for(i in seq(length(var_imp_levels))){ + act_species = gsub("_rf_elui_res", "", var_imp_levels[[i]]$RESPONSE[1]) + var_imp_levels[[i]]$RESPONSE = tl$Level[grep(act_species, tl$Species)] +} +plotVarImpHeatmap(var_imp_levels, xlab = "Species", ylab = "Band") +``` + +```{r} +t = do.call("rbind", var_imp) +t = t[t$mean>=0.6,] +t$RESPONSE = gsub("_rf_elui_res", "", t$RESPONSE) +tt = table(t$VARIABLE, t$RESPONSE) +pca_tt <- prcomp(tt, scale = TRUE, center = TRUE) +pca_var <- as.data.frame(pca_tt$rotation) +pca_obs <- as.data.frame(pca_tt$x) +ggplot(pca_var, aes(PC1, PC2))+ + geom_point()+ +geom_point(data = pca_obs, aes(PC1, PC2), color = "red") +ggbiplot(pca_tt, labels = rownames(pca_tt$x), choices = 1:2) +biplot(pca_tt) +``` + +When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file). + +The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed. diff --git a/src/510_analyse_biodiv_sr_elev_res.nb.html b/src/510_analyse_biodiv_sr_elev_res.nb.html new file mode 100644 index 0000000..ca9c7f4 --- /dev/null +++ b/src/510_analyse_biodiv_sr_elev_res.nb.html @@ -0,0 +1,287 @@ + + + + + + + + + + + + + +510 Analyse Biodiv-RS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+

Compare PLS and RF

+ + + +

+ + + +
+
+

Collect variable importance

+
+

Number of variables

+ + + +

+ + + +
+
+
+

Variable importance for RF

+ + + +

+ + + +
+
+

Trophic levels

+ + + +

+ + + + + + +

+ + +

+ + +

rr t = do.call(, var_imp) t = t[t$mean>=0.6,] t\(RESPONSE = gsub(\_rf_elui_res\, \\, t\)RESPONSE) tt = table(t\(VARIABLE, t\)RESPONSE) pca_tt <- prcomp(tt, scale = TRUE, center = TRUE) pca_var <- as.data.frame(pca_tt\(rotation) pca_obs <- as.data.frame(pca_tt\)x) ggplot(pca_var, aes(PC1, PC2))+ geom_point()+ geom_point(data = pca_obs, aes(PC1, PC2), color = ) ggbiplot(pca_tt, labels = rownames(pca_tt$x), choices = 1:2)

+

biplot(pca_tt)

+
+ + +

+ + + +

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

+

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

+ +
+ +
LS0tDQp0aXRsZTogIjUxMCBBbmFseXNlIEJpb2Rpdi1SUyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyLCBpbmNsdWRlID0gRkFMU0V9DQpyb290X2ZvbGRlciA9IHBhdGguZXhwYW5kKCJ+L3BseWdybmQvaHlTcGVjVmlzS2lsaS8iKQ0Kc291cmNlKGZpbGUucGF0aChyb290X2ZvbGRlciwgImh5U3BlY1Zpc0tpbGkvc3JjLzAwMF9zZXR1cF93aW5kb3dzLlIiKSkNCg0KYWxsX21vZGVsc19yZXMgPSByZWFkUkRTKGZpbGUucGF0aChlbnZybXQkcGF0aF8yMjBfY29tcGlsZV9hbmFseXNpc19zcl9lbGV2X3JlcywgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIm1vZGVsc19zcl9lbGV2X3Jlcy5yZHMiKSkNCg0KDQojIENvbGxlY3QgbW9kZWwgcGVyZm9ybWFuY2UNCm1vZGVsc19yZXMgPSBtb2RlbFBlcmZvcm1hbmNlKGFsbF9tb2RlbHNfcmVzW1sicmYiXV0pDQoNCm1vZGVsc19yZXMkcmVzbW9kZWwgPSBtb2RlbHNfcmVzJHJlc3ANCm1vZGVsc19yZXMkcmVzcCA9IGdzdWIoIl9wbHNfZWx1aV9yZXMiLCAiIiwgbW9kZWxzX3JlcyRyZXNwKQ0KbW9kZWxzX3JlcyRyZXNwID0gZ3N1YigiX3JmX2VsdWlfcmVzIiwgIiIsIG1vZGVsc19yZXMkcmVzcCkNCg0KcmZfcGxzX3JlcyA9IG1vZGVsc19yZXNbbW9kZWxzX3JlcyRwdHlwZSA9PSAicGxzX2VsdWlfcmVzIixdDQpyZl9yZl9yZXMgPSBtb2RlbHNfcmVzW21vZGVsc19yZXMkcHR5cGUgPT0gInJmX2VsdWlfcmVzIixdDQoNCnN1bW1hcnkocmZfcGxzX3JlcykNCnN1bW1hcnkocmZfcmZfcmVzKQ0KDQojIEdldCB0cm9waGljIGxldmVscw0KdGwgPSByZWFkLnRhYmxlKGZpbGUucGF0aChwYXRoX21ldGEsICJ0cm9waGljX2xldmVscy5jc3YiKSwgaGVhZGVyID0gVFJVRSwgc2VwID0gIjsiKQ0KcmZfcGxzX3JlcyA9IG1lcmdlKHJmX3Bsc19yZXMsIHRsLCBieS54ID0gInJlc3AiLCBieS55ID0gIlNwZWNpZXMiKQ0KcmZfcmZfcmVzID0gbWVyZ2UocmZfcmZfcmVzLCB0bCwgYnkueCA9ICJyZXNwIiwgYnkueSA9ICJTcGVjaWVzIikNCmBgYA0KDQojIENvbXBhcmUgUExTIGFuZCBSRg0KYGBge3IsIGVjaG89RkFMU0V9DQptb2RlbHNfcmVzJG1wdHlwZSA9IHBhc3RlMChtb2RlbHNfcmVzJG10eXBlLCAiXyIsIG1vZGVsc19yZXMkcHR5cGUpDQptb2RlbHNfcmVzJG1wdHlwZSA9IGZhY3Rvcihtb2RlbHNfcmVzJG1wdHlwZSkNCg0KDQpnZ3Bsb3QoZGF0YSA9IG1vZGVsc19yZXMsIGFlcyh4ID0gcmVzcCwgeSA9IFJNU0Vfbm9ybVNELCBmaWxsID0gbXB0eXBlKSkgKyANCiAgZ2VvbV9ib3hwbG90KCkgKw0KICB0aGVtZV9idygpICsgDQogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSkpICsgDQogIGxhYnMobGlzdCh4ID0gIlNwZWNpZXMgZ3JvdXBzIiwgeSA9ICJSTVNFbiIsIGZpbGwgPSAiTW9kZWwgc2V0IikpDQpgYGANCg0KIyBDb2xsZWN0IHZhcmlhYmxlIGltcG9ydGFuY2UNCiMjIE51bWJlciBvZiB2YXJpYWJsZXMNCmBgYHtyfQ0KcmZfcGxzX3Jlc19sb25nID0gbWVsdChtb2RlbHNfcmVzW21vZGVsc19yZXMkUmVzYW1wbGUgPT0gIk1lYW4iLCBjKDIsMyw1KV0sIGlkLnZhcnMgPSBjKCJwdHlwZSIsICJyZXNwIikpDQpnZ3Bsb3QoZGF0YSA9IHJmX3Bsc19yZXNfbG9uZywgYWVzKHggPSB2YXJpYWJsZSwgeSA9IHZhbHVlLCBmaWxsID0gcHR5cGUpKSArDQogIGdlb21fYm94cGxvdCgpICsgDQogIGxhYnMobGlzdCh4ID0gIk1vZGVscyIsIHkgPSAiTnVtYmVyIG9mIHZhcmlhYmxlcyIgLA0KICAgICAgICAgICAgZmlsbCA9ICJQcmVkaWN0b3IgU2V0IikpICsNCiAgdGhlbWVfYncoKQ0KYGBgDQoNCg0KIyBWYXJpYWJsZSBpbXBvcnRhbmNlIGZvciBSRg0KYGBge3J9DQp2YXJfaW1wIDwtIGNvbXBWYXJJbXAoYWxsX21vZGVsc19yZXMkcmYkcmZfZWx1aV9yZXNAbW9kZWxbWzFdXSwgc2NhbGUgPSBGQUxTRSkNCiMgcGxvdFZhckltcCh2YXJfaW1wKQ0KcGxvdFZhckltcEhlYXRtYXAodmFyX2ltcCwgeGxhYiA9ICJTcGVjaWVzIiwgeWxhYiA9ICJCYW5kIikNCmBgYA0KDQoNCiMgVHJvcGhpYyBsZXZlbHMNCmBgYHtyfQ0KdmFyX2ltcF9sZXZlbHMgPSB2YXJfaW1wDQpmb3IoaSBpbiBzZXEobGVuZ3RoKHZhcl9pbXBfbGV2ZWxzKSkpew0KICBhY3Rfc3BlY2llcyA9IGdzdWIoIl9yZl9lbHVpX3JlcyIsICIiLCB2YXJfaW1wX2xldmVsc1tbaV1dJFJFU1BPTlNFWzFdKQ0KICB2YXJfaW1wX2xldmVsc1tbaV1dJFJFU1BPTlNFID0gdGwkTGV2ZWxbZ3JlcChhY3Rfc3BlY2llcywgdGwkU3BlY2llcyldDQp9DQpwbG90VmFySW1wSGVhdG1hcCh2YXJfaW1wX2xldmVscywgeGxhYiA9ICJTcGVjaWVzIiwgeWxhYiA9ICJCYW5kIikNCmBgYA0KDQpgYGB7cn0NCnQgPSBkby5jYWxsKCJyYmluZCIsIHZhcl9pbXApDQp0ID0gdFt0JG1lYW4+PTAuNixdDQp0JFJFU1BPTlNFID0gZ3N1YigiX3JmX2VsdWlfcmVzIiwgIiIsIHQkUkVTUE9OU0UpDQp0dCA9IHRhYmxlKHQkVkFSSUFCTEUsIHQkUkVTUE9OU0UpDQpwY2FfdHQgPC0gcHJjb21wKHR0LCBzY2FsZSA9IFRSVUUsIGNlbnRlciA9IFRSVUUpDQpwY2FfdmFyIDwtIGFzLmRhdGEuZnJhbWUocGNhX3R0JHJvdGF0aW9uKQ0KcGNhX29icyA8LSBhcy5kYXRhLmZyYW1lKHBjYV90dCR4KQ0KZ2dwbG90KHBjYV92YXIsIGFlcyhQQzEsIFBDMikpKw0KICBnZW9tX3BvaW50KCkrDQpnZW9tX3BvaW50KGRhdGEgPSBwY2Ffb2JzLCBhZXMoUEMxLCBQQzIpLCBjb2xvciA9ICJyZWQiKQ0KZ2diaXBsb3QocGNhX3R0LCBsYWJlbHMgPSByb3duYW1lcyhwY2FfdHQkeCksIGNob2ljZXMgPSAxOjIpDQpiaXBsb3QocGNhX3R0KQ0KYGBgDQoNCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ3RybCtTaGlmdCtLKiB0byBwcmV2aWV3IHRoZSBIVE1MIGZpbGUpLg0KDQpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuDQo=
+ + + +
+ + + + + + + + diff --git a/src/520_analyse_biodiv_sr_two_step.Rmd b/src/520_analyse_biodiv_sr_two_step.Rmd new file mode 100644 index 0000000..8c38f70 --- /dev/null +++ b/src/520_analyse_biodiv_sr_two_step.Rmd @@ -0,0 +1,139 @@ +--- +title: "520 Analyse Biodiv-RS Two Step" +output: html_notebook +--- + +```{r, include = FALSE} +source("C:/Users/tnauss/permanent/plygrnd/KI-Hyperspec/HySpec_KiLi/src/000_set_environment.R") + +dir.create(path_analysis_sr_elev_res, showWarnings = FALSE) + +comb = readRDS(paste0(path_comb_gpm_sr, "ki_hyperspec_biodiv_non_scaled.rds")) +all_models = readRDS(file.path(path_compile_analysis_sr, "models_sr.rds")) +all_models_res = readRDS(file.path(path_compile_analysis_sr_elev_res, + "models_sr_elev_res.rds")) + + + +comb_sr_two_step = comb +model = all_models$gam$elev@model$gam_none +model_res = all_models_res$pls$gam_elev_res@model$rf_ffs + +gam2rf = comp2StepPred(comb_sr_two_step, model, model_res) +gam2rf[gam2rf$RMSE_normSD1 > gam2rf$RMSE_normSD2,] + + + +# Collect model performance +pls_res = modelPerformance(all_models_res[["pls"]]) +rf_res = modelPerformance(all_models_res[["rf"]]) + +pls_res$resmodel = pls_res$resp +pls_res$resp = gsub("_gam_elev_res", "", pls_res$resp) + +rf_res$resmodel = rf_res$resp +rf_res$resp = gsub("_gam_elev_res", "", rf_res$resp) + +summary(pls_res) +summary(rf_res) + +# Get trophic levels +tl = read.table(file.path(path_meta, "trophic_levels.csv"), header = TRUE, sep = ";") +pls_res = merge(pls_res, tl, by.x = "resp", by.y = "Species") +rf_res = merge(rf_res, tl, by.x = "resp", by.y = "Species") + +# Arrange levels and species names +rf_res$Level = factor(rf_res$Level, levels(rf_res$Level)[c(1, 5, 4, 3, 6, 2)] ) +rf_res$resp = substr(rf_res$resp, 3, nchar(rf_res$resp)) +rf_res$resp = gsub("(^[[:alpha:]])", "\\U\\1", rf_res$resp, perl=TRUE) +rf_res$resp = factor(rf_res$resp, unique(rf_res$resp[order(rf_res$Level, rf_res$resp)])) +``` + +# Compare PLS and RF +```{r, echo=FALSE} +ggplot(data = rf_res, aes(x = resp, y = RMSE_normSD, fill = Level)) + + geom_boxplot() + + geom_hline(yintercept=1, linetype="dashed", color = "black") + + scale_fill_brewer(palette="Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species richness", y = "RMSEn", fill = "Trophic level")) +``` + +```{r, echo=FALSE} +pls_rf_res = merge(pls_res, rf_res, by = c("ptype", "resp", "Resample"), all.y = TRUE) +colnames(pls_rf_res)[grep("\\.x", colnames(pls_rf_res))] = + gsub("\\.x", "_pls", colnames(pls_rf_res)[grep("\\.x", colnames(pls_rf_res))]) +colnames(pls_rf_res)[grep("\\.y", colnames(pls_rf_res))] = + gsub("\\.y", "_rf", colnames(pls_rf_res)[grep("\\.y", colnames(pls_rf_res))]) +# nrow(pls_rf_res) + +ptypes = c("elui", "kmra", "spec", "elsp") +perf_check = lapply(ptypes, function(pt){ + subdf = pls_rf_res[!is.na(pls_rf_res$RMSE_pls) & + pls_rf_res$ptype == pt & + pls_rf_res$Resample == "Mean", ] + rownames(subdf[subdf$RMSE_pls < subdf$RMSE_rf, ]) +}) +names(perf_check) = ptypes +``` + +# Check performance of PLS and RF +```{r, echo = FALSE} +for(i in seq(length(perf_check))){ +rmse_perf = sort(round(1-pls_rf_res[as.numeric(perf_check[[i]]), "RMSE_pls"] / + pls_rf_res[as.numeric(perf_check[[i]]), "RMSE_rf"],2)) +var_rf_prct = sort(round(pls_rf_res[as.numeric(perf_check[[i]]), "nvars_rf"] / + pls_rf_res[as.numeric(perf_check[[i]]), "nvars_pls"],2)) +level_pls = sort(table(pls_rf_res[as.numeric(perf_check[[i]]), "Level_pls"])) +print(names(perf_check[i])) +print(pls_rf_res[as.numeric(perf_check[[i]]),]) +cat("RMSE (1 - PLS/RF):", rmse_perf, "\n") +cat("Var number (RF/PLS):", var_rf_prct, "\n") +cat("Levels with PLS is better:", level_pls, "\n") +cat("\n\n") +} +``` + +# Collect variable importance +## Number of variables +```{r} +pls_rf_res_long = melt(pls_rf_res[pls_rf_res$Resample == "Mean", c(1, 2, 6, 13)], id.vars = c("ptype", "resp")) +ggplot(data = pls_rf_res_long, aes(x = variable, y = value, fill = ptype)) + + geom_boxplot() + + labs(list(x = "Models", y = "Number of variables" , + fill = "Predictor Set")) + + theme_bw() +``` + + +# Variable importance for PLS +```{r, echo=FALSE} +var_imp <- compVarImp(all_models_res[["pls"]][["spec"]]@model[[1]], scale = FALSE) +# plotVarImp(var_imp) +plotVarImpHeatmap(var_imp, xlab = "Species", ylab = "Band") +``` + +# Variable importance for RF +```{r, echo=FALSE} +var_imp <- compVarImp(all_models_res[["rf"]][["spec"]]@model[[1]], scale = FALSE) +# plotVarImp(var_imp) +plotVarImpHeatmap(var_imp, xlab = "Species", ylab = "Band") +``` + + +# Trophic levels +```{r} +var_imp_levels = var_imp +for(i in seq(length(var_imp_levels))){ + var_imp_levels[[i]]$RESPONSE = tl$Level[grep(var_imp_levels[[i]]$RESPONSE[1], tl$Species)] +} +plotVarImpHeatmap(var_imp_levels, xlab = "Species", ylab = "Band") +``` + + + + +When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file). + +The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed. diff --git a/src/520_analyse_biodiv_sr_two_step.nb.html b/src/520_analyse_biodiv_sr_two_step.nb.html new file mode 100644 index 0000000..fb44f45 --- /dev/null +++ b/src/520_analyse_biodiv_sr_two_step.nb.html @@ -0,0 +1,293 @@ + + + + + + + + + + + + + +520 Analyse Biodiv-RS Two Step + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+

Compare PLS and RF

+ + + +

+ + + + + + + +
+
+

Check performance of PLS and RF

+ + + + +
+
+

Collect variable importance

+
+

Number of variables

+ + + +
pls_rf_res_long = melt(pls_rf_res[pls_rf_res$Resample == "Mean", c(1, 2, 6, 13)], id.vars = c("ptype", "resp"))
+ggplot(data = pls_rf_res_long, aes(x = variable, y = value, fill = ptype)) +
+  geom_boxplot() + 
+  labs(list(x = "Models", y = "Number of variables" ,
+            fill = "Predictor Set")) +
+  theme_bw()
+ + + +
+
+
+

Variable importance for PLS

+ + + + +
+
+

Variable importance for RF

+ + + + +
+
+

Trophic levels

+ + + +
var_imp_levels = var_imp
+for(i in seq(length(var_imp_levels))){
+  var_imp_levels[[i]]$RESPONSE = tl$Level[grep(var_imp_levels[[i]]$RESPONSE[1], tl$Species)]
+}
+plotVarImpHeatmap(var_imp_levels, xlab = "Species", ylab = "Band")
+ + + +

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

+

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

+ +
+ +
LS0tDQp0aXRsZTogIjUyMCBBbmFseXNlIEJpb2Rpdi1SUyBUd28gU3RlcCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyLCBpbmNsdWRlID0gRkFMU0V9DQpzb3VyY2UoIkM6L1VzZXJzL3RuYXVzcy9wZXJtYW5lbnQvcGx5Z3JuZC9LSS1IeXBlcnNwZWMvSHlTcGVjX0tpTGkvc3JjLzAwMF9zZXRfZW52aXJvbm1lbnQuUiIpDQoNCmRpci5jcmVhdGUocGF0aF9hbmFseXNpc19zcl9lbGV2X3Jlcywgc2hvd1dhcm5pbmdzID0gRkFMU0UpDQoNCmNvbWIgPSByZWFkUkRTKHBhc3RlMChwYXRoX2NvbWJfZ3BtX3NyLCAia2lfaHlwZXJzcGVjX2Jpb2Rpdl9ub25fc2NhbGVkLnJkcyIpKQ0KYWxsX21vZGVscyA9IHJlYWRSRFMoZmlsZS5wYXRoKHBhdGhfY29tcGlsZV9hbmFseXNpc19zciwgIm1vZGVsc19zci5yZHMiKSkNCmFsbF9tb2RlbHNfcmVzID0gcmVhZFJEUyhmaWxlLnBhdGgocGF0aF9jb21waWxlX2FuYWx5c2lzX3NyX2VsZXZfcmVzLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAibW9kZWxzX3NyX2VsZXZfcmVzLnJkcyIpKQ0KDQoNCg0KY29tYl9zcl90d29fc3RlcCA9IGNvbWINCm1vZGVsID0gYWxsX21vZGVscyRnYW0kZWxldkBtb2RlbCRnYW1fbm9uZQ0KbW9kZWxfcmVzID0gYWxsX21vZGVsc19yZXMkcGxzJGdhbV9lbGV2X3Jlc0Btb2RlbCRyZl9mZnMNCg0KZ2FtMnJmID0gY29tcDJTdGVwUHJlZChjb21iX3NyX3R3b19zdGVwLCBtb2RlbCwgbW9kZWxfcmVzKQ0KZ2FtMnJmW2dhbTJyZiRSTVNFX25vcm1TRDEgPiBnYW0ycmYkUk1TRV9ub3JtU0QyLF0NCg0KDQoNCiMgQ29sbGVjdCBtb2RlbCBwZXJmb3JtYW5jZQ0KcGxzX3JlcyA9IG1vZGVsUGVyZm9ybWFuY2UoYWxsX21vZGVsc19yZXNbWyJwbHMiXV0pDQpyZl9yZXMgPSBtb2RlbFBlcmZvcm1hbmNlKGFsbF9tb2RlbHNfcmVzW1sicmYiXV0pDQoNCnBsc19yZXMkcmVzbW9kZWwgPSBwbHNfcmVzJHJlc3ANCnBsc19yZXMkcmVzcCA9IGdzdWIoIl9nYW1fZWxldl9yZXMiLCAiIiwgcGxzX3JlcyRyZXNwKQ0KDQpyZl9yZXMkcmVzbW9kZWwgPSByZl9yZXMkcmVzcA0KcmZfcmVzJHJlc3AgPSBnc3ViKCJfZ2FtX2VsZXZfcmVzIiwgIiIsIHJmX3JlcyRyZXNwKQ0KDQpzdW1tYXJ5KHBsc19yZXMpDQpzdW1tYXJ5KHJmX3JlcykNCg0KIyBHZXQgdHJvcGhpYyBsZXZlbHMNCnRsID0gcmVhZC50YWJsZShmaWxlLnBhdGgocGF0aF9tZXRhLCAidHJvcGhpY19sZXZlbHMuY3N2IiksIGhlYWRlciA9IFRSVUUsIHNlcCA9ICI7IikNCnBsc19yZXMgPSBtZXJnZShwbHNfcmVzLCB0bCwgYnkueCA9ICJyZXNwIiwgYnkueSA9ICJTcGVjaWVzIikNCnJmX3JlcyA9IG1lcmdlKHJmX3JlcywgdGwsIGJ5LnggPSAicmVzcCIsIGJ5LnkgPSAiU3BlY2llcyIpDQoNCiMgQXJyYW5nZSBsZXZlbHMgYW5kIHNwZWNpZXMgbmFtZXMNCnJmX3JlcyRMZXZlbCA9IGZhY3RvcihyZl9yZXMkTGV2ZWwsIGxldmVscyhyZl9yZXMkTGV2ZWwpW2MoMSwgNSwgNCwgMywgNiwgMildICkNCnJmX3JlcyRyZXNwID0gc3Vic3RyKHJmX3JlcyRyZXNwLCAzLCBuY2hhcihyZl9yZXMkcmVzcCkpDQpyZl9yZXMkcmVzcCA9IGdzdWIoIiheW1s6YWxwaGE6XV0pIiwgIlxcVVxcMSIsIHJmX3JlcyRyZXNwLCBwZXJsPVRSVUUpDQpyZl9yZXMkcmVzcCA9IGZhY3RvcihyZl9yZXMkcmVzcCwgdW5pcXVlKHJmX3JlcyRyZXNwW29yZGVyKHJmX3JlcyRMZXZlbCwgcmZfcmVzJHJlc3ApXSkpDQpgYGANCg0KIyBDb21wYXJlIFBMUyBhbmQgUkYNCmBgYHtyLCBlY2hvPUZBTFNFfQ0KZ2dwbG90KGRhdGEgPSByZl9yZXMsIGFlcyh4ID0gcmVzcCwgeSA9IFJNU0Vfbm9ybVNELCBmaWxsID0gTGV2ZWwpKSArIA0KICBnZW9tX2JveHBsb3QoKSArDQogIGdlb21faGxpbmUoeWludGVyY2VwdD0xLCBsaW5ldHlwZT0iZGFzaGVkIiwgY29sb3IgPSAiYmxhY2siKSArIA0KICBzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlPSJEYXJrMiIpICsgDQogIHRoZW1lX2J3KCkgKyANCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkgKyANCiAgbGFicyhsaXN0KHggPSAiU3BlY2llcyByaWNobmVzcyIsIHkgPSAiUk1TRW4iLCBmaWxsID0gIlRyb3BoaWMgbGV2ZWwiKSkNCmBgYA0KDQpgYGB7ciwgZWNobz1GQUxTRX0NCnBsc19yZl9yZXMgPSBtZXJnZShwbHNfcmVzLCByZl9yZXMsIGJ5ID0gYygicHR5cGUiLCAicmVzcCIsICJSZXNhbXBsZSIpLCBhbGwueSA9IFRSVUUpDQpjb2xuYW1lcyhwbHNfcmZfcmVzKVtncmVwKCJcXC54IiwgY29sbmFtZXMocGxzX3JmX3JlcykpXSA9IA0KICBnc3ViKCJcXC54IiwgIl9wbHMiLCBjb2xuYW1lcyhwbHNfcmZfcmVzKVtncmVwKCJcXC54IiwgY29sbmFtZXMocGxzX3JmX3JlcykpXSkNCmNvbG5hbWVzKHBsc19yZl9yZXMpW2dyZXAoIlxcLnkiLCBjb2xuYW1lcyhwbHNfcmZfcmVzKSldID0gDQogIGdzdWIoIlxcLnkiLCAiX3JmIiwgY29sbmFtZXMocGxzX3JmX3JlcylbZ3JlcCgiXFwueSIsIGNvbG5hbWVzKHBsc19yZl9yZXMpKV0pDQojIG5yb3cocGxzX3JmX3JlcykNCg0KcHR5cGVzID0gYygiZWx1aSIsICJrbXJhIiwgInNwZWMiLCAiZWxzcCIpDQpwZXJmX2NoZWNrID0gbGFwcGx5KHB0eXBlcywgZnVuY3Rpb24ocHQpew0KICBzdWJkZiA9IHBsc19yZl9yZXNbIWlzLm5hKHBsc19yZl9yZXMkUk1TRV9wbHMpICYgDQogICAgICAgICAgICAgICAgICAgICAgcGxzX3JmX3JlcyRwdHlwZSA9PSBwdCAmDQogICAgICAgICAgICAgICAgICAgICAgcGxzX3JmX3JlcyRSZXNhbXBsZSA9PSAiTWVhbiIsIF0NCiAgcm93bmFtZXMoc3ViZGZbc3ViZGYkUk1TRV9wbHMgPCBzdWJkZiRSTVNFX3JmLCBdKQ0KfSkNCm5hbWVzKHBlcmZfY2hlY2spID0gcHR5cGVzDQpgYGANCg0KIyBDaGVjayBwZXJmb3JtYW5jZSBvZiBQTFMgYW5kIFJGDQpgYGB7ciwgZWNobyA9IEZBTFNFfQ0KZm9yKGkgaW4gc2VxKGxlbmd0aChwZXJmX2NoZWNrKSkpew0Kcm1zZV9wZXJmID0gc29ydChyb3VuZCgxLXBsc19yZl9yZXNbYXMubnVtZXJpYyhwZXJmX2NoZWNrW1tpXV0pLCAiUk1TRV9wbHMiXSAvIA0KICAgICAgICAgICAgICAgICAgICAgICAgIHBsc19yZl9yZXNbYXMubnVtZXJpYyhwZXJmX2NoZWNrW1tpXV0pLCAiUk1TRV9yZiJdLDIpKQ0KdmFyX3JmX3ByY3QgPSBzb3J0KHJvdW5kKHBsc19yZl9yZXNbYXMubnVtZXJpYyhwZXJmX2NoZWNrW1tpXV0pLCAibnZhcnNfcmYiXSAvIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgcGxzX3JmX3Jlc1thcy5udW1lcmljKHBlcmZfY2hlY2tbW2ldXSksICJudmFyc19wbHMiXSwyKSkNCmxldmVsX3BscyA9IHNvcnQodGFibGUocGxzX3JmX3Jlc1thcy5udW1lcmljKHBlcmZfY2hlY2tbW2ldXSksICJMZXZlbF9wbHMiXSkpDQpwcmludChuYW1lcyhwZXJmX2NoZWNrW2ldKSkNCnByaW50KHBsc19yZl9yZXNbYXMubnVtZXJpYyhwZXJmX2NoZWNrW1tpXV0pLF0pDQpjYXQoIlJNU0UgKDEgLSBQTFMvUkYpOiIsIHJtc2VfcGVyZiwgIlxuIikNCmNhdCgiVmFyIG51bWJlciAoUkYvUExTKToiLCB2YXJfcmZfcHJjdCwgIlxuIikNCmNhdCgiTGV2ZWxzIHdpdGggUExTIGlzIGJldHRlcjoiLCBsZXZlbF9wbHMsICJcbiIpDQpjYXQoIlxuXG4iKQ0KfQ0KYGBgDQoNCiMgQ29sbGVjdCB2YXJpYWJsZSBpbXBvcnRhbmNlDQojIyBOdW1iZXIgb2YgdmFyaWFibGVzDQpgYGB7cn0NCnBsc19yZl9yZXNfbG9uZyA9IG1lbHQocGxzX3JmX3Jlc1twbHNfcmZfcmVzJFJlc2FtcGxlID09ICJNZWFuIiwgYygxLCAyLCA2LCAxMyldLCBpZC52YXJzID0gYygicHR5cGUiLCAicmVzcCIpKQ0KZ2dwbG90KGRhdGEgPSBwbHNfcmZfcmVzX2xvbmcsIGFlcyh4ID0gdmFyaWFibGUsIHkgPSB2YWx1ZSwgZmlsbCA9IHB0eXBlKSkgKw0KICBnZW9tX2JveHBsb3QoKSArIA0KICBsYWJzKGxpc3QoeCA9ICJNb2RlbHMiLCB5ID0gIk51bWJlciBvZiB2YXJpYWJsZXMiICwNCiAgICAgICAgICAgIGZpbGwgPSAiUHJlZGljdG9yIFNldCIpKSArDQogIHRoZW1lX2J3KCkNCmBgYA0KDQoNCiMgVmFyaWFibGUgaW1wb3J0YW5jZSBmb3IgUExTDQpgYGB7ciwgZWNobz1GQUxTRX0NCnZhcl9pbXAgPC0gY29tcFZhckltcChhbGxfbW9kZWxzX3Jlc1tbInBscyJdXVtbInNwZWMiXV1AbW9kZWxbWzFdXSwgc2NhbGUgPSBGQUxTRSkNCiMgcGxvdFZhckltcCh2YXJfaW1wKQ0KcGxvdFZhckltcEhlYXRtYXAodmFyX2ltcCwgeGxhYiA9ICJTcGVjaWVzIiwgeWxhYiA9ICJCYW5kIikNCmBgYA0KDQojIFZhcmlhYmxlIGltcG9ydGFuY2UgZm9yIFJGDQpgYGB7ciwgZWNobz1GQUxTRX0NCnZhcl9pbXAgPC0gY29tcFZhckltcChhbGxfbW9kZWxzX3Jlc1tbInJmIl1dW1sic3BlYyJdXUBtb2RlbFtbMV1dLCBzY2FsZSA9IEZBTFNFKQ0KIyBwbG90VmFySW1wKHZhcl9pbXApDQpwbG90VmFySW1wSGVhdG1hcCh2YXJfaW1wLCB4bGFiID0gIlNwZWNpZXMiLCB5bGFiID0gIkJhbmQiKQ0KYGBgDQoNCg0KIyBUcm9waGljIGxldmVscw0KYGBge3J9DQp2YXJfaW1wX2xldmVscyA9IHZhcl9pbXANCmZvcihpIGluIHNlcShsZW5ndGgodmFyX2ltcF9sZXZlbHMpKSl7DQogIHZhcl9pbXBfbGV2ZWxzW1tpXV0kUkVTUE9OU0UgPSB0bCRMZXZlbFtncmVwKHZhcl9pbXBfbGV2ZWxzW1tpXV0kUkVTUE9OU0VbMV0sIHRsJFNwZWNpZXMpXQ0KfQ0KcGxvdFZhckltcEhlYXRtYXAodmFyX2ltcF9sZXZlbHMsIHhsYWIgPSAiU3BlY2llcyIsIHlsYWIgPSAiQmFuZCIpDQpgYGANCg0KDQoNCg0KV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDdHJsK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuDQoNClRoZSBwcmV2aWV3IHNob3dzIHlvdSBhIHJlbmRlcmVkIEhUTUwgY29weSBvZiB0aGUgY29udGVudHMgb2YgdGhlIGVkaXRvci4gQ29uc2VxdWVudGx5LCB1bmxpa2UgKktuaXQqLCAqUHJldmlldyogZG9lcyBub3QgcnVuIGFueSBSIGNvZGUgY2h1bmtzLiBJbnN0ZWFkLCB0aGUgb3V0cHV0IG9mIHRoZSBjaHVuayB3aGVuIGl0IHdhcyBsYXN0IHJ1biBpbiB0aGUgZWRpdG9yIGlzIGRpc3BsYXllZC4NCg==
+ + + +
+ + + + + + + + diff --git a/src/600_analyse_biodiv_sr.Rmd b/src/600_analyse_biodiv_sr.Rmd new file mode 100644 index 0000000..560ace5 --- /dev/null +++ b/src/600_analyse_biodiv_sr.Rmd @@ -0,0 +1,186 @@ +--- +title: "600 Analyse Biodiv-RS" +output: + html_document: + df_print: paged +editor_options: + chunk_output_type: console +--- + +```{r, include = FALSE} +# Set up working environment and defaults -------------------------------------- +if (Sys.info()["sysname"] == "Windows") { + filepath_base <- "D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R" +} else { + filepath_base <- "/mnt/sd19006/data/users/tnauss/KI-Hyperspec/hySpecVisKili/src/000_set_environment_linux.R" +} +source(filepath_base) + +all_models <- readRDS(file.path( + path_compile_analysis_sr, + "models_sr.rds" +)) + +# Collect model performance +gam_sr <- modelPerformance(all_models[["gam"]]) +pls_sr <- modelPerformance(all_models[["pls"]]) +rf_sr <- modelPerformance(all_models[["rf"]]) + +summary(gam_sr) +summary(pls_sr) +summary(rf_sr) + +# Get trophic levels +tl <- read.table(file.path(path_meta, "trophic_levels.csv"), header = TRUE, sep = ";") +gam_sr <- merge(gam_sr, tl, by.x = "resp", by.y = "Species") +pls_sr <- merge(pls_sr, tl, by.x = "resp", by.y = "Species") +rf_sr <- merge(rf_sr, tl, by.x = "resp", by.y = "Species") + +# Arrange levels and species names +gam_sr$Level <- factor(gam_sr$Level, levels(gam_sr$Level)[c(1, 5, 4, 3, 6, 2)]) +gam_sr$resp <- as.character(gam_sr$resp) +gam_sr$resp <- substr(gam_sr$resp, 3, nchar(gam_sr$resp)) +gam_sr$resp <- gsub("(^[[:alpha:]])", "\\U\\1", gam_sr$resp, perl = TRUE) +gam_sr$resp <- factor(gam_sr$resp, unique(gam_sr$resp[order(gam_sr$Level, gam_sr$resp)])) + +pls_sr$Level <- factor(pls_sr$Level, levels(pls_sr$Level)[c(1, 5, 4, 3, 6, 2)]) +pls_sr$resp <- as.character(pls_sr$resp) +pls_sr$resp <- substr(pls_sr$resp, 3, nchar(pls_sr$resp)) +pls_sr$resp <- gsub("(^[[:alpha:]])", "\\U\\1", pls_sr$resp, perl = TRUE) +pls_sr$resp <- factor(pls_sr$resp, unique(pls_sr$resp[order(pls_sr$Level, pls_sr$resp)])) + + +rf_sr$Level <- factor(rf_sr$Level, levels(rf_sr$Level)[c(1, 5, 4, 3, 6, 2)]) +rf_sr$resp <- as.character(rf_sr$resp) +rf_sr$resp <- substr(rf_sr$resp, 3, nchar(rf_sr$resp)) +rf_sr$resp <- gsub("(^[[:alpha:]])", "\\U\\1", rf_sr$resp, perl = TRUE) +rf_sr$resp <- factor(rf_sr$resp, unique(rf_sr$resp[order(rf_sr$Level, rf_sr$resp)])) + +gam_sr$mptype <- paste0(gam_sr$mtype, "_", gam_sr$ptype) +pls_sr$mptype <- paste0(pls_sr$mtype, "_", pls_sr$ptype) +rf_sr$mptype <- paste0(rf_sr$mtype, "_", rf_sr$ptype) + +# Create merged model +model_sr_m <- rbind( + gam_sr[, c("resp", "mtype", "ptype", "mptype", "Resample", "RMSE", "Rsquared", "RMSE_normSD")], + pls_sr[, c("resp", "mtype", "ptype", "mptype", "Resample", "RMSE", "Rsquared", "RMSE_normSD")], + rf_sr[, c("resp", "mtype", "ptype", "mptype", "Resample", "RMSE", "Rsquared", "RMSE_normSD")] +) + + +# Some preliminary results +best_model <- function(data) { + best_model <- lapply(unique(data$resp), function(r) { + tmp <- data[data$resp == r & data$Resample == "Mean", ] + tmp[which(min(tmp$RMSE) == tmp$RMSE), ] + }) + best_model <- do.call("rbind", best_model) + + for (r in unique(best_model$resp[duplicated(best_model$resp) | duplicated(best_model$resp, fromLast = TRUE)])) { + tmp <- best_model[best_model$resp == r, ] + if (any("e" != substr(tmp$ptype, 1, 1))) { + use <- which("e" != substr(tmp$ptype, 1, 1)) + } else { + use <- 1 + } + rmv <- seq(nrow(tmp)) + rmv <- rmv[use != rmv] + best_model[best_model$resp == r, "Resample"][rmv] <- "remove" + } + return(list(best_model = best_model, best_model_table = table(best_model$mptype[best_model$Resample != "remove"]))) +} + + +gam_models = c("gam_elev", "gam_elui") +pls_models = sort(unique(model_sr_m$mptype[model_sr_m$mtype == "pls"])) +pls_models_single = pls_models[c(3, 5, 6, 7)] +rf_models = sort(unique(model_sr_m$mptype[model_sr_m$mtype == "rf"])) +rf_models_single = rf_models[c(3, 5, 6, 7)] + + +best_model_gam <- best_model(model_sr_m[model_sr_m$mptype %in% gam_models, ]) +print(best_model_gam$best_model_table) + +best_model_pls_incl_gam <- best_model(model_sr_m[model_sr_m$mptype %in% pls_models | model_sr_m$mptype %in% gam_models, ]) +best_model_pls_incl_gam$best_model_table + +best_model_pls_single <- best_model(model_sr_m[model_sr_m$mptype %in% pls_models_single, ]) +best_model_pls_single$best_model_table + +best_model_pls_single_incl_gam <- best_model(model_sr_m[model_sr_m$mptype %in% pls_models_single| model_sr_m$mptype %in% gam_models, ]) +best_model_pls_single_incl_gam$best_model_table + +best_model_rf_incl_gam <- best_model(model_sr_m[model_sr_m$mptype %in% rf_models | model_sr_m$mptype %in% gam_models, ]) +best_model_rf_incl_gam$best_model_table + +best_model_rf_single <- best_model(model_sr_m[model_sr_m$mptype %in% rf_models_single, ]) +best_model_rf_single$best_model_table + +best_model_rf_single_incl_gam <- best_model(model_sr_m[model_sr_m$mptype %in% rf_models_single| model_sr_m$mptype %in% gam_models, ]) +best_model_rf_single_incl_gam$best_model_table + + + +``` + + +Lable colors in the first tow boxplots have the following meaning: blue = LiDAR is better than hyperspectral observations, green = hyperspectral is better, black = prediction using elevation is better. +```{r} +best_model_gam$best_model_table + +best_model_pls_incl_gam$best_model_table + +best_model_pls_single$best_model_table + +best_model_pls_single_incl_gam$best_model_table + +best_model_rf_incl_gam$best_model_table + +best_model_rf_single$best_model_table + +best_model_rf_single_incl_gam$best_model_table + + +lbl_color <- best_model_pls_single_incl_gam$best_model[, c("resp", "ptype")] +lbl_color$color = "black" +lbl_color$color[lbl_color$ptype == "lidr"] = "blue" +lbl_color$color[lbl_color$ptype == "spec"] = "darkgreen" + +ggplot( + data = model_sr_m[model_sr_m$mptype %in% pls_models_single| model_sr_m$mptype %in% gam_models, ], + aes(x = resp, y = RMSE_normSD, fill = mptype) +) + + geom_boxplot() + + geom_hline(yintercept = c(0.5, 1), linetype = "dashed", color = "black") + + scale_fill_brewer(palette = "Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1, colour = lbl_color$color)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + + +lbl_color <- best_model_rf_single_incl_gam$best_model[, c("resp", "ptype")] +lbl_color$color = "black" +lbl_color$color[lbl_color$ptype == "lidr"] = "blue" +lbl_color$color[lbl_color$ptype == "spec"] = "darkgreen" + +ggplot( + data = model_sr_m[model_sr_m$mptype %in% rf_models_single| model_sr_m$mptype %in% gam_models, ], + aes(x = resp, y = RMSE_normSD, fill = mptype) +) + + geom_boxplot() + + geom_hline(yintercept = c(0.5, 1), linetype = "dashed", color = "black") + + scale_fill_brewer(palette = "Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1, colour = lbl_color$color)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + + +ggplot(data = rbind(model_sr_m[(model_sr_m$mptype == "gam_elui" | model_sr_m$mptype == "gam_elev" | model_sr_m$mptype == "pls_spec" | model_sr_m$mptype == "pls_lidr" | model_sr_m$mptype == "pls_elui" | model_sr_m$mptype == "rf_spec" | model_sr_m$mptype == "rf_lidr" | model_sr_m$mptype == "rf_elui") & model_sr_m$Resample != "Mean", ]), aes(x = resp, y = RMSE_normSD, fill = mptype)) + + geom_boxplot() + + geom_hline(yintercept = c(0.5, 1), linetype = "dashed", color = "black") + + scale_fill_brewer(palette = "Dark2") + + theme_bw() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + + labs(list(x = "Species groups", y = "RMSEn", fill = "Model set")) + +``` \ No newline at end of file diff --git a/src/600_analyse_biodiv_sr.html b/src/600_analyse_biodiv_sr.html new file mode 100644 index 0000000..d229841 --- /dev/null +++ b/src/600_analyse_biodiv_sr.html @@ -0,0 +1,1760 @@ + + + + + + + + + + + + + +600 Analyse Biodiv-RS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +

Lable colors in the first tow boxplots have the following meaning: blue = LiDAR is better than hyperspectral observations, green = hyperspectral is better, black = prediction using elevation is better.

+
best_model_gam$best_model_table
+
## 
+## gam_elev gam_elui 
+##       10       17
+
best_model_pls_incl_gam$best_model_table
+
## 
+## pls_eldr pls_elsp pls_esld pls_lidr pls_spec pls_splr 
+##        2        2        2        1        3       17
+
best_model_pls_single$best_model_table
+
## 
+## pls_kmra pls_lidr pls_spec 
+##        1        9       17
+
best_model_pls_single_incl_gam$best_model_table
+
## 
+## gam_elev gam_elui pls_lidr pls_spec 
+##        1        3        8       15
+
best_model_rf_incl_gam$best_model_table
+
## 
+## rf_eldr rf_elsp rf_esld rf_spec rf_splr 
+##       1       3       7       3      13
+
best_model_rf_single$best_model_table
+
## 
+## rf_lidr rf_spec 
+##       9      18
+
best_model_rf_single_incl_gam$best_model_table
+
## 
+## gam_elev gam_elui  rf_lidr  rf_spec 
+##        1        1        8       17
+
lbl_color <- best_model_pls_single_incl_gam$best_model[, c("resp", "ptype")]
+lbl_color$color = "black"
+lbl_color$color[lbl_color$ptype == "lidr"] = "blue"
+lbl_color$color[lbl_color$ptype == "spec"] = "darkgreen"
+
+ggplot(
+  data = model_sr_m[model_sr_m$mptype %in% pls_models_single| model_sr_m$mptype %in% gam_models, ],
+  aes(x = resp, y = RMSE_normSD, fill = mptype)
+) +
+  geom_boxplot() +
+  geom_hline(yintercept = c(0.5, 1), linetype = "dashed", color = "black") +
+  scale_fill_brewer(palette = "Dark2") +
+  theme_bw() +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1, colour  = lbl_color$color)) +
+  labs(list(x = "Species groups", y = "RMSEn", fill = "Model set"))
+
## Warning: Vectorized input to `element_text()` is not officially supported.
+## Results may be unexpected or may change in future versions of ggplot2.
+

+
lbl_color <- best_model_rf_single_incl_gam$best_model[, c("resp", "ptype")]
+lbl_color$color = "black"
+lbl_color$color[lbl_color$ptype == "lidr"] = "blue"
+lbl_color$color[lbl_color$ptype == "spec"] = "darkgreen"
+
+ggplot(
+  data = model_sr_m[model_sr_m$mptype %in% rf_models_single| model_sr_m$mptype %in% gam_models, ],
+  aes(x = resp, y = RMSE_normSD, fill = mptype)
+) +
+  geom_boxplot() +
+  geom_hline(yintercept = c(0.5, 1), linetype = "dashed", color = "black") +
+  scale_fill_brewer(palette = "Dark2") +
+  theme_bw() +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1, colour  = lbl_color$color)) +
+  labs(list(x = "Species groups", y = "RMSEn", fill = "Model set"))
+
## Warning: Vectorized input to `element_text()` is not officially supported.
+## Results may be unexpected or may change in future versions of ggplot2.
+

+
ggplot(data = rbind(model_sr_m[(model_sr_m$mptype == "gam_elui" | model_sr_m$mptype == "gam_elev" | model_sr_m$mptype == "pls_spec" | model_sr_m$mptype == "pls_lidr" | model_sr_m$mptype == "pls_elui" | model_sr_m$mptype == "rf_spec" | model_sr_m$mptype == "rf_lidr" | model_sr_m$mptype == "rf_elui") & model_sr_m$Resample != "Mean", ]), aes(x = resp, y = RMSE_normSD, fill = mptype)) +
+  geom_boxplot() +
+  geom_hline(yintercept = c(0.5, 1), linetype = "dashed", color = "black") +
+  scale_fill_brewer(palette = "Dark2") +
+  theme_bw() +
+  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
+  labs(list(x = "Species groups", y = "RMSEn", fill = "Model set"))
+

+ + + + +
+ + + + + + + + + + + + + + + diff --git a/src/Neues Textdokument.txt b/src/Neues Textdokument.txt new file mode 100644 index 0000000..e996ee2 --- /dev/null +++ b/src/Neues Textdokument.txt @@ -0,0 +1,6 @@ +1. PCA +2. PCA inverse (95%) +3. VegIndices +4. Haralick (8, 16, x), 1, 3, 9 15 30 m, welches Band? PCA 1? nur die, die gut sind +5. Openness, closeness +5. Mean, SD, Rao Q pro Datenlayer und einmal über alle, Rao Q ggf. auf Fenstern diff --git a/src/findings.txt b/src/findings.txt new file mode 100644 index 0000000..bf1bd47 --- /dev/null +++ b/src/findings.txt @@ -0,0 +1,20 @@ +ELUI +RF always performs better than PLS for elui. + +KMRA +PLS better for: SRasterids, SRferns, SRmammals, SRmonocots +RMSE percentage difference: 0.01 0.05 0.07 0.10 +Variables used by RF/PLS: 0.21 0.33 0.43 0.57 +RF performs worse than PLS for kmra for 4 groups. Differences in RMSE range between 1% and 10%. RF requires only between 21% and 57% of the variables. + +SPEC +PLS better for: SRallplants, SRants, SRbats, SRbees, SRbirds, SRcollembola, SReudicots, SRmagnoliids, SRmammals, SRothercoleoptera, SRrosids, SRsnails +RMSE percentage difference: 0.01 0.03 0.04 0.04 0.05 0.06 0.07 0.07 0.08 0.10 0.27 0.34 +Variables used by RF/PLS: 0.10 0.16 0.24 0.24 0.32 0.32 0.33 0.35 0.38 0.45 0.47 0.50 +RF performs worse than PLS for spec for 12 groups. Differences in RMSE range between 1% and 34%. RF requires only between 10% and 50% of the variables. + +ELSP +PLS better for: SRallplants, SRasterids, SRothercoleoptera, SRsyrphids +RMSE percentage difference: 0.03 0.08 0.08 0.28 +Variables used by RF/PLS: 0.16 0.21 0.29 0.43 +RF performs worse than PLS for elsp for 4 groups. Differences in RMSE range between 3% and 28%. RF requires only between 16% and 43% of the variables. \ No newline at end of file diff --git a/src/functions/001_functions.R b/src/functions/001_functions.R new file mode 100644 index 0000000..6455422 --- /dev/null +++ b/src/functions/001_functions.R @@ -0,0 +1,791 @@ +# Visually check data ---------------------------------------------------------- +visCheck = function(datapath, polygonfile, band = 109){ + ds = list.files(datapath, full.names = TRUE) + pb = shapefile(polygonfile) + + reproj = TRUE + for(d in ds){ + r = readRDS(d) + if(reproj){ + pb = spTransform(pb, projection(r)) + reproj = FALSE + } + plot(r[[band]], main = substr(basename(d), 1, 4)) + plot(pb[grep(substr(basename(d), 1, 4), pb$PlotID),], add = TRUE) + } +} + + + +# Compute predictions ---------------------------------------------------------- +compPredictions = function(model, input){ + if(inherits(model, "try-error")){ + predictions = NA + } else { + non_na_pos = which( + complete.cases( + input[, model$selectedvars])) + + predictions = NA + predictions[non_na_pos] = predict(model, input[non_na_pos,]) + } + return(predictions) +} + + + +# Compile residual datasets ---------------------------------------------------- +compResData = function(comb_sr, pt, mt, model_path = path_model_gpm_sr, suf = "_res"){ + comb_sr_elev_res = comb_sr + model_files = list.files(model_path, full.names = TRUE, + pattern = glob2rx(paste0(pt, mt))) + + for(m in model_files){ + act_model = readRDS(m)@model[[1]][[1]][[1]] + + if(inherits(act_model$model, "try-error")){ + act_predictions = NA + } else { + non_na_pos = which( + complete.cases( + comb_sr_elev_res@data$input[, act_model$model$selectedvars])) + + act_predictions = NA + act_predictions[non_na_pos] = predict(act_model$model, comb_sr@data$input[non_na_pos,]) + } + comb_sr_elev_res@data$input[, act_model$response] = + comb_sr_elev_res@data$input[, act_model$response] - + act_predictions + + colname_pos = grep(act_model$response, colnames(comb_sr_elev_res@data$input)) + colnames(comb_sr_elev_res@data$input)[colname_pos] = + paste0(colnames(comb_sr_elev_res@data$input)[colname_pos], + gsub("[*]", "", paste0("_", mt, "_", pt, suf))) + } + + comb_sr_elev_res@meta$input$RESPONSE = + paste0(comb_sr_elev_res@meta$input$RESPONSE, + gsub("[*]", "", paste0("_", mt, "_", pt, suf))) + + comb_sr_elev_res@meta$input$RESPONSE_FINAL = comb_sr_elev_res@meta$input$RESPONSE + return(comb_sr_elev_res) +} + + + +# Train and tune models -------------------------------------------------------- +compModels = function(model, pt, mt, rs = NULL, outpath, nested_cv = FALSE){ + foreach (i = seq(length(model@meta$input$RESPONSE)), .packages = c("gpm", "caret", "randomForest", "CAST")) %dopar% { + + model@meta$input$RESPONSE_FINAL = model@meta$input$RESPONSE[i] + model@data$input = model@data$input[complete.cases(model@data$input[, c(model@meta$input$RESPONSE_FINAL, model@meta$input$PREDICTOR_FINAL)]), ] + if(length(model@meta$input$PREDICTOR_FINAL) < 3 | !is.null(rs)){ + var_mode = "none" + } else { + var_mode = "ffs" + } + if(nrow(model@data$input) > 0){ + model = createIndexFolds(x = model, nested_cv = nested_cv) + model = trainModel(x = model, + metric = "RMSE", + n_var = NULL, + mthd = mt, + mode = var_mode, + seed_nbr = 11, + cv_nbr = NULL, + var_selection = "indv", + filepath_tmp = NULL) + } + + if(is.null(rs)){ + outfile_name = gsub("[*]", "", paste0(outpath, + "ki_sr_", pt, "_non_scaled_", mt, "_", + model@meta$input$RESPONSE_FINAL, + ".rds")) + } else { + outfile_name = gsub("[*]", "", paste0(outpath, + "ki_sr_", pt, "_non_scaled_", mt, "_", + model@meta$input$RESPONSE_FINAL, + "_iv.rds")) + } + + print(outfile_name) + saveRDS(model, file = outfile_name) + } +} + + + +# Collect model performance ---------------------------------------------------- +modelPerformance = function(model){ + smr_all = lapply(names(model), function(pt){ + print(pt) + smr_pt = lapply(model[[pt]]@model[[1]], function(mi){ + if(is.na(mi[[1]])){ + df = NULL + } else if(inherits(mi[[1]]$model, "try-error")){ + df = NULL + } else { + if(ncol(mi[[1]]$model$resample) == 6){ + temp = rbind(mi[[1]]$model$resample[ + mi[[1]]$model$resample$method == mi[[1]]$model$bestTune$method & + mi[[1]]$model$resample$select == mi[[1]]$model$bestTune$select, c(1:3, 6)], + data.frame( + t(colMeans(mi[[1]]$model$resample[ + mi[[1]]$model$resample$method == mi[[1]]$model$bestTune$method & + mi[[1]]$model$resample$select == mi[[1]]$model$bestTune$select, 1:3], na.rm = TRUE)), + Resample = "Mean")) + temp$mtry = NA + } else if(mi[[1]]$model$method == "pls") { + temp = mi[[1]]$model$resample + + if("ncomp" %in% colnames(temp)){ + temp[which(colnames(temp) == "ncomp")] = NULL + } + temp = rbind(temp, + data.frame(t(colMeans(mi[[1]]$model$resample[, 1:3], na.rm = TRUE)), + Resample = "Mean")) + + } else if(ncol(mi[[1]]$model$resample) == 4) { + temp = rbind(mi[[1]]$model$resample, + data.frame(t(colMeans(mi[[1]]$model$resample[, 1:3], na.rm = TRUE)), + Resample = "Mean")) + temp$mtry = NA + + } else { + temp = rbind(mi[[1]]$model$resample, + data.frame(t(colMeans(mi[[1]]$model$resample[, 1:3], na.rm = TRUE)), + mtry = NA, Resample = "Mean")) + + } + temp$RMSE_normSD = temp$RMSE/sd(mi[[1]]$model$trainingData$.outcome) + df = data.frame(mtype = mi[[1]]$model$method, + ptype = pt, + resp = mi[[1]]$response, + mi[[1]]$model$bestTune, + nvars = length(mi[[1]]$model$selectedvars), + temp) + # for(i in seq(df$nvars)){ + # df[paste0("V",i)] = mi[[1]]$model$selectedvars[i] + # } + } + }) + smr_pt = do.call("rbind", smr_pt) + return(smr_pt) + }) + + smr_all = do.call("rbind", smr_all) + return(smr_all) +} + + + +# Compile two step prediction datasets ----------------------------------------- +comp2StepPred = function(comb_sr_two_step, model, model_res){ + + smr = lapply(seq(length(model)), function(i){ + mi = model[[i]][[1]] + mi_res = model_res[[i]][[1]] + + mi_pred = compPredictions(mi$model, comb_sr_two_step@data$input) + mi_res_pred = compPredictions(mi_res$model, comb_sr_two_step@data$input) + + rmse_1step = sqrt(mean((comb_sr_two_step@data$input[, mi$response]-(mi_pred))**2, na.rm = TRUE)) + rmse_2step = sqrt(mean((comb_sr_two_step@data$input[, mi$response]-(mi_pred + mi_res_pred))**2, na.rm = TRUE)) + pmean = mean(comb_sr_two_step@data$input[, mi$response], na.rm = TRUE) + psd = sd(comb_sr_two_step@data$input[, mi$response], na.rm = TRUE) + rmse_1step_nd = rmse_1step/psd + rmse_2step_nd = rmse_2step/psd + + data.frame(mtype1 = mi$model$method, + mtype2 = mi_res$model$method, + resp = mi$response, + RMSE1 = rmse_1step, + RMSE2 = rmse_2step, + RMSE_normSD1 = rmse_1step_nd, + RMSE_normSD2 = rmse_2step_nd) + + }) + smr = do.call("rbind", smr) + return(smr) +} + + +# Spectral rao ----------------------------------------------------------------- +######### SPECTRALRAO ############################# +## Developed by Matteo Marcantonio +## Latest update: 04th October 2018 +## ------------------------------------------------- +## Code to calculate Rao's quadratic entropy on a +## numeric matrix, RasterLayer object (or lists) +## using a moving window algorithm. +## The function also calculates Shannon-Wiener index. +## ------------------------------------------------- +## Rao's Q Min = 0, if all pixel classes have +## distance 0. If the chosen distance ranges between +## 0 and 1, Rao's Max = 1-1/S (Simpson Diversity, +## where S is the number of pixel classes). +## ------------------------------------------------- +## Find more info and application here: +## 1) https://doi.org/10.1016/j.ecolind.2016.07.039 Titel anhand dieser DOI in Citavi-Projekt ?bernehmen +## 2) https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12941 %CITAVIPICKER?10.1111/2041-210X.12941?Titel anhand dieser DOI in Citavi-Projekt ?bernehmen?% +##################################################### +# Function +spectralrao <- function(input, distance_m="euclidean", p=NULL, window=9, mode="classic", lambda=0, shannon=FALSE, rescale=FALSE, na.tolerance=0.0, simplify=3, nc.cores=1, cluster.type="MPI", debugging=FALSE, ...) +{ + # + ## Load required packages + # + require(raster) + require(svMisc) + require(proxy) + # + ## Define function to check if a number is an integer + # + is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol + # + ## Initial checks + # + if( !(is(input,"matrix") | is(input,"SpatialGridDataFrame") | is(input,"RasterLayer") | is(input,"list")) ) { + stop("\nNot a valid input object.") + } + if( is(input,"SpatialGridDataFrame") ) { + input <- raster(input) # Change input matrix/ces names + } + if( is(input,"matrix") | is(input,"RasterLayer")) { + rasterm<-input + } else if( is(input,"list") ) { + rasterm<-input[[1]] + } + if(na.tolerance>1){ + stop("na.tolerance must be in the [0-1] interval. Exiting...") + } + # Deal with matrices and RasterLayer in a different way + # If data are raster layers + if( is(input[[1]],"RasterLayer") ) { + if( mode=="classic" ){ + isfloat<-FALSE # If data are float numbers, transform them in integer + if( !is.wholenumber(rasterm@data@min) | !is.wholenumber(rasterm@data@max) | is.infinite(rasterm@data@min) ){ + message("Converting input data in an integer matrix...") + isfloat<-TRUE + mfactor<-100^simplify + rasterm<-getValues(rasterm)*mfactor + gc() + rasterm<-as.integer(rasterm) + gc() + rasterm<-matrix(rasterm,nrow(input),ncol(input),byrow=TRUE) + gc() + }else{ + rasterm<-matrix(getValues(rasterm),ncol=ncol(input),nrow=nrow(input),byrow=T) + } + } + #Print user messages + if( mode=="classic" & shannon ){ + message("Matrix check OK: \nRao and Shannon output matrices will be returned") + }else if( mode=="classic" & !shannon ){ + message("Matrix check OK: \nRao output matrix will be returned") + }else if( mode=="multidimension" & !shannon ){ + message(("Matrix check OK: \nA matrix with multimension RaoQ will be returned")) + }else if( mode=="multidimension" & shannon ){ + stop("Matrix check failed: \nMultidimension and Shannon not compatible, set shannon=FALSE") + }else{ + stop("Matrix check failed: \nNot a valid input | method | distance, please check all these options...") + } + # If data are a matrix or a list + }else if( is(input,"matrix") | is(input,"list") ) { + if( mode=="classic" ){ + isfloat<-FALSE # If data are float numbers, transform them in integer + if( !is.integer(rasterm) ){ + message("Converting input data in an integer matrix...") + isfloat<-TRUE + mfactor<-100^simplify + rasterm<-as.integer(rasterm*mfactor) + rasterm<-matrix(rasterm,nrow(input),ncol(input),byrow=TRUE) + gc() + }else{ + rasterm<-as.matrix(rasterm) + } + } + if( mode=="classic" & shannon ){ + message("Matrix check OK: \nRao and Shannon output matrices will be returned") + }else if( mode=="classic" & !shannon ){ + message("Matrix check OK: \nRao output matrix will be returned") + }else if( mode=="multidimension" & shannon ){ + stop("Matrix check failed: \nMultidimension and Shannon not compatible, set shannon=FALSE") + }else if( mode=="multidimension" & !shannon ){ + message(("Matrix check OK: \nA matrix with multimension RaoQ will be returned")) + }else{ + stop("Matrix check failed: \nNot a valid input | method | distance, please check all these options") + } + } + + if(nc.cores>1) { + if(mode=="multidimension"){ + message( + "Multi-core is not supported for multidimensional Rao, proceding with 1 core...") + nc.cores=1 + }else{ + message(" + ##################### Starting parallel calculation #######################") + } + } + # + ## Derive operational moving window + # + if( window%%2==1 ){ + w <- (window-1)/2 + } else { + stop("Moving window size must be an odd number.") + } + # + ## Preparation of output matrices + # + if(nc.cores==1) { + raoqe<-matrix(rep(NA,dim(rasterm)[1]*dim(rasterm)[2]),nrow=dim(rasterm)[1],ncol=dim(rasterm)[2]) + } + if(shannon){ + shannond<-matrix(rep(NA,dim(rasterm)[1]*dim(rasterm)[2]),nrow=dim(rasterm)[1],ncol=dim(rasterm)[2]) + } + # + ## If mode is classic Rao + # + if(mode=="classic") { + # + # If classic RaoQ is parallelized + # + if(nc.cores>1) { + # + ## Required packages for parallel calculation + # + require(foreach) + require(doSNOW) + require(parallel) + if( cluster.type=="MPI" ){ + require(Rmpi) + } + # + ## Reshape values + # + values<-as.numeric(as.factor(rasterm)) + rasterm_1<-matrix(data=values,nrow=dim(rasterm)[1],ncol=dim(rasterm)[2]) + # + ## Add fake columns and rows for moving window + # + hor<-matrix(NA,ncol=dim(rasterm)[2],nrow=w) + ver<-matrix(NA,ncol=w,nrow=dim(rasterm)[1]+w*2) + trasterm<-cbind(ver,rbind(hor,rasterm_1,hor),ver) + rm(hor,ver,rasterm_1,values); gc() + if(debugging){cat("#check: RaoQ parallel function.")} + # + ## Derive distance matrix + # + d1<-proxy::dist(as.numeric(levels(as.factor(rasterm))),method=distance_m) + gc() + # + ## Export variables in the global environment + # + if(isfloat) { + sapply(c("mfactor"), function(x) {assign(x,get(x),envir= .GlobalEnv)}) + } + # + ## Create cluster object with given number of slaves + # + plr<<-TRUE + if( cluster.type=="SOCK" || cluster.type=="FORK" ) { + cls <- parallel::makeCluster(nc.cores,type=cluster.type, outfile="",useXDR=FALSE,methods=FALSE,output="") + } else if( cluster.type=="MPI" ) { + cls <- makeMPIcluster(nc.cores,outfile="",useXDR=FALSE,methods=FALSE,output="") + } + registerDoSNOW(cls) + clusterCall(cl=cls, function() library("parallel")) + if(isfloat) { + parallel::clusterExport(cl=cls, varlist=c("mfactor")) + } + on.exit(stopCluster(cls)) # Close the clusters on exit + gc() + # + ## Start the parallelized loop over iter + # + pb <- txtProgressBar(min = (1+w), max = dim(rasterm)[2], style = 3) + progress <- function(n) setTxtProgressBar(pb, n) + opts <- list(progress = progress) + raop <- foreach(cl=(1+w):(dim(rasterm)[2]+w),.options.snow = opts,.verbose = F) %dopar% { + if(debugging) { + cat(paste(cl)) + } + raout <- sapply((1+w):(dim(rasterm)[1]+w), function(rw) { + if( length(!which(!trasterm[c(rw-w):c(rw+w),c(cl-w):c(cl+w)]%in%NA)) < window^2-((window^2)*na.tolerance) ) { + vv<-NA + return(vv) + } + else { + tw<-summary(as.factor(trasterm[c(rw-w):c(rw+w),c(cl-w):c(cl+w)]),maxsum=10000) + if( "NA's"%in%names(tw) ) { + tw<-tw[-length(tw)] + } + if( debugging ) { + message("Working on coords ",rw,",",cl,". classes length: ",length(tw),". window size=",window) + } + tw_labels<-names(tw) + tw_values<-as.vector(tw) + if( length(tw_values) <=2 ) { + vv<-NA + return(vv) + } + else { + p <- tw_values/sum(tw_values) + p1 <- diag(0,length(tw_values)) + p1[upper.tri(p1)] <- c(combn(p,m=2,FUN=prod)) + p1[lower.tri(p1)] <- c(combn(p,m=2,FUN=prod)) + d2 <- unname(as.matrix(d1)[as.numeric(tw_labels),as.numeric(tw_labels)]) + vv <- sum(p1*d2) + return(vv) + } + } + }) + return(raout) + } # End classic RaoQ - parallelized + message(("\n\nCalculation of Rao's index complete.\n")) + # + ## If classic RaoQ is sequential + # + } else if(nc.cores==1) { + # Reshape values + values<-as.numeric(as.factor(rasterm)) + rasterm_1<-matrix(data=values,nrow=dim(rasterm)[1],ncol=dim(rasterm)[2]) + # Add fake columns and rows for moving window + hor<-matrix(NA,ncol=dim(rasterm)[2],nrow=w) + ver<-matrix(NA,ncol=w,nrow=dim(rasterm)[1]+w*2) + trasterm<-cbind(ver,rbind(hor,rasterm_1,hor),ver) + # Derive distance matrix + classes<-levels(as.factor(rasterm)) + d1<-proxy::dist(x=as.numeric(classes),method=distance_m) + # Loop over each pixel + for (cl in (1+w):(dim(rasterm)[2]+w)) { + for(rw in (1+w):(dim(rasterm)[1]+w)) { + if( length(!which(!trasterm[c(rw-w):c(rw+w),c(cl-w):c(cl+w)]%in%NA)) < window^2-((window^2)*na.tolerance) ) { + raoqe[rw-w,cl-w]<-NA + } else { + tw<-summary(as.factor(trasterm[c(rw-w):c(rw+w),c(cl-w):c(cl+w)]),maxsum=10000) + if( "NA's"%in%names(tw) ) { + tw<-tw[-length(tw)] + } + if(debugging) { + message("Working on coords ",rw ,",",cl,". classes length: ",length(tw),". window size=",window) + } + tw_labels<-names(tw) + tw_values<-as.vector(tw) + if(length(tw_values) <= 2) { + raoqe[rw-w,cl-w]<-NA + } else { + p <- tw_values/sum(tw_values) + p1 <- diag(0,length(tw_values)) + p1[upper.tri(p1)] <- c(combn(p,m=2,FUN=prod)) + p1[lower.tri(p1)] <- c(combn(p,m=2,FUN=prod)) + d2 <- unname(as.matrix(d1)[as.numeric(tw_labels),as.numeric(tw_labels)]) + if(isfloat) { + raoqe[rw-w,cl-w]<-sum(p1*d2)/mfactor + } else { + raoqe[rw-w,cl-w]<-sum(p1*d2) + } + } + } + progress(value=cl, max.value=c((dim(rasterm)[2]+w)+(dim(rasterm)[1]+w))/2, progress.bar = FALSE) + } + } # End of for loop + message(("\nCalculation of Rao's index complete.\n")) + } + } # End classic RaoQ - sequential + else if( mode=="multidimension" ){ + if(debugging) { + message("#check: Into multidimensional clause.") + } + #----------------------------------------------------# + # + ## If multimensional RaoQ + # + # Check if there are NAs in the matrices + if ( is(rasterm,"RasterLayer") ){ + if(any(sapply(lapply(unlist(input),length),is.na)==TRUE)) + message("\n Warning: One or more RasterLayers contain NA which will be threated as 0") + } else if ( is(rasterm,"matrix") ){ + if(any(sapply(input, is.na)==TRUE) ) { + message("\n Warning: One or more matrices contain NA which will be threated as 0") + } + } + # + ## Check whether the chosen distance metric is valid or not + # + if( distance_m=="euclidean" | distance_m=="manhattan" | distance_m=="canberra" | distance_m=="minkowski" | distance_m=="mahalanobis" ) { + # + ## Define distance functions + # + #euclidean + multieuclidean <- function(x) { + tmp <- lapply(x, function(y) { + (y[[1]]-y[[2]])^2 + }) + return(sqrt(Reduce(`+`,tmp))) + } + #manhattan + multimanhattan <- function(x) { + tmp <- lapply(x, function(y) { + abs(y[[1]]-y[[2]]) + }) + return(Reduce(`+`,tmp)) + } + #canberra + multicanberra <- function(x) { + tmp <- lapply(x, function(y) { + abs(y[[1]] - y[[2]]) / (abs(y[[1]]) + abs(y[[2]])) + }) + return(Reduce(`+`,tmp)) + } + #minkowski + multiminkowski <- function(x) { + tmp <- lapply(x, function(y) { + abs((y[[1]]-y[[2]])^lambda) + }) + return(Reduce(`+`,tmp)^(1/lambda)) + } + #mahalanobis + multimahalanobis <- function(x){ + tmp <- matrix(unlist(lapply(x,function(y) as.vector(y))),ncol=2) + tmp <- tmp[!is.na(tmp[,1]),] + if( length(tmp)==0 | is.null(dim(tmp)) ) { + return(NA) + } else if(rcond(cov(tmp)) <= 0.001) { + return(NA) + } else { + #return the inverse of the covariance matrix of tmp; aka the precision matrix + inverse<-solve(cov(tmp)) + if(debugging){ + print(inverse) + } + tmp<-scale(tmp,center=T,scale=F) + tmp<-as.numeric(t(tmp[1,])%*%inverse%*%tmp[1,]) + return(sqrt(tmp)) + } + } + # + ## Decide what function to use + # + if( distance_m=="euclidean") { + distancef <- get("multieuclidean") + } else if( distance_m=="manhattan" ) { + distancef <- get("multimanhattan") + } else if( distance_m=="canberra" ) { + distancef <- get("multicanberra") + } else if( distance_m=="minkowski" ) { + if( lambda==0 ) { + stop("The Minkowski Distance for lambda = 0 is Infinity; please choose another value for lambda.") + } else { + distancef <- get("multiminkowski") + } + } else if( distance_m=="mahalanobis" ) { + distancef <- get("multimahalanobis") + warning("Multimahalanobis distance is not fully supported...") + } + } else { + stop("Distance function not defined for multidimensional Rao's Q; please choose among euclidean, manhattan, canberra, minkowski, mahalanobis!") + } + # + ## Reshape values + # + vls<-lapply(input, function(x) {raster::as.matrix(x)}) + # + ## Rescale and add fake columns and rows for moving w + # + hor<-matrix(NA,ncol=dim(vls[[1]])[2],nrow=w) + ver<-matrix(NA,ncol=w,nrow=dim(vls[[1]])[1]+w*2) + if(rescale) { + trastersm<-lapply(vls, function(x) { + t1 <- raster::scale(raster(cbind(ver,rbind(hor,x,hor),ver))) + t2 <- as.matrix(t1) + return(t2) + }) + } else { + trastersm<-lapply(vls, function(x) { + cbind(ver,rbind(hor,x,hor),ver) + }) + } + # + ## Loop over all the pixels in the matrices + # + if( (ncol(vls[[1]])*nrow(vls[[1]]))> 10000) { + message("\n Warning: ",ncol(vls[[1]])*nrow(vls[[1]])*length(vls), " cells to be processed, may take some time... \n") + } + + + cores = 4 + clp = parallel::makeCluster(cores) + doParallel::registerDoParallel(clp) + on.exit(stopCluster(clp)) + + + t = foreach (cl = (1+w):(dim(vls[[1]])[2]+w), .combine="rbind", .packages="foreach") %dopar% { + foreach (rw = (1+w):(dim(vls[[1]])[1]+w), .combine="rbind") %dopar% { + if( length(!which(!trastersm[[1]][c(rw-w):c(rw+w),c(cl-w):c(cl+w)]%in%NA)) < window^2-((window^2)*na.tolerance) ) { + # raoqe[rw-w,cl-w] <- NA + return(data.frame(row=rw-w, col=cl-w, value=NA)) + } else { + tw<-lapply(trastersm, function(x) { x[(rw-w):(rw+w),(cl-w):(cl+w)] + }) + # + ## Vectorize the matrices in the list and calculate + #Among matrix pairwase distances + lv <- lapply(tw, function(x) {as.vector(t(x))}) + vcomb <- combn(length(lv[[1]]),2) + vout <- c() + for(p in 1:ncol(vcomb) ) { + lpair <- lapply(lv, function(chi) { + c(chi[vcomb[1,p]],chi[vcomb[2,p]]) + }) + vout[p] <- distancef(lpair) + } + # raoqe[rw-w,cl-w] <- sum(rep(vout,2) * (1/(window)^4),na.rm=TRUE) + return(data.frame(row=rw-w, col=cl-w, value=sum(rep(vout,2) * (1/(window)^4),na.rm=TRUE))) + } + + } + + + # do.call("rbind", lapply((1+w):(dim(vls[[1]])[1]+w), function(rw){ + # if( length(!which(!trastersm[[1]][c(rw-w):c(rw+w),c(cl-w):c(cl+w)]%in%NA)) < window^2-((window^2)*na.tolerance) ) { + # # raoqe[rw-w,cl-w] <- NA + # return(data.frame(row=rw-w, col=cl-w, value=NA)) + # } else { + # tw<-lapply(trastersm, function(x) { x[(rw-w):(rw+w),(cl-w):(cl+w)] + # }) + # # + # ## Vectorize the matrices in the list and calculate + # #Among matrix pairwase distances + # lv <- lapply(tw, function(x) {as.vector(t(x))}) + # vcomb <- combn(length(lv[[1]]),2) + # vout <- c() + # for(p in 1:ncol(vcomb) ) { + # lpair <- lapply(lv, function(chi) { + # c(chi[vcomb[1,p]],chi[vcomb[2,p]]) + # }) + # vout[p] <- distancef(lpair) + # } + # # raoqe[rw-w,cl-w] <- sum(rep(vout,2) * (1/(window)^4),na.rm=TRUE) + # return(data.frame(row=rw-w, col=cl-w, value=sum(rep(vout,2) * (1/(window)^4),na.rm=TRUE))) + # } + # })) + + + + # progress(value=cl, max.value=dim(rasterm)[2]+w, progress.bar = FALSE) + } + raoqe<-matrix(rep(NA,dim(rasterm)[1]*dim(rasterm)[2]),nrow=dim(rasterm)[1],ncol=dim(rasterm)[2]) + for(i in seq(nrow(t))){ + raoqe[t[i,"row"], t[i,"col"]] = t[i,"value"] + } + + if(exists("pb")) { + close(pb) + } + } else{ + message("Something went wrong when trying to calculate Rao's indiex.") + } # end of multimensional RaoQ + message("\nCalculation of Multidimensional Rao's index complete.\n") + + #----------------------------------------------------# + + # + ## Shannon + # + if( shannon==T ) { + message("\nStarting Shannon-Wiener index calculation:\n") + # Reshape values + values<-as.numeric(as.factor(rasterm)) + rasterm_1<-matrix(data=values,nrow=dim(rasterm)[1],ncol=dim(rasterm)[2]) + # + ## Add "fake" columns and rows for moving window + # + hor<-matrix(NA,ncol=dim(rasterm)[2],nrow=w) + ver<-matrix(NA,ncol=w,nrow=dim(rasterm)[1]+w*2) + trasterm<-cbind(ver,rbind(hor,rasterm_1,hor),ver) + # + ## Loop over all the pixels + # + for (cl in (1+w):(dim(rasterm)[2]+w)) { + for(rw in (1+w):(dim(rasterm)[1]+w)) { + if( length(!which(!trasterm[c(rw-w):c(rw+w),c(cl-w):c(cl+w)]%in%NA)) < window^2-((window^2)*na.tolerance) ) { + shannond[rw-w,cl-w]<-NA + } else { + tw<-summary(as.factor(trasterm[c(rw-w):c(rw+w),c(cl-w):c(cl+w)])) + if( "NA's"%in%names(tw) ) { + tw<-tw[-length(tw)] + } + tw[tw>1]<-1 + tw_values<-as.vector(tw) + p<-tw_values/length(tw_values) + p_log<-log(p) + shannond[rw-w,cl-w]<-(-(sum(p*p_log))) + } + } + svMisc::progress(value=cl, max.value=(c((dim(rasterm)[2]+w)+(dim(rasterm)[1]+w))/2), progress.bar = FALSE) + } + message(("\nCalculation of Shannon's index is also complete!\n")) + } # End ShannonD + + #----------------------------------------------------# + + # + ## Return multiple outputs + # + if(debugging){ + message( "#check: return function." ) + } + + if( shannon ) { + if( nc.cores>1 ) { + outl<-list(do.call(cbind,raop),shannond) + names(outl)<-c("Rao","Shannon") + return(outl) + } else if( nc.cores==1 ){ + outl<-list(raoqe,shannond) + names(outl)<-c("Rao","Shannon") + return(outl) + } + } else if( !shannon & mode=="classic" ) { + if( isfloat & nc.cores>1 ) { + #return(raop) + return(do.call(cbind,raop)/mfactor) + if(debugging){ + message("#check: return function - classic.") + } + } else if( !isfloat & nc.cores>1 ) { + outl<-list(do.call(cbind,raop)) + names(outl)<-c("Rao") + return(outl) + } else if( sfloat & nc.cores==1 ) { + outl<-list(raoqe/mfactor) + names(outl)<-c("Rao") + return(outl) + } else if( !isfloat & nc.cores==1 ) { + outl<-list(raoqe) + names(outl)<-c("Rao") + return(outl) + } else if( !isfloat & nc.cores>1 ) { + outl<-list(do.call(cbind,raoqe)) + names(outl)<-c("Rao") + return(outl) + } + } else if( !shannon & mode=="multidimension" ) { + outl<-list(raoqe) + names(outl)<-c("Multidimension_Rao") + return(outl) + } +} + + + + + + + + + diff --git a/src/hySpecVisKili.Rproj b/src/hySpecVisKili.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/src/hySpecVisKili.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/src/old_120_combine_predictores_biodiv_tlevel.R b/src/old_120_combine_predictores_biodiv_tlevel.R new file mode 100644 index 0000000..74fddbe --- /dev/null +++ b/src/old_120_combine_predictores_biodiv_tlevel.R @@ -0,0 +1,60 @@ +# Combine hyperspectral predictores and biodiversity variables in gpm class +# aggregated by trophic level. + +source("D:/plygrnd/hySpecVisKili/hySpecVisKili/src/000_set_environment.R") + + +preds = readRDS(paste0(path_hyp_pred, "hyperspec_preds.rds")) +bd = readRDS(paste0(path_biodiv, "biodiv.rds")) + +comb = merge(bd, preds, by = c("plotID"), all.x = TRUE, all.y = TRUE) + +trophic_levels = rbind(data.frame(tlevel = "Plants", + groups = c("SRallplants", "SRasterids", "SRconifers", "SReudicots", "SRferns", "SRlycopodiopsida", "SRmagnoliids", "SRmonocots", "SRrosids")), + data.frame(tlevel = "Herbivore", + groups = c("SRbees", "SRmoths", "SRorthoptera")), + data.frame(tlevel = "", + groups = c("SRdungbeetles", "SRmillipedes", "SRcollembola")), + data.frame(tlevel = "Predators", + groups = c("SRspiders", "SRheteroptera", "SRotheraculeata", "SRparasitoids", "SRothercoleoptera")), + data.frame(tlevel = "Flying predatores", + groups = c("SRbats", "SRbirds")), + data.frame(tlevel = "Generalist", + groups = c("SRmammals", "SRanimals", "SRsyrphids", "SRants", "SSRsnails"))) + +head(comb) + + + + + + + +comb$SelCat = substr(as.character(comb$plotID), 1, 3) +comb$SelNbr = substr(as.character(comb$plotID), 4, 4) + +col_selector = which(names(comb) %in% c("SelCat", "SelNbr")) + +col_diversity = seq(which("SRmammals" == colnames(comb)), + which("SRallplants" == colnames(comb))) + +col_precitors = c(which("elevation" == colnames(comb)), + seq(which("lui_biomass_removal" == colnames(comb)), + which("lui" == colnames(comb))), + seq(which("CARI_mean" == colnames(comb)), + which("pcai_kmdc_raoq_sd" == colnames(comb)))) + +col_meta = which(!seq(ncol(comb)) %in% c(col_selector, col_diversity, col_precitors)) + + +meta <- createGPMMeta(comb, type = "input", + selector = col_selector, + response = col_diversity, + predictor = col_precitors, + meta = col_meta) + +comb <- gpm(comb, meta, scale = FALSE) + +dir.create(paste0(path_comb_gpm), showWarnings = FALSE) + +saveRDS(comb, file = paste0(path_comb_gpm, "ki_hyperspec_biodiv_non_scaled.rds"))