diff --git a/evaluations/pipeline_results.Rmd b/evaluations/pipeline_results.Rmd new file mode 100644 index 0000000..566f7b2 --- /dev/null +++ b/evaluations/pipeline_results.Rmd @@ -0,0 +1,841 @@ +--- +output: + html_document: + theme: spacelab + df_print: paged + toc: yes + toc_float: yes + pdf_document: + toc: yes +params: + proj: null + t0: null + eval_year: 2022 + input_dir: null + output_dir: null + fullname: null + shapefile_path: null + country_path: null + pairs_path: null + carbon_density_path: null + additionality_path: null + verbose: FALSE + branch: null +--- + +```{r include=FALSE} + +# TO KNIT THIS NOTEBOOK, RUN THE FOLLOWING LINE IN A POWERSHELL TERMINAL: + +# Rscript -e "rmarkdown::render(input='evaluations/pipeline_results.Rmd', output_file='/maps/aew85/tmf_pipe_out/example_project.html', params=list(proj='example_project', t0=2010, ...))" + +# Mandatory args: proj, t0, eval year +# Optional args: input dir, output dir, fullname, country_path, shapefile path, pairs_path, carbon density path, verbose +# You must either specify input dir and output dir OR provide absolute paths to each of the objects required (pairs, shapefile, carbon density, additionality, country list) +# Verbose option includes additional descriptive text. Defaults to false. +# Include name of pipeline branch with branch parameter for reproducibility. + +``` + +```{r settings, include=FALSE} +knitr::opts_chunk$set( + echo = FALSE, warning=FALSE,message=FALSE) + +library(tidyverse) +library(sf) +library(reshape2) +library(maps) +library(mapdata) +library(ggspatial) +library(arrow) +library(rnaturalearth) +library(rnaturalearthdata) +library(stringr) +library(jsonlite) +library(countrycode) +library(here) + +``` + +```{r inputs, echo=FALSE,warning=FALSE, message=FALSE} + +# Extract params + +project_name <- params$proj +start_year <- as.numeric(params$t0) +evaluation_year <- as.numeric(params$eval_year) +verbose <- params$verbose + +``` + +--- +title: "`r paste0('4C Ex-Post Evaluation: ', if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() })`" +subtitle: "`r format(Sys.Date(), "%B %Y")`" +--- + +```{r set_paths, echo=FALSE,warning=FALSE, message=FALSE} + +# get script path + +script_path <- here('evaluations/scripts') # change as appropriate + +# get data path + +if (!is.null(params$output_dir)) { + data_path <- paste0(params$output_dir,'/',project_name) +} + +# get input path + +if (!is.null(params$input_dir)) { + input_dir <- params$input_dir +} + +# get branch + +if (!is.null(params$branch)) { + branch <- params$branch +} + +``` + +```{r shapefile_path, echo=FALSE, message=FALSE} + +# add error message for shapefile + +if (is.null(params$input_dir) && is.null(params$shapefile_path)) { + warning("Error: insufficient information to read shapefile. To map the shapefile, you must provide either input_dir OR shapefile_path in params.")} + +# read shapefile + +if (!is.null(params$shapefile_path)) { + shapefile_path <- params$shapefile_path +} else if(file.exists(input_dir)) { shapefile_path <- paste0(input_dir,'/',project_name,'.geojson') } +if(file.exists(shapefile_path)) {shapefile <- read_sf(shapefile_path)} + +``` + +```{r pairs_path, echo=FALSE, message=FALSE} + +# add error message for pairs + +if (is.null(params$output_dir) && is.null(params$pairs_path)) { + warning("Error: insufficient information to read pairs. To analyse pairs, you must provide either output_dir OR pairs_path in params.")} + +# get path to pairs + +if (!is.null(params$pairs_path)) { + pairs_path <- params$pairs_path +} else if(file.exists(data_path)) {pairs_path <- file.path(data_path,'pairs') } + +``` + +```{r carbon_density_path, echo=FALSE, message=FALSE} + +# add error message for carbon density + +if (is.null(params$output_dir) && is.null(params$carbon_density_path)) { + } + +# read carbon density + +if (!is.null(params$carbon_density_path)) { + carbon_density_path <- params$carbon_density_path +} else if(file.exists(data_path)) {carbon_density_path <- list.files(data_path,pattern='carbon',full.names=T)[1] } + +if(file.exists(carbon_density_path)) { + carbon_density <- read.csv(carbon_density_path) + } else { + warning("Error: insufficient information to read carbon density. To print carbon density information, you must provide either output_dir OR carbon_density_path in params.")} + +``` + +```{r country_path, echo=FALSE, message=FALSE} + +# add error message for country + +if (is.null(params$output_dir) && is.null(params$country_path)) { + warning("Error: insufficient information to read country. To print country information, must provide either output_dir OR country_path in params.")} + +# read country path + +if (!is.null(params$country_path)) { + country_path <- params$country_path +} else if(file.exists(data_path)) {country_path <- list.files(path = data_path, pattern = "country-list", full.names = TRUE)[1]} + +``` + +```{r additionality_path, echo=FALSE, message=FALSE} + +# add error message for additionality + +if (is.null(params$output_dir) && is.null(params$additionality_path)) { + warning("Error: insufficient information to read additionality. To print additionality information, you must provide either output_dir OR additionality_path in params.")} + +if (!is.null(params$additionality_path)) { + additionality_path <- params$additionality_path +} else if(file.exists(data_path)) {additionality_path <- list.files(path = data_path, pattern = "additionality", full.names = TRUE)[1]} +if(file.exists(additionality_path)) {additionality <- read.csv(additionality_path)} + +``` + +```{r read_pairs, echo=FALSE} + +# get filenames and filter for matched points + +files_full_raw <- list.files(pairs_path, + pattern='*.parquet',full.names=T,recursive=F) +files_full <- files_full_raw[!grepl('matchless',files_full_raw)] +files_short_raw <- list.files(path=pairs_path, + pattern='*.parquet',full.names=F,recursive=F) +files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + +# initialise dfs + +vars <- c(colnames(read_parquet(files_full[1])),'pair') +paired_data_raw <- data.frame(matrix(ncol=length(vars),nrow=0)) +colnames(paired_data_raw) <- vars +paired_data_raw$pair <- as.factor(paired_data_raw$pair) + +for(j in 1:length(files_full)){ + + # read in all parquet files for a given project + + f <- data.frame(read_parquet(files_full[j])) + + # add identity column + + f$pair <- as.factor(c(replicate(nrow(f),files_short[j]))) + + # append data to bottom of df + + paired_data_raw <- bind_rows(paired_data_raw,f) + +} + +# generate separate datasets for project and counterfactual + +project <- paired_data_raw %>% dplyr::select(starts_with('k'),pair) +cf <- paired_data_raw %>% dplyr::select(starts_with('s'),pair) + +# create project-counterfactual merged dataset + +colnames(cf) <- colnames(project) +pair_merged <- bind_rows(project,cf) +names(pair_merged) <- str_sub(names(pair_merged),3) +names(pair_merged)[names(pair_merged) == "ir"] <- "pair" +data <- pair_merged %>% select_if(~ !any(is.na(.))) + +# merge reference data to cf-counterfactual merged dataset with type column + +type <- c(replicate(nrow(project),'Project'),replicate(nrow(cf),'Counterfactual')) +data$type <- type + +``` + +```{r get_shapefile_area, echo=FALSE} + +if(exists("shapefile")){ + project_area_ha <- as.numeric(sum(st_area(st_make_valid(shapefile)))/10000) +} + +``` + +```{r get_country_names} + +if(exists("country_path")){ + + # define function for extracting country names + + get_country_names <- function(country_codes_path) { + codes <- as.character(fromJSON(country_codes_path)) + country_names <- countrycode(codes, 'iso2c', 'country.name') + country_names[country_names == 'Congo - Kinshasa'] <- 'Democratic Republic of the Congo' + return(country_names) + } + + # read in country json and get names + country_vec <- get_country_names(country_path) + + # define function for printing the country names if there are multiple + + if (length(country_vec) > 1) { + country_string <- paste(country_vec[-length(country_vec)], collapse = ", ") + country_string <- paste(country_string, "and", country_vec[length(country_vec)]) + } else { + country_string <- country_vec[1] + } + +} + + +``` + +\ + +# About the project + +`r project_name %>% str_replace_all("_", " ") %>% str_to_title()` is located in `r if(exists("country_string")) country_string`. The project started in `r start_year` and has an area of `if (exists("project_area_ha")) r format(project_area_ha, big.mark = ",", scientific = FALSE, digits = 3)` hectares. + +```{r echo=FALSE} + +# ________________ FILL IN PROJECT-SPECIFIC INFORMATION __________________ + +# Replace this chunk with a short narrative about the context of the project and why it was chosen for evaluation by 4C. Include any other details relevant to the interpretation of this document. + +``` + + + +\ + +# Introduction to the 4C methodology + +`r if(verbose){" + +The 4C method for calculating the additionality of a project is based on pixel matching. This approach allows us to identify places which are similar to the project but are not protected. We can then measure deforestation in these places and use this as our counterfactual scenario (what would have happened in the absence of the project) against which we measure the impact that the project has had. + +More information about 4C's approach can be found below. + +[Quantify Earth, an interactive platform which demonstrates our pixel-matching approach in action](https://quantify.earth/#/globe/1) + +[4C explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence) + +[Cambridge Carbon Impact: an evaluation of some existing carbon projects](https://www.cambridge.org/engage/coe/article-details/6409c345cc600523a3e778ae) + +[The full PACT methodology](https://www.cambridge.org/engage/coe/article-details/657c8b819138d23161bb055f) + +"}` + + + +\ + +# Additionality summary + +The graph below shows the annual trend in additionality from 10 years before the project start to the present day. The solid grey vertical line represents the project start, whilst the dashed grey horizontal line represents 0 additionality (i.e. no avoided deforestation). Above this line, the project has experienced less forest carbon loss than the counterfactual; below this line, it has experienced more forest carbon loss than the counterfactual. + +\ + +```{r additionality_summary, echo=FALSE} + +if(exists("additionality")) { + additionality %>% ggplot(aes(x = year, y = additionality)) + + geom_vline(xintercept = start_year, alpha = 0.4) + + geom_hline(yintercept = 0, alpha = 0.4, linetype = 'dashed') + + #annotate(geom='text',x=start_year,y=10000,label='Project start',size=5,colour='grey50')+ + geom_line() + + xlab('Year') + + ylab(expression(paste('Additionality (Mg ', CO[2], "e)", sep = '')))+ + theme_classic() +} else {print("Additionality information not available")} + + + +``` + +\ + +Raw values are also presented below. + +\ + +```{r echo=FALSE} + +if(exists("additionality")) { + additionality %>% rename(Additionality = additionality, Year = year) +} else {print("Additionality information not available")} + +``` + +\ + +# Detailed explanation of results + +The following sections will detail how we arrived at the additionality results, including the location and quality of the matched points, the deforestation rates in each set, and the carbon density values used to convert these deforestation rates to carbon emissions reductions. + +\ + +### Location of matched points + +We sampled `r format((nrow(data)/2), big.mark = ",", scientific = FALSE, digits = 3)` points within the project and an equal number of matching points from outside of the project. These matching points serve as our counterfactual scenario for deforestation. + +Below we show the location of the counterfactual matching points (shown in blue) relative to the project (shown in red), both at the country and project scale. + +`r if(nrow(data) > 20000){"Note that, for clarity and computational efficiency, we show only 10% of the points."}` + +\ + +```{r match_locations, warning=FALSE, echo=FALSE, message=FALSE} + +if(exists("shapefile") & exists("country_vec") & exists("data")) { + + # downsample by 90% + + if(nrow(data) > 20000){ + data_forplot <- data %>% sample_frac(0.1) + } else { + data_forplot <- data + } + + # plot location of matching points + + country_map <- ne_countries(country = country_vec, returnclass = "sf", scale= "large") + + # transform crs + + shapefile <- st_transform(shapefile, st_crs(country_map)) + + ggplot(data=country_map) + + geom_sf(colour=NA,fill='grey80')+ + geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5) + + geom_sf(data=shapefile,fill='red',colour=NA,inherit.aes=F)+ + scale_color_manual(values=c('blue','red'))+ + coord_sf()+ + theme_void()+ + annotation_scale(text_cex=1.5,location='tl')+ + theme(legend.title = element_blank(), + text=element_text(size=20)) + + xmin <- filter(data, type=='Project') %>% select(lng) %>% min() + xmax <- filter(data, type=='Project') %>% select(lng) %>% max() + ymin <- filter(data, type=='Project') %>% select(lat) %>% min() + ymax <- filter(data, type=='Project') %>% select(lat) %>% max() + + ggplot(data=country_map) + + geom_sf(colour=NA,fill='grey80')+ + geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5) + + geom_sf(data=shapefile,fill='red',colour=NA,inherit.aes=F)+ + scale_color_manual(values=c('blue','red'))+ + coord_sf(xlim=c(xmin-0.5,xmax+0.5),ylim=c(ymin-0.5,ymax+0.5))+ + theme_void()+ + annotation_scale(text_cex=1.5,location='tl')+ + theme(legend.title = element_blank(), + text=element_text(size=16), + legend.position='none') + +} else {print("Insufficient information available to map points")} + + +``` + +### Quality of matches + +Here we show how well the matching points align with the project in terms of our matching variables. Correspondence between the project (shown in red in the plots below) and the counterfactual (shown in blue) indicates that the counterfactual will faithfully represent the business-as-usual scenario for places like the project. + +- Inaccessibility (motorized travel time to healthcare, minutes) + +- Slope ($^\circ$) + +- Elevation (meters) + +- Forest cover at t0 (project start, %) + +- Deforestation at t0 (%) + +- Forest cover at t-5 (5 years prior to project start, %) + +- Deforestation at t-5 (%) + +- Forest cover at t-10 (10 years prior to project start, %) + +- Deforestation at t-10 (%) + +Forest cover and deforestation are measured as the proportion of pixels within a 1km radius of a particular point which are classified either as undisturbed forest (in the case of forest cover) or deforested (in the case of deforestation) by the JRC Tropical Moist Forest dataset. + +More information about the datasets we use can be found below: + +[MAP access to healthcare](https://malariaatlas.org/project-resources/accessibility-to-healthcare/) + +[SRTM elevation data](https://www.earthdata.nasa.gov/sensors/srtm) + +[JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF) + +\ + +```{r match_quality, warning=FALSE,message=FALSE, echo=FALSE} + +if(exists("data")) { + + # plot matches + + source(file.path(script_path,'plot_matchingvars.R')) + + plot_matching_variables(data) + +} else {print("Insufficient information available to evaluate match quality")} + + + +``` + +\ + +### Standardised mean differences + +We can quantify the similarity in matching variables in terms of their standardised mean difference (SMD). An SMD of \< 0.25 indicates that points are well-matched for that particular variable. + +In the below plot, the blue points indicate the SMD value (i.e. the amount of difference between the project and counterfactual) for each variable. The grey dotted lines at (-0.25, +0.25) represent the boundary within which our differences would ideally fall in order for the project and counterfactual to be considered well-matched. + +\ + +```{r smd} + +if(exists("data")) { + + source(file.path(script_path,'std_mean_diff.R')) + + results <- std_mean_diff(pairs_path) + + # changing sign for interpretation + + results$smd <- (-1)*results$smd + + # changing order of variables + + variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + + # plotting + + ggplot(results,aes(x=smd,y=variable))+ + #geom_boxplot(outlier.shape=NA,colour='blue')+ + geom_point(colour='blue',fill='blue',alpha=0.3,size=4)+ + geom_vline(xintercept=0)+ + geom_vline(xintercept=0.25,lty=2,colour='grey30')+ + geom_vline(xintercept=-0.25,lty=2,colour='grey30')+ + scale_y_discrete(labels=c(bquote(Deforestation~t[-10]~("%")), + bquote(Deforestation~t[-5]~("%")), + bquote(Deforestation~t[0]~("%")), + bquote(Forest~cover~t[-10]~("%")), + bquote(Forest~cover~t[-5]~("%")), + bquote(Forest~cover~t[0]~("%")), + 'Inaccessibility (mins)',paste0('Slope (',intToUtf8(176),')'),'Elevation (m)'))+ + xlab('Standardised mean difference')+ + xlim(-1,1)+ + theme_classic()+ + theme(axis.title.y=element_blank(), + legend.title=element_blank(), + legend.box.background=element_rect(), + legend.position='none', + text=element_text(size=20), + axis.text.y=element_text(size=20)) + +} else {print("Insufficient information available to evaluate match quality")} + +``` + +\ + +### Deforestation within the project + +Now focusing on deforestation within the project, we can examine the spatial distribution of the following 4 processes pertinent to forest carbon stock changes: + +- Undisturbed forest to degraded forest + +- Degraded forest to deforested land + +- Undisturbed forest to deforested land + +- Undisturbed land to reforested land + +\ + +These transitions are shown in the plot below for the `r evaluation_year-start_year`-year period between `r start_year` and `r evaluation_year`. They are overlaid on the project area, shown in grey. If a transition is not shown, it did not occur in the period examined. + +Data from [JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF). + +\ + +```{r deforestation_spatial_distribution, echo=FALSE, warning=FALSE} + + +if(exists("data")) { + + # plot deforestation within project + + source(file.path(script_path,'plot_transitions.R')) + + plot_transitions(data=data,t0=start_year,period_length=evaluation_year-start_year,shapefile=shapefile) + +} else {print("Insufficient information available to evaluate deforestation")} + + +``` + +\ + +### Deforestation and degradation rates within project and counterfactual + +Here we compare various deforestation processes between the project and counterfactual. We present average annual rates measured between `r start_year` and `r evaluation_year`. + +\ + +```{r proportions_undisturbed_degraded, echo=FALSE} + +if(exists("data")) { + +# obtaining the area of undisturbed and degraded forest at t0, for use later + +source(file.path(script_path,'def_rate.R')) + +proj_data <- data %>% filter(type=='Project') + +prop_und <- get_prop_class(data=proj_data,t0=start_year-10,class=1) +prop_deg <- get_prop_class(data=proj_data,t0=start_year-10,class=2) + +} else {print("Insufficient information available to evaluate deforestation")} + +``` + +***Rate of forest loss, %/year*** + +First we can calculate the average annual rate at which undisturbed forest is lost. This refers to the % loss of undisturbed tropical moist forest per year, i.e. it is relative to the amount of tropical moist forest present at the beginning of the project. + +```{r rate_of_forest_loss_percent, echo=FALSE} + +if(exists("data")) { + + source(file.path(script_path,'def_rate.R')) + + df <- def_rate(data=data,t0=start_year,period_length=evaluation_year-start_year) + + df %>% t() %>% data.frame() %>% rename('Rate of forest loss (%/year)' = 1) + +} else {print("Insufficient information available to evaluate deforestation")} + + +``` + +\ + +***Separate deforestation and degradation processes, %/year*** + +The rate of forest loss can be broken down into more specific processes, presented in the table below: + +- degradation of undisturbed forest + +- deforestation of undisturbed forest + +- deforestation of degraded forest + +- reforestation of undisturbed forest + + +\ + +```{r separate_deforestation_processes_percent, echo=FALSE} + +if(exists("data")) { + + source(file.path(script_path,'def_rate.R')) + + df_sep <- def_rate_seperate(data=data,t0=start_year,period_length=evaluation_year-start_year) + + df_sep + +} else {print("Insufficient information available to evaluate deforestation")} + +``` + +\ + +We can also convert these deforestation rates to hectares per using the following formula: + +\ + +$$ {\text{Deforestation rate (hectares/year)}} = +\left( \frac{\text{Deforestation rate (%/year)}}{100} \right) \times \text{Project area (hectares)} \times \text{Proportion of forest type present at } t_0 +$$ + +\ + +It is necessary to correct for the amount of each forest type (undisturbed or degraded) present at the beginning of the project. For this project these proportions are as follows: + +- Undisturbed forest: `r format(100*prop_und, digits = 3)`% + +- Degraded forest: `r format(100*prop_deg, digits = 3)`% + +The rates of overall forest loss and individual deforestation processes are shown in hectares in the tables below. + +\ + +***Rate of forest loss, hectares/year*** + +```{r rate_of_forest_loss_ha, echo=FALSE} + +if(exists("data")) { + + df_ha <- df + + df_ha[1,1:2] <- (df_ha[1,1:2]/100)*project_area_ha*prop_und + + colnames(df_ha) <- c('Project','Counterfactual') + + df_ha %>% t() %>% data.frame() %>% rename('Rate of forest loss (ha/year)' = 1) + +} else {print("Insufficient information available to evaluate deforestation")} + +``` + +\ + +***Separate deforestation and degradation processes, hectares/year*** + +```{r separate_deforestation_processes_ha, echo=FALSE} + +if(exists("data")) { + + source(file.path(script_path,'def_rate.R')) + + df_sep_ha <- df_sep + + df_sep_ha[df_sep_ha$`Forest type`=='Undisturbed forest',4] <- (df_sep_ha[df_sep_ha$`Forest type`=='Undisturbed forest',4]/100)*project_area_ha*prop_und + + df_sep_ha[df_sep_ha$`Forest type`=='Disturbed forest',4] <- (df_sep_ha[df_sep_ha$`Forest type`=='Disturbed forest',4]/100)*project_area_ha*prop_deg + + df_sep_ha %>% rename('Rate (ha/year)' = 4) + +} else {print("Insufficient information available to evaluate deforestation")} + +``` + +\ + +***Land cover changes over time*** + +Presenting the above data in another way, we can visualise the year-on-year change in different land cover classes (undisturbed forest, degraded forest, deforested land and regrowth) for both the project and the counterfactual. + +In the below plots, the vertical grey dashed line represents the start year of the project. + +```{r luc_timeseries_all, echo=FALSE} + +if(exists("data")) { + + source(file.path(script_path,'land_cover_timeseries.R')) + + df <- get_luc_timeseries(data,t0=start_year,tend=evaluation_year) + + df %>% mutate( + luc = as.factor(luc), + year = as.numeric(year)) %>% + ggplot(aes(x=year,y=percentage,colour=luc,lty=type))+ + geom_line(linewidth=1.5,alpha=0.5)+ + geom_vline(xintercept=start_year,lty=2,colour='grey30')+ + scale_colour_manual(values=c('darkgreen','gold2','orange3','steelblue2'), + name='Land Use Class',labels=c('Undisturbed forest','Degraded forest','Deforested land','Regrowth'))+ + scale_linetype_manual(name='Location',values=c('solid','dotted'),breaks=c('Project','Counterfactual'))+ + xlab('Year')+ + ylab('% cover')+ + theme_classic() + +} else {print("Insufficient information available to evaluate deforestation")} + +``` + +\ + +***Land cover changes over time: undisturbed forest only*** + +Zooming in on the trend in **undisturbed forest** cover for project and counterfactual (shown in green in the plot above), we expect the trends in forest cover to be parallel prior to the project start (indicating that they are well-matched) but diverging after the project start, indicating a measurable reduction in deforestation under the project scenario. + +Here the trajectories are shown in red (project) and blue (counterfactual). They show the mean and 95% confidence intervals of % undisturbed tropical moist forest cover, calculated across the 100 sets of points we generate as part of our bootstrapping procedure. More details about this algorithm are available in the [PACT Methodology](https://www.cambridge.org/engage/api-gateway/coe/assets/orp/resource/item/647a14a14f8b1884b7b97b55/original/pact-tropical-moist-forest-accreditation-methodology.pdf). + +\ + + +```{r luc_timeseries, echo=FALSE} + +if(exists("data")) { + + source(file.path(script_path,'land_cover_timeseries.R')) + + # caching result of land cover time series + result <- luc_class1_uncertainty(data=data, t0=start_year, tend=evaluation_year) + + # calculating stats + plotting + + result %>% + group_by(type,year) %>% + summarise(mean=mean(percent_class1), + se = sd(percent_class1) / sqrt(n()), # Standard error + t_critical = qt(0.975, df = n() - 1), # Critical t-value for 95% CI + ci_lower = mean - t_critical * se, + ci_upper = mean + t_critical * se, + .groups = 'drop') %>% + ggplot(aes(x = year, y = mean, color = type)) + + geom_line(size = 1) + # Line for the mean + geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper, fill = type), alpha = 0.2) + # Confidence interval ribbon + labs( + x = "Year", + y = "% undisturbed forest") + + theme_classic() + + geom_vline(xintercept=start_year,lty=2,colour='grey30')+ + scale_color_manual(values = c("Project" = "red", "Counterfactual" = "blue")) + + scale_fill_manual(values = c("Project" = "red", "Counterfactual" = "blue")) + + theme(legend.title = element_blank()) + +} else {print("Insufficient information available to evaluate deforestation")} + +``` + +\ + +### Carbon densities + +In order to calculate additionality, the deforestation rates are converted to carbon emissions rates using regional carbon density values generated through NASA GEDI data. These are presented in the table below for each land use class, each of which is associated with a different carbon density value. + +More information on GEDI data is available [here](https://www.earthdata.nasa.gov/sensors/gedi). + +\ + +```{r carbon_density, echo=FALSE} + +if(exists("carbon_density")) { + + carbon_density <- carbon_density %>% mutate( + land.use.class = case_when( + land.use.class == 1 ~ 'Undisturbed', + land.use.class == 2 ~ 'Degraded', + land.use.class == 3 ~ 'Deforested', + land.use.class == 4 ~ 'Reforested', + land.use.class == 5 ~ 'Water', + land.use.class == 6 ~ 'Other') + ) + + + colnames(carbon_density) <- c('Land Use Class', 'Carbon Density (Mg/Ha)') + + carbon_density + +} else {print("Carbon density not available")} + +``` + +\ + +The additionality summary presented at the top of this document is based on the difference in the carbon loss estimates between the project and the counterfactual scenario. + +\ + +`r if(verbose){" + + +# Statement on leakage and permanence + +Leakage and permanence are two factors that affect the long-term emissions reductions contributed by a project but **have not been included in this evaluation**. + +**Leakage** is the displacement of activities which deplete forest carbon stocks from the project to other areas due to the implementation of the project. In the worst case scenario, 100% of these activities are displaced, effectively nullifying the additionality of the project. Leakage can be reduced by interventions which remove the incentive to continue activities which deplete forest carbon stocks in areas outside of the project. + +**Permanence** is the ability of a project to protect carbon stocks long-term. Carbon stored in forests is inherently impermanent, given the finite lifespan of trees and the potential for deforestation and catastrophic events such as wildfires. The estimates given in this evaluation assume that all carbon stored is permanent, but in reality this is unlikely to be the case. + +\ + +You can find out more about our plans to deal with leakage and permanence in our [explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence), and in [the full PACT methodology](https://www.cambridge.org/engage/api-gateway/coe/assets/orp/resource/item/647a14a14f8b1884b7b97b55/original/pact-tropical-moist-forest-accreditation-methodology.pdf). + +"}` + +--- + +This report was generated on `r format(Sys.Date(), "%B %d, %Y")` using the `r branch` of the [tmf-implementation code](https://github.com/quantifyearth/tmf-implementation). diff --git a/evaluations/scripts/def_rate.R b/evaluations/scripts/def_rate.R new file mode 100644 index 0000000..6a4c417 --- /dev/null +++ b/evaluations/scripts/def_rate.R @@ -0,0 +1,328 @@ + + + +def_rate <- function(data,t0,period_length,process='all'){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and match + + proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() + cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # choosing processes to measure + + if(process=='def_only'){ + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + } else if(process=='deg_only'){ + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + } else { + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 1, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + } + + + data_filtered$response <- response + + # count up number of pixels where there have been changes for each type + + proj_changes <- data_filtered %>% filter(response==1 & type=='Project') %>% + nrow() + cf_changes <- data_filtered %>% filter(response==1 & type=='Counterfactual') %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + proj_rate <- 100*(proj_changes/proj_1s)/period_length + cf_rate <- 100*(cf_changes/cf_1s)/period_length + + # make df + + df <- data.frame(matrix(ncol=2,nrow=1)) + colnames(df) <- c('Project','Counterfactual') + df[1,1] <- proj_rate + df[1,2] <- cf_rate + + return(df) + +} + + + +def_rate_seperate <- function(data,t0,period_length){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and cf + + proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() + cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # measuring responses + + def_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + deg_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + ref_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 0, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + data_filtered$def_response <- def_response + data_filtered$deg_response <- deg_response + data_filtered$ref_response <- ref_response + + # count up number of pixels where there have been changes for each type + + proj_def_changes <- data_filtered %>% filter(def_response==1 & type=='Project') %>% + nrow() + cf_def_changes <- data_filtered %>% filter(def_response==1 & type=='Counterfactual') %>% + nrow() + + proj_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Project') %>% + nrow() + cf_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Counterfactual') %>% + nrow() + + proj_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Project') %>% + nrow() + cf_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Counterfactual') %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + proj_def <- 100*(proj_def_changes/proj_1s)/period_length + cf_def <- 100*(cf_def_changes/cf_1s)/period_length + + proj_deg <- 100*(proj_deg_changes/proj_1s)/period_length + cf_deg <- 100*(cf_deg_changes/cf_1s)/period_length + + proj_ref <- 100*(proj_ref_changes/proj_1s)/period_length + cf_ref <- 100*(cf_ref_changes/cf_1s)/period_length + + # adding the degraded-to-deforested transition + + data_filtered_2 <- data[data[,t0_index]==2,] + + # count 2s at t0 in project and cf + + proj_2s <- data_filtered_2 %>% filter(type=='Project') %>% nrow() + cf_2s <- data_filtered_2 %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + luc_tend_2 <- data_filtered_2 %>% + select(paste0('luc_',tend)) + + def_response_2 <- case_when( + luc_tend_2==1 ~ 0, + luc_tend_2==2 ~ 0, + luc_tend_2==3 ~ 1, + luc_tend_2==4 ~ 0, + luc_tend_2>4 ~ 0) + + data_filtered_2$def_response_2 <- def_response_2 + + proj_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Project') %>% + nrow() + cf_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Counterfactual') %>% + nrow() + + proj_deg_to_def <- 100*(proj_def_changes_2/proj_2s)/period_length + cf_deg_to_def <- 100*(cf_def_changes_2/cf_2s)/period_length + + # make df + + df <- data.frame(matrix(ncol=4,nrow=8)) + + colnames(df) <- c('Process','Forest type','Location','Rate (%/year)') + + df[1] <- c(rep(c('Degradation','Deforestation','Deforestation','Reforestation'),each=2)) + df[2] <- c(rep(c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest'),each=2)) + df[3] <- c(rep(c('Project','Counterfactual'),times=4)) + df[4] <- c(proj_deg,cf_deg,proj_def,cf_def,proj_deg_to_def,cf_deg_to_def,proj_ref,cf_ref) + + return(df) + +} + +get_prop_class <- function(data,t0,class){ + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + data_filtered <- data[data[,t0_index]==class,] + + total_count <- data %>% nrow() + class_count <- data_filtered %>% nrow() + prop <- class_count/total_count + + return(prop) + +} + + +def_rate_single <- function(data,t0,period_length){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and cf + + no_1s <- nrow(data_filtered) + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # measuring responses + + def_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + deg_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + ref_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 0, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + data_filtered$def_response <- def_response + data_filtered$deg_response <- deg_response + data_filtered$ref_response <- ref_response + + # count up number of pixels where there have been changes for each type + + def_changes <- data_filtered %>% filter(def_response==1) %>% + nrow() + + deg_changes <- data_filtered %>% filter(deg_response==1) %>% + nrow() + + ref_changes <- data_filtered %>% filter(ref_response==1) %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + def <- 100*(def_changes/no_1s)/period_length + + deg <- 100*(deg_changes/no_1s)/period_length + + ref <- 100*(ref_changes/no_1s)/period_length + + # adding the degraded-to-deforested transition + + data_filtered_2 <- data[data[,t0_index]==2,] + + # count 2s at t0 in project and cf + + no_2s <- data_filtered_2 %>% nrow() + + # identify where there have been changes during the evaluation period + + luc_tend_2 <- data_filtered_2 %>% + select(paste0('luc_',tend)) + + def_response_2 <- case_when( + luc_tend_2==1 ~ 0, + luc_tend_2==2 ~ 0, + luc_tend_2==3 ~ 1, + luc_tend_2==4 ~ 0, + luc_tend_2>4 ~ 0) + + data_filtered_2$def_response_2 <- def_response_2 + + def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1) %>% + nrow() + + deg_to_def <- 100*(def_changes_2/no_2s)/period_length + + # make df + + df <- data.frame(matrix(ncol=3,nrow=4)) + + colnames(df) <- c('Process','Forest type','Rate (%/year)') + + df[1] <- c('Degradation','Deforestation','Deforestation','Reforestation') + df[2] <- c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest') + df[3] <- c(deg,def,deg_to_def,ref) + + return(df) + +} \ No newline at end of file diff --git a/evaluations/scripts/land_cover_timeseries.R b/evaluations/scripts/land_cover_timeseries.R new file mode 100644 index 0000000..6490bf1 --- /dev/null +++ b/evaluations/scripts/land_cover_timeseries.R @@ -0,0 +1,111 @@ + +get_luc_timeseries <- function(data,t0,tend,type='both'){ + + years_list <- seq(t0,tend) + + if(type=='both'){ + + df <- data.frame(matrix(ncol=4,nrow=8*length(years_list))) + + colnames(df) <- c('year','type','luc','percentage') + + counter <- 1 + + for(year in years_list) { + + for(i in seq (1:4)) { + + for(type_value in c('Project','Counterfactual')) { + + total <- data %>% filter(type == type_value) %>% nrow() + + no_class_i <- data %>% filter(type == type_value & .data[[paste0('luc_',year)]]==i) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- type_value + df[counter,3] <- i + df[counter,4] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + } else if(type=='single'){ + + df <- data.frame(matrix(ncol=3,nrow=4*length(years_list))) + + colnames(df) <- c('year','luc','percentage') + + counter <- 1 + + for(year in years_list) { + + for(i in seq (1:4)) { + + total <- data %>% nrow() + + no_class_i <- data %>% filter(.data[[paste0('luc_',year)]]==i) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- i + df[counter,3] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + return(drop_na(df)) + +} + +luc_class1_uncertainty <- function(data,t0,tend) { + + years_list <- seq(t0-10,tend) + + df <- data.frame(matrix(ncol=4,nrow=2*length(unique(data$pair))*length(years_list))) + + colnames(df) <- c('year','type','pair','percent_class1') + + counter <- 1 + + for(year in years_list) { + + for(type_value in c('Project','Counterfactual')) { + + for(pair_id in unique(data$pair)) { + + total <- data %>% filter(type == type_value & pair == pair_id) %>% nrow() + + no_class_i <- data %>% filter(type == type_value & pair == pair_id & .data[[paste0('luc_',year)]]==1) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- type_value + df[counter,3] <- pair_id + df[counter,4] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + return(drop_na(df)) + +} + diff --git a/evaluations/scripts/plot_matchingvars.R b/evaluations/scripts/plot_matchingvars.R new file mode 100644 index 0000000..ec47f01 --- /dev/null +++ b/evaluations/scripts/plot_matchingvars.R @@ -0,0 +1,42 @@ +plot_matching_variables <- function(data, ex_ante = 'false') { + + cont_data <- data %>% dplyr::select(type, elevation, slope, access, starts_with('cpc')) + cont_data[, 5:length(cont_data)] <- 100 * cont_data[, 5:length(cont_data)] # cpcs as percentages + cont_data <- melt(cont_data) + + # rename labels + cont_data$variable <- factor(cont_data$variable, + levels = c('access', 'cpc0_u', 'cpc0_d', + 'slope', 'cpc5_u', 'cpc5_d', + 'elevation', 'cpc10_u', 'cpc10_d')) + + levels(cont_data$variable) <- c('Inaccessibility', + 'Forest~cover~t[0]', + 'Deforestation~t[0]', + 'Slope', + 'Forest~cover~t[-5]', + 'Deforestation~t[-5]', + 'Elevation', + 'Forest~cover~t[-10]', + 'Deforestation~t[-10]') + + # determine labels based on ex_ante + if (ex_ante == 'false') { + plot_labels <- c('Counterfactual', 'Project') + } else if (ex_ante == 'true') { + plot_labels <- c('Matched points', 'Project')} + + # plot + matchingvars <- ggplot(data = cont_data, mapping = aes(x = value, colour = type)) + + geom_density(adjust = 10, size = 1) + + facet_wrap(~variable, scales = 'free', nrow = 3, labeller = label_parsed) + + ylab('Density') + + scale_colour_manual(values = c('blue', 'red'), labels = plot_labels) + + theme_classic() + + theme(legend.title = element_blank(), + axis.title.x = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank()) + + return(matchingvars) +} \ No newline at end of file diff --git a/evaluations/scripts/plot_transitions.R b/evaluations/scripts/plot_transitions.R new file mode 100644 index 0000000..2931a60 --- /dev/null +++ b/evaluations/scripts/plot_transitions.R @@ -0,0 +1,63 @@ +library(ggspatial) + +plot_transitions <- function(data,t0,period_length,shapefile){ + + # count number of 1s at project start + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + data_filtered <- data[data[,t0_index]==1,] + + # identify where there have been changes + + tend <- t0 + period_length + + luc_tend <- data_filtered[[paste0('luc_', tend)]] + + response <- case_when( + luc_tend==1 ~ NA, + luc_tend==2 ~ 'deg', + luc_tend==3 ~ 'def', + luc_tend==4 ~ 'ref', + luc_tend>4 ~ NA) + + data_filtered$response <- as.factor(response) + data_filtered <- data_filtered %>% filter(!is.na(response)) + + # adding deg --> def transition + + # count number of 2s at project start + + data_filtered_2s <- data[data[,t0_index]==2,] + + # identify where there have been changes + + luc_tend <- data_filtered_2s[[paste0('luc_', tend)]] + + response <- case_when( + luc_tend==1 ~ NA, + luc_tend==2 ~ NA, + luc_tend==3 ~ 'deg_to_def', + luc_tend==4 ~ NA, + luc_tend>4 ~ NA) + + data_filtered_2s$response <- as.factor(response) + data_filtered_2s <- data_filtered_2s %>% filter(!is.na(response)) + + combined_dat <- bind_rows(data_filtered, data_filtered_2s) + combined_dat$response <- factor(combined_dat$response, levels=c('deg','deg_to_def','def','ref')) + + # plotting + + plot <- combined_dat %>% + filter(response != 0) %>% + ggplot(aes(x=lng,y=lat,colour=response))+ + geom_sf(data=shapefile,inherit.aes=F,fill='grey80',colour=NA)+ + geom_point(alpha=0.5,size=0.5)+ + scale_colour_manual(values=c('yellow','orange','red','green'),name='Transition',labels=c('Undisturbed to degraded','Degraded to deforested','Undisturbed to deforested','Undisturbed to reforested'))+ + annotation_scale(text_cex = 1.3)+ + theme_void() + + return(plot) + +} diff --git a/evaluations/scripts/std_mean_diff.R b/evaluations/scripts/std_mean_diff.R new file mode 100644 index 0000000..63d81ba --- /dev/null +++ b/evaluations/scripts/std_mean_diff.R @@ -0,0 +1,57 @@ + +std_mean_diff <- function(path_to_pairs) { + + # clean data + + files_full_raw <- list.files(path_to_pairs, + pattern='*.parquet',full.names=T,recursive=F) + files_full <- files_full_raw[!grepl('matchless',files_full_raw)] + files_short_raw <- list.files(path=path_to_pairs, + pattern='*.parquet',full.names=F,recursive=F) + files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + + # initialise dfs + + vars <- c(colnames(read_parquet(files_full[1])),'pair') + df <- data.frame(matrix(ncol=length(vars),nrow=0)) + colnames(df) <- vars + + for(j in 1:length(files_full)){ + + # read in all parquet files for a given project + + f <- data.frame(read_parquet(files_full[j])) + + # append data to bottom of df + + df <- bind_rows(df,f) + + } + + # calculate smd + + smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE) + + variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + + for (var in variables) { + k_var <- df[[paste0("k_", var)]] + s_var <- df[[paste0("s_", var)]] + + k_mean <- mean(k_var, na.rm = TRUE) + s_mean <- mean(s_var, na.rm = TRUE) + k_sd <- sd(k_var, na.rm = TRUE) + s_sd <- sd(s_var, na.rm = TRUE) + + pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2) + smd <- (k_mean - s_mean) / pooled_sd + + smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE)) + } + + return(smd_results) +} + + \ No newline at end of file diff --git a/methods/inputs/generate_slope.py b/methods/inputs/generate_slope.py index 891e26f..7d58988 100644 --- a/methods/inputs/generate_slope.py +++ b/methods/inputs/generate_slope.py @@ -111,138 +111,123 @@ def generate_slope(input_elevation_directory: str, output_slope_directory: str): continue with tempfile.TemporaryDirectory() as tmpdir: - elevation = RasterLayer.layer_from_file(elev_path) - - logging.info("Area of elevation tile %a", elevation.area) - _easting, _northing, lower_code, lower_letter = utm.from_latlon( - elevation.area.bottom, elevation.area.left - ) - _easting, _northing, upper_code, upper_letter = utm.from_latlon( - elevation.area.top, elevation.area.right - ) - - # FAST PATH -- with only one UTM zone the reprojection back has no issues - if lower_code == upper_code and lower_letter == upper_letter: - actual_utm_code = lower_code - warp( - actual_utm_code, - elev_path, - elevation.pixel_scale.xstep, - elevation.pixel_scale.ystep, - out_path, + with RasterLayer.layer_from_file(elev_path) as elevation: + + logging.info("Area of elevation tile %a", elevation.area) + _easting, _northing, lower_code, lower_letter = utm.from_latlon( + elevation.area.bottom, elevation.area.left ) - else: - # SLOW PATH -- in the slow path, we have to break the elevation raster into - # UTM sections and do the above to each before reprojecting back and recombining - - # To capture the results here for later inspection just override the tmpdir variable - for actual_utm_code in range(lower_code, upper_code + 1): - for utm_letter in crange(lower_letter, upper_letter): - logging.debug("UTM(%s,%s)", actual_utm_code, utm_letter) - - # Note: we go a little bit around the UTM tiles and will crop them down to size later - # this is to remove some aliasing effects. - bbox = bounding_box_of_utm(actual_utm_code, utm_letter, UTM_EXPANSION_DEGREES) - - # Crop the elevation tile to a UTM zone - utm_layer = RasterLayer.empty_raster_layer_like( - elevation, area=bbox - ) - utm_id = f"{actual_utm_code}-{utm_letter}-{elevation_path}" - utm_clip_path = os.path.join(tmpdir, utm_id) - intersection = RasterLayer.find_intersection( - [elevation, utm_layer] - ) - result = RasterLayer.empty_raster_layer( - intersection, - elevation.pixel_scale, - elevation.datatype, - utm_clip_path, - elevation.projection, - ) - result.set_window_for_intersection(intersection) - elevation.set_window_for_intersection(intersection) - elevation.save(result) - - # Flush elevation utm clip to disk - del result - - # Now warp into UTM, calculate slopes, and warp back - slope_out_path = os.path.join(tmpdir, "out-slope-" + utm_id) - warp( - actual_utm_code, - utm_clip_path, - elevation.pixel_scale.xstep, - elevation.pixel_scale.ystep, - slope_out_path, - ) - - # We now recrop the out-slope back to the bounding box we assumed at the start - bbox_no_expand = bounding_box_of_utm( - actual_utm_code, utm_letter, 0.0 - ) - slope_tif = RasterLayer.layer_from_file(slope_out_path) - grid = RasterLayer.empty_raster_layer_like( - slope_tif, area=bbox_no_expand - ) - output_final = f"final-slope-{actual_utm_code}-{utm_letter}-{elevation_path}" - final_path = os.path.join(tmpdir, output_final) - logging.debug("Slope underlying %s", slope_tif._underlying_area) # pylint: disable=W0212 - logging.debug("Grid underling %s", grid._underlying_area) # pylint: disable=W0212 - try: - intersection = RasterLayer.find_intersection([slope_tif, grid]) - except ValueError: - logging.debug( - "UTM (%s, %s) didn't intersect actual area %s", - actual_utm_code, - utm_letter, - grid._underlying_area # pylint: disable=W0212 - ) - continue - slope_tif.set_window_for_intersection(intersection) - final = RasterLayer.empty_raster_layer( - intersection, - slope_tif.pixel_scale, - slope_tif.datatype, - final_path, - slope_tif.projection, - ) - logging.debug("Final underlying %s", final._underlying_area) # pylint: disable=W0212 - final.set_window_for_intersection(intersection) - slope_tif.save(final) - - # Flush - del final - - # Now to recombine the UTM gridded slopes into the slope tile - slopes = glob("final-slope-*", root_dir=tmpdir) - assert len(slopes) > 0 - - # This sets the order a little better for the union of the layers - slopes.sort() - slopes.reverse() - - logging.info("Render order %s", slopes) - - combined = GroupLayer( - [ - RasterLayer.layer_from_file(os.path.join(tmpdir, filename)) - for filename in slopes - ] + _easting, _northing, upper_code, upper_letter = utm.from_latlon( + elevation.area.top, elevation.area.right ) - elevation = RasterLayer.layer_from_file(elev_path) - intersection = RasterLayer.find_intersection([elevation, combined]) - combined.set_window_for_intersection(intersection) - elevation.set_window_for_intersection(intersection) - - assembled_path = os.path.join(tmpdir, "patched.tif") - result = RasterLayer.empty_raster_layer_like( - elevation, filename=assembled_path - ) - combined.save(result) + # FAST PATH -- with only one UTM zone the reprojection back has no issues + if lower_code == upper_code and lower_letter == upper_letter: + actual_utm_code = lower_code + warp( + actual_utm_code, + elev_path, + elevation.pixel_scale.xstep, + elevation.pixel_scale.ystep, + out_path, + ) + else: + # SLOW PATH -- in the slow path, we have to break the elevation raster into + # UTM sections and do the above to each before reprojecting back and recombining + + # To capture the results here for later inspection just override the tmpdir variable + for actual_utm_code in range(lower_code, upper_code + 1): + for utm_letter in crange(lower_letter, upper_letter): + logging.debug("UTM(%s,%s)", actual_utm_code, utm_letter) + + # Note: we go a little bit around the UTM tiles and will crop them down to size later + # this is to remove some aliasing effects. + bbox = bounding_box_of_utm(actual_utm_code, utm_letter, UTM_EXPANSION_DEGREES) + + # Crop the elevation tile to a UTM zone + with RasterLayer.empty_raster_layer_like(elevation, area=bbox) as utm_layer: + utm_id = f"{actual_utm_code}-{utm_letter}-{elevation_path}" + utm_clip_path = os.path.join(tmpdir, utm_id) + intersection = RasterLayer.find_intersection( + [elevation, utm_layer] + ) + with RasterLayer.empty_raster_layer( + intersection, + elevation.pixel_scale, + elevation.datatype, + utm_clip_path, + elevation.projection, + ) as result: + result.set_window_for_intersection(intersection) + elevation.set_window_for_intersection(intersection) + elevation.save(result) + + # Now warp into UTM, calculate slopes, and warp back + slope_out_path = os.path.join(tmpdir, "out-slope-" + utm_id) + warp( + actual_utm_code, + utm_clip_path, + elevation.pixel_scale.xstep, + elevation.pixel_scale.ystep, + slope_out_path, + ) - shutil.move(assembled_path, out_path) + # We now recrop the out-slope back to the bounding box we assumed at the start + bbox_no_expand = bounding_box_of_utm( + actual_utm_code, utm_letter, 0.0 + ) + with RasterLayer.layer_from_file(slope_out_path) as slope_tif: + with RasterLayer.empty_raster_layer_like(slope_tif, area=bbox_no_expand) as grid: + output_final = f"final-slope-{actual_utm_code}-{utm_letter}-{elevation_path}" + final_path = os.path.join(tmpdir, output_final) + logging.debug("Slope underlying %s", slope_tif._underlying_area) # pylint: disable=W0212 + logging.debug("Grid underling %s", grid._underlying_area) # pylint: disable=W0212 + try: + intersection = RasterLayer.find_intersection([slope_tif, grid]) + except ValueError: + logging.debug( + "UTM (%s, %s) didn't intersect actual area %s", + actual_utm_code, + utm_letter, + grid._underlying_area # pylint: disable=W0212 + ) + continue + slope_tif.set_window_for_intersection(intersection) + with RasterLayer.empty_raster_layer( + intersection, + slope_tif.pixel_scale, + slope_tif.datatype, + final_path, + slope_tif.projection, + ) as final: + logging.debug("Final underlying %s", final._underlying_area) # pylint: disable=W0212 + final.set_window_for_intersection(intersection) + slope_tif.save(final) + + # Now to recombine the UTM gridded slopes into the slope tile + slopes = glob("final-slope-*", root_dir=tmpdir) + assert len(slopes) > 0 + + # This sets the order a little better for the union of the layers + slopes.sort() + slopes.reverse() + + logging.info("Render order %s", slopes) + + files = [os.path.join(tmpdir, filename) for filename in slopes] + with GroupLayer.layer_from_files(files) as combined: + with RasterLayer.layer_from_file(elev_path) as elevation: + intersection = RasterLayer.find_intersection([elevation, combined]) + combined.set_window_for_intersection(intersection) + elevation.set_window_for_intersection(intersection) + + assembled_path = os.path.join(tmpdir, "patched.tif") + with RasterLayer.empty_raster_layer_like( + elevation, filename=assembled_path + ) as result: + combined.save(result) + + shutil.move(assembled_path, out_path) def main() -> None: diff --git a/requirements.txt b/requirements.txt index bad4673..8118a36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ scipy numba matplotlib geojson -git+https://github.com/quantifyearth/yirgacheffe@bd2e91c773a414f66340ebb8c13044a1b1a6045f +git+https://github.com/quantifyearth/yirgacheffe@cc89b4d8a0e97c1a11b730cd688a58b680023336 git+https://github.com/carboncredits/biomass-recovery@9e54f80832a7eca915ebd13b03df9db2a08aee9d # developement diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index 0306fdc..12b5574 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -1,17 +1,17 @@ #!/bin/bash -#run with command: scripts/tmfpython.sh -i 'maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ... +#run with command: scripts/tmfpython.sh -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ... #i: input dir - directory containing project shapefiles #o: output dir - directory containing pipeline outputs #p: project name/ID - must match name of shapefile #t: year of project start (t0) #e: evaluation year (default: 2022) -#v: verbose - whether to run an ex-post evaluation and knit the results in an R notebook (true/false, default: false). +#r: report - whether to run an ex-post evaluation and knit the results in an R notebook (true/false, default: false). #NB running evaluations requires the evaluations code # Check which branch is currently checked out -#current_branch=$(git rev-parse --abbrev-ref HEAD) +branch=$(git rev-parse --abbrev-ref HEAD) set -e @@ -20,7 +20,7 @@ set -e input_dir="" output_dir="" eval_year=2022 -verbose=false +report=false ##################################### @@ -33,14 +33,14 @@ function display_help() { echo " -p Project name" echo " -t Start year" echo " -e Evaluation year" - echo " -v Knit ex post evaluation as .Rmd? (true/false)" + echo " -r Knit ex post evaluation as .Rmd? (true/false)" echo " -h Display this help message" echo "Example:" echo " $0 -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out -p 1201 -t 2012" } # Parse arguments -while getopts "i:o:p:t:e:v:h" flag +while getopts "i:o:p:t:e:r:h" flag do case "${flag}" in i) input_dir=${OPTARG};; @@ -48,7 +48,7 @@ do p) proj=${OPTARG};; t) t0=${OPTARG};; e) eval_year=${OPTARG};; - r) verbose=${OPTARG};; + r) report=${OPTARG};; a) ex_ante=${OPTARG};; h) display_help; exit 0;; *) echo "Invalid option: -${OPTARG}" >&2; display_help; exit 1;; @@ -60,7 +60,7 @@ echo "Output directory: $output_dir" echo "Project: $proj" echo "t0: $t0" echo "Evaluation year: $eval_year" -echo "Ex-post evaluation: $verbose" +echo "Create report: $report" if [ $# -eq 0 ]; then display_help @@ -203,9 +203,8 @@ tmfpython3 -m methods.outputs.calculate_additionality \ --output "${output_dir}/${proj}/additionality.csv" echo "--Additionality calculated.--" -# Run ex post evaluation -if [ "$verbose" == "true" ]; then - evaluations_dir="~/evaluations" - ep_output_file="${evaluations_dir}/${proj}_ex_post_evaluation.html" - Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_post_evaluation_template.Rmd',output_file='${ep_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}'))" +# Knit report file +if [ "$report" == "true" ]; then + report_output_file="${output_dir}/${proj}_report.html" + Rscript -e "rmarkdown::render(input='./evaluations/pipeline_results.Rmd',output_file='${report_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))" fi