From c5078ff21427d8fcdfd98bbb340e01702f543bd7 Mon Sep 17 00:00:00 2001 From: imranlala99 <65369472+imranlala99@users.noreply.github.com> Date: Tue, 14 Oct 2025 10:32:36 +0300 Subject: [PATCH] converted notebooks into scripts for data loading pipeline --- .DS_Store | Bin 8196 -> 10244 bytes notebooks/README.md | 51 ++ notebooks/analyzer_playground.ipynb | 752 ++++++++++++++++++++++++--- notebooks/merge_lwb_tile_stats.ipynb | 2 +- scripts/README.md | 11 + scripts/download_lwb_rasters.py | 112 ++++ scripts/download_s2_monthly.py | 149 ++++++ scripts/export_tile_stats.py | 254 +++++++++ scripts/generate_tile_grid.py | 62 +++ scripts/merge_lwb_tile_stats.py | 115 ++++ utils/lamwo_pipeline.py | 138 +++++ 11 files changed, 1572 insertions(+), 74 deletions(-) create mode 100644 notebooks/README.md create mode 100644 scripts/README.md create mode 100644 scripts/download_lwb_rasters.py create mode 100644 scripts/download_s2_monthly.py create mode 100644 scripts/export_tile_stats.py create mode 100644 scripts/generate_tile_grid.py create mode 100644 scripts/merge_lwb_tile_stats.py create mode 100644 utils/lamwo_pipeline.py diff --git a/.DS_Store b/.DS_Store index 4b65d6f13133a06765a060ea0d5e69c9df910cf2..1d1da61f57734bdb1098e3e6a68649575ae0f0f7 100644 GIT binary patch literal 10244 zcmeHMTX57=7(Rd6(nK1Y7AUvHMi!LIQoE&;GN`j{7Z6$~vV~q{-Q9#X(`-^UyITZ1 zbVeOT9DP!e7atrPWf&g3zj#5B;mr~1ZS+BB6rTira`Z+2b56GGwp(>ZK^&7#@+bf0 zoc!PU{&V)Egb--WXhA|$LWqbbl}ZI(iX7BV&uNF?YJ4aG;|VdyAaUZO&Pd#x=Itpw z1Uv*h1Uv*h1Uv+81O#x+riVD2OYiC-;3424Fo^)2A5=W447qZgOEz`jg>3;)mZF*+ zc#r!47Nfc{AB*oX^wFp?x6QWFF~ibREWprobQ$P2EX^KmwKJCHu-FJU zNRlW;l?8_mH8iXZsOy@>0_ve)unF&*nuB9wk~q(Q$HraJVKZsjkMl&poeYRPC9$K^ zvoZ&Rk%XNb$@83&chU}r<0i`+CzJ78PJMT-WqLbFTl))MBSU}ITQ-uiOxqg79%vS& zL?bbhCNci?guTU^*B$-&h<Q&XEUwy|Y#Seil1MK9FJ1PD(oM4~>QjkVijU{|~WCFAJ3bZpS64~Q9wFfxjWU+l@mjr|!T{lLVWs%k~{`Ikwe^7?>=FS6a%Xiplm_hk@_9-WMSo}oC$66hLj3PJ$Jjjs z+8?Ut1_^(P)RIQBnd~5Ok|c-8W8^qFK~9r1cq`-j;9E6A9VR!@{g(u)?cm|$@=iqsG30{Vi za0*_7x8WUl7tX`SZ~-pDC-5!&0GHtk{2~Z~EK~}LP$eu90>T=hPFOE&6WWCiVY{$L zhzdhOQpoZYgxtiqH@>3qn6|wfqX}Rw<`ND^h0rmdeXi zr3_1kKi)2{RFzp+F8nc|)~SksRmvZO>RMST!5U-*@2gPjWRc%)kT=RoDb}Ji;lt99 z<=PNdCShH)s$o^3mGuvD{4?@3xkN6LUy!%$KeGy0WUJ9zXGo!)89azpMm$`EPM#( z;3N1PzJW_{WeS707cv-r%x7@nl1)^1$)03*;bJ}AUSye`jGeMTOZWrH#syl(Il;!i z;oLtu{>7!bp&_6)H2lq_x=3RAi*^xnk>$IHSK-)cocc@GLpepGKg9DT7`7?o;(CR1Uv+;2?8a& zRh54K-!$|8|F22kUZWlY9s)N80#F)`gj?}la=XT|xqi tvz|$i8K?vZB)EZuD@gUm!tczJ`Befr7$IgbOpfQ7Hd$LbYGchBCIAhO6;=QM diff --git a/notebooks/README.md b/notebooks/README.md new file mode 100644 index 0000000..8ae3fc5 --- /dev/null +++ b/notebooks/README.md @@ -0,0 +1,51 @@ +# Lamwo Composite Pipeline + +This folder contains the Jupyter notebooks used to build the Lamwo district monthly Sentinel-2 composites and related biomass statistics. The notebooks are meant to be run in Google Colab with access to the Sunbird shared Google Drive and Google Earth Engine (EE). + +## Prerequisites +- Google account with access to the `Sunbird AI/Projects` shared drive. +- Google Colab runtime with GPU/CPU high-RAM recommended. +- Earth Engine account authorized for `ee-isekalala` (adjust if needed). +- Python packages installed within the Colab session, including `pystac-client`, `rio-tiler`, `boto3`, `python-dateutil`, `geopandas`, `shapely`, `rasterio`, `earthengine-api`, `rasterstats`, and plotting libraries. +- Expected directory structure in Drive: + - `/content/drive/Shareddrives/Sunbird AI/Projects/suntrace/suntrace-multimodal/data` for outputs. + - Administrative boundaries in `/content/drive/Shareddrives/Sunbird AI/Projects/GIZ Mini-grid Identification/Phase II/Data/administrative areas/`. + +## Recommended Run Order +1. **`Pull S2 imagery for Lamwo_monthly composites.ipynb`** + Creates 1 km tile grid, pulls monthly Sentinel-2 L2A composites from AWS STAC, and saves per-tile `npy` cubes plus the grid GeoJSON to Drive. + +2. **`Precomputed Aggregates.ipynb`** + Authenticates with EE, maps tile geometries over various datasets (NDVI, elevation, climate, buildings), and exports the per-tile statistics as CSV/GeoJSON. + +3. **`get_lwb.ipynb`** + Downloads Aboveground Live Woody Biomass rasters per tile from ArcGIS, clips them to the Lamwo AOI, and stores GeoTIFFs locally in the Colab session. + +4. **`merge_lwb_tile_stats.ipynb`** + Loads the biomass rasters and EE tile statistics, computes total biomass per tile, and merges results into GeoJSON/CSV artifacts ready for downstream analysis. + +## Inputs and Outputs (by step) +- `Pull S2 imagery …` + - Inputs: Lamwo AOI GeoPackage, Sentinel-2 STAC (`earth-search.aws.element84.com`). + - Outputs: `grid.geojson`, monthly `.npy` files per tile/year in Drive. +- `Precomputed Aggregates` + - Inputs: Tile grid (`grid.geojson`), EE datasets (S2_SR, SRTM, PAR, CHIRPS, VIIRS, building layers). + - Outputs: EE Drive export (CSV) or downloaded GeoJSON with tile-level statistics. +- `get_lwb` + - Inputs: Lamwo AOI, ArcGIS feature service `Aboveground_Live_Woody_Biomass_Density`. + - Outputs: Downloaded `.tif` rasters and clipped versions for intersecting tiles. +- `merge_lwb_tile_stats` + - Inputs: Biomass raster (`aglwb.tif` or clipped stack), tile grid GeoJSON, tile stats CSV. + - Outputs: Merged GeoJSON/CSV with biomass totals and calculated metrics per tile. + +## Operational Tips +- Verify Drive paths at the top of each notebook; update to your Drive mount if different. +- When running EE exports, monitor the EE Tasks tab for completion before proceeding. +- `merge_lwb_tile_stats.ipynb` requires selecting the correct biomass raster unit (`Mg_per_pixel` vs `Mg_per_ha`). Check dataset metadata before merging. +- Large raster downloads may time out; re-run the `get_lwb` download cell for failed tiles. +- Keep secrets (API keys, service accounts) out of notebooks; rely on EE auth prompts in Colab. + +## Troubleshooting Checklist +- Missing imports: re-run the initial install cells or add `!pip install` for any new packages. +- CRS mismatches: the merge notebook reprojects to UTM 36N; ensure custom rasters share this CRS or adjust the code. +- Drive quota: exported artifacts land in `ee_exports` or the `data` folder—clean up old runs to stay within limits. diff --git a/notebooks/analyzer_playground.ipynb b/notebooks/analyzer_playground.ipynb index b15056a..98829f0 100644 --- a/notebooks/analyzer_playground.ipynb +++ b/notebooks/analyzer_playground.ipynb @@ -15,7 +15,7 @@ "parent_dir = os.path.abspath(os.path.join(notebook_dir, '..'))\n", "if parent_dir not in sys.path:\n", " sys.path.append(parent_dir)\n", - "from src.utils.factory import create_geospatial_analyzer" + "from utils.factory import create_geospatial_analyzer" ] }, { @@ -33,17 +33,82 @@ { "cell_type": "code", "execution_count": 3, + "id": "0fdf8c74", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.factory import create_geospatial_analyzer" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9bfcb05e", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.GeospatialAnalyzer2 import GeospatialAnalyzer2\n", + "from shapely.geometry import Polygon\n", + "\n", + "ga = create_geospatial_analyzer(use_postgis=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "fc0235e6", "metadata": {}, "outputs": [], "source": [ "sample_region = gpd.read_file(\"../data/sample_region_mudu/mudu_village.gpkg\")\n", - "sample_region_geometry = sample_region.geometry.iloc[0] #What you pass into the GeospatialAnalyzer" + "sample_region_geometry = sample_region.geometry.iloc[0]" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, + "id": "b83cb80d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "268" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ga.count_features_within_region(region=sample_region_geometry, layer_or_table=\"public.lamwo_buildings\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "6ae07ea5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "66627.03865642505" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ga.region_total_biomass(region=sample_region_geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "id": "a7bdc3f0", "metadata": {}, "outputs": [ @@ -77,7 +142,7 @@ " <meta name="viewport" content="width=device-width,\n", " initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />\n", " <style>\n", - " #map_d4f7c5a5d29ed27b2ed204fe05a7a681 {\n", + " #map_a2d05813a5304e45c8e03240731294c4 {\n", " position: relative;\n", " width: 100.0%;\n", " height: 100.0%;\n", @@ -91,16 +156,16 @@ "<body>\n", " \n", " \n", - " <div class="folium-map" id="map_d4f7c5a5d29ed27b2ed204fe05a7a681" ></div>\n", + " <div class="folium-map" id="map_a2d05813a5304e45c8e03240731294c4" ></div>\n", " \n", "</body>\n", "<script>\n", " \n", " \n", - " var map_d4f7c5a5d29ed27b2ed204fe05a7a681 = L.map(\n", - " "map_d4f7c5a5d29ed27b2ed204fe05a7a681",\n", + " var map_a2d05813a5304e45c8e03240731294c4 = L.map(\n", + " "map_a2d05813a5304e45c8e03240731294c4",\n", " {\n", - " center: [3.4813802487434833, 32.50141743753237],\n", + " center: [3.8082958523765407, 33.02349033544537],\n", " crs: L.CRS.EPSG3857,\n", " zoom: 13,\n", " zoomControl: true,\n", @@ -112,38 +177,38 @@ "\n", " \n", " \n", - " var tile_layer_e4bb8a7dee361dcd28a6d985395cc415 = L.tileLayer(\n", + " var tile_layer_ebfa4d49afc21819af2ad96468a2f72e = L.tileLayer(\n", " "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",\n", " {"attribution": "Data by \\u0026copy; \\u003ca target=\\"_blank\\" href=\\"http://openstreetmap.org\\"\\u003eOpenStreetMap\\u003c/a\\u003e, under \\u003ca target=\\"_blank\\" href=\\"http://www.openstreetmap.org/copyright\\"\\u003eODbL\\u003c/a\\u003e.", "detectRetina": false, "maxNativeZoom": 18, "maxZoom": 18, "minZoom": 0, "noWrap": false, "opacity": 1, "subdomains": "abc", "tms": false}\n", - " ).addTo(map_d4f7c5a5d29ed27b2ed204fe05a7a681);\n", + " ).addTo(map_a2d05813a5304e45c8e03240731294c4);\n", " \n", " \n", "\n", - " function geo_json_278a1aec0cbb54be38be3c1c04c4af24_onEachFeature(feature, layer) {\n", + " function geo_json_7f55d6402316b5eb395d0f4fbf36fa04_onEachFeature(feature, layer) {\n", " layer.on({\n", " });\n", " };\n", - " var geo_json_278a1aec0cbb54be38be3c1c04c4af24 = L.geoJson(null, {\n", - " onEachFeature: geo_json_278a1aec0cbb54be38be3c1c04c4af24_onEachFeature,\n", + " var geo_json_7f55d6402316b5eb395d0f4fbf36fa04 = L.geoJson(null, {\n", + " onEachFeature: geo_json_7f55d6402316b5eb395d0f4fbf36fa04_onEachFeature,\n", " \n", " });\n", "\n", - " function geo_json_278a1aec0cbb54be38be3c1c04c4af24_add (data) {\n", - " geo_json_278a1aec0cbb54be38be3c1c04c4af24\n", + " function geo_json_7f55d6402316b5eb395d0f4fbf36fa04_add (data) {\n", + " geo_json_7f55d6402316b5eb395d0f4fbf36fa04\n", " .addData(data)\n", - " .addTo(map_d4f7c5a5d29ed27b2ed204fe05a7a681);\n", + " .addTo(map_a2d05813a5304e45c8e03240731294c4);\n", " }\n", - " geo_json_278a1aec0cbb54be38be3c1c04c4af24_add({"coordinates": [[[[32.48048488872554, 3.468747811173695], [32.48158730835399, 3.4787680053782806], [32.4821002301256, 3.486264423171222], [32.4823904075939, 3.4873498296199466], [32.4831705626519, 3.4874525204521136], [32.48442633978543, 3.487193445489093], [32.48542360337253, 3.4869342267525596], [32.486051541517895, 3.4867119137800335], [32.486346722877435, 3.4871945006599407], [32.48656808411788, 3.487602828066962], [32.487490923097916, 3.488382634450169], [32.48859833631059, 3.4893109807433262], [32.49055515615484, 3.4902397879367872], [32.49302906825129, 3.491057543417667], [32.4943215416863, 3.4912437913963004], [32.49491215421474, 3.49176364554485], [32.49491177710217, 3.4924687289197966], [32.49454165484743, 3.4939900236331995], [32.49361754978893, 3.495548128326089], [32.49272957352964, 3.49859063585541], [32.49243298680232, 3.50070572335012], [32.49243274685191, 3.501151039009583], [32.4926905244632, 3.5025242352215544], [32.49272701552271, 3.5033406667554354], [32.49324357178191, 3.5042315784129423], [32.493501710416645, 3.5049368019738756], [32.49335374545577, 3.5053820364492996], [32.49327946350071, 3.506161298260502], [32.492909750416416, 3.50690328973143], [32.49309388683678, 3.507868239613714], [32.49298281309148, 3.5083877138914406], [32.493093386533374, 3.5087959803749507], [32.493351145607, 3.5102062861218397], [32.49327654234424, 3.511579301361916], [32.49353444200618, 3.5127298398306928], [32.49397697918891, 3.5139175892289516], [32.49441943821145, 3.5152537765032448], [32.4948616383726, 3.51707238798006], [32.49534117012395, 3.518148827879821], [32.495820703735575, 3.519225267667847], [32.495884928522784, 3.51943864511256], [32.496324586544, 3.5192456538495946], [32.49643205024559, 3.518743058364269], [32.496735957751454, 3.5184021296157466], [32.497307796086716, 3.5181511125081797], [32.49741523047775, 3.517720320350918], [32.497594057281404, 3.5173972857089026], [32.497665794859216, 3.516894661559752], [32.49773746562843, 3.516499756274787], [32.49816640428136, 3.5162127539306494], [32.49813089982505, 3.5157818843615565], [32.49784539925985, 3.5151354590487127], [32.497774151883675, 3.514704570168767], [32.49848889618323, 3.514525432746202], [32.49866763497869, 3.5143819120701156], [32.49877528917611, 3.513520278286795], [32.498989886860656, 3.513125440771164], [32.499633141812986, 3.5129821790988514], [32.499954874533096, 3.512695118803111], [32.50080126511249, 3.512229001225523], [32.500868246470276, 3.5122599596828974], [32.501654475704825, 3.512009054213361], [32.502119049931956, 3.5119015930835276], [32.50215444121952, 3.512547883531995], [32.50247584966995, 3.512871196297074], [32.50297615829259, 3.512763753003688], [32.50336933994494, 3.512512626849901], [32.503655384510814, 3.5121537414821278], [32.50408430060276, 3.511902643077808], [32.50444174677033, 3.51165150636533], [32.504406240175776, 3.511220635704248], [32.50465658786977, 3.5107899181916222], [32.50496035733709, 3.5107003140039743], [32.5053889879679, 3.5109877756160457], [32.50567478566425, 3.5110956474941433], [32.50606792823154, 3.510916332682496], [32.50638972500335, 3.510521558868281], [32.5064613913311, 3.510126652265659], [32.50649725725789, 3.509875334007539], [32.50689034264163, 3.5098037387100787], [32.5073192380905, 3.5095885448816335], [32.507462390488, 3.509157769601357], [32.507641315111535, 3.508655199559857], [32.50817742122202, 3.50840415555829], [32.50896366565806, 3.5081173367926866], [32.51007166553937, 3.5075075545552603], [32.510643597750196, 3.507077004013392], [32.510894053489096, 3.5064308532075223], [32.51075155685401, 3.505604982169003], [32.510751948942094, 3.504850999833844], [32.510930796115126, 3.504492046121968], [32.51125264353956, 3.503989558978976], [32.51150319092115, 3.5031638939790635], [32.511735527385476, 3.503002445056139], [32.51245002828068, 3.503254146535466], [32.51262881869761, 3.503002912421294], [32.513272001066404, 3.50296734199973], [32.5135220269105, 3.5031469963548383], [32.51373644898821, 3.503075304450758], [32.514022469037656, 3.502752312214746], [32.514737303702944, 3.5023577393301264], [32.51488035971721, 3.502106486076759], [32.514880748112944, 3.501352493271126], [32.51516689656994, 3.5007781729118532], [32.51595329811655, 3.500168214673021], [32.516346379181826, 3.500096605654974], [32.516453451838196, 3.500347988896005], [32.51673928504994, 3.5003840433988813], [32.51681103874017, 3.4998096112797286], [32.51681135170066, 3.499199244779926], [32.517204947252196, 3.4981223235461325], [32.51724103006672, 3.4974401526784944], [32.51716968240447, 3.497224695330492], [32.517405725187245, 3.4968937553693418], [32.51749167383759, 3.496520401872246], [32.5177775977778, 3.496376931944854], [32.51806360069095, 3.496061118324467], [32.51840670241452, 3.495917677665076], [32.51869252912327, 3.4959465519165223], [32.51886418981298, 3.4956594060377446], [32.51926453818917, 3.4953436499302026], [32.519722005773815, 3.4951428226762338], [32.519922070146364, 3.4952003703587122], [32.520236543708336, 3.4951143600214603], [32.52057967403343, 3.494913473597972], [32.52089408912822, 3.4949423618094126], [32.52138001923698, 3.494971337776413], [32.52178022102875, 3.494942815686901], [32.521980410957575, 3.4947705734664405], [32.52226631029005, 3.4946558298580754], [32.522380714438725, 3.4945409976250477], [32.52243808596937, 3.494138892730802], [32.52246679073634, 3.4939091268840996], [32.52258122934686, 3.4937081137019987], [32.52258147629985, 3.4932198158654404], [32.52269594292081, 3.492961366807837], [32.523181988249696, 3.492760552177892], [32.523524972223065, 3.492846899801961], [32.52378230583631, 3.4927034131788752], [32.52398248592445, 3.492531170401611], [32.52435415068111, 3.4924164687695343], [32.524554258304036, 3.4923878435402895], [32.524697254919154, 3.492244298493339], [32.52484023793653, 3.492129481301166], [32.525126202974896, 3.4918998368842797], [32.52484052696113, 3.4915550113998797], [32.524640506255615, 3.491411292040528], [32.52466921577482, 3.4911527990461244], [32.52489794044563, 3.4910667433186973], [32.525212413254984, 3.4909807301443285], [32.52509827879957, 3.49057854686812], [32.52506825868284, 3.4902905932061024], [32.52504138530922, 3.4900327757995173], [32.52515590852732, 3.489659426498161], [32.524986738055325, 3.489582049914081], [32.523552458201436, 3.488926027446037], [32.52337167207994, 3.487923998488283], [32.523009411559954, 3.487286217821829], [32.522647289391415, 3.486375181637022], [32.52074428944311, 3.485281188280533], [32.519113184244475, 3.4842784162920304], [32.51793535421918, 3.4831847890578103], [32.517451232510254, 3.482407471888167], [32.51730892276702, 3.481690558154806], [32.51666745094285, 3.4805791258874788], [32.51631113032173, 3.479862101861694], [32.51641841042343, 3.479324526810858], [32.51645439011667, 3.478715231501424], [32.51627663189519, 3.4775681957564633], [32.51602748152974, 3.476528648941331], [32.51560042880375, 3.474628802102087], [32.51513790995334, 3.472334675449666], [32.514960299863056, 3.470900903066757], [32.514997211181935, 3.4684636658768326], [32.515105221605516, 3.4664924101862926], [32.514922435342456, 3.466221430120931], [32.51508217927654, 3.466166485664234], [32.51498857177943, 3.464842432612321], [32.51472411318697, 3.4636703634107326], [32.51475220540859, 3.462672526290083], [32.51420522168023, 3.4620489446422598], [32.513356693272385, 3.4603785678148977], [32.51250847107714, 3.4581144346489663], [32.513100592479766, 3.4556654952348413], [32.51166201747008, 3.4523990936780566], [32.51110849668014, 3.451582392976641], [32.510554862749025, 3.450988349841166], [32.51026298058944, 3.4502703173542293], [32.510222917796874, 3.450171763393459], [32.50933642802496, 3.4505424024158935], [32.50863441205729, 3.451247122948698], [32.50708323321726, 3.4515431940654513], [32.50593812175646, 3.4521363533337297], [32.50490393094858, 3.4524698000241503], [32.5040541354137, 3.453285768790063], [32.503130170237114, 3.454695452906811], [32.502059279113624, 3.454583560680507], [32.50054517959573, 3.4545827619728304], [32.49991745926117, 3.454433990923552], [32.49766422850404, 3.455471867750929], [32.496187037261215, 3.4555081910266896], [32.49513417002746, 3.4562312680827407], [32.494302946226995, 3.4568245780871543], [32.49361976422856, 3.4568056573893484], [32.49299198596691, 3.456768211167876], [32.49234563371769, 3.4569348569761025], [32.49190261105905, 3.456693405974649], [32.49136688892641, 3.4571569885943028], [32.490794444192716, 3.457230899247438], [32.48994472291689, 3.4578798600849394], [32.489409396599875, 3.4576012485815357], [32.48907704288736, 3.457582514109147], [32.48802417886843, 3.458287026422337], [32.48745163464967, 3.458546483266468], [32.48702695228974, 3.4585360923293043], [32.48667612834754, 3.458527507267592], [32.48628822074641, 3.4588056184343112], [32.48621401141944, 3.459454996912093], [32.48604749965497, 3.4600672147083147], [32.48567798486088, 3.460475218724468], [32.48508731501778, 3.4601038008777887], [32.48490245808927, 3.4604933505143416], [32.48479142858523, 3.4609386055302354], [32.4846251655596, 3.461086953577119], [32.483886406882185, 3.4614019812692316], [32.48355378067396, 3.461884224264051], [32.48368280200405, 3.4623110547742972], [32.48416253357158, 3.4629607358416554], [32.483903695206955, 3.4635729024156974], [32.48355248258919, 3.4642777919495313], [32.482961478177494, 3.4645186804693284], [32.481834713020184, 3.4652788085138364], [32.4813360827773, 3.465426972985357], [32.48102237379282, 3.465074258615441], [32.48072695765594, 3.4650369864486787], [32.480338792435084, 3.465778964326159], [32.48005451141783, 3.466198565903837], [32.48048488872554, 3.468747811173695]]]], "type": "MultiPolygon"});\n", + " geo_json_7f55d6402316b5eb395d0f4fbf36fa04_add({"coordinates": [[[33.021795932856996, 3.809748963641805], [33.025151252073, 3.809956278325074], [33.02623122133802, 3.808427581666168], [33.02485427551908, 3.807549887495514], [33.02301920233525, 3.806256413373593], [33.0214095313367, 3.806644512528437], [33.021795932856996, 3.809748963641805]]], "type": "Polygon"});\n", "\n", " \n", "</script>\n", "</html>\" style=\"position:absolute;width:100%;height:100%;left:0;top:0;border:none !important;\" allowfullscreen webkitallowfullscreen mozallowfullscreen>" ], "text/plain": [ - "" + "" ] }, - "execution_count": 4, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -163,23 +228,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "id": "894ba6c4", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Tile stats loaded from CSV. Geometry will be added from plain tiles during merge.\n", - "Warning: Plain tiles GeoDataFrame is missing the 'id' column. Creating from index.\n", - "Merging tile stats and plain tiles on 'id'...\n", - "Merge complete.\n", - "Geospatial data loading and initial processing complete.\n", - "Buildings CRS: EPSG:4326\n", - "Minigrids CRS: EPSG:4326\n", - "Plain Tiles CRS: EPSG:4326\n", - "Joined Tiles CRS: EPSG:4326\n" + "ename": "AttributeError", + "evalue": "'int' object has no attribute 'lower'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m analyzer \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_geospatial_analyzer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Desktop/SUNBIRD LOCAL/PROJECTS/suntrace/utils/factory.py:55\u001b[0m, in \u001b[0;36mcreate_geospatial_analyzer\u001b[0;34m(bpath, tpath, ppath, cmpath, empath, egpath, gepath, rpath, vpath, papath, spath, use_postgis, database_uri, layer_table_map)\u001b[0m\n\u001b[1;32m 53\u001b[0m env_flag \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m env_flag \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 55\u001b[0m use_postgis \u001b[38;5;241m=\u001b[39m \u001b[43menv_flag\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlower\u001b[49m() \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrue\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myes\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 57\u001b[0m use_postgis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mbool\u001b[39m(os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSUNTRACE_DATABASE_URI\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n", + "\u001b[0;31mAttributeError\u001b[0m: 'int' object has no attribute 'lower'" ] } ], @@ -189,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "id": "b6c751cf", "metadata": {}, "outputs": [ @@ -199,18 +261,18 @@ "268" ] }, - "execution_count": 6, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "analyzer.count_buildings_within_region(sample_region_geometry)" + "analyzer.count_features_within_region(sample_region_geometry, 'buildings')" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "c416f006", "metadata": {}, "outputs": [ @@ -220,7 +282,7 @@ "35" ] }, - "execution_count": 7, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -231,8 +293,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "4aebfd1d", + "execution_count": 13, + "id": "48441a3d", "metadata": {}, "outputs": [ { @@ -245,7 +307,40 @@ { "data": { "text/plain": [ - "-0.5121903091796086" + "66627.03732742427" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analyzer.region_total_biomass(sample_region_geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "064ccaae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ndvi_mean': -0.5122,\n", + " 'ndvi_med': -0.5806,\n", + " 'ndvi_std': 0.2273,\n", + " 'evi_med': 1.4574,\n", + " 'elev_mean': 849.5706,\n", + " 'slope_mean': 2.5659,\n", + " 'par_mean': 179.2317,\n", + " 'rain_total_mm': 34.5617,\n", + " 'rain_mean_mm_day': 3.2298,\n", + " 'cloud_free_days': 29.0,\n", + " 'vegetation_density': 'Very limited vegetation',\n", + " 'total_biomass_Mg': 66627.0387,\n", + " 'region_area_km2': 20.6963}" ] }, "execution_count": 8, @@ -254,13 +349,13 @@ } ], "source": [ - "analyzer.avg_ndvi(sample_region_geometry)" + "ga._analyze_environmental_metrics(sample_region_geometry)" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "aca2b7d3", + "execution_count": 10, + "id": "6fb29e12", "metadata": {}, "outputs": [ { @@ -284,59 +379,216 @@ " \n", " \n", " \n", - " ID\n", - " Location\n", - " Population\n", - " capacity\n", - " total_con\n", - " x_cord\n", - " y_cord\n", - " layer\n", - " path\n", - " geometry\n", + " ogc_fid\n", + " geom\n", + " id\n", + " addr_vname\n", + " rank\n", + " category\n", " \n", " \n", " \n", " \n", - " 5\n", - " 7439003\n", - " MUDU EAST\n", - " 568.0\n", - " NaN\n", + " 0\n", + " 88\n", + " POLYGON ((32.45556 3.52822, 32.45652 3.52907, ...\n", + " 5502886\n", + " Otaa\n", " NaN\n", + " Solar home system\n", + " \n", + " \n", + " 1\n", + " 89\n", + " POLYGON ((32.42204 3.50881, 32.42188 3.50937, ...\n", + " 5502887\n", + " Mudu Central\n", " NaN\n", + " Existing minigrid\n", + " \n", + " \n", + " 2\n", + " 90\n", + " POLYGON ((32.48048 3.46875, 32.48159 3.47877, ...\n", + " 5502888\n", + " Mudu East\n", + " 21.0\n", + " Candidate minigrid\n", + " \n", + " \n", + " 3\n", + " 100\n", + " POLYGON ((32.46802 3.43551, 32.46857 3.43699, ...\n", + " 5502898\n", + " Padwat Central\n", + " 12.0\n", + " Candidate minigrid\n", + " \n", + " \n", + " 4\n", + " 102\n", + " POLYGON ((32.48745 3.45855, 32.48802 3.45829, ...\n", + " 5502900\n", + " Padwat East\n", + " 32.0\n", + " Candidate minigrid\n", + " \n", + " \n", + " 5\n", + " 108\n", + " POLYGON ((32.49077 3.42482, 32.49875 3.43083, ...\n", + " 5502906\n", + " Lugwar Central\n", + " 4.0\n", + " Candidate minigrid\n", + " \n", + " \n", + " 6\n", + " 111\n", + " POLYGON ((32.51499 3.46484, 32.51508 3.46617, ...\n", + " 5502909\n", + " Lanywang West\n", + " 15.0\n", + " Candidate minigrid\n", + " \n", + " \n", + " 7\n", + " 113\n", + " POLYGON ((32.47974 3.52567, 32.48022 3.52603, ...\n", + " 5502911\n", + " Lanywang Central\n", " NaN\n", - " Candidate-MGs — candidate_minigrids_nes_reproj\n", - " /Users/ernest/Documents/Sunbird/Projects/GIZ-P...\n", - " POINT (32.50252 3.47558)\n", + " Solar home system\n", " \n", " \n", "\n", "" ], "text/plain": [ - " ID Location Population capacity total_con x_cord y_cord \\\n", - "5 7439003 MUDU EAST 568.0 NaN NaN NaN NaN \n", - "\n", - " layer \\\n", - "5 Candidate-MGs — candidate_minigrids_nes_reproj \n", + " ogc_fid geom id \\\n", + "0 88 POLYGON ((32.45556 3.52822, 32.45652 3.52907, ... 5502886 \n", + "1 89 POLYGON ((32.42204 3.50881, 32.42188 3.50937, ... 5502887 \n", + "2 90 POLYGON ((32.48048 3.46875, 32.48159 3.47877, ... 5502888 \n", + "3 100 POLYGON ((32.46802 3.43551, 32.46857 3.43699, ... 5502898 \n", + "4 102 POLYGON ((32.48745 3.45855, 32.48802 3.45829, ... 5502900 \n", + "5 108 POLYGON ((32.49077 3.42482, 32.49875 3.43083, ... 5502906 \n", + "6 111 POLYGON ((32.51499 3.46484, 32.51508 3.46617, ... 5502909 \n", + "7 113 POLYGON ((32.47974 3.52567, 32.48022 3.52603, ... 5502911 \n", "\n", - " path geometry \n", - "5 /Users/ernest/Documents/Sunbird/Projects/GIZ-P... POINT (32.50252 3.47558) " + " addr_vname rank category \n", + "0 Otaa NaN Solar home system \n", + "1 Mudu Central NaN Existing minigrid \n", + "2 Mudu East 21.0 Candidate minigrid \n", + "3 Padwat Central 12.0 Candidate minigrid \n", + "4 Padwat East 32.0 Candidate minigrid \n", + "5 Lugwar Central 4.0 Candidate minigrid \n", + "6 Lanywang West 15.0 Candidate minigrid \n", + "7 Lanywang Central NaN Solar home system " ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "analyzer.get_minigrids_info_within_region(sample_region.geometry.iloc[0])\n" + "ga.get_villages_info_within_region(sample_region_geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "4dd32cc5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reprojecting GeoDataFrame from EPSG:4326 to EPSG:32636 for calculation.\n", + "Reprojecting GeoDataFrame from EPSG:4326 to EPSG:32636 for calculation.\n" + ] + }, + { + "data": { + "text/plain": [ + "{'ndvi_mean': -0.5122,\n", + " 'ndvi_med': -0.5806,\n", + " 'ndvi_std': 0.2273,\n", + " 'evi_med': 1.4574,\n", + " 'elev_mean': 849.5706,\n", + " 'slope_mean': 2.5659,\n", + " 'par_mean': 179.2317,\n", + " 'rain_total_mm': 34.5617,\n", + " 'rain_mean_mm_day': 3.2298,\n", + " 'cloud_free_days': 29.0,\n", + " 'region_area_km2': 20.6963,\n", + " 'vegetation_density': 'Very limited vegetation',\n", + " 'total_biomass_Mg': 66627.03732742427}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analyzer._analyze_environmental_metrics(sample_region_geometry)" ] }, { "cell_type": "code", "execution_count": 10, + "id": "4aebfd1d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reprojecting GeoDataFrame from EPSG:4326 to EPSG:32636 for calculation.\n" + ] + }, + { + "data": { + "text/plain": [ + "-0.5121903091796086" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analyzer.avg_ndvi(sample_region_geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "aca2b7d3", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'GeospatialAnalyzer' object has no attribute 'get_minigrids_info_within_region'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43manalyzer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_minigrids_info_within_region\u001b[49m(sample_region\u001b[38;5;241m.\u001b[39mgeometry\u001b[38;5;241m.\u001b[39miloc[\u001b[38;5;241m0\u001b[39m])\n", + "\u001b[0;31mAttributeError\u001b[0m: 'GeospatialAnalyzer' object has no attribute 'get_minigrids_info_within_region'" + ] + } + ], + "source": [ + "analyzer.get_minigrids_info_within_region(sample_region.geometry.iloc[0])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, "id": "74596ef2", "metadata": {}, "outputs": [ @@ -346,7 +598,7 @@ "-0.514233812830664" ] }, - "execution_count": 10, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -357,10 +609,364 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "4c0098df", "metadata": {}, "outputs": [], + "source": [ + "existing_minigrids = gpd.read_file(\"../data/viz_geojsons/existing_minigrids.geojson\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "8c9f19f7", + "metadata": {}, + "outputs": [], + "source": [ + "existing_minigrids.sort_values('Capacity', ascending=False, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f2291393", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idLocationNameCapacitygeometry
016Agoro_Incl Tumanum BAgoro0.08POINT (33.01636 3.79883)
1735Paloga CentralPaloga Central0.08POINT (32.94280 3.58834)
2456PalabekBase Camp TC0.08POINT (32.45319 3.35247)
2355PalabekPalabek0.08POINT (32.41668 3.36141)
1432Ogili TCOgili TC0.08POINT (32.49622 3.40938)
522KapetaKapeta0.04POINT (32.59161 3.50416)
1533OpokiOpoki0.04POINT (32.71950 3.42934)
2658LabayangoLabayango0.04POINT (32.86622 3.52132)
218Apyeta West/Apyeta Primary SchoolApyeta West0.04POINT (32.36870 3.30583)
2240YwayaYwaya0.04POINT (33.08095 3.69453)
2139PotikaPotika0.04POINT (32.88081 3.71998)
1937Pany Buk East-WestPany Buk East-West0.04POINT (32.94961 3.67544)
1836Pangira -Licwar CentralPangira-Licwar Central0.04POINT (32.65234 3.61458)
421Ayuu AlaliAyuu Alali0.04POINT (32.55980 3.53905)
624LapidiyenyiLapidiyenyi0.04POINT (32.92919 3.58105)
1634OtaaOtaa0.04POINT (32.47962 3.48625)
117Apwoyo TCApwoyo TC0.04POINT (32.99998 3.75932)
1331Oboko TCOboko TC0.04POINT (32.98424 3.67091)
1230NgomoromoNgomoromo0.04POINT (32.58623 3.68851)
1129Muddu CentralMuddu Central0.04POINT (32.45229 3.49704)
927Loromibeng A-BLoromibeng A-B0.04POINT (33.04055 3.75589)
826LogwakLogwak0.04POINT (32.74118 3.66006)
2759Apyetta TCApyetta TC0.04POINT (32.41602 3.37312)
2038Pawena TCPawena TC0.02POINT (32.66910 3.28127)
1028Moroto EastMoroto East0.02POINT (32.92925 3.69297)
320Aweno OlwiAweno Olwi0.02POINT (32.74772 3.71898)
2557LabayangoLabayango Primary School0.02POINT (32.86379 3.52098)
725Lelapwot WestLelapwot West0.02POINT (32.70874 3.62690)
\n", + "
" + ], + "text/plain": [ + " id Location Name Capacity \\\n", + "0 16 Agoro_Incl Tumanum B Agoro 0.08 \n", + "17 35 Paloga Central Paloga Central 0.08 \n", + "24 56 Palabek Base Camp TC 0.08 \n", + "23 55 Palabek Palabek 0.08 \n", + "14 32 Ogili TC Ogili TC 0.08 \n", + "5 22 Kapeta Kapeta 0.04 \n", + "15 33 Opoki Opoki 0.04 \n", + "26 58 Labayango Labayango 0.04 \n", + "2 18 Apyeta West/Apyeta Primary School Apyeta West 0.04 \n", + "22 40 Ywaya Ywaya 0.04 \n", + "21 39 Potika Potika 0.04 \n", + "19 37 Pany Buk East-West Pany Buk East-West 0.04 \n", + "18 36 Pangira -Licwar Central Pangira-Licwar Central 0.04 \n", + "4 21 Ayuu Alali Ayuu Alali 0.04 \n", + "6 24 Lapidiyenyi Lapidiyenyi 0.04 \n", + "16 34 Otaa Otaa 0.04 \n", + "1 17 Apwoyo TC Apwoyo TC 0.04 \n", + "13 31 Oboko TC Oboko TC 0.04 \n", + "12 30 Ngomoromo Ngomoromo 0.04 \n", + "11 29 Muddu Central Muddu Central 0.04 \n", + "9 27 Loromibeng A-B Loromibeng A-B 0.04 \n", + "8 26 Logwak Logwak 0.04 \n", + "27 59 Apyetta TC Apyetta TC 0.04 \n", + "20 38 Pawena TC Pawena TC 0.02 \n", + "10 28 Moroto East Moroto East 0.02 \n", + "3 20 Aweno Olwi Aweno Olwi 0.02 \n", + "25 57 Labayango Labayango Primary School 0.02 \n", + "7 25 Lelapwot West Lelapwot West 0.02 \n", + "\n", + " geometry \n", + "0 POINT (33.01636 3.79883) \n", + "17 POINT (32.94280 3.58834) \n", + "24 POINT (32.45319 3.35247) \n", + "23 POINT (32.41668 3.36141) \n", + "14 POINT (32.49622 3.40938) \n", + "5 POINT (32.59161 3.50416) \n", + "15 POINT (32.71950 3.42934) \n", + "26 POINT (32.86622 3.52132) \n", + "2 POINT (32.36870 3.30583) \n", + "22 POINT (33.08095 3.69453) \n", + "21 POINT (32.88081 3.71998) \n", + "19 POINT (32.94961 3.67544) \n", + "18 POINT (32.65234 3.61458) \n", + "4 POINT (32.55980 3.53905) \n", + "6 POINT (32.92919 3.58105) \n", + "16 POINT (32.47962 3.48625) \n", + "1 POINT (32.99998 3.75932) \n", + "13 POINT (32.98424 3.67091) \n", + "12 POINT (32.58623 3.68851) \n", + "11 POINT (32.45229 3.49704) \n", + "9 POINT (33.04055 3.75589) \n", + "8 POINT (32.74118 3.66006) \n", + "27 POINT (32.41602 3.37312) \n", + "20 POINT (32.66910 3.28127) \n", + "10 POINT (32.92925 3.69297) \n", + "3 POINT (32.74772 3.71898) \n", + "25 POINT (32.86379 3.52098) \n", + "7 POINT (32.70874 3.62690) " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "existing_minigrids" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d34e33f3", + "metadata": {}, + "outputs": [], "source": [] } ], @@ -380,7 +986,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/notebooks/merge_lwb_tile_stats.ipynb b/notebooks/merge_lwb_tile_stats.ipynb index d2521ce..d469f78 100644 --- a/notebooks/merge_lwb_tile_stats.ipynb +++ b/notebooks/merge_lwb_tile_stats.ipynb @@ -2333,7 +2333,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..6329911 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,11 @@ +# Lamwo Data Pipeline Scripts + +These CLI utilities mirror the Lamwo notebook workflow so teammates can refresh inputs without opening Colab. Update the placeholder paths/project IDs before running. + +1. `generate_tile_grid.py` — Build a 1 km grid from an AOI GeoPackage. Outputs a GeoJSON grid (`lamwo_grid.geojson`). +2. `download_s2_monthly.py` — Download monthly Sentinel-2 composites per tile as `.npy` cubes using Earth Engine. +3. `export_tile_stats.py` — Start an Earth Engine export (Drive or direct download) with vegetation, terrain, climate, and building stats per tile. +4. `download_lwb_rasters.py` — Fetch Aboveground Live Woody Biomass rasters per tile from the ArcGIS FeatureServer and clip them to the AOI. +5. `merge_lwb_tile_stats.py` — Combine biomass totals with Earth Engine tile statistics and save merged GeoJSON/CSV outputs. + +Typical run order follows the notebook sequence: generate the grid → (optional) fetch Sentinel-2 composites → export the EE aggregates → download biomass rasters → merge outputs. diff --git a/scripts/download_lwb_rasters.py b/scripts/download_lwb_rasters.py new file mode 100644 index 0000000..4e8dde6 --- /dev/null +++ b/scripts/download_lwb_rasters.py @@ -0,0 +1,112 @@ +"""Download and optionally clip Aboveground Live Woody Biomass rasters per tile.""" +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path +from typing import List + +import geopandas as gpd +import requests +import rasterio +from rasterio.mask import mask + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +LOGGER = logging.getLogger(__name__) + +DEFAULT_AOI_PATH = Path("PATH/TO/lamwo_aoi.gpkg") +DEFAULT_SERVICE_URL = ( + "https://services2.arcgis.com/g8WusZB13b9OegfU/arcgis/rest/services/" + "Aboveground_Live_Woody_Biomass_Density/FeatureServer/0/query?where=1%3D1&" + "outFields=tile_id,Mg_px_1_download,Shape__Area,Shape__Length&outSR=4326&f=json" +) +DEFAULT_DOWNLOAD_DIR = Path("PATH/TO/output/aglwb_tiles") +DEFAULT_CLIP_DIR = Path("PATH/TO/output/aglwb_clipped") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--aoi-path", type=Path, default=DEFAULT_AOI_PATH, help="AOI footprint (GeoPackage/GeoJSON)") + parser.add_argument("--service-url", default=DEFAULT_SERVICE_URL, help="ArcGIS FeatureServer query URL") + parser.add_argument("--download-dir", type=Path, default=DEFAULT_DOWNLOAD_DIR, help="Directory for downloaded rasters") + parser.add_argument("--clip-dir", type=Path, default=DEFAULT_CLIP_DIR, help="Directory for AOI-clipped rasters") + parser.add_argument("--tile-field", default="tile_id", help="Field name containing tile identifier") + parser.add_argument( + "--url-field", + default="Mg_px_1_download", + help="Field containing download URLs for each tile raster", + ) + parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files") + parser.add_argument("--http-timeout", type=int, default=600, help="HTTP timeout (seconds)") + return parser.parse_args() + + +def download_raster(url: str, output_path: Path, timeout: int, overwrite: bool) -> bool: + if output_path.exists() and not overwrite: + LOGGER.info("Skipping existing %s", output_path) + return False + + response = requests.get(url, stream=True, timeout=timeout) + response.raise_for_status() + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "wb") as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + LOGGER.info("Downloaded %s", output_path) + return True + + +def clip_raster_to_aoi(raster_path: Path, clip_path: Path, geometry) -> Path: + clip_path.parent.mkdir(parents=True, exist_ok=True) + with rasterio.open(raster_path) as src: + out_image, out_transform = mask(src, [geometry], crop=True) + out_meta = src.meta.copy() + out_meta.update( + { + "height": out_image.shape[1], + "width": out_image.shape[2], + "transform": out_transform, + } + ) + with rasterio.open(clip_path, "w", **out_meta) as dest: + dest.write(out_image) + LOGGER.info("Clipped %s", clip_path) + return clip_path + + +def main() -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") + args = parse_args() + + aoi = gpd.read_file(args.aoi_path) + if aoi.empty: + raise ValueError(f"AOI at {args.aoi_path} is empty") + aoi_geom = aoi.to_crs(epsg=4326).geometry.iloc[0] + + tiles = gpd.read_file(args.service_url) + tiles = tiles.to_crs(epsg=4326) + intersects = tiles[tiles.intersects(aoi_geom)] + LOGGER.info("Found %d biomass tiles intersecting AOI", len(intersects)) + + for _, row in intersects.iterrows(): + tile_id = row.get(args.tile_field) + url = row.get(args.url_field) + if not url: + LOGGER.warning("Row %s missing URL field %s", tile_id, args.url_field) + continue + + download_path = args.download_dir / f"{tile_id}.tif" + downloaded = download_raster(url, download_path, args.http_timeout, args.overwrite) + + if downloaded or args.overwrite: + clip_path = args.clip_dir / f"{tile_id}.tif" + clip_raster_to_aoi(download_path, clip_path, aoi_geom) + + +if __name__ == "__main__": + main() diff --git a/scripts/download_s2_monthly.py b/scripts/download_s2_monthly.py new file mode 100644 index 0000000..5ff9460 --- /dev/null +++ b/scripts/download_s2_monthly.py @@ -0,0 +1,149 @@ +"""Download monthly Sentinel-2 composites per tile using Earth Engine.""" +from __future__ import annotations + +import argparse +import io +import logging +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from typing import Iterable, Tuple + +import geopandas as gpd +import numpy as np +import requests + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +import ee # type: ignore + +from utils.lamwo_pipeline import ( + ee_bytes_to_img, + geom_to_ee, + init_ee, + reshape_monthly_stack, +) + +LOGGER = logging.getLogger(__name__) + +DEFAULT_PROJECT_ID = "your-ee-project-id" +DEFAULT_GRID_PATH = Path("PATH/TO/input/lamwo_grid.geojson") +DEFAULT_OUTPUT_DIR = Path("PATH/TO/output/s2_tiles") +S2_DATASET_ID = "COPERNICUS/S2_SR_HARMONIZED" +DEFAULT_BANDS = ["B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B11", "B12"] + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--project-id", default=DEFAULT_PROJECT_ID, help="Earth Engine project ID (update placeholder)") + parser.add_argument("--grid-path", type=Path, default=DEFAULT_GRID_PATH, help="Path to the tile grid GeoJSON") + parser.add_argument("--output-dir", type=Path, default=DEFAULT_OUTPUT_DIR, help="Directory for tile numpy stacks") + parser.add_argument("--year", type=int, default=2024, help="Calendar year to composite") + parser.add_argument("--dataset-id", default=S2_DATASET_ID, help="Earth Engine dataset to sample (default: Sentinel-2 SR)") + parser.add_argument("--bands", nargs="+", default=DEFAULT_BANDS, help="Band list to include per month") + parser.add_argument("--max-cloud", type=float, default=20.0, help="Cloud percentage threshold") + parser.add_argument("--scale", type=float, default=10.0, help="Pixel scale in metres for downloads") + parser.add_argument("--format", default="NPY", choices=["NPY"], help="Download format (NPY only)") + parser.add_argument("--max-workers", type=int, default=1, help="Concurrent download workers") + parser.add_argument("--http-timeout", type=int, default=600, help="HTTP timeout per tile download (seconds)") + return parser.parse_args() + + +def monthly_stack(region: ee.Geometry, *, year: int, dataset_id: str, bands: Iterable[str], max_cloud: float) -> ee.Image: + collection = ee.ImageCollection(dataset_id) + composites = [] + for month in range(1, 13): + start = ee.Date.fromYMD(year, month, 1) + end = start.advance(1, "month") + filtered = ( + collection + .filterBounds(region) + .filterDate(start, end) + .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", max_cloud)) + .select(list(bands)) + ) + image = filtered.median() + month_str = ee.Number(month).format("%02d") + renamed = image.rename(image.bandNames().map(lambda b: ee.String(b).cat("_m").cat(month_str))) + composites.append(renamed) + return ee.Image.cat(composites) + + +def resolve_tile_label(row, fallback_index: int) -> str: + for candidate in ("tile_id", "tileID", "id"): + value = row.get(candidate) + if value not in (None, ""): + return str(value) + return f"{fallback_index:04d}" + + +def process_tile( + idx: int, + row, + *, + args: argparse.Namespace, + band_names: Iterable[str], +) -> Tuple[str, Path]: + tile_label = resolve_tile_label(row, idx) + geom = row.geometry + region = geom_to_ee(geom) + image = monthly_stack( + region, + year=args.year, + dataset_id=args.dataset_id, + bands=args.bands, + max_cloud=args.max_cloud, + ) + url = image.getDownloadURL( + { + "bands": list(band_names), + "region": region, + "scale": args.scale, + "format": args.format, + } + ) + response = requests.get(url, timeout=args.http_timeout) + response.raise_for_status() + payload = io.BytesIO(response.content) + pixel_data = np.load(payload, allow_pickle=True) + array = ee_bytes_to_img(pixel_data) + cube = reshape_monthly_stack(array, len(args.bands)) + + output_dir = args.output_dir + output_dir.mkdir(parents=True, exist_ok=True) + output_path = output_dir / f"tile_{tile_label}_{args.year}.npy" + np.save(output_path, cube) + LOGGER.info("Saved %s", output_path) + return tile_label, output_path + + +def main() -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") + args = parse_args() + + init_ee(args.project_id) + + tiles = gpd.read_file(args.grid_path).to_crs(epsg=4326).reset_index(drop=True) + band_names = [f"{band}_m{month:02d}" for month in range(1, 13) for band in args.bands] + + if args.max_workers > 1: + with ThreadPoolExecutor(max_workers=args.max_workers) as executor: + futures = [ + executor.submit(process_tile, idx, row, args=args, band_names=band_names) + for idx, row in tiles.iterrows() + ] + for future in as_completed(futures): + try: + future.result() + except Exception as exc: # pragma: no cover - surfacing errors + LOGGER.error("Tile download failed: %s", exc) + raise + else: + for idx, row in tiles.iterrows(): + process_tile(idx, row, args=args, band_names=band_names) + + +if __name__ == "__main__": + main() diff --git a/scripts/export_tile_stats.py b/scripts/export_tile_stats.py new file mode 100644 index 0000000..2cfc320 --- /dev/null +++ b/scripts/export_tile_stats.py @@ -0,0 +1,254 @@ +"""Export per-tile aggregate statistics from Earth Engine.""" +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path +from typing import List + +import geopandas as gpd +import requests + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +import ee # type: ignore + +from utils.lamwo_pipeline import geom_to_ee, geodf_to_feature_collection, init_ee + +LOGGER = logging.getLogger(__name__) + +DEFAULT_PROJECT_ID = "your-ee-project-id" +DEFAULT_AOI_PATH = Path("PATH/TO/lamwo_aoi.gpkg") +DEFAULT_TILES_PATH = Path("PATH/TO/input/lamwo_grid.geojson") +DEFAULT_DRIVE_FOLDER = "ee_exports" +DEFAULT_FILE_PREFIX = "Lamwo_Tile_Stats_EE" +DEFAULT_SELECTORS = [ + "ndvi_mean", + "ndvi_med", + "ndvi_std", + "evi_med", + "elev_mean", + "slope_mean", + "par_mean", + "rain_total_mm", + "rain_mean_mm_day", + "cloud_free_days", + "bldg_count", + "bldg_area", + "bldg_h_max", + "system:index", +] + +DATASETS = { + "S2_SR": "COPERNICUS/S2_SR_HARMONIZED", + "SRTM": "USGS/SRTMGL1_003", + "VIIRS": "NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG", + "MODIS_PAR": "MODIS/062/MCD18C2", + "OPEN_BUILDINGS": "GOOGLE/Research/open-buildings-temporal/v1", + "CHIRPS": "UCSB-CHG/CHIRPS/DAILY", +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--project-id", default=DEFAULT_PROJECT_ID, help="Earth Engine project ID (update placeholder)") + parser.add_argument("--aoi-path", type=Path, default=DEFAULT_AOI_PATH, help="AOI footprint (GeoPackage/GeoJSON)") + parser.add_argument("--tiles-path", type=Path, default=DEFAULT_TILES_PATH, help="Tile grid GeoJSON") + parser.add_argument("--year", type=int, default=2023, help="Year to aggregate over") + parser.add_argument("--drive-folder", default=DEFAULT_DRIVE_FOLDER, help="Drive folder for EE export task") + parser.add_argument("--file-prefix", default=DEFAULT_FILE_PREFIX, help="Prefix for exported file") + parser.add_argument( + "--download-path", + type=Path, + help="Optional local path to download GeoJSON instead of creating a Drive task", + ) + parser.add_argument( + "--selectors", + nargs="+", + default=DEFAULT_SELECTORS, + help="Feature properties to include in the export", + ) + parser.add_argument("--cloud-threshold", type=float, default=20.0, help="Cloud threshold for Sentinel-2 filtering") + return parser.parse_args() + + +def build_collections(aoi_ee, year: int, cloud_threshold: float): + s2 = ( + ee.ImageCollection(DATASETS["S2_SR"]) + .filterBounds(aoi_ee) + .filterDate(f"{year}-01-01", f"{year}-12-31") + .filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", cloud_threshold)) + ) + + ndvi_collection = s2.map(lambda img: img.normalizedDifference(["B8", "B4"]).rename("NDVI")) + evi_collection = s2.map( + lambda img: img.expression( + "2.5*((NIR - RED)/(NIR + 6*RED - 7.5*BLUE + 1))", + {"NIR": img.select("B8"), "RED": img.select("B4"), "BLUE": img.select("B2")}, + ).rename("EVI") + ) + + dem = ee.Image(DATASETS["SRTM"]).select("elevation") + slope = ee.Terrain.slope(dem) + + par = ( + ee.ImageCollection(DATASETS["MODIS_PAR"]) + .filterDate(f"{year}-01-01", f"{year}-12-31") + .select("GMT_1200_PAR") + .mean() + ) + + chirps = ( + ee.ImageCollection(DATASETS["CHIRPS"]) + .filterDate(f"{year}-01-01", f"{year}-12-31") + .select("precipitation") + ) + + buildings = ( + ee.ImageCollection(DATASETS["OPEN_BUILDINGS"]) + .filterBounds(aoi_ee) + .filterDate(f"{year}-01-01", f"{year}-12-31") + ).mosaic() + + return { + "ndvi_collection": ndvi_collection, + "evi_collection": evi_collection, + "dem": dem, + "slope": slope, + "par": par, + "chirps": chirps, + "buildings": buildings, + "s2": s2, + } + + +def compute_stats_fn(collections, year: int, cloud_threshold: float): + ndvi_collection = collections["ndvi_collection"] + evi_collection = collections["evi_collection"] + dem = collections["dem"] + slope = collections["slope"] + par = collections["par"] + chirps = collections["chirps"] + buildings = collections["buildings"] + s2 = collections["s2"] + + def _compute(tile): + geom = tile.geometry() + props = {} + + ndvi_mean = ndvi_collection.mean().reduceRegion( + ee.Reducer.mean(), geometry=geom, scale=30, bestEffort=True + ) + props["ndvi_mean"] = ndvi_mean.get("NDVI", ee.Number(-9999)) + + ndvi_med = ndvi_collection.median().reduceRegion( + ee.Reducer.median(), geometry=geom, scale=30, bestEffort=True + ) + props["ndvi_med"] = ndvi_med.get("NDVI", ee.Number(-9999)) + + ndvi_std = ndvi_collection.reduce(ee.Reducer.stdDev()).reduceRegion( + ee.Reducer.mean(), geometry=geom, scale=30, bestEffort=True + ) + props["ndvi_std"] = ndvi_std.get("NDVI_stdDev", ee.Number(-9999)) + + evi_med = evi_collection.median().reduceRegion( + ee.Reducer.median(), geometry=geom, scale=30, bestEffort=True + ) + props["evi_med"] = evi_med.get("EVI", ee.Number(-9999)) + + elev_mean = dem.reduceRegion( + ee.Reducer.mean(), geometry=geom, scale=30, bestEffort=True + ) + props["elev_mean"] = elev_mean.get("elevation", ee.Number(-9999)) + + slope_mean = slope.reduceRegion( + ee.Reducer.mean(), geometry=geom, scale=30, bestEffort=True + ) + props["slope_mean"] = slope_mean.get("slope", ee.Number(-9999)) + + par_mean = par.reduceRegion( + ee.Reducer.mean(), geometry=geom, scale=1000, bestEffort=True + ) + props["par_mean"] = par_mean.get("GMT_1200_PAR", ee.Number(-9999)) + + rain_sum = chirps.sum().reduceRegion( + ee.Reducer.sum(), geometry=geom, scale=5566, bestEffort=True + ) + props["rain_total_mm"] = rain_sum.get("precipitation", ee.Number(0)) + + rain_mean = chirps.mean().reduceRegion( + ee.Reducer.mean(), geometry=geom, scale=5566, bestEffort=True + ) + props["rain_mean_mm_day"] = rain_mean.get("precipitation", ee.Number(0)) + + cf_days = s2.filterBounds(geom).filterDate( + f"{year}-01-01", f"{year}-12-31" + ).filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE", cloud_threshold)).size() + props["cloud_free_days"] = cf_days + + bldg_count = buildings.reduceRegion( + ee.Reducer.sum(), geometry=geom, scale=1, bestEffort=True + ) + props["bldg_count"] = bldg_count.get("building_fractional_count", ee.Number(0)) + + bldg_presence = buildings.select("building_presence").gt(0.34).reduceRegion( + ee.Reducer.sum(), geometry=geom, scale=1, bestEffort=True + ) + props["bldg_area"] = bldg_presence.get("building_presence", ee.Number(0)) + + bldg_height = buildings.select("building_height").reduceRegion( + ee.Reducer.max(), geometry=geom, scale=1, bestEffort=True + ) + props["bldg_h_max"] = bldg_height.get("building_height", ee.Number(0)) + + return tile.set(props) + + return _compute + + +def start_export(stats_table, selectors: List[str], args: argparse.Namespace) -> None: + if args.download_path: + params = {"filetype": "GeoJSON", "selectors": selectors} + url = stats_table.getDownloadURL(**params) + response = requests.get(url, timeout=600) + response.raise_for_status() + args.download_path.parent.mkdir(parents=True, exist_ok=True) + args.download_path.write_bytes(response.content) + LOGGER.info("Downloaded stats to %s", args.download_path) + return + + task = ee.batch.Export.table.toDrive( + collection=stats_table, + description=args.file_prefix, + folder=args.drive_folder, + fileNamePrefix=args.file_prefix, + fileFormat="CSV", + selectors=selectors, + ) + task.start() + LOGGER.info("Started EE Drive export task %s", task.id) + + +def main() -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") + args = parse_args() + + init_ee(args.project_id) + + aoi = gpd.read_file(args.aoi_path) + aoi_ee = geom_to_ee(aoi.to_crs(epsg=4326).unary_union) + + tiles = gpd.read_file(args.tiles_path).to_crs(epsg=4326) + tiles_fc = geodf_to_feature_collection(tiles) + + collections = build_collections(aoi_ee, args.year, args.cloud_threshold) + stats_table = tiles_fc.map(compute_stats_fn(collections, args.year, args.cloud_threshold)) + + start_export(stats_table, args.selectors, args) + + +if __name__ == "__main__": + main() diff --git a/scripts/generate_tile_grid.py b/scripts/generate_tile_grid.py new file mode 100644 index 0000000..fdfe517 --- /dev/null +++ b/scripts/generate_tile_grid.py @@ -0,0 +1,62 @@ +"""Generate a square tile grid for the Lamwo AOI.""" +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from utils.lamwo_pipeline import create_tile_grid + +DEFAULT_AOI_PATH = Path("PATH/TO/lamwo_aoi.gpkg") +DEFAULT_OUTPUT_PATH = Path("PATH/TO/output/lamwo_grid.geojson") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--aoi-path", + type=Path, + default=DEFAULT_AOI_PATH, + help="Path to the AOI GeoPackage (update placeholder path).", + ) + parser.add_argument( + "--output-path", + type=Path, + default=DEFAULT_OUTPUT_PATH, + help="Where to write the generated grid GeoJSON (update placeholder path).", + ) + parser.add_argument( + "--cell-size-m", + type=float, + default=1000, + help="Tile edge length in metres (default: 1000).", + ) + parser.add_argument( + "--driver", + default="GeoJSON", + help="OGR driver to use when writing the grid (default: GeoJSON).", + ) + return parser.parse_args() + + +def main() -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") + args = parse_args() + + grid = create_tile_grid( + args.aoi_path, + cell_size_m=args.cell_size_m, + output_path=args.output_path, + output_driver=args.driver, + ) + + logging.info("Generated %d tiles covering %s", len(grid), args.aoi_path) + + +if __name__ == "__main__": + main() diff --git a/scripts/merge_lwb_tile_stats.py b/scripts/merge_lwb_tile_stats.py new file mode 100644 index 0000000..e6ccd0e --- /dev/null +++ b/scripts/merge_lwb_tile_stats.py @@ -0,0 +1,115 @@ +"""Merge biomass rasters with Earth Engine tile statistics.""" +from __future__ import annotations + +import argparse +import logging +import sys +import tempfile +from pathlib import Path + +import geopandas as gpd +import numpy as np +import pandas as pd +import rasterio +import rioxarray as rxr +from rasterio.warp import Resampling +from rasterstats import zonal_stats + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from utils.lamwo_pipeline import compute_utm_crs + +LOGGER = logging.getLogger(__name__) + +DEFAULT_RASTER_PATH = Path("PATH/TO/input/aglwb.tif") +DEFAULT_GRID_PATH = Path("PATH/TO/input/lamwo_grid.geojson") +DEFAULT_TILE_STATS_PATH = Path("PATH/TO/input/Lamwo_Tile_Stats_EE.csv") +DEFAULT_OUTPUT_GEOJSON = Path("PATH/TO/output/Lamwo_Tile_Stats_EE_biomass.geojson") +DEFAULT_OUTPUT_CSV = Path("PATH/TO/output/Lamwo_Tile_Stats_EE_biomass.csv") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--raster-path", type=Path, default=DEFAULT_RASTER_PATH, help="Biomass raster path") + parser.add_argument("--grid-path", type=Path, default=DEFAULT_GRID_PATH, help="Tile grid GeoJSON") + parser.add_argument("--tile-stats-path", type=Path, default=DEFAULT_TILE_STATS_PATH, help="Earth Engine per-tile CSV") + parser.add_argument("--output-geojson", type=Path, default=DEFAULT_OUTPUT_GEOJSON, help="GeoJSON output path") + parser.add_argument("--output-csv", type=Path, default=DEFAULT_OUTPUT_CSV, help="CSV output path") + parser.add_argument( + "--raster-unit", + choices=["Mg_per_pixel", "Mg_per_ha"], + default="Mg_per_pixel", + help="Interpretation of raster values for biomass", + ) + parser.add_argument("--utm-crs", help="Optional CRS override for projected calculations (e.g. EPSG:32636)") + return parser.parse_args() + +def compute_zonal_stats(tiles_metric: gpd.GeoDataFrame, raster_path: Path) -> gpd.GeoDataFrame: + with rasterio.open(raster_path) as src: + nodata = src.nodata + stats = zonal_stats( + tiles_metric.geometry, + raster_path, + stats=["sum", "mean", "count"], + nodata=nodata, + all_touched=False, + geojson_out=False, + ) + tiles_metric = tiles_metric.copy() + tiles_metric["r_sum"] = [entry.get("sum", np.nan) for entry in stats] + tiles_metric["r_mean"] = [entry.get("mean", np.nan) for entry in stats] + tiles_metric["r_count"] = [entry.get("count", 0) for entry in stats] + return tiles_metric + + +def aggregate_biomass(tiles_metric: gpd.GeoDataFrame, raster_unit: str) -> gpd.GeoDataFrame: + if raster_unit == "Mg_per_pixel": + tiles_metric["tile_total_Mg"] = tiles_metric["r_sum"] + else: # Mg_per_ha + tiles_metric["tile_total_Mg"] = tiles_metric["r_mean"] * tiles_metric["area_ha"] + return tiles_metric + + +def main() -> None: + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") + args = parse_args() + + tiles = gpd.read_file(args.grid_path).to_crs(epsg=4326) + utm_crs = args.utm_crs or compute_utm_crs(tiles.unary_union) + LOGGER.info("Using projected CRS %s for zonal statistics", utm_crs) + + tiles_metric = tiles.to_crs(utm_crs) + tiles_metric["area_m2"] = tiles_metric.geometry.area + tiles_metric["area_ha"] = tiles_metric["area_m2"] / 10000.0 + + with tempfile.TemporaryDirectory() as tmpdir: + projected_raster_path = Path(tmpdir) / "aglwb_utm.tif" + arr = rxr.open_rasterio(args.raster_path) + reproj = arr.rio.reproject(utm_crs, resampling=Resampling.nearest) + reproj.rio.to_raster(projected_raster_path) + + tiles_metric = compute_zonal_stats(tiles_metric, projected_raster_path) + + tiles_metric = aggregate_biomass(tiles_metric, args.raster_unit) + + tiles_wgs84 = tiles_metric.to_crs(epsg=4326) + + tile_stats = pd.read_csv(args.tile_stats_path) + tile_stats = tile_stats.reset_index(drop=True) + + combined = tile_stats.join( + tiles_wgs84[["geometry", "area_m2", "area_ha", "tile_total_Mg", "r_sum", "r_mean", "r_count"]] + ) + + result_gdf = gpd.GeoDataFrame(combined, geometry="geometry", crs=tiles_wgs84.crs) + + args.output_geojson.parent.mkdir(parents=True, exist_ok=True) + result_gdf.to_file(args.output_geojson, driver="GeoJSON") + result_gdf.drop(columns="geometry").to_csv(args.output_csv, index=False) + LOGGER.info("Saved %s and %s", args.output_geojson, args.output_csv) + + +if __name__ == "__main__": + main() diff --git a/utils/lamwo_pipeline.py b/utils/lamwo_pipeline.py new file mode 100644 index 0000000..844403b --- /dev/null +++ b/utils/lamwo_pipeline.py @@ -0,0 +1,138 @@ +"""Utility helpers for the Lamwo data-gathering pipeline.""" +from __future__ import annotations + +import json +import logging +from pathlib import Path +from typing import Optional + +import geopandas as gpd +import numpy as np +from shapely.geometry import box, mapping +from shapely.geometry.base import BaseGeometry + +LOGGER = logging.getLogger(__name__) + + +def _import_ee(): + """Import Google Earth Engine lazily so non-EE scripts still load.""" + try: + import ee # type: ignore + from ee.ee_exception import EEException # type: ignore + except ImportError as exc: # pragma: no cover - guard for optional dep + raise ImportError( + "The earthengine-api package is required for this operation." + ) from exc + return ee, EEException + + +def init_ee(project_id: Optional[str] = None, authenticate_if_needed: bool = True) -> None: + """Initialise the Earth Engine client, authenticating if credentials are absent.""" + ee, EEException = _import_ee() + try: + if project_id: + ee.Initialize(project=project_id) + else: + ee.Initialize() + except EEException as exc: + if not authenticate_if_needed: + raise + LOGGER.info("EE initialization failed, attempting authentication: %s", exc) + ee.Authenticate() + if project_id: + ee.Initialize(project=project_id) + else: + ee.Initialize() + + +def geom_to_ee(geometry: BaseGeometry): + """Convert a Shapely geometry into an Earth Engine geometry.""" + ee, _ = _import_ee() + geojson = mapping(geometry) + geom_type = geojson.get("type") + coordinates = geojson.get("coordinates") + if geom_type == "MultiPolygon": + return ee.Geometry.MultiPolygon(coordinates) + if geom_type == "Polygon": + return ee.Geometry.MultiPolygon([coordinates]) + raise ValueError(f"Unsupported geometry type for EE conversion: {geom_type}") + + +def compute_utm_crs(geometry: BaseGeometry) -> str: + """Derive an appropriate UTM CRS for a lon/lat geometry.""" + centroid = geometry.centroid + zone = int((centroid.x + 180) // 6) + 1 + epsg = 32600 + zone if centroid.y >= 0 else 32700 + zone + return f"EPSG:{epsg}" + + +def create_tile_grid( + aoi_path: Path, + *, + cell_size_m: float = 1000, + output_path: Optional[Path] = None, + output_driver: str = "GeoJSON", +) -> gpd.GeoDataFrame: + """Create a square grid covering the AOI and optionally persist it.""" + if cell_size_m <= 0: + raise ValueError("cell_size_m must be positive") + + aoi = gpd.read_file(aoi_path) + if aoi.empty: + raise ValueError(f"AOI at {aoi_path} is empty") + if aoi.crs is None: + raise ValueError("AOI must have a defined CRS") + + aoi_wgs84 = aoi.to_crs(epsg=4326) + union_geom = aoi_wgs84.unary_union + + utm_crs = compute_utm_crs(union_geom) + aoi_metric = aoi_wgs84.to_crs(utm_crs) + union_metric = aoi_metric.unary_union + minx, miny, maxx, maxy = union_metric.bounds + + cells = [] + x = minx + while x < maxx: + y = miny + while y < maxy: + candidate = box(x, y, x + cell_size_m, y + cell_size_m) + intersection = candidate.intersection(union_metric) + if not intersection.is_empty: + cells.append(intersection) + y += cell_size_m + x += cell_size_m + + grid = gpd.GeoDataFrame({"geometry": cells}, crs=utm_crs).to_crs(epsg=4326) + + if output_path: + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + grid.to_file(output_path, driver=output_driver) + LOGGER.info("Wrote grid to %s", output_path) + + return grid + + +def geodf_to_feature_collection(gdf: gpd.GeoDataFrame): + """Convert a GeoDataFrame into an EE FeatureCollection.""" + ee, _ = _import_ee() + return ee.FeatureCollection(json.loads(gdf.to_json())) + + +def ee_bytes_to_img(pixel_data) -> np.ndarray: + """Convert a structured array returned by EE into a standard NumPy array.""" + if isinstance(pixel_data, np.ndarray) and pixel_data.dtype.names: + return np.stack([pixel_data[name] for name in pixel_data.dtype.names], axis=-1) + raise ValueError("Expected a structured NumPy array with named bands") + + +def reshape_monthly_stack(array: np.ndarray, band_count: int) -> np.ndarray: + """Reshape stacked bands into (months, height, width, bands).""" + if array.ndim != 3: + raise ValueError("Array must be 3-D (height, width, bands)") + height, width, stacked_bands = array.shape + if stacked_bands % band_count != 0: + raise ValueError("Stacked band count is not divisible by band_count") + months = stacked_bands // band_count + return array.reshape(height, width, months, band_count).transpose(2, 0, 1, 3)