From 639c19926705a7fe2004ced77acbf13add8440c7 Mon Sep 17 00:00:00 2001 From: Robert Jackson Date: Fri, 23 Jan 2026 17:02:33 -0600 Subject: [PATCH 1/3] ADD: Fixes to inference --- lars/__init__.py | 5 +++-- lars/nepho/__init__.py | 6 +++--- lars/nepho/inference.py | 18 ++++++++++++++---- lars/preprocessing/__init__.py | 4 ++-- pyproject.toml | 2 +- 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/lars/__init__.py b/lars/__init__.py index a53fbf7..9b79fa3 100644 --- a/lars/__init__.py +++ b/lars/__init__.py @@ -1,2 +1,3 @@ -from . import preprocessing -from . import nepho \ No newline at end of file +from . import preprocessing # noqa: F401 +from . import util # noqa: F401 +from . import nepho # noqa: F401 \ No newline at end of file diff --git a/lars/nepho/__init__.py b/lars/nepho/__init__.py index f1ec871..b5147e1 100644 --- a/lars/nepho/__init__.py +++ b/lars/nepho/__init__.py @@ -1,3 +1,3 @@ -from . import models -from .config import config, Config -from .inference import label_radar_data, DEFAULT_CATEGORIES +from . import models # noqa: F401 +from .config import config, Config # noqa: F401 +from .inference import label_radar_data, DEFAULT_CATEGORIES # noqa: F401 diff --git a/lars/nepho/inference.py b/lars/nepho/inference.py index feb918e..c40e905 100644 --- a/lars/nepho/inference.py +++ b/lars/nepho/inference.py @@ -1,4 +1,6 @@ import asyncio +import os + DEFAULT_CATEGORIES = {"No precipitation": "No echoes greater than 10 dBZ present. A circle of echoes near radar site may be present due to ground clutter.", "Stratiform rain": "Widespread echoes between 0 and 35 dBZ, not present as a circular pattern around the radar site.", "Scattered Convection": "Present as isolated to scattered cells with reflectivities between 35-65 dBZ", @@ -7,7 +9,7 @@ "Unknown": "If you cannot confidently classify the radar image into one of the above categories"} async def label_radar_data(radar_df, model, categories=None, site="Bankhead National Forest", - verbose=True, vmin=-20, vmax=60): + verbose=True, vmin=-20, vmax=60, model_output_dir=None): """ Label radar data using a given model. @@ -16,6 +18,7 @@ async def label_radar_data(radar_df, model, categories=None, site="Bankhead Nati radar_df (pd.DataFrame): DataFrame containing radar data to be labeled. model: Model used for labeling the radar data. site: str: Radar site identifier. + model_output_dir: str: Directory to save model outputs. Returns ------- @@ -37,18 +40,25 @@ async def label_radar_data(radar_df, model, categories=None, site="Bankhead Nati for fi in radar_df["file_path"].values: time = radar_df.loc[radar_df["file_path"] == fi, "time"].values[0] prompt_with_time = prompt + f"Please provide just the category label for the radar image taken at time {time}." - + prompt_with_time = prompt_with_time + "Do not provide your reasoning for your selection, just the category." output_model = await model.chat(prompt_with_time, images=[fi]) # Find the category label in the output - output = output_model.strip() + output_model = output_model.strip() + output = "Unknown" for category in categories.keys(): - if category.lower() in output.lower(): + output_lower = output_model.lower() + last_line = output_lower.split("\n")[-1].strip().lower() + if category.lower() in last_line: output = category break if verbose: print("Category assigned:", output) print("Model output:", output_model) + if model_output_dir is not None: + output_file = f"{model_output_dir}/{os.path.basename(fi).replace('.png', '_llm_output.txt')}" + with open(output_file, "w") as f: + f.write(output_model) if output[-1] == ".": output = output[:-1] radar_df.loc[radar_df["file_path"] == fi, "llm_label"] = output.strip() diff --git a/lars/preprocessing/__init__.py b/lars/preprocessing/__init__.py index 5c2664c..5f86c8d 100644 --- a/lars/preprocessing/__init__.py +++ b/lars/preprocessing/__init__.py @@ -1,2 +1,2 @@ -from .radar_preprocessing import preprocess_radar_data -from .labels import load_labels, save_labels, change_file_path \ No newline at end of file +from .radar_preprocessing import preprocess_radar_data # noqa: F401 +from .labels import load_labels, save_labels, change_file_path # noqa: F401 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 0f8dddf..666b66f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ dependencies = [] [project.optional-dependencies] -dev = ["pytest>=6.0", "black", "flake8", "openai", "ollama", "xradar", "python-dotenv", "cmweather", "torchvision", "torch", "aiohttp"] +dev = ["pytest>=6.0", "black", "flake8", "openai", "ollama", "xradar", "python-dotenv", "scikit-learn", "cmweather", "torchvision", "torch", "aiohttp"] [project.urls] Homepage = "https://github.com/rcjackson/lars" \ No newline at end of file From 46aae8aed9234b082e137b42ac858ce822b887c5 Mon Sep 17 00:00:00 2001 From: Robert Jackson Date: Fri, 23 Jan 2026 17:02:53 -0600 Subject: [PATCH 2/3] ADD: Confusion matrix plots. --- lars/util/__init__.py | 1 + lars/util/confusion_matrix.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 lars/util/__init__.py create mode 100644 lars/util/confusion_matrix.py diff --git a/lars/util/__init__.py b/lars/util/__init__.py new file mode 100644 index 0000000..d007ce0 --- /dev/null +++ b/lars/util/__init__.py @@ -0,0 +1 @@ +from .confusion_matrix import plot_confusion_matrix # noqa: F401 diff --git a/lars/util/confusion_matrix.py b/lars/util/confusion_matrix.py new file mode 100644 index 0000000..c1fd867 --- /dev/null +++ b/lars/util/confusion_matrix.py @@ -0,0 +1,32 @@ +import matplotlib.pyplot as plt +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay +from sklearn.preprocessing import LabelEncoder + +def plot_confusion_matrix(df, label_col='label', pred_col='llm_label', normalize=None, ax=None): + """ + Plot a confusion matrix using true and predicted labels from a DataFrame. + + Parameters + ---------- + df (pd.DataFrame): DataFrame containing true and predicted labels. + label_col (str): Column name for true labels. + pred_col (str): Column name for predicted labels. + normalize (str or None): Normalization mode for confusion matrix. + ax (matplotlib axis handle): The axis handle to plot on. Set to None to use the current axis. + + Returns + ------- + None + """ + le = LabelEncoder() + true_labels = le.fit_transform(df[label_col].str.lower()) + pred_labels = le.transform(df[pred_col].str.lower()) + if ax is None: + ax = plt.gca() + + cm = confusion_matrix(true_labels, pred_labels, normalize=normalize) + disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_) + + + disp.plot(ax=ax, cmap=plt.cm.Blues) + ax.set_title('Confusion Matrix') From 1b31a336116cc9842863ca46d45ae6161c265bf7 Mon Sep 17 00:00:00 2001 From: Robert Jackson Date: Thu, 19 Feb 2026 14:57:23 -0600 Subject: [PATCH 3/3] ADD: A codebook for classification. --- CODEBOOK.md | 105 ++++++++++++++++++++++ lars/preprocessing/radar_preprocessing.py | 2 +- 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 CODEBOOK.md diff --git a/CODEBOOK.md b/CODEBOOK.md new file mode 100644 index 0000000..3029307 --- /dev/null +++ b/CODEBOOK.md @@ -0,0 +1,105 @@ +# Radar Image Labelling Codebook + +A reference guide for annotators labelling radar imagery with LARS. Use this codebook to ensure consistent, reproducible labels across all annotators and sessions. + +--- + +## 1. Overview + +The purpose of this section is to label radar imagery for warm-season precipitation + +- **Radar type:** ARM CSAPR2 +- **Data source:** csapr2cmac.c1 datastream +- **Geographic scope:** Bankhead National Forest +- **Labelling task:** scene classification + +--- + +## 2. Data Description + +### 2.1 Input Fields + +| Field | Units | Description | +|-------|-------|-------------| +| Reflectivity (Z) | dBZ | Intensity of returned radar signal | + + +### 2.2 Image Format + +- **Spatial resolution:** 1 km by 1 km +- **Temporal resolution:** 10-minute intervals +- **Projection:** Polar coordinates projected onto +- **Color scale:** (describe or attach scale reference) + +--- + +## 3. Label Classes + +Each image or region-of-interest must be assigned exactly one primary class. + +### 3.1 Primary Classes + +| Label | Description | +|------------------------|-----------------------------------------------------------------------------| +| No Precipitation | No significant return; background noise only. It looks like a blue quasi-circle in the center of the image with some yellows mixed in. | +| Stratiform Precipitation | Widespread, precipitation mostly below 40 dBZ (green colors) | +| Isolated Convection | Intense, localized cores; high reflectivity (≥ 45 dBZ, localized dark orange to red colors) | +| Linear Convection | A region of red colors (reflectivity > 45 dBZ) organized into a quasi-linear structure | +| Ambiguous / Uncertain | Cannot be classified with confidence | + + +--- + +## 5. Labelling Procedure + +1. Use :code:`lars.preprocessing.preprocess_radar_data` to generate images and a .csv file +2. The csv file will label all categories as UNKNOWN. This is just a placeholder for hand labelling. +3. According to the criteria above, label all images in the 'file_path' column of the .csv file. + +--- + +## 6. Annotator Guidelines + +- When in doubt, default to the **more conservative** class (e.g. Stratiform over Convective). +- Use the provided example gallery (Section 8) to calibrate your judgement. +- Inter-annotator agreement should be checked periodically; raise disagreements with the team lead. + + +--- + +## 7. Quality Control + +| Check | Method | +|-------|--------| +| Completeness | All images have a primary label | +| Consistency | Random sample reviewed by second annotator | +| Agreement metric | Cohen's κ computed per annotator pair | +| Outlier review | Labels deviating from model predictions flagged for review | + +--- + +## 8. Example Gallery + +*(Attach or link representative images for each primary class here.)* + +| Class | Example Image | Notes | +|--------------------------|-----------------------------------|--------------------------| +| Stratiform Rain | `examples/stratiform_01.png` | Clear bright band at 2 km | +| Convective Rain | `examples/convective_01.png` | 55 dBZ core, anvil visible | +| No Precipitation | `examples/clutter_01.png` | Stationary radial spokes | + +--- + +## 9. Changelog + +| Version | Date | Author | Changes | +|---------|------|--------|---------| +| 1.0 | 2025-02-19 | Robert Jackson | Initial release | + +--- + +## 10. References + +- Rinehart, R. E. (2004). *Radar for Meteorologists* (4th ed.). +- American Meteorological Society Glossary: https://glossary.ametsoc.org + diff --git a/lars/preprocessing/radar_preprocessing.py b/lars/preprocessing/radar_preprocessing.py index 72e082a..2c07449 100644 --- a/lars/preprocessing/radar_preprocessing.py +++ b/lars/preprocessing/radar_preprocessing.py @@ -55,7 +55,7 @@ def preprocess_radar_data(file_path, output_path, if 'sweep_0' in radar: sweep = radar['sweep_0'] if sweep["sweep_mode"] == 'ppi' or sweep["sweep_mode"] == 'sector': - fig = plt.figure(figsize=(4, 4)) + fig = plt.figure(figsize=(256/150, 256/150)) ax = plt.axes() sweep["corrected_reflectivity"].where( sweep["corrected_reflectivity"] > min_ref).plot(x="x", y="y",