Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ fred-update-questions:
dbnomics: dbnomics-fetch dbnomics-update-questions

dbnomics-fetch:
$(MAKE) -C src/questions/dbnomics/fetch || echo "* $@" >> $(MAKE_FAILURE_LOG)
$(MAKE) -C src/orchestration/func_dbnomics_fetch || echo "* $@" >> $(MAKE_FAILURE_LOG)

dbnomics-update-questions:
$(MAKE) -C src/questions/dbnomics/update_questions || echo "* $@" >> $(MAKE_FAILURE_LOG)
$(MAKE) -C src/orchestration/func_dbnomics_update || echo "* $@" >> $(MAKE_FAILURE_LOG)

tag-questions:
$(MAKE) -C src/metadata/tag_questions || echo "* $@" >> $(MAKE_FAILURE_LOG)
Expand Down
17 changes: 17 additions & 0 deletions src/_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,23 @@ class Config:
coerce = True


class DbnomicsFetchFrame(pa.DataFrameModel):
"""Output of DbnomicsSource.fetch(). Per-observation rows from the DBnomics API."""

id: Series[str]
period: Series[str]
value: Series[object] # float observation or the string "NA" for missing values
provider_name: Series[str]
dataset_name: Series[str]
series_name: Series[str]

class Config:
"""Schema configuration."""

strict = False
coerce = True


class AcledResolutionFrame(pa.DataFrameModel):
"""ACLED-specific: aggregated events by country and date.

Expand Down
106 changes: 8 additions & 98 deletions src/helpers/dbnomics.py
Original file line number Diff line number Diff line change
@@ -1,102 +1,12 @@
"""DBnomics-specific variables."""
"""DBnomics constants — thin re-export shim over the lightweight sources._metadata layer."""

from sources._metadata import SOURCE_METADATA
import os
import sys

SOURCE_INTRO = SOURCE_METADATA["dbnomics"]["source_intro"]
RESOLUTION_CRITERIA = SOURCE_METADATA["dbnomics"]["resolution_criteria"]
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))

FETCH_COLUMN_DTYPE = {
"id": str,
"period": str,
"value": str,
"question_text": str,
"value_at_freeze_datetime_explanation": str,
}
FETCH_COLUMNS = list(FETCH_COLUMN_DTYPE.keys())
from sources._metadata import SOURCE_METADATA # noqa: E402

METEOFRANCE_STATIONS = [
{"id": "07005", "station": "Abbeville"},
{"id": "07015", "station": "Lille Airport"},
{"id": "07020", "station": "Pointe De La Hague"},
{"id": "07027", "station": "Caen – Carpiquet Airport"},
{"id": "07037", "station": "Rouen Airport"},
{"id": "07072", "station": "Reims – Prunay Aerodrome"},
{"id": "07110", "station": "Brest Bretagne Airport"},
{"id": "07117", "station": "Ploumanac'h"},
{"id": "07130", "station": "Rennes–Saint-Jacques Airport"},
{"id": "07139", "station": "Alençon"},
{"id": "07149", "station": "Orly"},
{"id": "07168", "station": "Troyes-Barberey Airport"},
{"id": "07181", "station": "Nancy – Ochey Air Base"},
{"id": "07190", "station": "Strasbourg Airport"},
{"id": "07222", "station": "Nantes Atlantique Airport"},
{"id": "07240", "station": "Tours"},
{"id": "07255", "station": "Bourges"},
{"id": "07280", "station": "Dijon-Bourgogne Airport"},
{"id": "07299", "station": "EuroAirport Basel Mulhouse Freiburg"},
{"id": "07335", "station": "Poitiers–Biard Airport"},
{"id": "07434", "station": "Limoges – Bellegarde Airport"},
{"id": "07460", "station": "Clermont-Ferrand Auvergne Airport"},
{"id": "07471", "station": "Le Puy – Loudes Airport"},
{"id": "07481", "station": "Lyon–Saint Exupéry Airport"},
{"id": "07510", "station": "Bordeaux–Mérignac Airport"},
{"id": "07535", "station": "Gourdon"},
{"id": "07558", "station": "Millau"},
{"id": "07577", "station": "Montélimar"},
{"id": "07591", "station": "Embrun"},
{"id": "07607", "station": "Mont-de-Marsan"},
{"id": "07621", "station": "Tarbes–Lourdes–Pyrénées Airport"},
{"id": "07627", "station": "Saint-Girons"},
{"id": "07630", "station": "Toulouse–Blagnac Airport"},
{"id": "07650", "station": "Marignane"},
{"id": "07690", "station": "Nice"},
{"id": "07747", "station": "Perpignan"},
{"id": "07761", "station": "Ajaccio"},
{"id": "61968", "station": "Glorioso Islands"},
{"id": "61970", "station": "Juan de Nova Island"},
{"id": "61972", "station": "Europa Island"},
{"id": "61976", "station": "Tromelin Island"},
{"id": "61980", "station": "Roland Garros Airport"},
{"id": "61996", "station": "Amsterdam Island"},
{"id": "61997", "station": "Île de la Possession"},
{"id": "61998", "station": "Grande Terre"},
{"id": "67005", "station": "Pamandzi"},
{"id": "71805", "station": "Saint-Pierre"},
{"id": "78890", "station": "La Désirade"},
{"id": "78894", "station": "Saint Barthélemy"},
{"id": "78897", "station": "Pointe-à-Pitre International Airport"},
{"id": "78925", "station": "Martinique Aimé Césaire International Airport"},
{"id": "81401", "station": "Saint-Laurent"},
{"id": "81405", "station": "Cayenne – Félix Éboué Airport"},
]

QUESTION_TEMPLATES = {
"meteofrance": (
"What is the probability that the daily average temperature at the French weather station "
"at {station} will be higher on {resolution_date} than on {forecast_due_date}?"
)
}

VALUE_EXPLANATIONS = {
"meteofrance": "The daily average temperature at the French weather station at {station}."
}


def create_meteofrance_constants(STATIONS):
"""Convert PRE-CONSTANTS data to format expected by fetch and update_questions functions."""
constants = []
for item in STATIONS:
id = item["id"]
station = item["station"]
question_text = QUESTION_TEMPLATES["meteofrance"].replace("{station}", station)
explanation = VALUE_EXPLANATIONS["meteofrance"].format(station=station)
new_entry = {
"id": f"meteofrance/TEMPERATURE/celsius.{id}.D",
"question_text": question_text,
"freeze_datetime_value_explanation": explanation,
}
constants.append(new_entry)
return constants


CONSTANTS = create_meteofrance_constants(METEOFRANCE_STATIONS)
_META = SOURCE_METADATA["dbnomics"]
SOURCE_INTRO = _META["source_intro"]
RESOLUTION_CRITERIA = _META["resolution_criteria"]
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,23 @@ UPLOAD_DIR = upload
.gcloudignore:
cp -r $(ROOT_DIR)src/helpers/.gcloudignore .

Procfile:
cp -r $(ROOT_DIR)src/helpers/Procfile .
Dockerfile: $(ROOT_DIR)src/helpers/Dockerfile.template
sed \
-e 's/REGION/$(CLOUD_DEPLOY_REGION)/g' \
-e 's/STACK/google-22-full/g' \
-e 's/PYTHON_VERSION/python312/g' \
$< > Dockerfile

deploy : main.py .gcloudignore requirements.txt Procfile
deploy : main.py .gcloudignore requirements.txt Dockerfile
mkdir -p $(UPLOAD_DIR)
cp -r $(ROOT_DIR)utils $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/helpers
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/sources
mkdir -p $(UPLOAD_DIR)/orchestration
cp $(ROOT_DIR)src/orchestration/__init__.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_source_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp $^ $(UPLOAD_DIR)/
gcloud run jobs deploy \
func-data-dbnomics-fetch \
Expand All @@ -37,4 +41,4 @@ deploy : main.py .gcloudignore requirements.txt Procfile
--source $(UPLOAD_DIR)

clean :
rm -rf $(UPLOAD_DIR) .gcloudignore Procfile
rm -rf $(UPLOAD_DIR) .gcloudignore Dockerfile
30 changes: 30 additions & 0 deletions src/orchestration/func_dbnomics_fetch/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""DBnomics fetch entry point."""

from __future__ import annotations

import logging
from typing import Any

from helpers import decorator
from orchestration import _source_io
from sources.dbnomics import DbnomicsSource

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

SOURCE = "dbnomics"


@decorator.log_runtime
def driver(_: Any) -> None:
"""Fetch DBnomics data and upload to question bank."""
source = DbnomicsSource()

dff = source.fetch()

_source_io.write_fetch_output(SOURCE, dff)
logger.info("Done.")


if __name__ == "__main__":
driver(None)
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
google-cloud-storage
google-cloud-secret-manager
backoff
certifi
pandas>=2.2.2,<3.0
pandera
termcolor
requests
backoff
numpy
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,23 @@ UPLOAD_DIR = upload
.gcloudignore:
cp -r $(ROOT_DIR)src/helpers/.gcloudignore .

Procfile:
cp -r $(ROOT_DIR)src/helpers/Procfile .
Dockerfile: $(ROOT_DIR)src/helpers/Dockerfile.template
sed \
-e 's/REGION/$(CLOUD_DEPLOY_REGION)/g' \
-e 's/STACK/google-22-full/g' \
-e 's/PYTHON_VERSION/python312/g' \
$< > Dockerfile

deploy : main.py .gcloudignore requirements.txt Procfile
deploy : main.py .gcloudignore requirements.txt Dockerfile
mkdir -p $(UPLOAD_DIR)
cp -r $(ROOT_DIR)utils $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/helpers
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/sources
mkdir -p $(UPLOAD_DIR)/orchestration
cp $(ROOT_DIR)src/orchestration/__init__.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_source_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp $^ $(UPLOAD_DIR)/
gcloud run jobs deploy \
func-data-dbnomics-update-questions \
Expand All @@ -37,4 +41,4 @@ deploy : main.py .gcloudignore requirements.txt Procfile
--source $(UPLOAD_DIR)

clean :
rm -rf $(UPLOAD_DIR) .gcloudignore Procfile
rm -rf $(UPLOAD_DIR) .gcloudignore Dockerfile
37 changes: 37 additions & 0 deletions src/orchestration/func_dbnomics_update/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""DBnomics update entry point."""

from __future__ import annotations

import logging
from typing import Any

from helpers import data_utils, decorator
from orchestration import _source_io
from sources.dbnomics import DbnomicsSource

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

SOURCE = "dbnomics"


@decorator.log_runtime
def driver(_: Any) -> None:
"""Update DBnomics questions and resolution files."""
source = DbnomicsSource()

dfq, dff = data_utils.get_data_from_cloud_storage(
SOURCE, return_question_data=True, return_fetch_data=True
)

result = source.update(dfq, dff)

logger.info("Uploading to GCP...")
data_utils.upload_questions(result.dfq, SOURCE)
if result.resolution_files:
_source_io.upload_resolution_files(SOURCE, result.resolution_files)
logger.info("Done.")


if __name__ == "__main__":
driver(None)
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
google-cloud-storage
google-cloud-secret-manager
pandas>=2.2.2,<3.0
tqdm
pandera
termcolor
requests
backoff
numpy
Loading
Loading