Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,10 @@ wikipedia-update-questions:
fred: fred-fetch fred-update-questions

fred-fetch:
$(MAKE) -C src/questions/fred/fetch || echo "* $@" >> $(MAKE_FAILURE_LOG)
$(MAKE) -C src/orchestration/func_fred_fetch || echo "* $@" >> $(MAKE_FAILURE_LOG)

fred-update-questions:
$(MAKE) -C src/questions/fred/update_questions || echo "* $@" >> $(MAKE_FAILURE_LOG)
$(MAKE) -C src/orchestration/func_fred_update || echo "* $@" >> $(MAKE_FAILURE_LOG)

dbnomics: dbnomics-fetch dbnomics-update-questions

Expand Down
8 changes: 8 additions & 0 deletions src/_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ class Config:
coerce = True


class FredFetchFrame(QuestionFrame):
"""Output of FredSource.fetch(). QuestionFrame plus transient fields for update()."""

fetch_datetime: Series[str]
probability: Series[object] = pa.Field(nullable=True)
resolutions: Series[object] # list[dict] per row: [{id, date, value}, ...]


class AcledResolutionFrame(pa.DataFrameModel):
"""ACLED-specific: aggregated events by country and date.

Expand Down
1,768 changes: 8 additions & 1,760 deletions src/helpers/fred.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,25 @@ UPLOAD_DIR = upload
.gcloudignore:
cp -r $(ROOT_DIR)src/helpers/.gcloudignore .

Procfile:
cp -r $(ROOT_DIR)src/helpers/Procfile .
Dockerfile: $(ROOT_DIR)src/helpers/Dockerfile.template
sed \
-e 's/REGION/$(CLOUD_DEPLOY_REGION)/g' \
-e 's/STACK/google-22-full/g' \
-e 's/PYTHON_VERSION/python312/g' \
$< > Dockerfile

NUM_CPUS=2

deploy : main.py .gcloudignore requirements.txt Procfile
deploy : main.py .gcloudignore requirements.txt Dockerfile
mkdir -p $(UPLOAD_DIR)
cp -r $(ROOT_DIR)utils $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/helpers
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/sources
mkdir -p $(UPLOAD_DIR)/orchestration
cp $(ROOT_DIR)src/orchestration/__init__.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_source_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp $^ $(UPLOAD_DIR)/
gcloud run jobs deploy \
func-data-fred-fetch \
Expand All @@ -40,4 +44,4 @@ deploy : main.py .gcloudignore requirements.txt Procfile
--source $(UPLOAD_DIR)

clean :
rm -rf $(UPLOAD_DIR) .gcloudignore Procfile
rm -rf $(UPLOAD_DIR) .gcloudignore Dockerfile
33 changes: 33 additions & 0 deletions src/orchestration/func_fred_fetch/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""FRED fetch entry point."""

from __future__ import annotations

import logging
from typing import Any

from helpers import data_utils, decorator, keys
from orchestration import _source_io
from sources.fred import FredSource

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

SOURCE = "fred"


@decorator.log_runtime
def driver(_: Any) -> None:
"""Fetch FRED questions and upload to question bank."""
source = FredSource()
source.api_key = keys.API_KEY_FRED

dfq = data_utils.get_data_from_cloud_storage(SOURCE, return_question_data=True)

dff = source.fetch(dfq=dfq)

_source_io.write_fetch_output(SOURCE, dff)
logger.info("Done.")


if __name__ == "__main__":
driver(None)
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
google-cloud-storage
google-cloud-secret-manager
pandas>=2.2.2,<3.0
tqdm
pandera
requests
bs4
backoff
pandera
termcolor
numpy
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,25 @@ UPLOAD_DIR = upload
.gcloudignore:
cp -r $(ROOT_DIR)src/helpers/.gcloudignore .

Procfile:
cp -r $(ROOT_DIR)src/helpers/Procfile .
Dockerfile: $(ROOT_DIR)src/helpers/Dockerfile.template
sed \
-e 's/REGION/$(CLOUD_DEPLOY_REGION)/g' \
-e 's/STACK/google-22-full/g' \
-e 's/PYTHON_VERSION/python312/g' \
$< > Dockerfile

NUM_CPUS=2

deploy : main.py .gcloudignore requirements.txt Procfile
deploy : main.py .gcloudignore requirements.txt Dockerfile
mkdir -p $(UPLOAD_DIR)
cp -r $(ROOT_DIR)utils $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp -r $(ROOT_DIR)src/helpers $(UPLOAD_DIR)/helpers
cp -r $(ROOT_DIR)src/sources $(UPLOAD_DIR)/sources
mkdir -p $(UPLOAD_DIR)/orchestration
cp $(ROOT_DIR)src/orchestration/__init__.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/orchestration/_source_io.py $(UPLOAD_DIR)/orchestration/
cp $(ROOT_DIR)src/_fb_types.py $(UPLOAD_DIR)/
cp $(ROOT_DIR)src/_schemas.py $(UPLOAD_DIR)/
cp $^ $(UPLOAD_DIR)/
gcloud run jobs deploy \
func-data-fred-update-questions \
Expand All @@ -40,4 +44,4 @@ deploy : main.py .gcloudignore requirements.txt Procfile
--source $(UPLOAD_DIR)

clean :
rm -rf $(UPLOAD_DIR) .gcloudignore Procfile
rm -rf $(UPLOAD_DIR) .gcloudignore Dockerfile
37 changes: 37 additions & 0 deletions src/orchestration/func_fred_update/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""FRED update entry point."""

from __future__ import annotations

import logging
from typing import Any

from helpers import data_utils, decorator
from orchestration import _source_io
from sources.fred import FredSource

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

SOURCE = "fred"


@decorator.log_runtime
def driver(_: Any) -> None:
"""Update FRED questions and resolution files."""
source = FredSource()

dfq, dff = data_utils.get_data_from_cloud_storage(
SOURCE, return_question_data=True, return_fetch_data=True
)

result = source.update(dfq, dff)

logger.info("Uploading to GCP...")
data_utils.upload_questions(result.dfq, SOURCE)
if result.resolution_files:
_source_io.upload_resolution_files(SOURCE, result.resolution_files)
logger.info("Done.")


if __name__ == "__main__":
driver(None)
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
google-cloud-storage
google-cloud-secret-manager
pandas>=2.2.2,<3.0
pandera
termcolor
requests
backoff
numpy
Loading
Loading