Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions fgi_stac/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,10 @@ def build_metadata_block(
)

organisation_path = (
default_publisher if "/" in clean(default_publisher) else ""
) or clean(dataspot_meta["publisher_path"]) or clean(producer_organization)
(default_publisher if "/" in clean(default_publisher) else "")
or clean(dataspot_meta["publisher_path"])
or clean(producer_organization)
)

return {
"default": {
Expand Down
11 changes: 5 additions & 6 deletions fgi_stac/publish_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
write_metadata_snapshot_file,
)
from common import change_tracking
from dataspot_api import dataspot_metadata
from dataspot_auth import DataspotAuth
from http_client import HTTP_LIMITS, HTTP_TIMEOUT, with_http_retry
from huwise_utils_py import (
Expand All @@ -45,12 +46,10 @@
)
from huwise_utils_py.config import HuwiseConfig
from huwise_utils_py.http import HttpClient
from dataspot_api import dataspot_metadata
from dataspot_auth import DataspotAuth
from metadata import (
DEFAULT_ATTRIBUTIONS,
DEFAULT_CONTACT_EMAIL,
DEFAULT_CONTACT_NAME,
DEFAULT_ATTRIBUTIONS,
DEFAULT_GEOGRAPHIC_REFERENCE,
DEFAULT_LICENSE,
DEFAULT_RIGHTS,
Expand Down Expand Up @@ -190,6 +189,8 @@ def _processor_field_targets_stale(
if expected_label and clean_text(existing.get("field_label")) != expected_label:
return True
return False


THEME_MAP_DATA_BS_CH = {
"arbeit, erwerb": "20bb143",
"bau- und wohnungswesen": "c813f26",
Expand Down Expand Up @@ -1031,9 +1032,7 @@ def _set_template_field(template: str, field: str, value: Any) -> None:
matches_last_push = last_push is not None and normalized_existing == _normalize_metadata_compare_value(
last_push
)
publisher_existing = _normalize_metadata_compare_value(
template_payloads.get("default", {}).get("publisher")
)
publisher_existing = _normalize_metadata_compare_value(template_payloads.get("default", {}).get("publisher"))
prefilled_as_publisher = (
(resolved_template, field) == ("custom", "publizierende_organisation")
and normalized_existing
Expand Down
19 changes: 5 additions & 14 deletions kapo_smileys/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,13 @@ def parse_messdaten(df_einsatz_days, df_einsaetze):
messdaten_columns = df_all_pro_standort.columns

total_all_rows += len(df_all_pro_standort)
df_all_pro_standort.to_csv(
export_file_all_unfiltered, mode="a", header=first_all_csv, index=False
)
df_all_pro_standort.to_csv(export_file_all_unfiltered, mode="a", header=first_all_csv, index=False)
first_all_csv = False

df_filtered = df_all_pro_standort[df_all_pro_standort["Zyklus"].isin(zyklus_filter)]
if not df_filtered.empty:
total_filtered_rows += len(df_filtered)
df_filtered.to_csv(
export_file_filtered, mode="a", header=first_filtered_csv, index=False
)
df_filtered.to_csv(export_file_filtered, mode="a", header=first_filtered_csv, index=False)
first_filtered_csv = False
del df_filtered

Expand All @@ -102,14 +98,11 @@ def parse_messdaten(df_einsatz_days, df_einsaetze):

if first_filtered_csv:
logging.warning(
f"No datapoints for cycles {previous_zyklus} and {current_zyklus}; "
f"writing empty {export_file_filtered}"
f"No datapoints for cycles {previous_zyklus} and {current_zyklus}; writing empty {export_file_filtered}"
)
pd.DataFrame(columns=messdaten_columns).to_csv(export_file_filtered, index=False)

logging.info(
f"Saved unfiltered data with {total_all_rows} datapoints to {export_file_all_unfiltered}"
)
logging.info(f"Saved unfiltered data with {total_all_rows} datapoints to {export_file_all_unfiltered}")
logging.info(f"Extracting data for cycles {previous_zyklus} and {current_zyklus}")
logging.info(
f"Filtered data contains {total_filtered_rows} datapoints out of {total_all_rows} total "
Expand Down Expand Up @@ -456,9 +449,7 @@ def init_sqlite(conn):
def append_to_sqlite(df, conn):
df_einsatzplan, df_einzelmessungen = _prepare_sqlite_frames(df)
df_einsatzplan.to_sql("Einsatzplan", conn, if_exists="append", index=False)
df_einzelmessungen.to_sql(
"Einzelmessungen", conn, if_exists="append", index=False, chunksize=SQLITE_CHUNK_SIZE
)
df_einzelmessungen.to_sql("Einzelmessungen", conn, if_exists="append", index=False, chunksize=SQLITE_CHUNK_SIZE)


def finalize_sqlite(conn):
Expand Down
14 changes: 5 additions & 9 deletions mobilitaet_mikromobilitaet_stats/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
import common
import geopandas as gpd
import pandas as pd
from shapely import wkt
from shapely.geometry import mapping
from common import FTP_PASS, FTP_SERVER, FTP_USER
from dateutil.relativedelta import relativedelta
from shapely import wkt
from shapely.geometry import mapping

CONFIGS = {
"bezirke": {
Expand Down Expand Up @@ -126,6 +126,7 @@ def _ods_export_cols(output_cols):
"""Huwise/ODS export: no geometry (full geometries stay in SQLite for Datasette)."""
return [c for c in output_cols if c != "geometry"]


DEDUPE_COLS_416 = [
"date",
"bez_id",
Expand Down Expand Up @@ -344,10 +345,7 @@ def append_stats_to_sqlite(df_stats, db_path, table_name, dedupe_cols, output_co
df_all = _cast_bez_id_column(df_all)
df_all.to_sql(table_name, conn, if_exists="replace", index=False)
conn.commit()
logging.info(
f"SQLite {table_name}: {len(df_all)} fact rows, "
f"{len(df_bezirke)} bezirke update(s) in {work_path}"
)
logging.info(f"SQLite {table_name}: {len(df_all)} fact rows, {len(df_bezirke)} bezirke update(s) in {work_path}")


def _filter_by_rolling_window(df, is_monthly):
Expand Down Expand Up @@ -433,9 +431,7 @@ def _zip_csv_for_ods(csv_path):
logging.info(f"File {csv_path} size is {csv_mb:.2f} MB")
logging.info(f"Created compressed file: {zip_path}")
if csv_mb > 0:
logging.info(
f"Compressed file size: {zip_mb:.2f} MB (compression ratio: {zip_mb / csv_mb * 100:.1f}%)"
)
logging.info(f"Compressed file size: {zip_mb:.2f} MB (compression ratio: {zip_mb / csv_mb * 100:.1f}%)")
else:
logging.info(f"Compressed file size: {zip_mb:.2f} MB")
if zip_mb > ODS_SIZE_LIMIT_MB:
Expand Down
4 changes: 1 addition & 3 deletions staka_vernehmlassungen/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,9 +1099,7 @@ def process_textrueckmeldungen():
for filename in os.listdir(textrueckmeldungen_path):
if filename.endswith((".xlsx", ".xls")):
if "zustimmungsmessung" in _normalize_column_name(Path(filename).stem):
logging.warning(
f"Skipping Zustimmungsmessung file (aggregate format not supported): {filename}"
)
logging.warning(f"Skipping Zustimmungsmessung file (aggregate format not supported): {filename}")
continue
file_path = os.path.join(textrueckmeldungen_path, filename)
try:
Expand Down