From 3c1ad451ca477f3ed9622617e7d8fed0250a6614 Mon Sep 17 00:00:00 2001 From: RaidedCluster <95372096+RaidedCluster@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:59:45 +0530 Subject: [PATCH] feat: add external submissions onboarding CLI --- .gitignore | 4 + Makefile | 5 + src/external-submissions/onboard/Makefile | 11 + .../onboard/edge_cases.example.sh | 77 +++ .../onboard/init_counters.py | 53 ++ src/external-submissions/onboard/main.py | 494 ++++++++++++++++++ .../onboard/onboard.example.sh | 24 + .../onboard/requirements.txt | 3 + src/helpers/email.py | 80 +++ src/helpers/env.py | 7 + src/helpers/question_curation.py | 16 +- src/tests/test_email.py | 70 +++ src/tests/test_onboard.py | 453 ++++++++++++++++ src/tests/test_question_curation.py | 21 + variables.example.mk | 5 + 15 files changed, 1322 insertions(+), 1 deletion(-) create mode 100644 src/external-submissions/onboard/Makefile create mode 100644 src/external-submissions/onboard/edge_cases.example.sh create mode 100644 src/external-submissions/onboard/init_counters.py create mode 100644 src/external-submissions/onboard/main.py create mode 100644 src/external-submissions/onboard/onboard.example.sh create mode 100644 src/external-submissions/onboard/requirements.txt create mode 100644 src/helpers/email.py create mode 100644 src/tests/test_email.py create mode 100644 src/tests/test_onboard.py create mode 100644 src/tests/test_question_curation.py diff --git a/.gitignore b/.gitignore index 6bb290d4..e1214321 100644 --- a/.gitignore +++ b/.gitignore @@ -75,6 +75,10 @@ Thumbs.db # Worktrees .worktrees/ +# Local copies of the onboarding wrapper scripts (may contain team details) +onboard.sh +edge_cases.sh + # Compressed files *.7z *.dmg diff --git a/Makefile b/Makefile index b65e2f67..5cf02cd9 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,11 @@ export LLM_BASELINE_STAGING_BUCKET export LLM_BASELINE_NEWS_BUCKET export BUILD_ENV export WORKSPACE_BUCKET +export SUBMISSIONS_BUCKET +export SUBMISSIONS_INTERSTITIAL_BUCKET +export SUBMISSIONS_HISTORY_BUCKET +export SUBMISSIONS_SERVICE_ACCOUNT +export SMTP_USER export CLOUD_DEPLOY_REGION := us-central1 diff --git a/src/external-submissions/onboard/Makefile b/src/external-submissions/onboard/Makefile new file mode 100644 index 00000000..636ef992 --- /dev/null +++ b/src/external-submissions/onboard/Makefile @@ -0,0 +1,11 @@ +# The onboard CLI is run locally by an admin; it is not deployed to GCP. +# This Makefile exists so that `make setup-python-env` installs requirements.txt +# and the root `make clean` can recurse into this directory. + +.PHONY: all clean + +all: + @: + +clean: + rm -rf __pycache__ diff --git a/src/external-submissions/onboard/edge_cases.example.sh b/src/external-submissions/onboard/edge_cases.example.sh new file mode 100644 index 00000000..717f3701 --- /dev/null +++ b/src/external-submissions/onboard/edge_cases.example.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Edge-case driver: copy to edge_cases.sh (gitignored) and run against a dev project only. +# Loops through labeled argument sets in TEST mode (no email is ever attempted), so a +# reviewer can see exactly which cases were exercised. Every case here is also covered in +# the unit test suite (src/tests/test_onboard.py). +# +# Labels starting with "ok" are expected to succeed; "fail" cases must exit non-zero. +# +# NB: GCS IAM only accepts EXISTING Google identities, so the ok-cases use $EMAIL (a real +# account — defaults to SMTP_USER) and $SERVICE_ACCOUNT (the real submissions SA). +# Fictional addresses appear only in cases expected to fail. The non-Google-email warning +# path can't be demoed live for the same reason; it is covered by the unit suite. +# +# NB: the loop uses `eval` so case strings can contain quoted arguments. Keep the CASES +# table admin-authored; never build it from external input. + +set -a +. <(grep -v '^#' ../../../variables.mk | tr -d '\r') +set +a + +EMAIL="${SMTP_USER}" +SERVICE_ACCOUNT="${SUBMISSIONS_SERVICE_ACCOUNT:-${SUBMISSIONS_SA_EMAIL:-}}" + +if [ -z "$EMAIL" ]; then + echo "FAIL: SMTP_USER not set in variables.mk (needed as the real test identity)." + exit 1 +fi +if [ -z "$SERVICE_ACCOUNT" ]; then + echo "FAIL: no submissions service account in variables.mk (needed for the SA cases)." + exit 1 +fi + +# The anonymous case needs the counter; harmless if it already exists. +python init_counters.py --anon-count 0 || true + +CASES=( + "ok-minimal |--organization MinimalOrg --emails $EMAIL" + "ok-anonymous |--organization 'Secret Labs' --anonymous --emails $EMAIL" + "ok-service-account-only|--organization 'Bot Org' --service-accounts $SERVICE_ACCOUNT" + "ok-emails-plus-sa |--organization 'Mixed Org' --emails $EMAIL --service-accounts $SERVICE_ACCOUNT" + "ok-team-name |--organization 'Acme' --team-name acme-alpha --emails $EMAIL" + "ok-same-org-twice |--organization 'Acme' --emails $EMAIL" + "ok-unicode-org |--organization 'Gréta Łabs' --emails $EMAIL" + "ok-punctuation-org |--organization 'cmcc.vc' --emails $EMAIL" + "ok-long-org |--organization 'An Extremely Long Organization Name That Exceeds The Slug Limit For Sure' --emails $EMAIL" + "ok-send-test-email |--organization 'Mail Org' --emails $EMAIL --send-email-in-test" + "fail-duplicate-teamname|--organization 'Acme' --team-name acme-alpha --emails $EMAIL" + "fail-empty-org |--organization '' --emails $EMAIL" + "fail-no-emails-no-sa |--organization 'Nobody Org'" + "fail-invalid-email |--organization 'Typo Org' --emails not-an-email" + "fail-nonexistent-acct |--organization 'Ghost Org' --emails a@dummy-domain-x92ah8.com" +) + +PASS=0 +FAIL=0 +for case in "${CASES[@]}"; do + label="$(echo "${case%%|*}" | xargs)" + args="${case#*|}" + echo "=== ${label} ===" + if eval python main.py register $args --mode TEST; then + outcome="succeeded" + else + outcome="failed" + fi + if { [[ $label == ok-* ]] && [ "$outcome" = "succeeded" ]; } || + { [[ $label == fail-* ]] && [ "$outcome" = "failed" ]; }; then + echo "--- ${label}: ${outcome} (as expected)" + PASS=$((PASS + 1)) + else + echo "--- ${label}: ${outcome} (UNEXPECTED)" + FAIL=$((FAIL + 1)) + fi + echo +done + +echo "${PASS} as expected, ${FAIL} unexpected." +[ "$FAIL" -eq 0 ] diff --git a/src/external-submissions/onboard/init_counters.py b/src/external-submissions/onboard/init_counters.py new file mode 100644 index 00000000..a0d52737 --- /dev/null +++ b/src/external-submissions/onboard/init_counters.py @@ -0,0 +1,53 @@ +"""One-time per-environment setup: create the anonymous-number counter. + +Run exactly once when setting up a new environment (dev or prod), before the first +registration. Refuses to overwrite an existing counter, so re-running later is harmless +but will not change anything. + +Usage (from this directory, with `variables.mk` loaded): + + python init_counters.py --anon-count 8 + +`--anon-count` is the highest anonymous team number already issued (0 for a fresh +environment). Previously issued numbers must never be reissued, including any gaps. +""" + +import argparse +import sys + +from main import COUNTERS_COLLECTION, COUNTERS_DOCUMENT, get_clients + + +def init_counters(anon_count: int, db=None) -> dict: + """Create the counter document; refuses to overwrite an existing one. + + Args: + anon_count (int): Highest anonymous number already issued. + db: Firestore client (injected in tests). + """ + if db is None: + db, _ = get_clients() + counter_ref = db.collection(COUNTERS_COLLECTION).document(COUNTERS_DOCUMENT) + if counter_ref.get().exists: + raise ValueError( + f"{COUNTERS_COLLECTION}/{COUNTERS_DOCUMENT} already exists; not overwriting." + ) + counters = {"anon_count": anon_count} + counter_ref.set(counters) + return counters + + +def main() -> None: + """Parse arguments and create the counter document.""" + parser = argparse.ArgumentParser(description="One-time anonymous-counter setup.") + parser.add_argument("--anon-count", type=int, required=True) + args = parser.parse_args() + try: + counters = init_counters(anon_count=args.anon_count) + except ValueError as exception: + sys.exit(str(exception)) + print(f"Created {COUNTERS_COLLECTION}/{COUNTERS_DOCUMENT}: {counters}") + + +if __name__ == "__main__": + main() diff --git a/src/external-submissions/onboard/main.py b/src/external-submissions/onboard/main.py new file mode 100644 index 00000000..07e70972 --- /dev/null +++ b/src/external-submissions/onboard/main.py @@ -0,0 +1,494 @@ +r"""Admin CLI to manage external submission teams. + +Run locally by an admin with gcloud application-default credentials; not deployed. + +Usage: copy `onboard.example.sh` to `onboard.sh` (gitignored), edit the values at the top, +and run it from this directory in bash. Or invoke directly: + + set -a; . <(grep -v '^#' ../../../variables.mk | tr -d '\r'); set +a + python main.py register --organization "Acme Corp" --emails alice@acme.com --mode TEST + python main.py deactivate --team-id acme-corp_a1b2c3 + +`--mode` defaults to TEST, which attempts no email. One-time per-environment setup +(anonymous-number counter) is done by `init_counters.py`, not by this CLI. + +Google Cloud client libraries are imported lazily so this module can be unit tested without +them installed. +""" + +import argparse +import json +import logging +import os +import re +import secrets +import sys +import unicodedata +from typing import Any, Dict, List, Optional, Set, Tuple + +sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) +from helpers import constants, email, env, question_curation # noqa: E402 + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +TEAMS_COLLECTION = "teams" +TEAM_NAMES_COLLECTION = "team_names" +COUNTERS_COLLECTION = "counters" +COUNTERS_DOCUMENT = "teams" + +EMAIL_REGEX = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$") +GOOGLE_MX_SUFFIXES = ("google.com", "googlemail.com") +TEAM_ID_HASH_LENGTH = 6 +MAX_SLUG_LENGTH = 40 + +SUBMISSION_WIKI_URL = ( + "https://github.com/forecastingresearch/forecastbench/wiki/How-to-submit-to-ForecastBench" +) + + +def normalize_name(name: str) -> str: + """Normalize a team/organization name for case-insensitive matching. + + Lowercases, trims, and collapses whitespace. The legacy CSV form "Anonymous #8" normalizes + to the canonical wiki form "anonymous 8". + + Args: + name (str): The name to normalize. + """ + normalized = re.sub(r"\s+", " ", name.strip().lower()) + return re.sub(r"^anonymous #(\d+)$", r"anonymous \1", normalized) + + +def slugify_organization(name: str) -> str: + """Return a filename-safe slug for use in team folder names. + + Lowercases, transliterates accents to ASCII, replaces every other character run with a + single hyphen, and truncates to `MAX_SLUG_LENGTH`. Falls back to "team" if nothing + survives (e.g. a fully non-Latin name). + + Args: + name (str): The organization name to slugify. + """ + ascii_name = unicodedata.normalize("NFKD", name).encode("ascii", "ignore").decode("ascii") + slug = re.sub(r"[^a-z0-9]+", "-", ascii_name.lower()).strip("-") + return slug[:MAX_SLUG_LENGTH].rstrip("-") or "team" + + +def generate_team_id(db: Any, organization: str) -> str: + """Return a unique team ID of the form "_<6-char-hash>". + + The team ID doubles as the team's GCS folder name. The slug keeps folders legible; the + random hash makes IDs unique so one organization can have several teams. + + Args: + db (Any): Firestore client. + organization (str): Public organization name (the anonymous one if applicable). + """ + slug = slugify_organization(organization) + for _ in range(20): + team_id = f"{slug}_{secrets.token_hex(TEAM_ID_HASH_LENGTH // 2)}" + if not db.collection(TEAMS_COLLECTION).document(team_id).get().exists: + return team_id + raise RuntimeError(f"Could not generate a unique team ID for slug {slug!r}.") + + +def make_principal(account: str) -> str: + """Return the IAM principal string for an email address or service account. + + Args: + account (str): Email address or service account. + """ + if account.endswith(".gserviceaccount.com"): + return f"serviceAccount:{account}" + return f"user:{account}" + + +def folder_prefix(bucket_name: str, team_id: str) -> str: + """Return the IAM `resource.name` prefix for a team's folder. + + Args: + bucket_name (str): GCS bucket name. + team_id (str): Team ID, e.g. "acme-corp_a1b2c3". + """ + return f"projects/_/buckets/{bucket_name}/objects/{team_id}/" + + +def is_google_account(email_address: str) -> bool: + """Return True if the email is a Gmail or Google Workspace account. + + Workspace domains are detected by their MX records pointing at Google. Errors (including + dnspython not being installed) count as "not a Google account": this only produces a + warning, never a registration failure. + + Args: + email_address (str): The email address to check. + """ + domain = email_address.split("@")[-1] + if domain in ("gmail.com", "googlemail.com"): + return True + try: + import dns.resolver + + records = dns.resolver.resolve(domain, "MX", lifetime=3) + return any( + str(record.exchange).rstrip(".").endswith(suffix) + for record in records + for suffix in GOOGLE_MX_SUFFIXES + ) + except Exception: + return False + + +def get_clients() -> Tuple[Any, Any]: + """Return (firestore client, storage client) pinned to `CLOUD_PROJECT`. + + Drops any inherited quota-project override: sending the x-goog-user-project header + requires `serviceusage.services.use`, which admin accounts may lack. Requests then + bill to the resource's own project, which is what we want here anyway. + """ + if not env.PROJECT_ID: + raise RuntimeError( + "CLOUD_PROJECT is not set — load variables.mk first. Refusing to fall back to" + " the gcloud default project." + ) + os.environ.pop("GOOGLE_CLOUD_QUOTA_PROJECT", None) + os.environ["GOOGLE_CLOUD_PROJECT"] = env.PROJECT_ID + + from google.cloud import firestore, storage + + return firestore.Client(project=env.PROJECT_ID), storage.Client(project=env.PROJECT_ID) + + +def allocate_anon_number(db: Any) -> int: + """Atomically allocate the next anonymous team number. + + Args: + db (Any): Firestore client. + """ + from google.cloud import firestore + + counter_ref = db.collection(COUNTERS_COLLECTION).document(COUNTERS_DOCUMENT) + + @firestore.transactional + def _allocate(transaction: Any) -> int: + snapshot = counter_ref.get(transaction=transaction) + if not snapshot.exists or "anon_count" not in snapshot.to_dict(): + raise RuntimeError( + f"Firestore document {COUNTERS_COLLECTION}/{COUNTERS_DOCUMENT} is missing or has" + " no 'anon_count'. Run `python init_counters.py` once per environment first." + ) + anon_n = snapshot.to_dict()["anon_count"] + 1 + transaction.set(counter_ref, {"anon_count": anon_n}) + return anon_n + + return _allocate(db.transaction()) + + +def _bindings_without_folder(policy: Any, prefix: str) -> List[Dict[str, Any]]: + """Return the policy's bindings minus those scoped to the given folder prefix. + + Args: + policy (Any): Bucket IAM policy (version 3). + prefix (str): Folder prefix from `folder_prefix`. + """ + return [ + b for b in policy.bindings if prefix not in b.get("condition", {}).get("expression", "") + ] + + +def set_folder_permissions(gcs: Any, bucket_name: str, team_id: str, principals: Set[str]) -> None: + """Grant objectViewer + objectUser on the team's folder prefix only. + + Idempotent: existing bindings for this folder are replaced, not duplicated. + + Args: + gcs (Any): Storage client. + bucket_name (str): Upload bucket name. + team_id (str): Team ID, used as the folder name. + principals (Set[str]): IAM principals (from `make_principal`). + """ + bucket = gcs.bucket(bucket_name) + policy = bucket.get_iam_policy(requested_policy_version=3) + policy.version = 3 + + prefix = folder_prefix(bucket_name, team_id) + policy.bindings = _bindings_without_folder(policy, prefix) + condition = { + "title": f"{team_id} folder access", + "expression": f'resource.name.startsWith("{prefix}")', + } + for role in ("roles/storage.objectViewer", "roles/storage.objectUser"): + policy.bindings.append({"role": role, "members": set(principals), "condition": condition}) + bucket.set_iam_policy(policy) + + +def remove_folder_permissions(gcs: Any, bucket_name: str, team_id: str) -> None: + """Remove all IAM bindings scoped to the team's folder prefix. + + Args: + gcs (Any): Storage client. + bucket_name (str): Upload bucket name. + team_id (str): Team ID, used as the folder name. + """ + bucket = gcs.bucket(bucket_name) + policy = bucket.get_iam_policy(requested_policy_version=3) + policy.version = 3 + policy.bindings = _bindings_without_folder(policy, folder_prefix(bucket_name, team_id)) + bucket.set_iam_policy(policy) + + +def build_welcome_email( + team_id: str, organization: str, anonymous: bool, next_due_date: str +) -> Tuple[str, str]: + """Return (subject, body) for the welcome email sent on registration. + + Submission instructions live on the wiki only, so they are maintained in one place. + + Args: + team_id (str): Team ID, used as the folder name. + organization (str): Public organization name (the anonymous one if applicable). + anonymous (bool): Whether the team registered anonymously. + next_due_date (str): Next forecast due date in ISO format. + """ + subject = "ForecastBench — your team has been registered" + anonymous_note = ( + f"\nYou are registered anonymously. Your public name is '{organization}': use it as" + " 'organization' in your forecast files. You may choose whether to also use it for" + " 'model_organization'.\n" + if anonymous + else "" + ) + body = f"""Hi, + +Your team has been registered on ForecastBench. + +Team: {organization} +Upload folder: gs://{env.SUBMISSIONS_BUCKET}/{team_id}/ +Next forecast due date: {next_due_date} (rounds repeat every two weeks) +{anonymous_note} +Please upload a small test file to your folder now to confirm your access works. + +Submission instructions: {SUBMISSION_WIKI_URL} + +If you have any questions, just reply to this email. + +The ForecastBench team +""" + return subject, body + + +def register( + organization: str, + emails: Optional[List[str]] = None, + service_accounts: Optional[List[str]] = None, + team_name: str = "", + anonymous: bool = False, + run_mode: constants.RunMode = constants.RunMode.TEST, + send_email_in_test: bool = False, + db: Any = None, + gcs: Any = None, +) -> Dict[str, Any]: + """Register a new team: GCS folder + IAM, Firestore documents, welcome email. + + Args: + organization (str): Real organization name (stored privately if anonymous). + emails (Optional[List[str]]): Member email addresses; receive IAM access and emails. + service_accounts (Optional[List[str]]): Service accounts; IAM access, never emailed. + team_name (str): Optional internal label; must be unique, never public. + anonymous (bool): Register under an "Anonymous N" public name. + run_mode (constants.RunMode): TEST (default) skips the welcome email unless + `send_email_in_test` is set; PROD sends it normally. + send_email_in_test (bool): In TEST mode, send the welcome email rerouted to + `SMTP_USER` with a "[TEST]" subject prefix. + db (Any): Firestore client (injected in tests). + gcs (Any): Storage client (injected in tests). + """ + emails = [e.strip().lower() for e in (emails or []) if e.strip()] + service_accounts = [s.strip().lower() for s in (service_accounts or []) if s.strip()] + organization = organization.strip() + team_name = team_name.strip() + + errors = [] + if not organization: + errors.append("--organization is required.") + if not emails and not service_accounts: + errors.append("Provide at least one of --emails or --service-accounts.") + bad_emails = [e for e in emails + service_accounts if not EMAIL_REGEX.match(e)] + if bad_emails: + errors.append(f"Invalid email address(es): {bad_emails}") + if not env.SUBMISSIONS_BUCKET: + errors.append("SUBMISSIONS_BUCKET is not set; load variables.mk.") + if errors: + raise ValueError(" ".join(errors)) + + if db is None or gcs is None: + db, gcs = get_clients() + + if team_name: + reservation_ref = db.collection(TEAM_NAMES_COLLECTION).document(normalize_name(team_name)) + if reservation_ref.get().exists: + raise ValueError( + f"Team name '{team_name}' is permanently reserved (names are never reissued," + " even after a team is deactivated)." + ) + + display_org = f"Anonymous {allocate_anon_number(db)}" if anonymous else organization + team_id = generate_team_id(db, display_org) + + bucket = gcs.bucket(env.SUBMISSIONS_BUCKET) + bucket.blob(f"{team_id}/.keep").upload_from_string("", content_type="application/x-empty") + + principals = {make_principal(account) for account in emails + service_accounts} + try: + set_folder_permissions(gcs, env.SUBMISSIONS_BUCKET, team_id, principals) + except Exception as exception: + if "does not exist" in str(exception): + raise ValueError( + f"GCS rejected an account ({exception}). IAM only accepts existing Google" + " identities — every email must belong to a real Gmail/Google Workspace" + " account and every service account must exist." + ) from exception + raise + + from google.cloud import firestore + + db.collection(TEAMS_COLLECTION).document(team_id).set( + { + "team_id": team_id, + "team_name": team_name or None, + "organization": display_org, + "deanonymized_organization": organization, + "emails": emails, + "service_accounts": service_accounts, + "anonymous": anonymous, + "active": True, + "created_at": firestore.SERVER_TIMESTAMP, + "deactivated_at": None, + } + ) + if team_name: + reservation_ref.set( + { + "team_id": team_id, + "team_name": team_name, + "reserved_at": firestore.SERVER_TIMESTAMP, + } + ) + + warnings = [] + non_google = [e for e in emails if not is_google_account(e)] + if non_google: + warnings.append( + f"Email(s) {non_google} do not appear to be Google accounts; GCS upload requires" + " Gmail or Google Workspace. Ask the team for Google-based addresses." + ) + + next_due_date = question_curation.get_next_forecast_due_date() + email_sent = False + if emails: + if run_mode == constants.RunMode.PROD or send_email_in_test: + subject, body = build_welcome_email(team_id, display_org, anonymous, next_due_date) + email_sent = email.send_email( + emails, subject, body, run_mode=run_mode, send_email_in_test=send_email_in_test + ) + if not email_sent: + warnings.append("Welcome email was NOT sent; see logs. Notify the team manually.") + else: + warnings.append( + f"Welcome email skipped in TEST mode. Email would have been sent to {emails}." + " Pass --send-email-in-test to send a rerouted test email." + ) + + return { + "team_id": team_id, + "team_name": team_name or None, + "organization": display_org, + "upload_folder": f"gs://{env.SUBMISSIONS_BUCKET}/{team_id}/", + "next_due_date": next_due_date, + "run_mode": run_mode.value, + "welcome_email_sent": email_sent, + "warnings": warnings, + } + + +def deactivate(team_id: str, db: Any = None, gcs: Any = None) -> Dict[str, Any]: + """Deactivate a team: revoke GCS access, mark inactive; folder and names stay reserved. + + Args: + team_id (str): Team ID, e.g. "acme-corp_a1b2c3". + db (Any): Firestore client (injected in tests). + gcs (Any): Storage client (injected in tests). + """ + if db is None or gcs is None: + db, gcs = get_clients() + + team_ref = db.collection(TEAMS_COLLECTION).document(team_id) + snapshot = team_ref.get() + if not snapshot.exists: + raise ValueError(f"Team '{team_id}' not found.") + if not snapshot.to_dict().get("active", False): + raise ValueError(f"Team '{team_id}' is already inactive.") + + remove_folder_permissions(gcs, env.SUBMISSIONS_BUCKET, team_id) + + from google.cloud import firestore + + team_ref.update({"active": False, "deactivated_at": firestore.SERVER_TIMESTAMP}) + return { + "team_id": team_id, + "active": False, + "note": "GCS access revoked. Folder, team ID, and name reservations are kept forever.", + } + + +def main() -> None: + """Parse arguments and dispatch to the requested command.""" + parser = argparse.ArgumentParser(description="Manage ForecastBench submission teams.") + subparsers = parser.add_subparsers(dest="command", required=True) + + register_parser = subparsers.add_parser("register", help="Register a new team.") + register_parser.add_argument("--organization", required=True) + register_parser.add_argument("--team-name", default="", help="Internal label, never public.") + register_parser.add_argument("--emails", nargs="*", default=[]) + register_parser.add_argument("--service-accounts", nargs="*", default=[]) + register_parser.add_argument("--anonymous", action="store_true") + register_parser.add_argument( + "--mode", + type=constants.RunMode, + choices=list(constants.RunMode), + default=constants.RunMode.TEST, + metavar="{TEST,PROD}", + help="TEST (default): no email is sent unless --send-email-in-test. PROD: email sent.", + ) + register_parser.add_argument( + "--send-email-in-test", + action="store_true", + help="In TEST mode, send the welcome email rerouted to SMTP_USER with a [TEST] prefix.", + ) + + deactivate_parser = subparsers.add_parser("deactivate", help="Deactivate a team.") + deactivate_parser.add_argument("--team-id", required=True) + + args = parser.parse_args() + try: + if args.command == "register": + result = register( + organization=args.organization, + emails=args.emails, + service_accounts=args.service_accounts, + team_name=args.team_name, + anonymous=args.anonymous, + run_mode=args.mode, + send_email_in_test=args.send_email_in_test, + ) + else: + result = deactivate(team_id=args.team_id) + except (ValueError, RuntimeError) as exception: + logger.error(str(exception)) + sys.exit(1) + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/src/external-submissions/onboard/onboard.example.sh b/src/external-submissions/onboard/onboard.example.sh new file mode 100644 index 00000000..bbe95dc1 --- /dev/null +++ b/src/external-submissions/onboard/onboard.example.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copy to onboard.sh (gitignored) and edit the values below, so commands with team details +# don't end up in shell history. Run from this directory in bash. + +ORGANIZATION="Example Org" +TEAM_NAME="" # optional internal label, never public +EMAILS="alice@example.com bob@example.com" +SERVICE_ACCOUNTS="" # e.g. "uploader@project.iam.gserviceaccount.com" +ANONYMOUS=0 # 1 to register under an "Anonymous N" public name +MODE="TEST" # TEST (default; no email sent) | PROD +SEND_EMAIL_IN_TEST=0 # 1 to send a rerouted [TEST] email while in TEST mode + +set -a +. <(grep -v '^#' ../../../variables.mk | tr -d '\r') +set +a + +ARGS=(--organization "$ORGANIZATION" --mode "$MODE") +[ -n "$TEAM_NAME" ] && ARGS+=(--team-name "$TEAM_NAME") +[ -n "$EMAILS" ] && ARGS+=(--emails $EMAILS) +[ -n "$SERVICE_ACCOUNTS" ] && ARGS+=(--service-accounts $SERVICE_ACCOUNTS) +[ "$ANONYMOUS" = "1" ] && ARGS+=(--anonymous) +[ "$SEND_EMAIL_IN_TEST" = "1" ] && ARGS+=(--send-email-in-test) + +python main.py register "${ARGS[@]}" diff --git a/src/external-submissions/onboard/requirements.txt b/src/external-submissions/onboard/requirements.txt new file mode 100644 index 00000000..a2129c08 --- /dev/null +++ b/src/external-submissions/onboard/requirements.txt @@ -0,0 +1,3 @@ +dnspython +google-cloud-firestore +google-cloud-storage diff --git a/src/helpers/email.py b/src/helpers/email.py new file mode 100644 index 00000000..ba1a793b --- /dev/null +++ b/src/helpers/email.py @@ -0,0 +1,80 @@ +"""Send email notifications via SMTP. + +`SMTP_PASSWORD` is read from the environment at send time. In deployed jobs it is injected +from Secret Manager via `--set-secrets`; for local runs, export it in the shell. It must not +be added to `variables.mk` or fetched through `helpers.keys`. + +Behavior is controlled by `constants.RunMode` (defaults to TEST — safe by default): + * TEST: no SMTP connection is attempted; returns False. + * TEST with `send_email_in_test=True`: the email is rerouted to `SMTP_USER` with a "[TEST]" + subject prefix and the intended recipients listed at the top of the body. + * PROD: the email is sent normally; `send_email_in_test` has no effect. +""" + +import logging +import os +import smtplib +import ssl +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from typing import List + +from . import constants, env + +logger = logging.getLogger(__name__) + + +def send_email( + to_emails: List[str], + subject: str, + body: str, + run_mode: constants.RunMode = constants.RunMode.TEST, + send_email_in_test: bool = False, +) -> bool: + """Send a plain-text email and report whether it was sent. + + Failures are logged, not raised: callers (e.g. team onboarding) should not abort their + transaction because a notification could not be delivered. + + Args: + to_emails (List[str]): Recipient email addresses. + subject (str): Subject line. + body (str): Plain-text body. + run_mode (constants.RunMode): TEST (default) attempts no SMTP unless + `send_email_in_test` is set; PROD sends normally. + send_email_in_test (bool): In TEST mode, send the email rerouted to `SMTP_USER` + with a "[TEST]" subject prefix. No effect in PROD. + """ + if run_mode != constants.RunMode.PROD and not send_email_in_test: + logger.info("RunMode is TEST and send_email_in_test is off; not sending %r.", subject) + return False + + smtp_password = os.environ.get("SMTP_PASSWORD", "") + if not env.SMTP_USER or not smtp_password: + logger.warning("SMTP_USER or SMTP_PASSWORD not set; not sending email %r.", subject) + return False + if not to_emails: + logger.warning("No recipients; not sending email %r.", subject) + return False + + if run_mode != constants.RunMode.PROD: + body = f"[TEST] Intended recipients: {', '.join(to_emails)}\n\n{body}" + subject = f"[TEST] {subject}" + to_emails = [env.SMTP_USER] + + msg = MIMEMultipart() + msg["From"] = env.SMTP_USER + msg["To"] = ", ".join(to_emails) + msg["Subject"] = subject + msg.attach(MIMEText(body, "plain")) + + try: + with smtplib.SMTP(env.SMTP_HOST, env.SMTP_PORT, timeout=15) as server: + # smtplib's default STARTTLS context does not verify certificates. + server.starttls(context=ssl.create_default_context()) + server.login(env.SMTP_USER, smtp_password) + server.sendmail(env.SMTP_USER, to_emails, msg.as_string()) + except Exception: + logger.exception("Failed to send email %r to %s.", subject, to_emails) + return False + return True diff --git a/src/helpers/env.py b/src/helpers/env.py index 62578145..636e189a 100644 --- a/src/helpers/env.py +++ b/src/helpers/env.py @@ -22,3 +22,10 @@ RUNNING_LOCALLY = bool(int(os.environ.get("RUNNING_LOCALLY", False))) BUCKET_MOUNT_POINT = os.environ.get("BUCKET_MOUNT_POINT", "") WORKSPACE_BUCKET = os.environ.get("WORKSPACE_BUCKET") +SUBMISSIONS_BUCKET = os.environ.get("SUBMISSIONS_BUCKET") +SUBMISSIONS_INTERSTITIAL_BUCKET = os.environ.get("SUBMISSIONS_INTERSTITIAL_BUCKET") +SUBMISSIONS_HISTORY_BUCKET = os.environ.get("SUBMISSIONS_HISTORY_BUCKET") +SUBMISSIONS_SERVICE_ACCOUNT = os.environ.get("SUBMISSIONS_SERVICE_ACCOUNT") +SMTP_USER = os.environ.get("SMTP_USER") +SMTP_HOST = os.environ.get("SMTP_HOST", "smtp.gmail.com") +SMTP_PORT = int(os.environ.get("SMTP_PORT") or 587) diff --git a/src/helpers/question_curation.py b/src/helpers/question_curation.py index f03512fa..5d1bd858 100644 --- a/src/helpers/question_curation.py +++ b/src/helpers/question_curation.py @@ -1,7 +1,8 @@ """Info relevant to selecting questions.""" import os -from datetime import timedelta +from datetime import date, timedelta +from typing import Optional from . import ( acled, @@ -122,3 +123,16 @@ def is_today_question_curation_date(): created. """ return get_num_days_since_original_forecast_due_date() % 14 == 14 - FREEZE_WINDOW_IN_DAYS + + +def get_next_forecast_due_date(today: Optional[date] = None) -> str: + """Return the next forecast due date in ISO format, on or after `today`. + + Due dates fall every 14 days from `constants.BENCHMARK_TOURNAMENT_START_DATE`. + + Args: + today (Optional[date]): Reference date; defaults to today in UTC. + """ + today = today or dates.get_date_today() + days_since = (today - constants.BENCHMARK_TOURNAMENT_START_DATE_DATETIME_DATE).days + return (today + timedelta(days=-days_since % 14)).isoformat() diff --git a/src/tests/test_email.py b/src/tests/test_email.py new file mode 100644 index 00000000..2cc43470 --- /dev/null +++ b/src/tests/test_email.py @@ -0,0 +1,70 @@ +"""Tests for helpers/email.py: SMTP notifications controlled by RunMode.""" + +from unittest.mock import MagicMock + +import pytest + +from helpers import email, env +from helpers.constants import RunMode + +SENDER = "sender@dummy-domain-x92ah8.org" +RECIPIENT = "a@dummy-domain-x92ah8.com" +RECIPIENT_2 = "b@dummy-domain-x92ah8.com" + + +@pytest.fixture +def smtp_config(monkeypatch): + """Configure SMTP settings and capture the SMTP connection mock.""" + monkeypatch.setattr(env, "SMTP_USER", SENDER) + monkeypatch.setattr(env, "SMTP_HOST", "smtp.gmail.com") + monkeypatch.setattr(env, "SMTP_PORT", 587) + monkeypatch.setenv("SMTP_PASSWORD", "app-password") + smtp_class = MagicMock() + monkeypatch.setattr(email.smtplib, "SMTP", smtp_class) + return smtp_class.return_value.__enter__.return_value + + +class TestSendEmail: + """Test send_email behavior across run modes.""" + + def test_default_test_mode_attempts_no_smtp(self, smtp_config): + assert email.send_email([RECIPIENT], "subject", "body") is False + smtp_config.sendmail.assert_not_called() + + def test_test_mode_with_flag_reroutes_to_sender(self, smtp_config): + sent = email.send_email( + [RECIPIENT], "subject", "body", run_mode=RunMode.TEST, send_email_in_test=True + ) + assert sent is True + _, recipients, message = smtp_config.sendmail.call_args[0] + assert recipients == [SENDER] + assert "[TEST]" in message + assert RECIPIENT in message + + def test_prod_sends_to_recipients(self, smtp_config): + sent = email.send_email([RECIPIENT, RECIPIENT_2], "subject", "body", run_mode=RunMode.PROD) + assert sent is True + _, recipients, message = smtp_config.sendmail.call_args[0] + assert recipients == [RECIPIENT, RECIPIENT_2] + assert "[TEST]" not in message + + def test_prod_ignores_send_email_in_test(self, smtp_config): + sent = email.send_email( + [RECIPIENT], "subject", "body", run_mode=RunMode.PROD, send_email_in_test=True + ) + assert sent is True + _, recipients, message = smtp_config.sendmail.call_args[0] + assert recipients == [RECIPIENT] + assert "[TEST]" not in message + + def test_returns_false_without_password(self, monkeypatch): + monkeypatch.setattr(env, "SMTP_USER", SENDER) + monkeypatch.delenv("SMTP_PASSWORD", raising=False) + assert email.send_email([RECIPIENT], "subject", "body", run_mode=RunMode.PROD) is False + + def test_returns_false_without_recipients(self, smtp_config): + assert email.send_email([], "subject", "body", run_mode=RunMode.PROD) is False + + def test_smtp_failure_returns_false(self, smtp_config): + smtp_config.sendmail.side_effect = OSError("boom") + assert email.send_email([RECIPIENT], "subject", "body", run_mode=RunMode.PROD) is False diff --git a/src/tests/test_onboard.py b/src/tests/test_onboard.py new file mode 100644 index 00000000..b5b3c2ef --- /dev/null +++ b/src/tests/test_onboard.py @@ -0,0 +1,453 @@ +"""Tests for the external-submissions onboard CLI (src/external-submissions/onboard/main.py).""" + +import importlib.util +import os +import sys +from unittest.mock import MagicMock + +import pytest + +from helpers.constants import RunMode + +# "external-submissions" is not an importable package name, so load the modules by path. +_ONBOARD_DIR = os.path.join(os.path.dirname(__file__), "..", "external-submissions", "onboard") +sys.path.insert(0, _ONBOARD_DIR) + +_spec = importlib.util.spec_from_file_location("main", os.path.join(_ONBOARD_DIR, "main.py")) +onboard = importlib.util.module_from_spec(_spec) +sys.modules["main"] = onboard +_spec.loader.exec_module(onboard) + +_ic_spec = importlib.util.spec_from_file_location( + "init_counters", os.path.join(_ONBOARD_DIR, "init_counters.py") +) +init_counters_module = importlib.util.module_from_spec(_ic_spec) +_ic_spec.loader.exec_module(init_counters_module) + +EMAIL = "a@dummy-domain-x92ah8.com" + + +@pytest.fixture +def fake_google_cloud(monkeypatch): + """Stub the google.cloud import so register/deactivate can run without GCP libraries.""" + fake = MagicMock() + fake.firestore.SERVER_TIMESTAMP = "SERVER_TIMESTAMP" + monkeypatch.setitem(sys.modules, "google", MagicMock()) + monkeypatch.setitem(sys.modules, "google.cloud", fake) + return fake + + +@pytest.fixture +def submissions_bucket(monkeypatch): + """Point env.SUBMISSIONS_BUCKET at a test bucket.""" + monkeypatch.setattr(onboard.env, "SUBMISSIONS_BUCKET", "test-bucket") + return "test-bucket" + + +def fake_db(): + """Build a Firestore client stub whose collection()/document() differentiate by name. + + A bare MagicMock returns the same child for any argument, so a query against the wrong + collection or document ID would still pass. This stub returns a distinct mock per name; + document snapshots default to exists=False. + """ + db = MagicMock() + collections = {} + + def get_collection(name): + if name not in collections: + collection = MagicMock() + documents = {} + + def get_document(doc_id, _documents=documents): + if doc_id not in _documents: + document = MagicMock() + document.get.return_value.exists = False + _documents[doc_id] = document + return _documents[doc_id] + + collection.document.side_effect = get_document + collections[name] = collection + return collections[name] + + db.collection.side_effect = get_collection + return db + + +class TestNormalizeName: + """Test case-insensitive name normalization, including anonymous forms.""" + + def test_lowercases_and_collapses_whitespace(self): + assert onboard.normalize_name(" GDM A ") == "gdm a" + + def test_legacy_anonymous_csv_form_matches_canonical(self): + assert onboard.normalize_name("Anonymous #8") == onboard.normalize_name("anonymous 8") + + def test_hash_only_normalized_for_anonymous_names(self): + assert onboard.normalize_name("Team #1") == "team #1" + + +class TestSlugifyOrganization: + """Test filename-safe slugs for team folder names.""" + + def test_spaces_and_case(self): + assert onboard.slugify_organization("Example Research Lab") == "example-research-lab" + + def test_punctuation_collapses_to_hyphens(self): + assert onboard.slugify_organization(" cmcc.vc ") == "cmcc-vc" + + def test_accents_transliterate(self): + assert onboard.slugify_organization("Gréta Labs") == "greta-labs" + + def test_truncates_long_names_without_trailing_hyphen(self): + slug = onboard.slugify_organization( + "An Extremely Long Organization Name That Exceeds The Slug Limit For Sure" + ) + assert len(slug) <= onboard.MAX_SLUG_LENGTH + assert not slug.endswith("-") + + def test_nothing_left_falls_back_to_team(self): + assert onboard.slugify_organization("???") == "team" + assert onboard.slugify_organization("森林实验室") == "team" + + +class TestGenerateTeamId: + """Test slug + 6-char-hash team ID generation.""" + + def test_format(self, monkeypatch): + monkeypatch.setattr(onboard.secrets, "token_hex", lambda n: "ab12cd") + assert onboard.generate_team_id(fake_db(), "Acme Corp") == "acme-corp_ab12cd" + + def test_retries_on_collision(self, monkeypatch): + hashes = iter(["aaaaaa", "bbbbbb"]) + monkeypatch.setattr(onboard.secrets, "token_hex", lambda n: next(hashes)) + db = fake_db() + db.collection("teams").document("acme_aaaaaa").get.return_value.exists = True + assert onboard.generate_team_id(db, "Acme") == "acme_bbbbbb" + + def test_anonymous_display_name_slugs(self, monkeypatch): + monkeypatch.setattr(onboard.secrets, "token_hex", lambda n: "ab12cd") + assert onboard.generate_team_id(fake_db(), "Anonymous 9") == "anonymous-9_ab12cd" + + +class TestGetClients: + """Test client construction guards (no GCP call is made before the guard).""" + + def test_refuses_without_cloud_project(self, monkeypatch): + monkeypatch.setattr(onboard.env, "PROJECT_ID", None) + with pytest.raises(RuntimeError, match="CLOUD_PROJECT is not set"): + onboard.get_clients() + + +class TestMakePrincipal: + """Test IAM principal formatting.""" + + def test_user_email(self): + assert onboard.make_principal(EMAIL) == f"user:{EMAIL}" + + def test_service_account(self): + sa = "uploader@proj.iam.gserviceaccount.com" + assert onboard.make_principal(sa) == f"serviceAccount:{sa}" + + +class TestIsGoogleAccount: + """Test Google account detection (fast path only; MX lookups are not exercised).""" + + def test_gmail_fast_path(self): + assert onboard.is_google_account("someone@gmail.com") + + def test_unresolvable_domain_is_not_google(self): + assert not onboard.is_google_account("someone@invalid.invalid") + + +class TestBuildWelcomeEmail: + """Test welcome email content requirements.""" + + def test_contains_folder_due_date_and_wiki_link_only(self, submissions_bucket): + _, body = onboard.build_welcome_email("acme_ab12cd", "Acme Corp", False, "2026-06-21") + assert "gs://test-bucket/acme_ab12cd/" in body + assert "2026-06-21" in body + assert onboard.SUBMISSION_WIKI_URL in body + # Submission steps live on the wiki only; the email must not duplicate them. + assert "gsutil" not in body + assert "gcloud" not in body + + def test_anonymous_note_wording_and_presence(self, submissions_bucket): + _, body = onboard.build_welcome_email( + "anonymous-9_ab12cd", "Anonymous 9", True, "2026-06-21" + ) + assert "use it as 'organization'" in body + assert "choose whether to also use it for" in body + _, body = onboard.build_welcome_email("acme_ab12cd", "Acme Corp", False, "2026-06-21") + assert "registered anonymously" not in body + + +class TestFolderPermissions: + """Test per-folder IAM binding management.""" + + @staticmethod + def _gcs_with_policy(bindings): + policy = MagicMock() + policy.bindings = bindings + gcs = MagicMock() + gcs.bucket.return_value.get_iam_policy.return_value = policy + return gcs, policy + + def test_set_is_idempotent_and_keeps_unrelated_bindings(self): + prefix = onboard.folder_prefix("test-bucket", "acme_ab12cd") + stale = { + "role": "roles/storage.objectUser", + "members": {"user:old@dummy-domain-x92ah8.com"}, + "condition": {"expression": f'resource.name.startsWith("{prefix}")'}, + } + other = { + "role": "roles/storage.objectUser", + "members": {"user:other@dummy-domain-x92ah8.com"}, + "condition": { + "expression": ( + 'resource.name.startsWith("projects/_/buckets/test-bucket/objects/zeta_9f8e7d/")' + ) + }, + } + gcs, policy = self._gcs_with_policy([stale, other]) + onboard.set_folder_permissions(gcs, "test-bucket", "acme_ab12cd", {f"user:{EMAIL}"}) + assert other in policy.bindings + assert stale not in policy.bindings + team_bindings = [b for b in policy.bindings if prefix in b["condition"]["expression"]] + assert {b["role"] for b in team_bindings} == { + "roles/storage.objectViewer", + "roles/storage.objectUser", + } + assert all(b["members"] == {f"user:{EMAIL}"} for b in team_bindings) + + def test_remove_drops_only_team_bindings(self): + prefix = onboard.folder_prefix("test-bucket", "acme_ab12cd") + team = { + "role": "roles/storage.objectUser", + "members": {f"user:{EMAIL}"}, + "condition": {"expression": f'resource.name.startsWith("{prefix}")'}, + } + unconditional = { + "role": "roles/storage.admin", + "members": {"user:admin@dummy-domain-x92ah8.com"}, + } + gcs, policy = self._gcs_with_policy([team, unconditional]) + onboard.remove_folder_permissions(gcs, "test-bucket", "acme_ab12cd") + assert policy.bindings == [unconditional] + + +class TestRegister: + """Test team registration, including the run-mode email matrix.""" + + @pytest.fixture + def wiring(self, monkeypatch, submissions_bucket, fake_google_cloud): + """Patch GCP-touching internals; capture email sends and IAM grants.""" + state = {"sent": None, "principals": None, "id_source": None} + monkeypatch.setattr(onboard, "allocate_anon_number", lambda db: 9) + + def _generate(db, organization): + state["id_source"] = organization + return f"{onboard.slugify_organization(organization)}_ab12cd" + + monkeypatch.setattr(onboard, "generate_team_id", _generate) + monkeypatch.setattr( + onboard, + "set_folder_permissions", + lambda gcs, bucket, team_id, principals: state.update(principals=principals), + ) + monkeypatch.setattr(onboard, "is_google_account", lambda e: True) + + def _send(to_emails, subject, body, run_mode, send_email_in_test): + state.update(sent={"to": to_emails, "mode": run_mode, "in_test": send_email_in_test}) + return True + + monkeypatch.setattr(onboard.email, "send_email", _send) + return state + + def test_requires_organization(self, submissions_bucket): + with pytest.raises(ValueError, match="organization"): + onboard.register(organization="", emails=[EMAIL], db=MagicMock(), gcs=MagicMock()) + + def test_requires_emails_or_service_accounts(self, submissions_bucket): + with pytest.raises(ValueError, match="at least one"): + onboard.register(organization="Acme", db=MagicMock(), gcs=MagicMock()) + + def test_rejects_invalid_email(self, submissions_bucket): + with pytest.raises(ValueError, match="Invalid email"): + onboard.register( + organization="Acme", emails=["not-an-email"], db=MagicMock(), gcs=MagicMock() + ) + + def test_requires_submissions_bucket(self, monkeypatch): + monkeypatch.setattr(onboard.env, "SUBMISSIONS_BUCKET", None) + with pytest.raises(ValueError, match="SUBMISSIONS_BUCKET"): + onboard.register(organization="Acme", emails=[EMAIL], db=MagicMock(), gcs=MagicMock()) + + def test_rejects_reserved_team_name(self, submissions_bucket): + db = fake_db() + # The reservation must be looked up in team_names under the normalized name. + db.collection("team_names").document("gdm a").get.return_value.exists = True + with pytest.raises(ValueError, match="reserved"): + onboard.register( + organization="Acme", + emails=[EMAIL], + team_name="GDM A", + db=db, + gcs=MagicMock(), + ) + + def test_happy_path_anonymous(self, wiring): + db, gcs = fake_db(), MagicMock() + + result = onboard.register( + organization="Acme Corp", + emails=["MiXeD@Dummy-Domain-X92ah8.COM"], + service_accounts=["uploader@proj.iam.gserviceaccount.com"], + team_name="acme-a", + anonymous=True, + db=db, + gcs=gcs, + ) + + # The folder slug comes from the public (anonymous) name, never the real org. + assert wiring["id_source"] == "Anonymous 9" + assert result["team_id"] == "anonymous-9_ab12cd" + assert result["organization"] == "Anonymous 9" + assert result["team_name"] == "acme-a" + assert wiring["principals"] == { + "user:mixed@dummy-domain-x92ah8.com", + "serviceAccount:uploader@proj.iam.gserviceaccount.com", + } + gcs.bucket.return_value.blob.assert_called_with("anonymous-9_ab12cd/.keep") + saved = db.collection("teams").document("anonymous-9_ab12cd").set.call_args[0][0] + assert saved["organization"] == "Anonymous 9" + assert saved["deanonymized_organization"] == "Acme Corp" + assert saved["active"] is True + reservation = db.collection("team_names").document("acme-a").set.call_args[0][0] + assert reservation["team_id"] == "anonymous-9_ab12cd" + + def test_default_test_mode_skips_email_with_warning(self, wiring): + result = onboard.register( + organization="Acme", emails=[EMAIL], db=fake_db(), gcs=MagicMock() + ) + assert wiring["sent"] is None + assert result["welcome_email_sent"] is False + assert result["run_mode"] == "TEST" + assert any( + "skipped in TEST mode" in w and "--send-email-in-test" in w for w in result["warnings"] + ) + assert any(EMAIL in w for w in result["warnings"]) + + def test_test_mode_with_flag_sends_rerouted(self, wiring): + result = onboard.register( + organization="Acme", + emails=[EMAIL], + send_email_in_test=True, + db=fake_db(), + gcs=MagicMock(), + ) + assert wiring["sent"] == {"to": [EMAIL], "mode": RunMode.TEST, "in_test": True} + assert result["welcome_email_sent"] is True + + def test_prod_mode_sends_normally(self, wiring): + result = onboard.register( + organization="Acme", + emails=[EMAIL], + run_mode=RunMode.PROD, + db=fake_db(), + gcs=MagicMock(), + ) + assert wiring["sent"] == {"to": [EMAIL], "mode": RunMode.PROD, "in_test": False} + assert result["welcome_email_sent"] is True + assert result["run_mode"] == "PROD" + + def test_service_accounts_only_never_emails(self, wiring): + result = onboard.register( + organization="Bot Org", + service_accounts=["uploader@proj.iam.gserviceaccount.com"], + run_mode=RunMode.PROD, + db=fake_db(), + gcs=MagicMock(), + ) + assert wiring["sent"] is None + assert result["welcome_email_sent"] is False + + def test_nonexistent_identity_raises_clean_error(self, wiring, monkeypatch): + def _reject(gcs, bucket, team_id, principals): + raise RuntimeError("User a@dummy-domain-x92ah8.com does not exist.") + + monkeypatch.setattr(onboard, "set_folder_permissions", _reject) + with pytest.raises(ValueError, match="existing Google identities"): + onboard.register(organization="Ghost", emails=[EMAIL], db=fake_db(), gcs=MagicMock()) + + def test_non_google_email_warns_but_registers(self, wiring, monkeypatch): + monkeypatch.setattr(onboard, "is_google_account", lambda e: False) + result = onboard.register( + organization="Acme", emails=[EMAIL], db=fake_db(), gcs=MagicMock() + ) + assert result["team_id"] == "acme_ab12cd" + assert any("Google" in w for w in result["warnings"]) + + def test_same_org_twice_gets_distinct_ids(self, wiring, monkeypatch): + hashes = iter(["aaaaaa", "bbbbbb"]) + monkeypatch.setattr( + onboard, + "generate_team_id", + lambda db, org: f"{onboard.slugify_organization(org)}_{next(hashes)}", + ) + db = fake_db() + first = onboard.register(organization="Acme", emails=[EMAIL], db=db, gcs=MagicMock()) + second = onboard.register(organization="Acme", emails=[EMAIL], db=db, gcs=MagicMock()) + assert first["team_id"] != second["team_id"] + + +class TestDeactivate: + """Test team deactivation.""" + + def test_not_found(self, submissions_bucket): + # fake_db snapshots default to exists=False. + with pytest.raises(ValueError, match="not found"): + onboard.deactivate("acme_ab12cd", db=fake_db(), gcs=MagicMock()) + + def test_already_inactive(self, submissions_bucket): + db = fake_db() + snapshot = db.collection("teams").document("acme_ab12cd").get.return_value + snapshot.exists = True + snapshot.to_dict.return_value = {"active": False} + with pytest.raises(ValueError, match="already inactive"): + onboard.deactivate("acme_ab12cd", db=db, gcs=MagicMock()) + + def test_deactivates_and_revokes(self, monkeypatch, submissions_bucket, fake_google_cloud): + revoked = {} + monkeypatch.setattr( + onboard, + "remove_folder_permissions", + lambda gcs, bucket, team_id: revoked.update(team_id=team_id), + ) + db = fake_db() + snapshot = db.collection("teams").document("acme_ab12cd").get.return_value + snapshot.exists = True + snapshot.to_dict.return_value = {"active": True} + + result = onboard.deactivate("acme_ab12cd", db=db, gcs=MagicMock()) + + assert revoked["team_id"] == "acme_ab12cd" + assert result["active"] is False + updated = db.collection("teams").document("acme_ab12cd").update.call_args[0][0] + assert updated["active"] is False + + +class TestInitCounters: + """Test the one-time counter setup script.""" + + def test_refuses_to_overwrite(self): + db = fake_db() + db.collection("counters").document("teams").get.return_value.exists = True + with pytest.raises(ValueError, match="already exists"): + init_counters_module.init_counters(anon_count=8, db=db) + + def test_creates_counter(self): + db = fake_db() + result = init_counters_module.init_counters(anon_count=8, db=db) + assert result == {"anon_count": 8} + db.collection("counters").document("teams").set.assert_called_once_with({"anon_count": 8}) diff --git a/src/tests/test_question_curation.py b/src/tests/test_question_curation.py new file mode 100644 index 00000000..acf53181 --- /dev/null +++ b/src/tests/test_question_curation.py @@ -0,0 +1,21 @@ +"""Tests for helpers/question_curation.py: forecast round date arithmetic.""" + +from datetime import date + +from helpers import question_curation + + +class TestGetNextForecastDueDate: + """Test the 14-day round cycle anchored at the tournament start date.""" + + def test_round_date_returns_itself(self): + assert question_curation.get_next_forecast_due_date(date(2025, 3, 2)) == "2025-03-02" + + def test_day_after_round_returns_next_round(self): + assert question_curation.get_next_forecast_due_date(date(2025, 3, 3)) == "2025-03-16" + + def test_tournament_start_is_a_round_date(self): + assert question_curation.get_next_forecast_due_date(date(2024, 7, 21)) == "2024-07-21" + + def test_day_before_round_returns_next_day(self): + assert question_curation.get_next_forecast_due_date(date(2025, 3, 1)) == "2025-03-02" diff --git a/variables.example.mk b/variables.example.mk index 9b55cb42..e2d47a0e 100644 --- a/variables.example.mk +++ b/variables.example.mk @@ -15,3 +15,8 @@ RUNNING_LOCALLY=1 GOOGLE_CLOUD_PROJECT=$CLOUD_PROJECT GOOGLE_CLOUD_QUOTA_PROJECT=$CLOUD_PROJECT WORKSPACE_BUCKET= +SUBMISSIONS_BUCKET= +SUBMISSIONS_INTERSTITIAL_BUCKET= +SUBMISSIONS_HISTORY_BUCKET= +SUBMISSIONS_SERVICE_ACCOUNT= +SMTP_USER=