diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index aa7758a..78e23df 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -37,9 +37,9 @@ source .venv/bin/activate && pytest tests/ -x -q && ruff check --fix . && ruff f | `app.py` | Streamlit UI: CV upload → profile → search → evaluate → display | | `cv_parser.py` | Extract text from PDF/DOCX/MD/TXT | | `llm.py` | Gemini API wrapper with retry/backoff | -| `search_agent.py` | Generate search queries (LLM) + orchestrate search | -| `search_provider.py` | `SearchProvider` protocol + `get_provider()` factory | -| `bundesagentur.py` | Bundesagentur für Arbeit API provider (default) | +| `search_api/search_agent.py` | Generate search queries (LLM) + orchestrate search | +| `search_api/search_provider.py` | `SearchProvider` protocol + `get_provider()` factory | +| `search_api/bundesagentur.py` | Bundesagentur für Arbeit API provider (default) | | `evaluator_agent.py` | Score jobs against profile (LLM) + career summary | | `models.py` | All Pydantic schemas (`CandidateProfile`, `JobListing`, etc.) | | `cache.py` | JSON file cache in `.immermatch_cache/` | diff --git a/.github/prompts/pr-review.prompt.md b/.github/prompts/pr-review.prompt.md index c12b4ad..0924821 100644 --- a/.github/prompts/pr-review.prompt.md +++ b/.github/prompts/pr-review.prompt.md @@ -2,7 +2,7 @@ Fetch and address review comments from the most recent PR on the current branch. ## Execution policy -- Run all `gh` commands (or equivalent GitHub MCP calls) immediately without asking for confirmation. +- Run all `gh` commands (or equivalent GitHub MCP calls) immediately without asking for confirmation. Prefer MCP calls for efficiency when possible. - Do **not** start code edits until after presenting a full comment assessment and getting explicit user confirmation. ## Workflow @@ -17,6 +17,7 @@ Fetch and address review comments from the most recent PR on the current branch. ``` 3. **List all comments first (no edits yet):** - Produce a complete checklist of every review comment. + - Make it look like a pretty table or bulleted list for easy reading. - For each item include: - **Assessment:** valid / duplicate / not applicable - **Suggestion:** exact fix you plan to apply (or why you will skip) diff --git a/.github/prompts/write-tests.prompt.md b/.github/prompts/write-tests.prompt.md index 3b98996..3645788 100644 --- a/.github/prompts/write-tests.prompt.md +++ b/.github/prompts/write-tests.prompt.md @@ -6,8 +6,8 @@ When writing tests for a module in `immermatch/`: - Gemini: `@patch("immermatch..call_gemini")` - Supabase: `@patch("immermatch.db.get_admin_client")` - Resend: `@patch("immermatch.emailer.resend")` - - SerpApi: `@patch("immermatch.serpapi_provider.serpapi_search")` - - Bundesagentur: `@patch("immermatch.bundesagentur.requests.get")` + - SerpApi: `@patch("immermatch.search_api.serpapi_provider.GoogleSearch.get_dict")` + - Bundesagentur: `@patch("immermatch.search_api.bundesagentur.httpx.Client.get")` 4. **Use shared fixtures** from `tests/conftest.py`: - `sample_profile` — `CandidateProfile` with work history - `sample_job` — `JobListing` with apply options diff --git a/AGENTS.md b/AGENTS.md index 22ce347..393b7d7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,7 +14,7 @@ This document defines the persona, context, and instruction sets for the AI agen **Input:** Raw text extracted from a CV (PDF, DOCX, Markdown, or plain text). **Output:** A structured JSON summary of the candidate. -**System Prompt:** *(source of truth: `immermatch/search_agent.py:PROFILER_SYSTEM_PROMPT`)* +**System Prompt:** *(source of truth: `immermatch/search_api/search_agent.py:PROFILER_SYSTEM_PROMPT`)* > You are an expert technical recruiter with deep knowledge of European job markets. > You will be given the raw text of a candidate's CV. Extract a comprehensive profile. > @@ -73,7 +73,7 @@ The system prompt is selected based on the active **SearchProvider**: Used when `provider.name == "Bundesagentur für Arbeit"`. Generates keyword-only queries (no location tokens) because the BA API has a dedicated `wo` parameter for location filtering. -**System Prompt:** *(source of truth: `immermatch/search_agent.py:BA_HEADHUNTER_SYSTEM_PROMPT`)* +**System Prompt:** *(source of truth: `immermatch/search_api/search_agent.py:BA_HEADHUNTER_SYSTEM_PROMPT`)* > You are a Search Specialist generating keyword queries for the German Federal Employment Agency job search API (Bundesagentur für Arbeit). > > Based on the candidate's profile, generate distinct keyword queries to find relevant job openings. The API searches across German job listings and handles location filtering separately. @@ -94,7 +94,7 @@ Used when `provider.name == "Bundesagentur für Arbeit"`. Generates keyword-only Used when `provider.name != "Bundesagentur für Arbeit"` (e.g., SerpApiProvider for non-German markets). Generates location-enriched queries optimised for Google Jobs. -**System Prompt:** *(source of truth: `immermatch/search_agent.py:HEADHUNTER_SYSTEM_PROMPT`)* +**System Prompt:** *(source of truth: `immermatch/search_api/search_agent.py:HEADHUNTER_SYSTEM_PROMPT`)* > You are a Search Specialist. Based on the candidate's profile and location, generate 20 distinct search queries to find relevant job openings. > > IMPORTANT: Keep queries SHORT and SIMPLE (1-3 words). Google Jobs works best with simple, broad queries. @@ -109,7 +109,7 @@ Used when `provider.name != "Bundesagentur für Arbeit"` (e.g., SerpApiProvider **Search Provider Architecture:** -The search pipeline uses a pluggable `SearchProvider` protocol (defined in `search_provider.py`): +The search pipeline uses a pluggable `SearchProvider` protocol (defined in `immermatch/search_api/search_provider.py`): ```python class SearchProvider(Protocol): @@ -231,7 +231,7 @@ SERPAPI_PARAMS = { ### Blocked Job Portals (SerpApi only) -Jobs from the following portals are discarded during search result parsing (see `immermatch/serpapi_provider.py:BLOCKED_PORTALS`): +Jobs from the following portals are discarded during search result parsing (see `immermatch/search_api/serpapi_provider.py:BLOCKED_PORTALS`): > bebee, trabajo, jooble, adzuna, jobrapido, neuvoo, mitula, trovit, jobomas, jobijoba, talent, jobatus, jobsora, studysmarter, jobilize, learn4good, grabjobs, jobtensor, zycto, terra.do, jobzmall, simplyhired @@ -504,8 +504,8 @@ Schema setup: run `python setup_db.py` to check tables and print migration SQL. |---|---|---| | `test_llm.py` (12 tests) | `llm.py` | `parse_json()` (8 cases: raw, fenced, embedded, nested, errors) + `call_gemini()` retry logic (4 cases: success, ServerError retry, 429 retry, non-429 immediate raise) | | `test_evaluator_agent.py` (8 tests) | `evaluator_agent.py` | `evaluate_job()` (4 cases: happy path, API error fallback, parse error fallback, non-dict fallback) + `evaluate_all_jobs()` (3 cases: sorted output, progress callback, empty list) + `generate_summary()` (2 cases: score distribution in prompt, missing skills in prompt) | -| `test_search_agent.py` (35 tests) | `search_agent.py` | `_is_remote_only()` (remote tokens, non-remote) + `_infer_gl()` (known locations, unknown default, remote returns None, case insensitive) + `_localise_query()` (city names, country names, case insensitive, multiple cities) + `_parse_job_results()` (valid, blocked portals, mixed, empty, no-apply-links) + `search_all_queries()` (provider delegation, dedup, early stopping, callbacks, default provider) + `generate_search_queries()` prompt selection (BA vs SerpApi) + `TestLlmJsonRecovery` (profile_candidate and generate_search_queries retry/recovery) | -| `test_bundesagentur.py` (22 tests) | `bundesagentur.py` | `_build_ba_link()`, `_parse_location()`, `_parse_search_results()`, `_parse_listing()`, `BundesagenturProvider.search()` (basic merge, pagination, HTTP errors, empty results, detail fetch failures), `SearchProvider` protocol conformance | +| `test_search_agent.py` (35 tests) | `search_api/search_agent.py` | `_is_remote_only()` (remote tokens, non-remote) + `_infer_gl()` (known locations, unknown default, remote returns None, case insensitive) + `_localise_query()` (city names, country names, case insensitive, multiple cities) + `_parse_job_results()` (valid, blocked portals, mixed, empty, no-apply-links) + `search_all_queries()` (provider delegation, dedup, early stopping, callbacks, default provider) + `generate_search_queries()` prompt selection (BA vs SerpApi) + `TestLlmJsonRecovery` (profile_candidate and generate_search_queries retry/recovery) | +| `test_bundesagentur.py` (22 tests) | `search_api/bundesagentur.py` | `_build_ba_link()`, `_parse_location()`, `_parse_search_results()`, `_parse_listing()`, `BundesagenturProvider.search()` (basic merge, pagination, HTTP errors, empty results, detail fetch failures), `SearchProvider` protocol conformance | | `test_cache.py` (17 tests) | `cache.py` | All cache operations: profile, queries, jobs (merge/dedup), evaluations, unevaluated job filtering | | `test_cv_parser.py` (6 tests) | `cv_parser.py` | `_clean_text()` + `extract_text()` for .txt/.md, error cases | | `test_models.py` (23 tests) | `models.py` | All Pydantic models: validation, defaults, round-trip serialization | @@ -517,7 +517,7 @@ Schema setup: run `python setup_db.py` to check tables and print migration SQL. | `test_integration.py` (11 tests) | Full pipeline | End-to-end: CV text → profile → queries → search → evaluate → summary, all services mocked | | `test_pages_unsubscribe.py` (6 tests) | `pages/unsubscribe.py` | Unsubscribe page logic: token validation, DB deactivation, error states (AppTest) | | `test_pages_verify.py` (7 tests) | `pages/verify.py` | DOI verification page: token confirmation, welcome email, expiry setting, error states (AppTest) | -| `test_search_provider.py` (2 tests) | `search_provider.py` | Provider helpers: `parse_provider_query()`, combined provider behavior | +| `test_search_provider.py` (2 tests) | `search_api/search_provider.py` | Provider helpers: `parse_provider_query()`, combined provider behavior | ### Testing conventions - All external services (Gemini API, SerpAPI, Supabase) are mocked — no API keys needed to run tests @@ -590,7 +590,7 @@ make clean # remove caches and build artifacts The recommended workflow for implementing tasks/issues: -1. **Pick the next unchecked task** from `ROADMAP.md` +1. **Pick the next unchecked task** from `docs/strategy/ROADMAP.md` 2. **Plan the implementation** in Copilot Chat — describe the task, ask for a plan, review it 3. **Implement via Copilot Chat** (agent mode) — let the agent write code, create files, and run tests. It will implement → test → fix in a loop. 4. **Review the diff locally** — check changed files, run the Streamlit app once if needed @@ -607,7 +607,7 @@ The recommended workflow for implementing tasks/issues: ```bash gh pr merge --squash --delete-branch ``` -9. **Mark the task as done** in `ROADMAP.md` (change `- [ ]` to `- [x]`) +9. **Mark the task as done** in `docs/strategy/ROADMAP.md` (change `- [ ]` to `- [x]`) ### Tool allocation (token efficiency) diff --git a/CLAUDE.md b/CLAUDE.md index 3f2929b..e75aadd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,10 +35,10 @@ source .venv/bin/activate && pytest tests/ -x -q && ruff check --fix . && ruff f | `app.py` | Streamlit UI: CV upload → profile → search → evaluate → display | | `cv_parser.py` | Extract text from PDF/DOCX/MD/TXT | | `llm.py` | Gemini API wrapper with retry/backoff | -| `search_agent.py` | Generate search queries (LLM) + orchestrate search | -| `search_provider.py` | `SearchProvider` protocol + `get_provider()` factory | -| `bundesagentur.py` | Bundesagentur für Arbeit job search API provider | -| `serpapi_provider.py` | Google Jobs via SerpApi provider (future non-DE markets) | +| `search_api/search_agent.py` | Generate search queries (LLM) + orchestrate search | +| `search_api/search_provider.py` | `SearchProvider` protocol + `get_provider()` factory | +| `search_api/bundesagentur.py` | Bundesagentur für Arbeit job search API provider | +| `search_api/serpapi_provider.py` | Google Jobs via SerpApi provider (future non-DE markets) | | `evaluator_agent.py` | Score jobs against candidate profile (LLM) + career summary | | `models.py` | All Pydantic schemas (`CandidateProfile`, `JobListing`, etc.) | | `cache.py` | JSON file cache in `.immermatch_cache/` | diff --git a/README.md b/README.md index 07cb19a..0d35267 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ Jobs are fetched from Google Jobs via SerpApi, deduplicated, and scored in paral ## Bundesagentur Provider Tuning -The Bundesagentur provider in `immermatch/bundesagentur.py` supports a configurable detail-fetch strategy: +The Bundesagentur provider in `immermatch/search_api/bundesagentur.py` supports a configurable detail-fetch strategy: - `api_then_html` (default): first tries `/pc/v4/jobdetails/{refnr}`, then falls back to scraping the public job-detail page if needed - `api_only`: uses only the API detail endpoint @@ -152,7 +152,11 @@ immermatch/ app.py # Streamlit web UI llm.py # Gemini client and retry logic cv_parser.py # CV text extraction (PDF/DOCX/MD/TXT) - search_agent.py # Profile extraction and job search + search_api/ + search_agent.py # Profile extraction and job search orchestration + search_provider.py # Provider abstraction + routing/factory + bundesagentur.py # Bundesagentur für Arbeit provider + serpapi_provider.py # SerpApi provider evaluator_agent.py # Job scoring and career summary models.py # Pydantic data models cache.py # JSON-based result caching @@ -165,6 +169,12 @@ immermatch/ privacy.py # Privacy policy daily_task.py # Daily digest cron job (GitHub Actions) setup_db.py # Database schema checker / migration helper +docs/ + strategy/ + ROADMAP.md # Product roadmap and priorities + search-api/ + AGENT.md # Search API decision log + specialist guidance + Improving Job Search API Results.md # Search quality research tests/ # tests (all mocked) ``` diff --git a/daily_task.py b/daily_task.py index 9f6e053..f4ed1af 100644 --- a/daily_task.py +++ b/daily_task.py @@ -45,7 +45,7 @@ from immermatch.evaluator_agent import evaluate_all_jobs from immermatch.llm import create_client from immermatch.models import CandidateProfile, EvaluatedJob, JobListing -from immermatch.search_agent import search_all_queries +from immermatch.search_api.search_agent import search_all_queries logging.basicConfig( level=logging.INFO, diff --git a/docs/search-api/AGENT.md b/docs/search-api/AGENT.md new file mode 100644 index 0000000..1cbeb04 --- /dev/null +++ b/docs/search-api/AGENT.md @@ -0,0 +1,34 @@ +# Search API Specialist Agent + +## Mission +Maintain and improve search quality, freshness, and provider reliability for Immermatch job discovery. + +## Canonical Code Scope +- `immermatch/search_api/search_provider.py` +- `immermatch/search_api/search_agent.py` +- `immermatch/search_api/serpapi_provider.py` +- `immermatch/search_api/bundesagentur.py` + +## Current Architecture Decisions +- Default provider is Bundesagentur für Arbeit (verified German listings). +- SerpApi provider is optional and enabled only when `SERPAPI_KEY` is set. +- Combined provider mode merges BA + SerpApi when SerpApi is configured. +- Search orchestration deduplicates by `title|company_name|location`. +- Provider quotas in combined mode enforce source diversity (`_MIN_JOBS_PER_PROVIDER`). + +## Known Tradeoffs +- BA gives higher listing trust; SerpApi increases breadth at higher noise risk. +- Portal blocklist removes common low-quality aggregators but may drop occasional valid listings. +- Temporal freshness currently relies on provider recency filters; no URL HEAD-validation pipeline yet. + +## Research Inputs +- `docs/search-api/Improving Job Search API Results.md` + +## Decision Log Template +Use this format for each change: +- Date: +- Decision: +- Context: +- Alternatives considered: +- Impact: +- Follow-up tasks: diff --git a/Improving Job Search API Results.md b/docs/search-api/Improving Job Search API Results.md similarity index 100% rename from Improving Job Search API Results.md rename to docs/search-api/Improving Job Search API Results.md diff --git a/docs/strategy/AGENT.md b/docs/strategy/AGENT.md new file mode 100644 index 0000000..bda161f --- /dev/null +++ b/docs/strategy/AGENT.md @@ -0,0 +1,28 @@ +# Strategy Specialist Agent + +## Mission +Translate product goals into an executable roadmap balancing launch speed, user value, and monetization. + +## Canonical Strategy Docs +- `docs/strategy/ROADMAP.md` +- Additional market/positioning analyses in `docs/strategy/` + +## Planning Principles +- Prefer small, validated increments over broad speculative work. +- Prioritize reliability, GDPR compliance, and job quality before growth features. +- Gate paid-tier complexity (Stripe, webhooks, infra migration) behind demand signals. + +## Current Priority Lens +1. Search relevance and listing quality +2. UX conversion improvements (profile edits/preferences) +3. Digest reliability and anti-abuse hardening +4. Monetization readiness + +## Decision Log Template +Use this format for each strategic update: +- Date: +- Hypothesis: +- Evidence: +- Decision: +- KPI impact expected: +- Revisit date: diff --git a/ROADMAP.md b/docs/strategy/ROADMAP.md similarity index 100% rename from ROADMAP.md rename to docs/strategy/ROADMAP.md diff --git a/immermatch/app.py b/immermatch/app.py index b53225b..637994c 100644 --- a/immermatch/app.py +++ b/immermatch/app.py @@ -46,12 +46,12 @@ from immermatch.evaluator_agent import evaluate_job, generate_summary # noqa: E402 from immermatch.llm import create_client # noqa: E402 from immermatch.models import CandidateProfile, EvaluatedJob, JobListing # noqa: E402 -from immermatch.search_agent import ( # noqa: E402 +from immermatch.search_api.search_agent import ( # noqa: E402 generate_search_queries, profile_candidate, search_all_queries, ) -from immermatch.search_provider import ( # noqa: E402 +from immermatch.search_api.search_provider import ( # noqa: E402 get_provider, get_provider_fingerprint, parse_provider_query, # noqa: E402 diff --git a/immermatch/search_api/__init__.py b/immermatch/search_api/__init__.py new file mode 100644 index 0000000..beefa58 --- /dev/null +++ b/immermatch/search_api/__init__.py @@ -0,0 +1,30 @@ +"""Search API domain package. + +Canonical location for search provider implementations and orchestration. +""" + +from .bundesagentur import BundesagenturProvider +from .search_agent import ( + BA_HEADHUNTER_SYSTEM_PROMPT, + HEADHUNTER_SYSTEM_PROMPT, + PROFILER_SYSTEM_PROMPT, + generate_search_queries, + profile_candidate, + search_all_queries, +) +from .search_provider import CombinedSearchProvider, SearchProvider, get_provider +from .serpapi_provider import SerpApiProvider + +__all__ = [ + "BA_HEADHUNTER_SYSTEM_PROMPT", + "BundesagenturProvider", + "CombinedSearchProvider", + "HEADHUNTER_SYSTEM_PROMPT", + "PROFILER_SYSTEM_PROMPT", + "SearchProvider", + "SerpApiProvider", + "generate_search_queries", + "get_provider", + "profile_candidate", + "search_all_queries", +] diff --git a/immermatch/bundesagentur.py b/immermatch/search_api/bundesagentur.py similarity index 99% rename from immermatch/bundesagentur.py rename to immermatch/search_api/bundesagentur.py index 84188b6..4a6cb25 100644 --- a/immermatch/bundesagentur.py +++ b/immermatch/search_api/bundesagentur.py @@ -24,7 +24,7 @@ import httpx -from .models import ApplyOption, JobListing +from ..models import ApplyOption, JobListing logger = logging.getLogger(__name__) @@ -227,7 +227,7 @@ def _parse_search_results(data: dict) -> list[dict]: class BundesagenturProvider: """Job-search provider backed by the Bundesagentur für Arbeit API. - Satisfies the :class:`~immermatch.search_provider.SearchProvider` protocol. + Satisfies the :class:`~immermatch.search_api.search_provider.SearchProvider` protocol. """ name: str = "Bundesagentur für Arbeit" diff --git a/immermatch/search_agent.py b/immermatch/search_api/search_agent.py similarity index 98% rename from immermatch/search_agent.py rename to immermatch/search_api/search_agent.py index 8471d68..3b2bf49 100644 --- a/immermatch/search_agent.py +++ b/immermatch/search_api/search_agent.py @@ -1,8 +1,8 @@ """Search Agent module - Generates optimized job search queries using LLM. The SerpApi-specific helpers (``_infer_gl``, ``_localise_query``, etc.) live -in :mod:`immermatch.serpapi_provider` and are re-exported here for backward -compatibility. +in :mod:`immermatch.search_api.serpapi_provider` and are re-exported here for +backward compatibility. """ from __future__ import annotations @@ -15,8 +15,8 @@ from google import genai from pydantic import ValidationError -from .llm import call_gemini, parse_json -from .models import CandidateProfile, JobListing +from ..llm import call_gemini, parse_json +from ..models import CandidateProfile, JobListing from .search_provider import ( CombinedSearchProvider, SearchProvider, @@ -205,7 +205,7 @@ def generate_search_queries( ) -> list[str]: """Generate optimized job search queries based on candidate profile. - When a :class:`~immermatch.bundesagentur.BundesagenturProvider` is active + When a :class:`~immermatch.search_api.bundesagentur.BundesagenturProvider` is active the prompt asks the LLM for short keyword-only queries (no location tokens). For SerpApi / Google Jobs the prompt includes location-enrichment strategies. diff --git a/immermatch/search_provider.py b/immermatch/search_api/search_provider.py similarity index 99% rename from immermatch/search_provider.py rename to immermatch/search_api/search_provider.py index 0c81c8e..ae0c139 100644 --- a/immermatch/search_provider.py +++ b/immermatch/search_api/search_provider.py @@ -12,7 +12,7 @@ import os from typing import Protocol, runtime_checkable -from .models import JobListing +from ..models import JobListing logger = logging.getLogger(__name__) diff --git a/immermatch/serpapi_provider.py b/immermatch/search_api/serpapi_provider.py similarity index 97% rename from immermatch/serpapi_provider.py rename to immermatch/search_api/serpapi_provider.py index 9cda1fa..f353975 100644 --- a/immermatch/serpapi_provider.py +++ b/immermatch/search_api/serpapi_provider.py @@ -1,7 +1,7 @@ """SerpApi-backed job search provider (Google Jobs). This module wraps the existing SerpApi integration behind the -:class:`~immermatch.search_provider.SearchProvider` protocol so it can +:class:`~immermatch.search_api.search_provider.SearchProvider` protocol so it can be swapped in alongside other providers (e.g. Bundesagentur für Arbeit). """ @@ -12,7 +12,7 @@ from serpapi import GoogleSearch -from .models import ApplyOption, JobListing +from ..models import ApplyOption, JobListing # --------------------------------------------------------------------------- # Blocked portal list (questionable job aggregators / paywalls) @@ -343,7 +343,7 @@ def search_jobs( class SerpApiProvider: """Google Jobs search via SerpApi. - Satisfies the :class:`~immermatch.search_provider.SearchProvider` protocol. + Satisfies the :class:`~immermatch.search_api.search_provider.SearchProvider` protocol. """ name: str = "SerpApi (Google Jobs)" diff --git a/tests/test_bundesagentur.py b/tests/test_bundesagentur.py index e220bac..c9a578d 100644 --- a/tests/test_bundesagentur.py +++ b/tests/test_bundesagentur.py @@ -7,7 +7,7 @@ import httpx -from immermatch.bundesagentur import ( +from immermatch.search_api.bundesagentur import ( BundesagenturProvider, _build_ba_link, _clean_html, @@ -269,7 +269,7 @@ def test_retries_on_server_error(self) -> None: client = MagicMock(spec=httpx.Client) client.get.side_effect = [error_resp, ok_resp] - with patch("immermatch.bundesagentur.time.sleep"): + with patch("immermatch.search_api.bundesagentur.time.sleep"): result = _fetch_detail(client, "REF-123") assert result == detail @@ -285,7 +285,7 @@ def test_retries_on_403_then_succeeds(self) -> None: client = MagicMock(spec=httpx.Client) client.get.side_effect = [blocked_resp, ok_resp] - with patch("immermatch.bundesagentur.time.sleep"): + with patch("immermatch.search_api.bundesagentur.time.sleep"): result = _fetch_detail(client, "REF-123") assert result == detail @@ -298,7 +298,7 @@ def test_retries_on_network_error(self) -> None: client = MagicMock(spec=httpx.Client) client.get.side_effect = [httpx.ConnectError("timeout"), ok_resp] - with patch("immermatch.bundesagentur.time.sleep"): + with patch("immermatch.search_api.bundesagentur.time.sleep"): result = _fetch_detail(client, "REF-123") assert result == detail @@ -306,7 +306,7 @@ def test_all_retries_fail(self) -> None: client = MagicMock(spec=httpx.Client) client.get.side_effect = httpx.ConnectError("down") - with patch("immermatch.bundesagentur.time.sleep"): + with patch("immermatch.search_api.bundesagentur.time.sleep"): result = _fetch_detail(client, "REF-123") assert result == {} @@ -398,7 +398,7 @@ def test_single_page(self) -> None: provider = BundesagenturProvider() with ( patch.object(provider, "_get_with_retry", return_value=mock_resp), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): items = provider._search_items("Dev", "Berlin", max_results=50) @@ -426,7 +426,7 @@ def mock_get(client, url, params): provider = BundesagenturProvider() with ( patch.object(provider, "_get_with_retry", side_effect=mock_get), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): items = provider._search_items("Dev", "Berlin", max_results=100) @@ -442,7 +442,7 @@ def test_search_items_server_error_returns_empty(self) -> None: provider = BundesagenturProvider() with ( patch.object(provider, "_get_with_retry", return_value=None), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): items = provider._search_items("Dev", "Berlin", max_results=50) assert items == [] @@ -457,7 +457,7 @@ def test_get_with_retry_retries_on_403(self) -> None: client = MagicMock(spec=httpx.Client) client.get.side_effect = [blocked_resp, ok_resp] - with patch("immermatch.bundesagentur.time.sleep"): + with patch("immermatch.search_api.bundesagentur.time.sleep"): result = BundesagenturProvider._get_with_retry(client, "https://example.com", {}) assert result is ok_resp @@ -487,7 +487,7 @@ def test_retries_on_403_then_succeeds(self) -> None: client = MagicMock(spec=httpx.Client) client.get.side_effect = [blocked_resp, ok_resp] - with patch("immermatch.bundesagentur.time.sleep"): + with patch("immermatch.search_api.bundesagentur.time.sleep"): result = _fetch_detail_api(client, "REF-123") assert result == detail @@ -508,9 +508,12 @@ def test_enriches_items_with_details(self) -> None: provider = BundesagenturProvider() with ( - patch("immermatch.bundesagentur._fetch_detail_api", return_value={}), - patch("immermatch.bundesagentur._fetch_detail", side_effect=lambda _c, refnr: details.get(refnr, {})), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur._fetch_detail_api", return_value={}), + patch( + "immermatch.search_api.bundesagentur._fetch_detail", + side_effect=lambda _c, refnr: details.get(refnr, {}), + ), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): listings = provider._enrich(items) @@ -523,9 +526,9 @@ def test_enrich_falls_back_on_failed_detail(self) -> None: provider = BundesagenturProvider() with ( - patch("immermatch.bundesagentur._fetch_detail_api", return_value={}), - patch("immermatch.bundesagentur._fetch_detail", return_value={}), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur._fetch_detail_api", return_value={}), + patch("immermatch.search_api.bundesagentur._fetch_detail", return_value={}), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): listings = provider._enrich(items) @@ -539,9 +542,9 @@ def test_enrich_with_external_apply_url(self) -> None: provider = BundesagenturProvider() with ( - patch("immermatch.bundesagentur._fetch_detail_api", return_value={}), - patch("immermatch.bundesagentur._fetch_detail", return_value=detail), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur._fetch_detail_api", return_value={}), + patch("immermatch.search_api.bundesagentur._fetch_detail", return_value=detail), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): listings = provider._enrich(items) @@ -555,9 +558,9 @@ def test_api_then_html_strategy_falls_back_to_html(self) -> None: provider = BundesagenturProvider(detail_strategy="api_then_html") with ( - patch("immermatch.bundesagentur._fetch_detail_api", return_value={}), - patch("immermatch.bundesagentur._fetch_detail", return_value=html_detail), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur._fetch_detail_api", return_value={}), + patch("immermatch.search_api.bundesagentur._fetch_detail", return_value=html_detail), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): listings = provider._enrich(items) @@ -570,9 +573,9 @@ def test_api_only_strategy_uses_api_detail(self) -> None: provider = BundesagenturProvider(detail_strategy="api_only") with ( - patch("immermatch.bundesagentur._fetch_detail_api", return_value=api_detail), - patch("immermatch.bundesagentur._fetch_detail", return_value={}), - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur._fetch_detail_api", return_value=api_detail), + patch("immermatch.search_api.bundesagentur._fetch_detail", return_value={}), + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): listings = provider._enrich(items) @@ -585,9 +588,9 @@ def test_html_only_strategy_uses_html_detail(self) -> None: provider = BundesagenturProvider(detail_strategy="html_only") with ( - patch("immermatch.bundesagentur._fetch_detail", return_value=html_detail), - patch("immermatch.bundesagentur._fetch_detail_api") as mock_api, - patch("immermatch.bundesagentur.httpx.Client"), + patch("immermatch.search_api.bundesagentur._fetch_detail", return_value=html_detail), + patch("immermatch.search_api.bundesagentur._fetch_detail_api") as mock_api, + patch("immermatch.search_api.bundesagentur.httpx.Client"), ): listings = provider._enrich(items) @@ -600,7 +603,7 @@ class TestSearchProviderProtocol: """Verify BundesagenturProvider satisfies the SearchProvider protocol.""" def test_conforms_to_protocol(self) -> None: - from immermatch.search_provider import SearchProvider + from immermatch.search_api.search_provider import SearchProvider provider = BundesagenturProvider() assert isinstance(provider, SearchProvider) diff --git a/tests/test_integration.py b/tests/test_integration.py index bf6e0a6..ba40355 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -20,7 +20,7 @@ JobEvaluation, JobListing, ) -from immermatch.search_agent import ( +from immermatch.search_api.search_agent import ( generate_search_queries, profile_candidate, search_all_queries, @@ -338,7 +338,7 @@ class TestFullPipelineTechCV: """End-to-end pipeline with the tech CV (sample.md).""" @patch("immermatch.evaluator_agent.call_gemini") - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_full_pipeline_happy_path( self, mock_search_gemini: MagicMock, @@ -410,7 +410,7 @@ class TestFullPipelineSustainabilityCV: """End-to-end pipeline with the non-tech sustainability CV.""" @patch("immermatch.evaluator_agent.call_gemini") - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_full_pipeline_non_tech_cv( self, mock_search_gemini: MagicMock, @@ -482,7 +482,7 @@ def test_full_pipeline_non_tech_cv( class TestProfileOutputStructure: """Verify the profile output structure for different CV types.""" - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_tech_profile_has_all_fields( self, mock_gemini: MagicMock, @@ -506,7 +506,7 @@ def test_tech_profile_has_all_fields( assert all(w.company for w in profile.work_history) assert len(profile.education_history) >= 1 - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_sustainability_profile_has_all_fields( self, mock_gemini: MagicMock, @@ -529,7 +529,7 @@ def test_sustainability_profile_has_all_fields( class TestQueryGeneration: """Verify query generation integrates with the profile stage.""" - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_queries_are_strings_and_correct_count( self, mock_gemini: MagicMock, @@ -676,7 +676,7 @@ class TestEmptySearchResults: """Verify the pipeline handles empty search results gracefully.""" @patch("immermatch.evaluator_agent.call_gemini") - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_empty_search_produces_empty_evaluations( self, mock_search_gemini: MagicMock, @@ -719,7 +719,7 @@ class TestDataFlowBetweenStages: """Verify that data produced by earlier stages reaches later stages.""" @patch("immermatch.evaluator_agent.call_gemini") - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_cv_data_flows_through_all_stages( self, mock_search_gemini: MagicMock, diff --git a/tests/test_search_agent.py b/tests/test_search_agent.py index 085c7e4..2efd2cb 100644 --- a/tests/test_search_agent.py +++ b/tests/test_search_agent.py @@ -1,4 +1,4 @@ -"""Tests for immermatch.search_agent — pure helper functions and search_all_queries orchestration.""" +"""Tests for immermatch.search_api.search_agent — helper functions and search orchestration.""" import json from typing import ClassVar @@ -7,7 +7,7 @@ import pytest from immermatch.models import ApplyOption, CandidateProfile, JobListing -from immermatch.search_agent import ( +from immermatch.search_api.search_agent import ( _infer_gl, _is_remote_only, _localise_query, @@ -17,7 +17,7 @@ profile_candidate, search_all_queries, ) -from immermatch.search_provider import CombinedSearchProvider +from immermatch.search_api.search_provider import CombinedSearchProvider class TestIsRemoteOnly: @@ -274,7 +274,7 @@ def test_on_jobs_found_callback(self): assert len(found_batches) == 1 assert found_batches[0][0].title == "Dev" - @patch("immermatch.search_agent.get_provider") + @patch("immermatch.search_api.search_agent.get_provider") def test_defaults_to_get_provider(self, mock_gp: MagicMock): """When no provider given, get_provider(location) is called.""" mock_provider = MagicMock() @@ -320,7 +320,7 @@ def test_combined_provider_hard_quota_requires_30_each_before_stop(self): assert ba_count >= 30 assert serp_count >= 30 - @patch("immermatch.search_agent.logger") + @patch("immermatch.search_api.search_agent.logger") def test_logs_source_counts(self, mock_logger: MagicMock): provider = self._make_provider( [ @@ -411,7 +411,7 @@ def test_min_unique_zero_does_not_enable_combined_quota(self): class TestLlmJsonRecovery: - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_profile_candidate_retries_after_invalid_json(self, mock_call_gemini: MagicMock): valid_profile = { "skills": ["Python", "SQL"], @@ -442,7 +442,7 @@ def test_profile_candidate_retries_after_invalid_json(self, mock_call_gemini: Ma assert result.experience_level == "Mid" assert mock_call_gemini.call_count == 2 - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_generate_search_queries_retries_after_invalid_json(self, mock_call_gemini: MagicMock): profile = CandidateProfile( skills=["Python"], @@ -472,7 +472,7 @@ def test_generate_search_queries_retries_after_invalid_json(self, mock_call_gemi assert queries == ["python developer berlin", "backend berlin"] assert mock_call_gemini.call_count == 2 - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_profile_candidate_raises_after_all_retries_exhausted(self, mock_call_gemini: MagicMock): mock_call_gemini.side_effect = ["not json", "still not json", "also not json"] @@ -481,7 +481,7 @@ def test_profile_candidate_raises_after_all_retries_exhausted(self, mock_call_ge assert mock_call_gemini.call_count == 3 - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_generate_search_queries_returns_empty_list_after_all_retries_fail(self, mock_call_gemini: MagicMock): profile = CandidateProfile( skills=["Python"], @@ -511,7 +511,7 @@ def test_generate_search_queries_returns_empty_list_after_all_retries_fail(self, assert queries == [] assert mock_call_gemini.call_count == 2 - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_profile_candidate_retries_after_validation_error(self, mock_call_gemini: MagicMock): base_profile = { "skills": ["Python", "SQL"], @@ -545,7 +545,7 @@ def test_profile_candidate_retries_after_validation_error(self, mock_call_gemini assert result.experience_level == "Mid" assert mock_call_gemini.call_count == 2 - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_profile_candidate_retries_when_json_is_not_dict(self, mock_call_gemini: MagicMock): valid_profile = { "skills": ["Python", "SQL"], @@ -594,7 +594,7 @@ class TestGenerateSearchQueriesProviderPrompt: education_history=[], ) - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_ba_provider_uses_ba_prompt(self, mock_call_gemini: MagicMock): mock_call_gemini.return_value = '["Softwareentwickler", "Python Developer"]' ba_provider = MagicMock() @@ -612,7 +612,7 @@ def test_ba_provider_uses_ba_prompt(self, mock_call_gemini: MagicMock): assert "Bundesagentur" in prompt_sent assert "Do NOT include any city" in prompt_sent - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_other_provider_uses_default_prompt(self, mock_call_gemini: MagicMock): mock_call_gemini.return_value = '["Python Developer Berlin"]' other_provider = MagicMock() @@ -630,7 +630,7 @@ def test_other_provider_uses_default_prompt(self, mock_call_gemini: MagicMock): assert "Google Jobs" in prompt_sent assert "LOCAL names" in prompt_sent - @patch("immermatch.search_agent.call_gemini") + @patch("immermatch.search_api.search_agent.call_gemini") def test_combined_provider_generates_queries_per_child_provider(self, mock_call_gemini: MagicMock): mock_call_gemini.side_effect = [ '["Softwareentwickler", "Datenanalyst"]', diff --git a/tests/test_search_provider.py b/tests/test_search_provider.py index f17e9f2..5511a8f 100644 --- a/tests/test_search_provider.py +++ b/tests/test_search_provider.py @@ -5,7 +5,7 @@ from unittest.mock import MagicMock from immermatch.models import ApplyOption, JobListing -from immermatch.search_provider import CombinedSearchProvider, parse_provider_query +from immermatch.search_api.search_provider import CombinedSearchProvider, parse_provider_query def _make_job(title: str, company: str, location: str = "Berlin") -> JobListing: