bayrem · bayrem · Jun 2, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -24,9 +24,11 @@ __pycache__/
 # OS
 .DS_Store
 
-# query/ is an internal work folder — ignore everything except the scoring prompt
+# query/ is an internal work folder — ignore runtime outputs, track prompt files
 query/
 !query/JOB_SCORING_PROMPT.md
+!query/SEARCH_DIRECTIVE_PROMPT.md
+!query/SEARCH_COMPANY_PROMPT.md
 
 # OAuth tokens (auto-generated)
 .oauth_client.json
@@ -37,3 +39,6 @@ scoring_profiles/
 
 # IT Team automation session data
 .it-sessions/
+
+# MCP servers — locally installed third-party servers; not committed
+mcp_servers/
diff --git a/agent/graph.py b/agent/graph.py
@@ -184,10 +184,6 @@ def _needs_convert_cvs(state: AgentState) -> str:
     return "convert_cvs" if state["pdf_paths"] else "generate_queries"
 
 
-def _needs_generate_queries(state: AgentState) -> str:
-    """Skip query generation when ``raw_queries`` already came from disk."""
-    return "generate_queries" if not state["raw_queries"] else "search_jobs"
-
 
 def _needs_notifications(state: AgentState) -> str:
     """Skip the notifications node when no channels are configured."""
@@ -230,11 +226,7 @@ def build_graph() -> CompiledStateGraph:
     })
     graph.add_edge("convert_cvs", "generate_queries")
 
-    # Conditional: skip LLM query generation when queries already exist
-    graph.add_conditional_edges("generate_queries", _needs_generate_queries, {
-        "generate_queries": "generate_queries",
-        "search_jobs": "search_jobs",
-    })
+    graph.add_edge("generate_queries", "search_jobs")
 
     # Linear core pipeline
     graph.add_edge("search_jobs", "search_companies")

diff --git a/agent/nodes/generate_queries.py b/agent/nodes/generate_queries.py
@@ -74,7 +74,8 @@ def run(state: AgentState) -> AgentState:
     cached = _cached_hash(_QUERIES_FILE)
 
     if cached == current_hash and cached:
-        queries = state.get("raw_queries", [])
+        lines = _QUERIES_FILE.read_text(encoding="utf-8").splitlines()
+        queries = [ln for ln in lines[2:] if ln.strip()]  # skip hash line + blank line
         run_log.append(
             f"generate_queries: cache hit (hash {current_hash[:8]}…) — "
             f"using {len(queries)} queries from {_QUERIES_FILE}"

diff --git a/agent/nodes/search_companies.py b/agent/nodes/search_companies.py
@@ -189,7 +189,7 @@ def run(state: AgentState) -> AgentState:
 
     try:
         from providers.llm.factory import build_llm
-        llm = build_llm(cfg["llm"])
+        llm = build_llm(cfg["llm"], task="search")
     except Exception as e:
         errors.append(f"Company search initialisation failed: {e}")
         logger.error("Company search init failed: %s", e)

diff --git a/agent/nodes/search_jobs.py b/agent/nodes/search_jobs.py
@@ -41,6 +41,7 @@
     "france_travail": 3,    # Documented 3 req/s ceiling
     "adzuna": 5,            # No documented limit; conservative default
     "anthropic_web": 1,     # LLM-backed — parallelism yields nothing
+    "linkedin": 1,          # Session-based auth — single in-flight reduces ban risk
 }
 _FALLBACK_MAX_CONCURRENT = 3
 
@@ -372,8 +373,8 @@ def _make_job_id(job: dict) -> str:
 
 # ── Directive search (anthropic_web) ─────────────────────────────────────────
 
-_DIRECTIVE_TARGET = 30   # jobs we want after Tavily filtering
-_DIRECTIVE_LLM_MAX = 50  # URLs we ask the LLM for (buffer for Tavily drops)
+_DIRECTIVE_TARGET = 50   # jobs we want after Tavily filtering
+_DIRECTIVE_LLM_MAX = 80  # URLs we ask the LLM for (buffer for Tavily drops + aggregator filter)
 
 
 def _get_positions(state: AgentState) -> list[str]:

diff --git a/config/config.yaml b/config/config.yaml
@@ -35,9 +35,10 @@ search:
       enabled: false               # No auth required once working endpoint confirmed
       max_results_per_query: 10
 
-    - name: linkedin               # LinkedIn Jobs — stub; requires OAuth app approval
-      enabled: false               # Requires: LINKEDIN_CLIENT_ID, LINKEDIN_CLIENT_SECRET
+    - name: linkedin               # LinkedIn Jobs — unofficial API + MCP browser fallback
+      enabled: true                # Requires: LINKEDIN_EMAIL, LINKEDIN_PASSWORD (Infisical)
       max_results_per_query: 10
+      max_concurrent: 1            # Single in-flight — session auth, ban risk reduction
 
     - name: apec                   # APEC (French exec board) — stub; requires auth
       enabled: false               # Requires: session cookie or undocumented API reverse-engineering

diff --git a/config/search_config.yaml b/config/search_config.yaml
@@ -10,7 +10,7 @@ search:
 cvs:
   cv1:
     - "Product Manager Data AI"
-    - "Head of Product Data AI"
+    - ""
   cv2:
     - ""
     - ""
@@ -22,11 +22,8 @@ cvs:
 #   url entry     → skips LLM, fetches jobs from that URL directly
 # User-provided hint/url always overrides anything in hints_cache.json.
 companies:
-  - "Mistral AI"
   - name: "Hugging Face"
     hint: "greenhouse:huggingface"
-  - name: "Criteo"
-    url: "https://jobs.lever.co/criteo"
 
 # ── Target locations ──────────────────────────────────────────────────────────
 locations:

diff --git a/monitoring/web_monitoring/report.py b/monitoring/web_monitoring/report.py
@@ -37,16 +37,25 @@ def _token_block_html(token_usage: dict) -> str:
     g_total = g_in + g_out + g_cache_read + g_cache_create
 
     cache_detail = ""
+    effective_str = ""
     if g_cache_read or g_cache_create:
         cache_detail = (
             f" · cache: {g_cache_read:,} read / {g_cache_create:,} created"
         )
+        # Effective compute = tokens that actually count against your limit:
+        # new input + output + 10% of cache-reads (cache-reads are ~90% cheaper).
+        effective = g_in + g_out + round(g_cache_read * 0.1)
+        effective_str = (
+            f' · <span style="color:#28a745;font-weight:bold">'
+            f"≈{fmt_tokens(effective)} effective compute</span>"
+        )
 
     grand_line = (
         f'<p style="font-size:14px;margin:8px 0 16px;">'
         f"<strong>Grand total:</strong> {fmt_cost(g_cost)} · "
-        f"{fmt_tokens(g_total)} total ({g_in:,} new in / {g_out:,} out"
-        f"{cache_detail}) · {g_calls} calls"
+        f"{fmt_tokens(g_total)} raw ({g_in:,} new in / {g_out:,} out"
+        f"{cache_detail})"
+        f"{effective_str} · {g_calls} calls"
         "</p>"
     )
 
@@ -146,10 +155,19 @@ def _node_row_html(name: str, node_timings: dict, by_node: dict) -> str:
     node_data = by_node.get(name) or {}
     in_tok = safe_int(node_data.get("input_tokens"))
     out_tok = safe_int(node_data.get("output_tokens"))
-    total_tokens = in_tok + out_tok
+    cache_read = safe_int(node_data.get("cache_read_input_tokens"))
+    cache_create = safe_int(node_data.get("cache_creation_input_tokens"))
     cost = safe_float(node_data.get("cost_usd"))
-    tok_str = fmt_tokens(total_tokens) if total_tokens else "—"
     cost_str = fmt_cost(cost) if cost else "—"
+    if in_tok or out_tok or cache_read or cache_create:
+        tok_parts = [f"{fmt_tokens(in_tok)} in", f"{fmt_tokens(out_tok)} out"]
+        if cache_read:
+            tok_parts.append(
+                f'<span style="color:#28a745">{fmt_tokens(cache_read)} cached</span>'
+            )
+        tok_str = " / ".join(tok_parts)
+    else:
+        tok_str = "—"
     return (
         f"<tr><td>{name}</td><td>{status}</td><td>{time_str}</td>"
         f"<td>{tok_str}</td><td>{cost_str}</td></tr>"
@@ -212,9 +230,17 @@ def _node_row_html(name: str, node_timings: dict, by_node: dict) -> str:
                 : st === 'running' ? '⟳' : '○';
       var timeStr = (typeof t === 'number') ? t.toFixed(1) + 's' : '—';
       var nd = bn[name] || {};
-      var toks = (nd.input_tokens||0) + (nd.output_tokens||0) + (nd.cache_read_input_tokens||0) + (nd.cache_creation_input_tokens||0);
+      var inTok = nd.input_tokens||0;
+      var outTok = nd.output_tokens||0;
+      var cacheRead = nd.cache_read_input_tokens||0;
+      var hasTokens = inTok||outTok||cacheRead||(nd.cache_creation_input_tokens||0);
+      var tokStr;
+      if(hasTokens){
+        tokStr = fmtTokens(inTok)+' in / '+fmtTokens(outTok)+' out';
+        if(cacheRead) tokStr += ' / <span style="color:#28a745">'+fmtTokens(cacheRead)+' cached</span>';
+      } else { tokStr = '—'; }
       rows += '<tr><td>' + escapeHtml(name) + '</td><td>' + glyph
-           +  '</td><td>' + timeStr + '</td><td>' + fmtTokens(toks)
+           +  '</td><td>' + timeStr + '</td><td>' + tokStr
            +  '</td><td>' + fmtCost(nd.cost_usd||0) + '</td></tr>';
     }
     return rows;
@@ -358,7 +384,7 @@ def generate_run_report(state: dict, duration_s: float, node_timings: dict) -> P
 <thead><tr>
   <th>Run ID</th><th>Datetime</th><th>Status</th><th>Runtime</th>
   <th>Jobs found</th><th>Jobs scored</th><th>Jobs approved</th>
-  <th>Tokens consumed</th><th>Cost $</th><th></th>
+  <th>Tokens consumed</th><th>Cost $</th>
 </tr></thead>
 <tbody>
 __ROWS_HTML__
@@ -453,7 +479,7 @@ def update_index(run_id: str, timestamp: str, duration_s: float, stats: dict) ->
 
         rows.append(
             f"<tr>"
-            f"<td>{_html.escape(str(rid))}</td>"
+            f'<td><a href="{href}">{_html.escape(str(rid))}</a></td>'
             f"<td>{_html.escape(str(run.get('timestamp', '')))}</td>"
             f'<td class="{status_cls}">{status_label}</td>'
             f"<td>{fmt_duration(safe_float(run.get('duration_s', 0)))}</td>"
@@ -462,7 +488,6 @@ def update_index(run_id: str, timestamp: str, duration_s: float, stats: dict) ->
             f"<td>{safe_int(run.get('new_saved', 0))}</td>"
             f"<td>{tok_str}</td>"
             f"<td>{cost_str}</td>"
-            f'<td><a href="{href}">→</a></td>'
             f"</tr>"
         )
 

diff --git a/providers/llm/factory.py b/providers/llm/factory.py
@@ -43,7 +43,11 @@ def build_llm(cfg: dict, task: str = "default"):
 
     # Build a new dict so we don't mutate the caller's config — tests rely
     # on this invariant.
-    resolved_cfg = {**cfg, "model": resolved_model}
+    # Search tasks need --dangerously-skip-permissions so the Claude CLI can
+    # invoke its web-search tool; all other tasks (scoring, compression) run
+    # without tool access for speed and safety.
+    allow_tools_override = True if task == "search" else cfg.get("allow_tools", False)
+    resolved_cfg = {**cfg, "model": resolved_model, "allow_tools": allow_tools_override}
 
     provider = resolved_cfg.get("provider", "anthropic").lower()