Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ __pycache__/
# OS
.DS_Store

# query/ is an internal work folder — ignore everything except the scoring prompt
# query/ is an internal work folder — ignore runtime outputs, track prompt files
query/
!query/JOB_SCORING_PROMPT.md
!query/SEARCH_DIRECTIVE_PROMPT.md
!query/SEARCH_COMPANY_PROMPT.md

# OAuth tokens (auto-generated)
.oauth_client.json
Expand All @@ -37,3 +39,6 @@ scoring_profiles/

# IT Team automation session data
.it-sessions/

# MCP servers — locally installed third-party servers; not committed
mcp_servers/
10 changes: 1 addition & 9 deletions agent/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,6 @@ def _needs_convert_cvs(state: AgentState) -> str:
return "convert_cvs" if state["pdf_paths"] else "generate_queries"


def _needs_generate_queries(state: AgentState) -> str:
"""Skip query generation when ``raw_queries`` already came from disk."""
return "generate_queries" if not state["raw_queries"] else "search_jobs"


def _needs_notifications(state: AgentState) -> str:
"""Skip the notifications node when no channels are configured."""
Expand Down Expand Up @@ -230,11 +226,7 @@ def build_graph() -> CompiledStateGraph:
})
graph.add_edge("convert_cvs", "generate_queries")

# Conditional: skip LLM query generation when queries already exist
graph.add_conditional_edges("generate_queries", _needs_generate_queries, {
"generate_queries": "generate_queries",
"search_jobs": "search_jobs",
})
graph.add_edge("generate_queries", "search_jobs")

# Linear core pipeline
graph.add_edge("search_jobs", "search_companies")
Expand Down
3 changes: 2 additions & 1 deletion agent/nodes/generate_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def run(state: AgentState) -> AgentState:
cached = _cached_hash(_QUERIES_FILE)

if cached == current_hash and cached:
queries = state.get("raw_queries", [])
lines = _QUERIES_FILE.read_text(encoding="utf-8").splitlines()
queries = [ln for ln in lines[2:] if ln.strip()] # skip hash line + blank line
run_log.append(
f"generate_queries: cache hit (hash {current_hash[:8]}…) — "
f"using {len(queries)} queries from {_QUERIES_FILE}"
Expand Down
2 changes: 1 addition & 1 deletion agent/nodes/search_companies.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def run(state: AgentState) -> AgentState:

try:
from providers.llm.factory import build_llm
llm = build_llm(cfg["llm"])
llm = build_llm(cfg["llm"], task="search")
except Exception as e:
errors.append(f"Company search initialisation failed: {e}")
logger.error("Company search init failed: %s", e)
Expand Down
5 changes: 3 additions & 2 deletions agent/nodes/search_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"france_travail": 3, # Documented 3 req/s ceiling
"adzuna": 5, # No documented limit; conservative default
"anthropic_web": 1, # LLM-backed — parallelism yields nothing
"linkedin": 1, # Session-based auth — single in-flight reduces ban risk
}
_FALLBACK_MAX_CONCURRENT = 3

Expand Down Expand Up @@ -372,8 +373,8 @@ def _make_job_id(job: dict) -> str:

# ── Directive search (anthropic_web) ─────────────────────────────────────────

_DIRECTIVE_TARGET = 30 # jobs we want after Tavily filtering
_DIRECTIVE_LLM_MAX = 50 # URLs we ask the LLM for (buffer for Tavily drops)
_DIRECTIVE_TARGET = 50 # jobs we want after Tavily filtering
_DIRECTIVE_LLM_MAX = 80 # URLs we ask the LLM for (buffer for Tavily drops + aggregator filter)


def _get_positions(state: AgentState) -> list[str]:
Expand Down
5 changes: 3 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ search:
enabled: false # No auth required once working endpoint confirmed
max_results_per_query: 10

- name: linkedin # LinkedIn Jobs — stub; requires OAuth app approval
enabled: false # Requires: LINKEDIN_CLIENT_ID, LINKEDIN_CLIENT_SECRET
- name: linkedin # LinkedIn Jobs — unofficial API + MCP browser fallback
enabled: true # Requires: LINKEDIN_EMAIL, LINKEDIN_PASSWORD (Infisical)
max_results_per_query: 10
max_concurrent: 1 # Single in-flight — session auth, ban risk reduction

- name: apec # APEC (French exec board) — stub; requires auth
enabled: false # Requires: session cookie or undocumented API reverse-engineering
Expand Down
5 changes: 1 addition & 4 deletions config/search_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ search:
cvs:
cv1:
- "Product Manager Data AI"
- "Head of Product Data AI"
- ""
cv2:
- ""
- ""
Expand All @@ -22,11 +22,8 @@ cvs:
# url entry → skips LLM, fetches jobs from that URL directly
# User-provided hint/url always overrides anything in hints_cache.json.
companies:
- "Mistral AI"
- name: "Hugging Face"
hint: "greenhouse:huggingface"
- name: "Criteo"
url: "https://jobs.lever.co/criteo"

# ── Target locations ──────────────────────────────────────────────────────────
locations:
Expand Down
43 changes: 34 additions & 9 deletions monitoring/web_monitoring/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,25 @@ def _token_block_html(token_usage: dict) -> str:
g_total = g_in + g_out + g_cache_read + g_cache_create

cache_detail = ""
effective_str = ""
if g_cache_read or g_cache_create:
cache_detail = (
f" · cache: {g_cache_read:,} read / {g_cache_create:,} created"
)
# Effective compute = tokens that actually count against your limit:
# new input + output + 10% of cache-reads (cache-reads are ~90% cheaper).
effective = g_in + g_out + round(g_cache_read * 0.1)
effective_str = (
f' · <span style="color:#28a745;font-weight:bold">'
f"≈{fmt_tokens(effective)} effective compute</span>"
)

grand_line = (
f'<p style="font-size:14px;margin:8px 0 16px;">'
f"<strong>Grand total:</strong> {fmt_cost(g_cost)} · "
f"{fmt_tokens(g_total)} total ({g_in:,} new in / {g_out:,} out"
f"{cache_detail}) · {g_calls} calls"
f"{fmt_tokens(g_total)} raw ({g_in:,} new in / {g_out:,} out"
f"{cache_detail})"
f"{effective_str} · {g_calls} calls"
"</p>"
)

Expand Down Expand Up @@ -146,10 +155,19 @@ def _node_row_html(name: str, node_timings: dict, by_node: dict) -> str:
node_data = by_node.get(name) or {}
in_tok = safe_int(node_data.get("input_tokens"))
out_tok = safe_int(node_data.get("output_tokens"))
total_tokens = in_tok + out_tok
cache_read = safe_int(node_data.get("cache_read_input_tokens"))
cache_create = safe_int(node_data.get("cache_creation_input_tokens"))
cost = safe_float(node_data.get("cost_usd"))
tok_str = fmt_tokens(total_tokens) if total_tokens else "—"
cost_str = fmt_cost(cost) if cost else "—"
if in_tok or out_tok or cache_read or cache_create:
tok_parts = [f"{fmt_tokens(in_tok)} in", f"{fmt_tokens(out_tok)} out"]
if cache_read:
tok_parts.append(
f'<span style="color:#28a745">{fmt_tokens(cache_read)} cached</span>'
)
tok_str = " / ".join(tok_parts)
else:
tok_str = "—"
return (
f"<tr><td>{name}</td><td>{status}</td><td>{time_str}</td>"
f"<td>{tok_str}</td><td>{cost_str}</td></tr>"
Expand Down Expand Up @@ -212,9 +230,17 @@ def _node_row_html(name: str, node_timings: dict, by_node: dict) -> str:
: st === 'running' ? '⟳' : '○';
var timeStr = (typeof t === 'number') ? t.toFixed(1) + 's' : '—';
var nd = bn[name] || {};
var toks = (nd.input_tokens||0) + (nd.output_tokens||0) + (nd.cache_read_input_tokens||0) + (nd.cache_creation_input_tokens||0);
var inTok = nd.input_tokens||0;
var outTok = nd.output_tokens||0;
var cacheRead = nd.cache_read_input_tokens||0;
var hasTokens = inTok||outTok||cacheRead||(nd.cache_creation_input_tokens||0);
var tokStr;
if(hasTokens){
tokStr = fmtTokens(inTok)+' in / '+fmtTokens(outTok)+' out';
if(cacheRead) tokStr += ' / <span style="color:#28a745">'+fmtTokens(cacheRead)+' cached</span>';
} else { tokStr = '—'; }
rows += '<tr><td>' + escapeHtml(name) + '</td><td>' + glyph
+ '</td><td>' + timeStr + '</td><td>' + fmtTokens(toks)
+ '</td><td>' + timeStr + '</td><td>' + tokStr
+ '</td><td>' + fmtCost(nd.cost_usd||0) + '</td></tr>';
}
return rows;
Expand Down Expand Up @@ -358,7 +384,7 @@ def generate_run_report(state: dict, duration_s: float, node_timings: dict) -> P
<thead><tr>
<th>Run ID</th><th>Datetime</th><th>Status</th><th>Runtime</th>
<th>Jobs found</th><th>Jobs scored</th><th>Jobs approved</th>
<th>Tokens consumed</th><th>Cost $</th><th></th>
<th>Tokens consumed</th><th>Cost $</th>
</tr></thead>
<tbody>
__ROWS_HTML__
Expand Down Expand Up @@ -453,7 +479,7 @@ def update_index(run_id: str, timestamp: str, duration_s: float, stats: dict) ->

rows.append(
f"<tr>"
f"<td>{_html.escape(str(rid))}</td>"
f'<td><a href="{href}">{_html.escape(str(rid))}</a></td>'
f"<td>{_html.escape(str(run.get('timestamp', '')))}</td>"
f'<td class="{status_cls}">{status_label}</td>'
f"<td>{fmt_duration(safe_float(run.get('duration_s', 0)))}</td>"
Expand All @@ -462,7 +488,6 @@ def update_index(run_id: str, timestamp: str, duration_s: float, stats: dict) ->
f"<td>{safe_int(run.get('new_saved', 0))}</td>"
f"<td>{tok_str}</td>"
f"<td>{cost_str}</td>"
f'<td><a href="{href}">→</a></td>'
f"</tr>"
)

Expand Down
6 changes: 5 additions & 1 deletion providers/llm/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ def build_llm(cfg: dict, task: str = "default"):

# Build a new dict so we don't mutate the caller's config — tests rely
# on this invariant.
resolved_cfg = {**cfg, "model": resolved_model}
# Search tasks need --dangerously-skip-permissions so the Claude CLI can
# invoke its web-search tool; all other tasks (scoring, compression) run
# without tool access for speed and safety.
allow_tools_override = True if task == "search" else cfg.get("allow_tools", False)
resolved_cfg = {**cfg, "model": resolved_model, "allow_tools": allow_tools_override}

provider = resolved_cfg.get("provider", "anthropic").lower()

Expand Down
Loading
Loading