From a49cc4fb4ad52092309c1bf084a463c60937b0d9 Mon Sep 17 00:00:00 2001 From: James Broadhead Date: Tue, 26 May 2026 23:18:25 +0000 Subject: [PATCH] skills(experimental): propagate ace-review correctness/security fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the four fixes shipping in ai-dev-kit#534 to the same files under experimental/. Each of these files was carried over verbatim from a-d-k and still had the bugs the original PR fixes: - fm-parallel-calls.py: guard div-by-zero on speedup calc when total_time is below clock resolution; convert trailing module-level docstring (was unreachable as docs) into a real comment block. - llm_config.py: stop echoing the raw token-endpoint response and HTTP body in error messages — they can contain credentials. Surface the shape (payload keys / status code) instead. - 5-serving-and-vector-search.py: replace the syntactically-broken example `query_vector=[0.1, 0.2, 0.3, ...]` (literal Ellipsis tuple) with a real list[float] stand-in plus a comment about matching the index's embedding dimension. - fm-minimal-chat.py: fix `streamlit run 2-minimal-chat-app.py` / `command: ["streamlit", "run", "2-minimal-chat-app.py"]` in the docstring — the actual filename is `fm-minimal-chat.py`. (Skill-name reference left as `databricks-model-serving` to match DAS naming.) The compute.py fix from #534 doesn't apply here — that file was removed in PR #90. Co-authored-by: Isaac --- .../examples/fm-minimal-chat.py | 4 +- .../examples/fm-parallel-calls.py | 68 ++++++++++--------- .../examples/llm_config.py | 7 +- .../examples/5-serving-and-vector-search.py | 7 +- 4 files changed, 47 insertions(+), 39 deletions(-) diff --git a/experimental/databricks-apps-python/examples/fm-minimal-chat.py b/experimental/databricks-apps-python/examples/fm-minimal-chat.py index db9a35d..920a405 100644 --- a/experimental/databricks-apps-python/examples/fm-minimal-chat.py +++ b/experimental/databricks-apps-python/examples/fm-minimal-chat.py @@ -16,11 +16,11 @@ export DATABRICKS_TOKEN="dapi..." export DATABRICKS_SERVING_BASE_URL="https:///serving-endpoints" export DATABRICKS_MODEL="" # See databricks-model-serving - streamlit run 2-minimal-chat-app.py + streamlit run fm-minimal-chat.py Databricks Apps Deployment: 1. Create app.yaml: - command: ["streamlit", "run", "2-minimal-chat-app.py"] + command: ["streamlit", "run", "fm-minimal-chat.py"] env: - name: DATABRICKS_SERVING_BASE_URL value: "https:///serving-endpoints" diff --git a/experimental/databricks-apps-python/examples/fm-parallel-calls.py b/experimental/databricks-apps-python/examples/fm-parallel-calls.py index 53cc6a2..71cd81b 100644 --- a/experimental/databricks-apps-python/examples/fm-parallel-calls.py +++ b/experimental/databricks-apps-python/examples/fm-parallel-calls.py @@ -224,42 +224,44 @@ def check_audience_fit(client: OpenAI, text: str) -> Dict[str, Any]: time_saved = (total_latency / 1000) - total_time print(f"\n{'='*60}") print(f"Time saved vs serial execution: {time_saved:.2f}s") - print(f"Speedup: {(total_latency/1000) / total_time:.1f}×") + if total_time > 0: + print(f"Speedup: {(total_latency/1000) / total_time:.1f}×") + else: + print("Speedup: N/A (total_time below resolution)") print(f"{'='*60}") # ============================================================================= # Production Best Practices # ============================================================================= -""" -Best practices from databricksters-check-and-pub: - -1. Configurable concurrency - - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app) - - Balance throughput vs rate limits - - Too high = rate limit errors - - Too low = underutilized resources - -2. Error handling - - Capture exceptions per job - - Return None for failed jobs - - Collect error messages for debugging - - Continue execution even if some jobs fail - -3. Bounded execution - - Only parallelize independent checks - - Cap concurrency with an env var rather than firing unlimited requests - - Keep the job contract simple: name -> (callable, args, kwargs) - -4. When to use parallel calls - - Multiple independent evaluations of same content - - Batch processing multiple documents - - A/B testing different prompts - - Multi-aspect analysis - -5. When NOT to use parallel calls - - Dependent/sequential operations - - Single evaluation needed - - Rate limits are very strict - - Debugging (use serial for easier troubleshooting) -""" +# +# Best practices from databricksters-check-and-pub: +# +# 1. Configurable concurrency +# - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app) +# - Balance throughput vs rate limits +# - Too high = rate limit errors +# - Too low = underutilized resources +# +# 2. Error handling +# - Capture exceptions per job +# - Return None for failed jobs +# - Collect error messages for debugging +# - Continue execution even if some jobs fail +# +# 3. Bounded execution +# - Only parallelize independent checks +# - Cap concurrency with an env var rather than firing unlimited requests +# - Keep the job contract simple: name -> (callable, args, kwargs) +# +# 4. When to use parallel calls +# - Multiple independent evaluations of same content +# - Batch processing multiple documents +# - A/B testing different prompts +# - Multi-aspect analysis +# +# 5. When NOT to use parallel calls +# - Dependent/sequential operations +# - Single evaluation needed +# - Rate limits are very strict +# - Debugging (use serial for easier troubleshooting) diff --git a/experimental/databricks-apps-python/examples/llm_config.py b/experimental/databricks-apps-python/examples/llm_config.py index 6c4d550..200aaef 100644 --- a/experimental/databricks-apps-python/examples/llm_config.py +++ b/experimental/databricks-apps-python/examples/llm_config.py @@ -218,8 +218,10 @@ def get_databricks_bearer_token( access_token = payload.get("access_token") expires_in = int(payload.get("expires_in", 300)) if not access_token: + payload_keys = sorted(payload.keys()) if isinstance(payload, dict) else [] raise DatabricksLLMConfigError( - f"Token endpoint response is missing access_token: {payload}" + "Token endpoint response is missing access_token " + f"(keys present: {payload_keys})" ) expires_at = int(time.time()) + expires_in @@ -278,8 +280,7 @@ def validate_databricks_llm_config( if response.status_code >= 400: raise DatabricksLLMConfigError( f"Failed to validate DATABRICKS_MODEL={config.model!r} in workspace " - f"{config.workspace_host} (HTTP {response.status_code}). " - f"Response: {response.text[:300]}" + f"{config.workspace_host} (HTTP {response.status_code})." ) _validation_cache[cache_key] = int(time.time()) + VALIDATION_TTL_SECONDS diff --git a/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py b/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py index 2a47c2b..597aede 100644 --- a/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py +++ b/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py @@ -168,10 +168,15 @@ # Query with embedding vector directly +# query_vector must be a list[float] whose length matches your index's +# embedding dimension (e.g. 768 for bge-small, 1024 for bge-large, 1536 for +# text-embedding-3-small / ada-002). The [0.0] * N below is a stand-in; +# replace with the actual vector returned by your embedding model. +query_vector = [0.0] * 768 results = w.vector_search_indexes.query_index( index_name="main.default.my_index", columns=["id", "text"], - query_vector=[0.1, 0.2, 0.3, ...], # Your embedding vector + query_vector=query_vector, num_results=10 )