From a49cc4fb4ad52092309c1bf084a463c60937b0d9 Mon Sep 17 00:00:00 2001
From: James Broadhead <james.broadhead@databricks.com>
Date: Tue, 26 May 2026 23:18:25 +0000
Subject: [PATCH] skills(experimental): propagate ace-review
 correctness/security fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirrors the four fixes shipping in ai-dev-kit#534 to the same files
under experimental/. Each of these files was carried over verbatim from
a-d-k and still had the bugs the original PR fixes:

- fm-parallel-calls.py: guard div-by-zero on speedup calc when total_time
  is below clock resolution; convert trailing module-level docstring (was
  unreachable as docs) into a real comment block.
- llm_config.py: stop echoing the raw token-endpoint response and HTTP
  body in error messages — they can contain credentials. Surface the
  shape (payload keys / status code) instead.
- 5-serving-and-vector-search.py: replace the syntactically-broken
  example `query_vector=[0.1, 0.2, 0.3, ...]` (literal Ellipsis tuple)
  with a real list[float] stand-in plus a comment about matching the
  index's embedding dimension.
- fm-minimal-chat.py: fix `streamlit run 2-minimal-chat-app.py` /
  `command: ["streamlit", "run", "2-minimal-chat-app.py"]` in the
  docstring — the actual filename is `fm-minimal-chat.py`. (Skill-name
  reference left as `databricks-model-serving` to match DAS naming.)

The compute.py fix from #534 doesn't apply here — that file was removed
in PR #90.

Co-authored-by: Isaac
---
 .../examples/fm-minimal-chat.py               |  4 +-
 .../examples/fm-parallel-calls.py             | 68 ++++++++++---------
 .../examples/llm_config.py                    |  7 +-
 .../examples/5-serving-and-vector-search.py   |  7 +-
 4 files changed, 47 insertions(+), 39 deletions(-)
diff --git a/experimental/databricks-apps-python/examples/fm-minimal-chat.py b/experimental/databricks-apps-python/examples/fm-minimal-chat.py
index db9a35d..920a405 100644
--- a/experimental/databricks-apps-python/examples/fm-minimal-chat.py
+++ b/experimental/databricks-apps-python/examples/fm-minimal-chat.py
@@ -16,11 +16,11 @@
     export DATABRICKS_TOKEN="dapi..."
     export DATABRICKS_SERVING_BASE_URL="https://<workspace>/serving-endpoints"
     export DATABRICKS_MODEL="<endpoint-name>"  # See databricks-model-serving
-    streamlit run 2-minimal-chat-app.py
+    streamlit run fm-minimal-chat.py
 
 Databricks Apps Deployment:
     1. Create app.yaml:
-       command: ["streamlit", "run", "2-minimal-chat-app.py"]
+       command: ["streamlit", "run", "fm-minimal-chat.py"]
        env:
          - name: DATABRICKS_SERVING_BASE_URL
            value: "https://<workspace>/serving-endpoints"
diff --git a/experimental/databricks-apps-python/examples/fm-parallel-calls.py b/experimental/databricks-apps-python/examples/fm-parallel-calls.py
index 53cc6a2..71cd81b 100644
--- a/experimental/databricks-apps-python/examples/fm-parallel-calls.py
+++ b/experimental/databricks-apps-python/examples/fm-parallel-calls.py
@@ -224,42 +224,44 @@ def check_audience_fit(client: OpenAI, text: str) -> Dict[str, Any]:
     time_saved = (total_latency / 1000) - total_time
     print(f"\n{'='*60}")
     print(f"Time saved vs serial execution: {time_saved:.2f}s")
-    print(f"Speedup: {(total_latency/1000) / total_time:.1f}×")
+    if total_time > 0:
+        print(f"Speedup: {(total_latency/1000) / total_time:.1f}×")
+    else:
+        print("Speedup: N/A (total_time below resolution)")
     print(f"{'='*60}")
 
 
 # =============================================================================
 # Production Best Practices
 # =============================================================================
-"""
-Best practices from databricksters-check-and-pub:
-
-1. Configurable concurrency
-   - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app)
-   - Balance throughput vs rate limits
-   - Too high = rate limit errors
-   - Too low = underutilized resources
-
-2. Error handling
-   - Capture exceptions per job
-   - Return None for failed jobs
-   - Collect error messages for debugging
-   - Continue execution even if some jobs fail
-
-3. Bounded execution
-   - Only parallelize independent checks
-   - Cap concurrency with an env var rather than firing unlimited requests
-   - Keep the job contract simple: name -> (callable, args, kwargs)
-
-4. When to use parallel calls
-   - Multiple independent evaluations of same content
-   - Batch processing multiple documents
-   - A/B testing different prompts
-   - Multi-aspect analysis
-
-5. When NOT to use parallel calls
-   - Dependent/sequential operations
-   - Single evaluation needed
-   - Rate limits are very strict
-   - Debugging (use serial for easier troubleshooting)
-"""
+#
+# Best practices from databricksters-check-and-pub:
+#
+# 1. Configurable concurrency
+#    - Use LLM_MAX_CONCURRENCY env var (default: 5 in the production app)
+#    - Balance throughput vs rate limits
+#    - Too high = rate limit errors
+#    - Too low = underutilized resources
+#
+# 2. Error handling
+#    - Capture exceptions per job
+#    - Return None for failed jobs
+#    - Collect error messages for debugging
+#    - Continue execution even if some jobs fail
+#
+# 3. Bounded execution
+#    - Only parallelize independent checks
+#    - Cap concurrency with an env var rather than firing unlimited requests
+#    - Keep the job contract simple: name -> (callable, args, kwargs)
+#
+# 4. When to use parallel calls
+#    - Multiple independent evaluations of same content
+#    - Batch processing multiple documents
+#    - A/B testing different prompts
+#    - Multi-aspect analysis
+#
+# 5. When NOT to use parallel calls
+#    - Dependent/sequential operations
+#    - Single evaluation needed
+#    - Rate limits are very strict
+#    - Debugging (use serial for easier troubleshooting)
diff --git a/experimental/databricks-apps-python/examples/llm_config.py b/experimental/databricks-apps-python/examples/llm_config.py
index 6c4d550..200aaef 100644
--- a/experimental/databricks-apps-python/examples/llm_config.py
+++ b/experimental/databricks-apps-python/examples/llm_config.py
@@ -218,8 +218,10 @@ def get_databricks_bearer_token(
         access_token = payload.get("access_token")
         expires_in = int(payload.get("expires_in", 300))
         if not access_token:
+            payload_keys = sorted(payload.keys()) if isinstance(payload, dict) else []
             raise DatabricksLLMConfigError(
-                f"Token endpoint response is missing access_token: {payload}"
+                "Token endpoint response is missing access_token "
+                f"(keys present: {payload_keys})"
             )
 
         expires_at = int(time.time()) + expires_in
@@ -278,8 +280,7 @@ def validate_databricks_llm_config(
     if response.status_code >= 400:
         raise DatabricksLLMConfigError(
             f"Failed to validate DATABRICKS_MODEL={config.model!r} in workspace "
-            f"{config.workspace_host} (HTTP {response.status_code}). "
-            f"Response: {response.text[:300]}"
+            f"{config.workspace_host} (HTTP {response.status_code})."
         )
 
     _validation_cache[cache_key] = int(time.time()) + VALIDATION_TTL_SECONDS
diff --git a/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py b/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py
index 2a47c2b..597aede 100644
--- a/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py
+++ b/experimental/databricks-python-sdk/examples/5-serving-and-vector-search.py
@@ -168,10 +168,15 @@
 
 
 # Query with embedding vector directly
+# query_vector must be a list[float] whose length matches your index's
+# embedding dimension (e.g. 768 for bge-small, 1024 for bge-large, 1536 for
+# text-embedding-3-small / ada-002). The [0.0] * N below is a stand-in;
+# replace with the actual vector returned by your embedding model.
+query_vector = [0.0] * 768
 results = w.vector_search_indexes.query_index(
     index_name="main.default.my_index",
     columns=["id", "text"],
-    query_vector=[0.1, 0.2, 0.3, ...],  # Your embedding vector
+    query_vector=query_vector,
     num_results=10
 )