Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions ai4rag/core/experiment/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,12 @@ def run_single_evaluation(self, rag_params: RAGParamsType) -> float:
"system_message_text": system_message_text,
},
}
if self.client:
rag_params["vector_io_provider_type"] = self.client.providers.retrieve(
self.ogx_vector_io_provider_id
).provider_type
else:
rag_params["vector_io_provider_type"] = "chroma::local"

logger.info("Using retrieval and generation params: %s", rag_params)

Expand Down Expand Up @@ -603,11 +609,6 @@ def _stream_finished_pattern(
AI4RAGParamNames.RANKER_ALPHA
)

vector_store_payload = {
"datasource_type": self.ogx_vector_io_provider_id or "local_chroma",
"collection_name": evaluation_result.collection,
}

indexing_payload = {
"chunking": {
"method": evaluation_result.indexing_params["chunking"][AI4RAGParamNames.CHUNKING_METHOD],
Expand All @@ -621,6 +622,21 @@ def _stream_finished_pattern(

n_known = len(self.known_observations) if self.known_observations else 0

responses_template_payload = {
"model": evaluation_result.rag_params["generation"]["model_id"],
"stream": False, # Not supported yet
"store": True, # Responses API default
"input": evaluation_result.rag_params["generation"]["user_message_text"],
"instructions": evaluation_result.rag_params["generation"]["system_message_text"],
"tools": [
{
"type": "file_search",
"vector_store_ids": [evaluation_result.collection],
}
],
"include": ["file_search_call.results"],
}

payload = {
"pattern_name": evaluation_result.pattern_name,
"scores": {
Expand All @@ -632,14 +648,22 @@ def _stream_finished_pattern(
"schema_version": "1.0",
"producer": "ai4rag",
"settings": {
"vector_store": vector_store_payload,
"vector_store_binding": {
"provider_id": self.ogx_vector_io_provider_id,
"provider_type": evaluation_result.rag_params["vector_io_provider_type"],
"vector_store_id": evaluation_result.collection,
"vector_store_name": "TBD",
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ADR specified here the name as assigned in the ConfigMap. It proves really problematic to obtain. How can we simplify this @LukaszCmielowski

},
**indexing_payload,
"retrieval": retrieval_payload,
"generation": generation_payload,
},
"iteration": len(self.results) + n_known,
}

if self.vector_store_type != "chroma":
payload["responses_template"] = responses_template_payload

self.event_handler.on_pattern_creation(
payload=payload,
evaluation_results=evaluation_results_json,
Expand Down
16 changes: 16 additions & 0 deletions dev_utils/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,19 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]:

def embed_query(self, query: str) -> list[float]:
return [random() for _ in range(self.params["embedding_dimension"])]


class MockedOGXClient:
"""Mock OGX client for testing without real OGX server."""

class MockedProviders:
"""Mock providers interface."""

def retrieve(self, provider_id: str):
class MockProvider:
provider_type = "mock_provider"

return MockProvider()

def __init__(self):
self.providers = self.MockedProviders()
29 changes: 29 additions & 0 deletions tests/functional/test_experiment_mocked_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,32 @@ def test_best_pattern_can_generate_answer(self, documents, benchmark_data, found
assert isinstance(result, dict), f"Expected dict from generate(), got {type(result)}"
answer = result.get("answer")
assert isinstance(answer, str) and len(answer) > 0, f"Expected a non-empty answer string, got {answer!r}"

def test_pattern_params_include_generation_config(
self, documents, benchmark_data, foundation_models, embedding_models
):
"""
Verify that RAG patterns include complete generation configuration.
With chroma (no client), vector_io_provider_type should be 'chroma::local'.
"""
experiment = _make_experiment(documents, benchmark_data, foundation_models, embedding_models)

experiment.search(optimizer=RandomOptimizer, skip_mps=True)

best_evaluations = experiment.results.get_best_evaluations(k=1)
assert len(best_evaluations) > 0, f"No evaluations generated. Total results: {len(experiment.results)}"

rag_params = best_evaluations[0].rag_params
assert "generation" in rag_params
assert "retrieval" in rag_params

generation = rag_params["generation"]
assert isinstance(generation["model_id"], str) and generation["model_id"]
assert isinstance(generation["context_template_text"], str)
assert isinstance(generation["user_message_text"], str)
assert isinstance(generation["system_message_text"], str)
assert generation["model_id"] in [fm.model_id for fm in foundation_models]

assert rag_params.get("vector_io_provider_type") == "chroma::local", (
"Chroma path (no client) should set vector_io_provider_type to 'chroma::local'"
)
251 changes: 251 additions & 0 deletions tests/unit/ai4rag/core/experiment/test_stream_pattern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# -----------------------------------------------------------------------------
# Copyright IBM Corp. 2026
# SPDX-License-Identifier: Apache-2.0
# -----------------------------------------------------------------------------
"""Unit tests for AI4RAGExperiment._stream_finished_pattern payload construction."""

import pandas as pd
import pytest
from langchain_core.documents import Document

from ai4rag.core.experiment.experiment import AI4RAGExperiment
from ai4rag.core.experiment.results import EvaluationResult, ExperimentResults
from ai4rag.core.hpo.random_opt import RandomOptSettings
from ai4rag.search_space.src.parameter import Parameter
from ai4rag.search_space.src.search_space import AI4RAGSearchSpace
from ai4rag.utils.constants import AI4RAGParamNames
from ai4rag.utils.event_handler import LocalEventHandler
from dev_utils.mocks import MockedEmbeddingModel, MockedFoundationModel, MockedOGXClient

_EMBEDDING_DIMENSION = 64


def _make_search_space(fm, em):
return AI4RAGSearchSpace(
vector_store_type="chroma",
params=[
Parameter(name="foundation_model", param_type="C", values=[fm]),
Parameter(name="embedding_model", param_type="C", values=[em]),
],
)


def _make_evaluation_result(
vector_io_provider_type="chroma::local",
search_mode="vector",
window_size=None,
ranker_strategy=None,
ranker_k=None,
ranker_alpha=None,
):
return EvaluationResult(
pattern_name="Pattern1",
collection="test-collection-abc",
indexing_params={
"chunking": {
AI4RAGParamNames.CHUNKING_METHOD: "recursive",
AI4RAGParamNames.CHUNK_SIZE: 512,
AI4RAGParamNames.CHUNK_OVERLAP: 64,
},
"embedding": {
"model_id": "mock-em-0",
"distance_metric": "cosine",
},
},
rag_params={
"retrieval": {
AI4RAGParamNames.RETRIEVAL_METHOD: "simple",
AI4RAGParamNames.NUMBER_OF_CHUNKS: 3,
AI4RAGParamNames.SEARCH_MODE: search_mode,
AI4RAGParamNames.WINDOW_SIZE: window_size,
AI4RAGParamNames.RANKER_STRATEGY: ranker_strategy,
AI4RAGParamNames.RANKER_K: ranker_k,
AI4RAGParamNames.RANKER_ALPHA: ranker_alpha,
},
"generation": {
"model_id": "mock-fm-0",
"context_template_text": "Context: {context}",
"user_message_text": "Answer: {question}",
"system_message_text": "You are a helpful assistant.",
},
"vector_io_provider_type": vector_io_provider_type,
},
scores={
"scores": {"answer_correctness": {"mean": 0.5}},
"question_scores": {"answer_correctness": {"q0": 0.5}},
},
execution_time=10.0,
final_score=0.5,
)


@pytest.fixture
def foundation_model():
return MockedFoundationModel(model_id="mock-fm-0", params=None)


@pytest.fixture
def embedding_model():
return MockedEmbeddingModel(
model_id="mock-em-0",
params={"embedding_dimension": _EMBEDDING_DIMENSION},
)


@pytest.fixture
def minimal_documents():
return [Document(page_content="Test content.", metadata={"document_id": "doc_0"})]


@pytest.fixture
def minimal_benchmark():
return pd.DataFrame(
{
"question": ["What is test?"],
"correct_answers": [["Test content."]],
"correct_answer_document_ids": [["doc_0"]],
}
)


def _make_chroma_experiment(foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker):
event_handler = mocker.MagicMock(spec=LocalEventHandler)
experiment = AI4RAGExperiment(
documents=minimal_documents,
benchmark_data=minimal_benchmark,
search_space=_make_search_space(foundation_model, embedding_model),
vector_store_type="chroma",
optimizer_settings=RandomOptSettings(max_evals=1),
event_handler=event_handler,
)
experiment.results = ExperimentResults()
return experiment


def _make_ogx_experiment(foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker):
event_handler = mocker.MagicMock(spec=LocalEventHandler)
experiment = AI4RAGExperiment(
documents=minimal_documents,
benchmark_data=minimal_benchmark,
search_space=_make_search_space(foundation_model, embedding_model),
vector_store_type="ogx",
optimizer_settings=RandomOptSettings(max_evals=1),
event_handler=event_handler,
client=MockedOGXClient(),
ogx_vector_io_provider_id="test-provider",
)
experiment.results = ExperimentResults()
return experiment


class TestStreamFinishedPatternChroma:

def test_chroma_payload_excludes_responses_template(
self, foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
):
experiment = _make_chroma_experiment(
foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
)
eval_result = _make_evaluation_result(vector_io_provider_type="chroma::local")

experiment._stream_finished_pattern(eval_result, [])

experiment.event_handler.on_pattern_creation.assert_called_once()
payload = experiment.event_handler.on_pattern_creation.call_args.kwargs["payload"]

assert "responses_template" not in payload
binding = payload["settings"]["vector_store_binding"]
assert "provider_id" in binding
assert "provider_type" in binding
assert "vector_store_id" in binding
assert "vector_store_name" in binding
assert binding["provider_type"] == "chroma::local"

def test_chroma_payload_has_required_top_level_keys(
self, foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
):
experiment = _make_chroma_experiment(
foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
)
eval_result = _make_evaluation_result()

experiment._stream_finished_pattern(eval_result, [])

payload = experiment.event_handler.on_pattern_creation.call_args.kwargs["payload"]
expected_keys = {"pattern_name", "scores", "execution_time", "final_score", "schema_version", "producer", "settings", "iteration"}
assert expected_keys.issubset(payload.keys())


class TestStreamFinishedPatternOGX:

def test_ogx_payload_includes_responses_template(
self, foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
):
experiment = _make_ogx_experiment(
foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
)
eval_result = _make_evaluation_result(vector_io_provider_type="mock_provider")

experiment._stream_finished_pattern(eval_result, [])

payload = experiment.event_handler.on_pattern_creation.call_args.kwargs["payload"]

assert "responses_template" in payload
responses = payload["responses_template"]
assert responses["model"] == "mock-fm-0"
assert responses["tools"][0]["type"] == "file_search"
assert responses["tools"][0]["vector_store_ids"] == ["test-collection-abc"]
assert responses["include"] == ["file_search_call.results"]
assert responses["stream"] is False
assert responses["store"] is True


class TestStreamFinishedPatternRetrieval:

def test_hybrid_retrieval_includes_ranker_fields(
self, foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
):
experiment = _make_chroma_experiment(
foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
)
eval_result = _make_evaluation_result(
search_mode="hybrid",
ranker_strategy="rrf",
ranker_k=60,
ranker_alpha=0.5,
)

experiment._stream_finished_pattern(eval_result, [])

payload = experiment.event_handler.on_pattern_creation.call_args.kwargs["payload"]
retrieval = payload["settings"]["retrieval"]
assert retrieval["search_mode"] == "hybrid"
assert retrieval["ranker_strategy"] == "rrf"
assert retrieval["ranker_k"] == 60
assert retrieval["ranker_alpha"] == 0.5

def test_window_size_included_when_set(
self, foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
):
experiment = _make_chroma_experiment(
foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
)
eval_result = _make_evaluation_result(window_size=2)

experiment._stream_finished_pattern(eval_result, [])

payload = experiment.event_handler.on_pattern_creation.call_args.kwargs["payload"]
assert payload["settings"]["retrieval"]["window_size"] == 2

def test_window_size_excluded_when_none(
self, foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
):
experiment = _make_chroma_experiment(
foundation_model, embedding_model, minimal_documents, minimal_benchmark, mocker
)
eval_result = _make_evaluation_result(window_size=None)

experiment._stream_finished_pattern(eval_result, [])

payload = experiment.event_handler.on_pattern_creation.call_args.kwargs["payload"]
assert "window_size" not in payload["settings"]["retrieval"]
Loading