diff --git a/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/AgentExecutor.kt b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/AgentExecutor.kt index 6b540d223..1db7a87a3 100644 --- a/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/AgentExecutor.kt +++ b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/AgentExecutor.kt @@ -33,7 +33,9 @@ import community.flock.aigentic.core.message.ToolCall import community.flock.aigentic.core.message.asJson import community.flock.aigentic.core.message.mapToTextMessages import community.flock.aigentic.core.model.ModelResponse +import community.flock.aigentic.core.platform.EvaluationSubmitResult import community.flock.aigentic.core.platform.RunSentResult +import community.flock.aigentic.core.platform.addToEvaluationSet import community.flock.aigentic.core.platform.getRuns import community.flock.aigentic.core.platform.sendRun import community.flock.aigentic.core.tool.Parameter @@ -51,6 +53,7 @@ suspend inline fun Agent.start(vararg a suspend inline fun Agent.start( input: I? = null, vararg attachments: Attachment, + expected: Expected? = null, ): AgentRun = coroutineScope { val agent = this@start @@ -59,13 +62,13 @@ suspend inline fun Agent.start( val logging = async { state.getStatus().map { it.text }.collect(::println) } try { val run = executeAction(Initialize(state, agent, input, attachments.toList())).toRun() - publishRun(agent, run, state) - run + val platformRunId = publishRun(agent, run, state, expected) + run.copy(platformRunId = platformRunId) } catch (e: AigenticException) { state.events.emit(AgentStatus.Fatal(e.message)) val run = (state to Outcome.Fatal(e.message)).toRun() - publishRun(agent, run, state) - run + val platformRunId = publishRun(agent, run, state, expected) + run.copy(platformRunId = platformRunId) } finally { delay(10) // Allow some time for the logging to finish logging.cancelAndJoin() @@ -77,22 +80,45 @@ internal suspend inline fun publishRun( agent: Agent, run: AgentRun, state: State, -) { - if (agent.platform != null) { - runCatching { - agent.platform.sendRun(run, agent) - }.onSuccess { result -> + expected: Expected?, +): RunId? { + if (agent.platform == null) return null + return runCatching { + agent.platform.sendRun(run, agent, expected) + }.fold( + onSuccess = { result -> when (result) { - RunSentResult.Success -> state.events.emit(AgentStatus.PublishedRunSuccess) - RunSentResult.Unauthorized -> state.events.emit(AgentStatus.PublishedRunUnauthorized) - is RunSentResult.Error -> state.events.emit(AgentStatus.PublishedRunError(result.message)) + is RunSentResult.Success -> { + state.events.emit(AgentStatus.PublishedRunSuccess) + result.runId + } + + RunSentResult.Unauthorized -> { + state.events.emit(AgentStatus.PublishedRunUnauthorized) + null + } + + is RunSentResult.Error -> { + state.events.emit(AgentStatus.PublishedRunError(result.message)) + null + } } - }.onFailure { exception -> + }, + onFailure = { exception -> state.events.emit(AgentStatus.PublishedRunError(exception.message ?: "Unknown error")) - } - } + null + }, + ) } +suspend inline fun Agent.addToEvaluationSet( + runId: String, + evaluationSet: String, + expected: O, +): EvaluationSubmitResult = + platform?.addToEvaluationSet(RunId(runId), evaluationSet, expected) + ?: aigenticException("Platform must be configured to add a run to an evaluation set") + suspend inline fun executeAction(action: Action): Pair> { var currentAction = action while (true) { diff --git a/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/Expected.kt b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/Expected.kt new file mode 100644 index 000000000..98869c1b4 --- /dev/null +++ b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/Expected.kt @@ -0,0 +1,6 @@ +package community.flock.aigentic.core.agent + +data class Expected( + val evaluationSet: String, + val output: O, +) diff --git a/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/Run.kt b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/Run.kt index 16788cc73..daa356156 100644 --- a/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/Run.kt +++ b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/agent/Run.kt @@ -25,6 +25,7 @@ data class AgentRun( override val modelRequests: List, val systemPromptMessage: Message.SystemPrompt, val exampleRunIds: List = emptyList(), + val platformRunId: RunId? = null, ) : Run() data class WorkflowRun( @@ -120,5 +121,6 @@ internal inline fun AgentRun.decode(): AgentRun { modelRequests = modelRequests, exampleRunIds = exampleRunIds, systemPromptMessage = systemPromptMessage, + platformRunId = platformRunId, ) } diff --git a/src/core/src/commonMain/kotlin/community/flock/aigentic/core/platform/Platform.kt b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/platform/Platform.kt index a7fb0f183..8d78ec9ad 100644 --- a/src/core/src/commonMain/kotlin/community/flock/aigentic/core/platform/Platform.kt +++ b/src/core/src/commonMain/kotlin/community/flock/aigentic/core/platform/Platform.kt @@ -2,6 +2,7 @@ package community.flock.aigentic.core.platform import community.flock.aigentic.core.agent.Agent import community.flock.aigentic.core.agent.AgentRun +import community.flock.aigentic.core.agent.Expected import community.flock.aigentic.core.agent.RunId import community.flock.aigentic.core.agent.RunTag import community.flock.aigentic.core.agent.decode @@ -30,8 +31,16 @@ interface PlatformClient { run: AgentRun, agent: Agent, outputSerializer: KSerializer, + expected: Expected?, ): RunSentResult + suspend fun addToEvaluationSet( + runId: RunId, + evaluationSet: String, + expected: O, + outputSerializer: KSerializer, + ): EvaluationSubmitResult + suspend fun getRuns(tags: List): List>> } @@ -44,7 +53,14 @@ interface Platform { suspend inline fun Platform.sendRun( run: AgentRun, agent: Agent, -): RunSentResult = client.sendRun(run, agent, serializer()) + expected: Expected? = null, +): RunSentResult = client.sendRun(run, agent, serializer(), expected) + +suspend inline fun Platform.addToEvaluationSet( + runId: RunId, + evaluationSet: String, + expected: O, +): EvaluationSubmitResult = client.addToEvaluationSet(runId, evaluationSet, expected, serializer()) suspend inline fun Platform.getRuns(tags: List): List>> = client @@ -54,7 +70,9 @@ suspend inline fun Platform.getRuns(tags: List): List< } sealed interface RunSentResult { - data object Success : RunSentResult + data class Success( + val runId: RunId, + ) : RunSentResult data object Unauthorized : RunSentResult @@ -62,3 +80,15 @@ sealed interface RunSentResult { val message: String, ) : RunSentResult } + +sealed interface EvaluationSubmitResult { + data object Success : EvaluationSubmitResult + + data object Unauthorized : EvaluationSubmitResult + + data object NotFound : EvaluationSubmitResult + + data class Error( + val message: String, + ) : EvaluationSubmitResult +} diff --git a/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/client/PlatformClient.kt b/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/client/PlatformClient.kt index 08b503865..d4cdd2e4c 100644 --- a/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/client/PlatformClient.kt +++ b/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/client/PlatformClient.kt @@ -2,15 +2,20 @@ package community.flock.aigentic.platform.client import community.flock.aigentic.core.agent.Agent import community.flock.aigentic.core.agent.AgentRun +import community.flock.aigentic.core.agent.Expected import community.flock.aigentic.core.agent.RunId import community.flock.aigentic.core.agent.RunTag import community.flock.aigentic.core.exception.aigenticException import community.flock.aigentic.core.platform.Authentication +import community.flock.aigentic.core.platform.EvaluationSubmitResult import community.flock.aigentic.core.platform.PlatformApiUrl import community.flock.aigentic.core.platform.PlatformClient import community.flock.aigentic.core.platform.RunSentResult +import community.flock.aigentic.gateway.wirespec.endpoint.AddRunAnnotations import community.flock.aigentic.gateway.wirespec.endpoint.Gateway import community.flock.aigentic.gateway.wirespec.endpoint.GetRuns +import community.flock.aigentic.gateway.wirespec.model.RunCreatedDto +import community.flock.aigentic.gateway.wirespec.model.RunEvaluationDto import community.flock.aigentic.platform.mapper.toDto import community.flock.aigentic.platform.mapper.toRun import community.flock.wirespec.kotlin.Wirespec @@ -45,7 +50,8 @@ import kotlin.reflect.KType interface PlatformEndpoints : Gateway.Handler, - GetRuns.Handler + GetRuns.Handler, + AddRunAnnotations.Handler const val defaultPlatformApiUrl = "https://aigentic-backend-kib53ypjwq-ez.a.run.app/" @@ -58,12 +64,16 @@ class AigenticPlatformClient( run: AgentRun, agent: Agent, outputSerializer: KSerializer, + expected: Expected?, ): RunSentResult { - val runDto = run.toDto(agent, outputSerializer) + val runDto = run.toDto(agent, outputSerializer, expected) val request = Gateway.Request(body = runDto) return when (val response = endpoints.gateway(request)) { is Gateway.Response201 -> { - RunSentResult.Success + response.body.runId + .takeIf { it.isNotBlank() } + ?.let { RunSentResult.Success(RunId(it)) } + ?: RunSentResult.Error("Gateway accepted the run but returned no run id") } is Gateway.Response401 -> { @@ -82,6 +92,44 @@ class AigenticPlatformClient( } } + override suspend fun addToEvaluationSet( + runId: RunId, + evaluationSet: String, + expected: O, + outputSerializer: KSerializer, + ): EvaluationSubmitResult { + val request = + AddRunAnnotations.Request( + runId = runId.value, + body = + RunEvaluationDto( + evaluationSet = evaluationSet, + expectedResponse = Json.encodeToString(outputSerializer, expected), + ), + ) + return when (val response = endpoints.addRunAnnotations(request)) { + is AddRunAnnotations.Response200 -> { + EvaluationSubmitResult.Success + } + + is AddRunAnnotations.Response401 -> { + EvaluationSubmitResult.Unauthorized + } + + is AddRunAnnotations.Response404 -> { + EvaluationSubmitResult.NotFound + } + + is AddRunAnnotations.Response400 -> { + EvaluationSubmitResult.Error(response.body.message) + } + + is AddRunAnnotations.Response500 -> { + EvaluationSubmitResult.Error("${response.body.name} - ${response.body.description}") + } + } + } + override suspend fun getRuns(tags: List): List>> = when (val response = endpoints.getRuns(GetRuns.Request(tags = tags.joinToString(",") { it.value }))) { is GetRuns.Response200 -> response.body @@ -166,6 +214,11 @@ class AigenticPlatformEndpoints( val edge = Gateway.Handler.client(serialization) val rawRequest = edge.to(request) val rawResponse = executeRequest(rawRequest) + // Backward compatibility: older gateways answer 201 with an empty body (the previous `201 -> Unit`). + // Surface it as a 201 with a blank runId so it doesn't throw; sendRun maps the blank id to an Error. + if (rawResponse.statusCode == 201 && rawResponse.body?.isEmpty() != false) { + return Gateway.Response201(RunCreatedDto(runId = "")) + } return edge.from(rawResponse) } @@ -176,6 +229,13 @@ class AigenticPlatformEndpoints( return edge.from(rawResponse) } + override suspend fun addRunAnnotations(request: AddRunAnnotations.Request): AddRunAnnotations.Response<*> { + val edge = AddRunAnnotations.Handler.client(serialization) + val rawRequest = edge.to(request) + val rawResponse = executeRequest(rawRequest) + return edge.from(rawResponse) + } + private suspend fun executeRequest(rawRequest: Wirespec.RawRequest): Wirespec.RawResponse { val response = httpClient.request { diff --git a/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/mapper/RequestMapper.kt b/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/mapper/RequestMapper.kt index 2063b38f6..a2d20fb96 100644 --- a/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/mapper/RequestMapper.kt +++ b/src/platform/src/commonMain/kotlin/community/flock/aigentic/platform/mapper/RequestMapper.kt @@ -2,6 +2,7 @@ package community.flock.aigentic.platform.mapper import community.flock.aigentic.core.agent.Agent import community.flock.aigentic.core.agent.AgentRun +import community.flock.aigentic.core.agent.Expected import community.flock.aigentic.core.agent.state.ModelRequestInfo import community.flock.aigentic.core.agent.tool.Outcome import community.flock.aigentic.core.message.Message @@ -34,6 +35,7 @@ import community.flock.aigentic.gateway.wirespec.model.PrimitiveValueNumberDto import community.flock.aigentic.gateway.wirespec.model.PrimitiveValueStringDto import community.flock.aigentic.gateway.wirespec.model.PrimitiveValueTypeDto import community.flock.aigentic.gateway.wirespec.model.RunDto +import community.flock.aigentic.gateway.wirespec.model.RunEvaluationDto import community.flock.aigentic.gateway.wirespec.model.SenderDto import community.flock.aigentic.gateway.wirespec.model.StructuredOutputMessageDto import community.flock.aigentic.gateway.wirespec.model.StuckResultDto @@ -65,6 +67,7 @@ private fun Parameter.toJsonSchemaString(): String = fun AgentRun.toDto( agent: Agent, outputSerializer: KSerializer, + expected: Expected? = null, ): RunDto = RunDto( startedAt = startedAt.toString(), @@ -99,6 +102,13 @@ fun AgentRun.toDto( messages = messages.mapNotNull { it.toDto() }, modelRequests = modelRequests.map { it.toDto() }, result = outcome.toDto(outputSerializer), + evaluation = + expected?.let { + RunEvaluationDto( + evaluationSet = it.evaluationSet, + expectedResponse = Json.encodeToString(outputSerializer, it.output), + ) + }, ) private fun Parameter.toDto(): ParameterDto = diff --git a/src/platform/src/jvmTest/kotlin/community/flock/aigentic/platform/client/AigenticPlatformClientTest.kt b/src/platform/src/jvmTest/kotlin/community/flock/aigentic/platform/client/AigenticPlatformClientTest.kt index f2bb0f1de..b07520f60 100644 --- a/src/platform/src/jvmTest/kotlin/community/flock/aigentic/platform/client/AigenticPlatformClientTest.kt +++ b/src/platform/src/jvmTest/kotlin/community/flock/aigentic/platform/client/AigenticPlatformClientTest.kt @@ -1,10 +1,12 @@ package community.flock.aigentic.platform.client +import community.flock.aigentic.core.agent.RunId import community.flock.aigentic.core.platform.Authentication import community.flock.aigentic.core.platform.PlatformApiUrl import community.flock.aigentic.core.platform.RunSentResult import community.flock.aigentic.gateway.wirespec.endpoint.Gateway import community.flock.aigentic.gateway.wirespec.model.GatewayClientErrorDto +import community.flock.aigentic.gateway.wirespec.model.RunCreatedDto import community.flock.aigentic.gateway.wirespec.model.ServerErrorDto import community.flock.aigentic.platform.util.createAgent import community.flock.aigentic.platform.util.createAgentRun @@ -20,7 +22,11 @@ class AigenticPlatformClientTest : withData( nameFn = { "Should map ${it.wirespecResponse} to ${it.runSentResult}" }, - TestCase(Gateway.Response201(body = Unit), RunSentResult.Success), + TestCase(Gateway.Response201(body = RunCreatedDto("run-123")), RunSentResult.Success(RunId("run-123"))), + TestCase( + Gateway.Response201(body = RunCreatedDto("")), + RunSentResult.Error("Gateway accepted the run but returned no run id"), + ), TestCase(Gateway.Response401(body = Unit), RunSentResult.Unauthorized), TestCase( Gateway.Response400(body = GatewayClientErrorDto("invalid request")), @@ -50,7 +56,7 @@ class AigenticPlatformClientTest : platformEndpoints, ) - val result = client.sendRun(run, agent, serializer()) + val result = client.sendRun(run, agent, serializer(), null) result shouldBe it.runSentResult } diff --git a/src/platform/src/jvmTest/kotlin/community/flock/aigentic/platform/client/EvaluationSubmissionTest.kt b/src/platform/src/jvmTest/kotlin/community/flock/aigentic/platform/client/EvaluationSubmissionTest.kt new file mode 100644 index 000000000..54d1f55a0 --- /dev/null +++ b/src/platform/src/jvmTest/kotlin/community/flock/aigentic/platform/client/EvaluationSubmissionTest.kt @@ -0,0 +1,263 @@ +package community.flock.aigentic.platform.client + +import community.flock.aigentic.core.agent.Expected +import community.flock.aigentic.core.agent.RunId +import community.flock.aigentic.core.agent.addToEvaluationSet +import community.flock.aigentic.core.agent.start +import community.flock.aigentic.core.agent.tool.FINISHED_TASK_TOOL_NAME +import community.flock.aigentic.core.annotations.AigenticParameter +import community.flock.aigentic.core.dsl.agent +import community.flock.aigentic.core.message.Message +import community.flock.aigentic.core.message.ToolCall +import community.flock.aigentic.core.message.ToolCallId +import community.flock.aigentic.core.model.GenerationSettings +import community.flock.aigentic.core.model.Model +import community.flock.aigentic.core.model.ModelIdentifier +import community.flock.aigentic.core.model.ModelResponse +import community.flock.aigentic.core.model.Usage +import community.flock.aigentic.core.platform.Authentication +import community.flock.aigentic.core.platform.EvaluationSubmitResult +import community.flock.aigentic.core.platform.Platform +import community.flock.aigentic.core.platform.PlatformApiUrl +import community.flock.aigentic.core.tool.Parameter +import community.flock.aigentic.core.tool.ToolDescription +import community.flock.aigentic.gateway.wirespec.model.RunDto +import community.flock.aigentic.gateway.wirespec.model.RunEvaluationDto +import community.flock.aigentic.platform.AigenticPlatform +import community.flock.aigentic.platform.mapper.toDto +import community.flock.aigentic.platform.util.createAgent +import community.flock.aigentic.platform.util.createAgentRun +import io.kotest.core.spec.style.DescribeSpec +import io.kotest.matchers.nulls.shouldNotBeNull +import io.kotest.matchers.shouldBe +import io.ktor.client.engine.mock.MockEngine +import io.ktor.client.engine.mock.respond +import io.ktor.client.engine.mock.toByteArray +import io.ktor.client.request.HttpRequestData +import io.ktor.http.HttpHeaders +import io.ktor.http.HttpMethod +import io.ktor.http.HttpStatusCode +import io.ktor.http.headersOf +import io.ktor.utils.io.ByteReadChannel +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.JsonPrimitive +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.serializer + +@AigenticParameter +private data class InvoiceFields( + val invoiceNumber: String, + val total: String, +) + +private val lenientJson = Json { ignoreUnknownKeys = true } + +private val finishedInvoice = InvoiceFields("INV-001", "1250.00") + +private fun finishedTaskToolCall(): ToolCall = + ToolCall( + ToolCallId("1"), + FINISHED_TASK_TOOL_NAME, + Json.encodeToString( + JsonObject.serializer(), + buildJsonObject { + put("description", JsonPrimitive("Finished the task")) + put( + "InvoiceFields", + buildJsonObject { + put("invoiceNumber", JsonPrimitive(finishedInvoice.invoiceNumber)) + put("total", JsonPrimitive(finishedInvoice.total)) + }, + ) + }, + ), + ) + +private val finishedInvoiceModel: Model = + object : Model { + override val modelIdentifier: ModelIdentifier = + object : ModelIdentifier { + override val stringValue = "test-model" + } + override val generationSettings = GenerationSettings.DEFAULT + + override suspend fun sendRequest( + messages: List, + tools: List, + structuredOutputParameter: Parameter?, + ): ModelResponse = + ModelResponse( + message = Message.ToolCalls(listOf(finishedTaskToolCall())), + usage = Usage(inputTokenCount = 1, outputTokenCount = 1, thinkingOutputTokenCount = 0), + ) + } + +private fun structuredAgent(platform: Platform) = + agent { + platform(platform) + model(finishedInvoiceModel) + task("Extract the invoice fields") {} + } + +private fun mockPlatform(engine: MockEngine): Platform { + val auth = Authentication.BasicAuth("user", "pass") + val url = PlatformApiUrl("") + return AigenticPlatform( + authentication = auth, + apiUrl = url, + client = + AigenticPlatformClient( + basicAuth = auth, + apiUrl = url, + endpoints = AigenticPlatformEndpoints(auth, url, engine), + ), + ) +} + +private suspend fun HttpRequestData.bodyText(): String = body.toByteArray().decodeToString() + +class EvaluationSubmissionTest : + DescribeSpec({ + + describe("RequestMapper evaluation field") { + + it("builds RunEvaluationDto with evaluationSet and serialized output when expected is present") { + val agent = createAgent() + val run = createAgentRun() + + val dto = + run.toDto( + agent, + serializer(), + Expected(evaluationSet = "golden-set", output = "the-expected-output"), + ) + + val evaluation = dto.evaluation.shouldNotBeNull() + evaluation.evaluationSet shouldBe "golden-set" + evaluation.expectedResponse shouldBe Json.encodeToString(serializer(), "the-expected-output") + } + + it("leaves evaluation null when expected is absent") { + val agent = createAgent() + val run = createAgentRun() + + val dto = run.toDto(agent, serializer(), null) + + dto.evaluation shouldBe null + } + } + + describe("start with expected") { + + it("puts the serialized expected output and evaluationSet in the POST body") { + var capturedBody: String? = null + val engine = + MockEngine { request -> + when (request.method to request.url.encodedPath) { + (HttpMethod.Post to "/gateway/runs") -> { + capturedBody = request.bodyText() + respond( + content = ByteReadChannel("""{"runId":"run-1"}"""), + status = HttpStatusCode.Created, + headers = headersOf(HttpHeaders.ContentType, "application/json"), + ) + } + + else -> { + error("Unexpected endpoint called! ${request.url.encodedPath}") + } + } + } + + val agent = structuredAgent(mockPlatform(engine)) + + agent.start( + expected = Expected(evaluationSet = "invoice-golden-set", output = finishedInvoice), + ) + + val body = capturedBody.shouldNotBeNull() + val runDto = lenientJson.decodeFromString(RunDto.serializer(), body) + val evaluation = runDto.evaluation.shouldNotBeNull() + evaluation.evaluationSet shouldBe "invoice-golden-set" + evaluation.expectedResponse shouldBe Json.encodeToString(serializer(), finishedInvoice) + } + + it("populates run.platformRunId from the 201 RunCreatedDto body") { + val engine = + MockEngine { request -> + when (request.method to request.url.encodedPath) { + (HttpMethod.Post to "/gateway/runs") -> { + respond( + content = ByteReadChannel("""{"runId":"run-42"}"""), + status = HttpStatusCode.Created, + headers = headersOf(HttpHeaders.ContentType, "application/json"), + ) + } + + else -> { + error("Unexpected endpoint called! ${request.url.encodedPath}") + } + } + } + + val agent = structuredAgent(mockPlatform(engine)) + + val run = agent.start() + + run.platformRunId shouldBe RunId("run-42") + } + + it("leaves run.platformRunId null when the 201 body is empty (old gateways)") { + val engine = + MockEngine { request -> + when (request.method to request.url.encodedPath) { + (HttpMethod.Post to "/gateway/runs") -> { + respond(content = ByteReadChannel.Empty, status = HttpStatusCode.Created) + } + + else -> { + error("Unexpected endpoint called! ${request.url.encodedPath}") + } + } + } + + val agent = structuredAgent(mockPlatform(engine)) + + val run = agent.start() + + run.platformRunId shouldBe null + } + } + + describe("addToEvaluationSet") { + + it("POSTs the serialized expected output to /gateway/runs/{runId}/annotations") { + var capturedPath: String? = null + var capturedBody: String? = null + val engine = + MockEngine { request -> + capturedPath = request.url.encodedPath + capturedBody = request.bodyText() + respond(content = ByteReadChannel.Empty, status = HttpStatusCode.OK) + } + + val agent = structuredAgent(mockPlatform(engine)) + + val result = + agent.addToEvaluationSet( + runId = "run-99", + evaluationSet = "golden", + expected = InvoiceFields("INV-009", "9.99"), + ) + + result shouldBe EvaluationSubmitResult.Success + capturedPath shouldBe "/gateway/runs/run-99/annotations" + val body = capturedBody.shouldNotBeNull() + val evaluationDto = lenientJson.decodeFromString(RunEvaluationDto.serializer(), body) + evaluationDto.evaluationSet shouldBe "golden" + evaluationDto.expectedResponse shouldBe + Json.encodeToString(serializer(), InvoiceFields("INV-009", "9.99")) + } + } + }) diff --git a/src/platform/wirespec/gateway.ws b/src/platform/wirespec/gateway.ws index aa7518752..ced279b7f 100644 --- a/src/platform/wirespec/gateway.ws +++ b/src/platform/wirespec/gateway.ws @@ -4,7 +4,8 @@ type RunDto { config: ConfigDto, result: ResultDto, messages: MessageDto[], - modelRequests: ModelRequestInfoDto[] + modelRequests: ModelRequestInfoDto[], + evaluation: RunEvaluationDto? } type ModelRequestInfoDto { @@ -213,8 +214,17 @@ type ServerErrorDto { description: String } +type RunEvaluationDto { + evaluationSet: String, + expectedResponse: String +} + +type RunCreatedDto { + runId: String +} + endpoint Gateway POST RunDto /gateway/runs -> { - 201 -> Unit + 201 -> RunCreatedDto 401 -> Unit 400 -> GatewayClientErrorDto 500 -> ServerErrorDto @@ -236,3 +246,11 @@ endpoint GetRuns GET /gateway/runs ? { tags: String? } -> { 401 -> Unit 500 -> ServerErrorDto } + +endpoint AddRunAnnotations POST RunEvaluationDto /gateway/runs/{runId: String}/annotations -> { + 200 -> Unit + 400 -> GatewayClientErrorDto + 401 -> Unit + 404 -> Unit + 500 -> ServerErrorDto +}