From 0f6c5710d31984ad3b41925dc18e96b91173e32a Mon Sep 17 00:00:00 2001 From: Alex Z Date: Mon, 16 Mar 2026 16:52:17 -0700 Subject: [PATCH 1/2] tool processing for import --- bindings/typescript/src/converters.ts | 28 ++ bindings/typescript/src/wasm.ts | 7 +- crates/lingua/src/processing/import.rs | 80 +++++- crates/lingua/src/processing/mod.rs | 5 +- crates/lingua/src/providers/openai/convert.rs | 24 -- crates/lingua/src/providers/openai/params.rs | 245 +++++++++++++++++- crates/lingua/src/wasm.rs | 14 + 7 files changed, 370 insertions(+), 33 deletions(-) diff --git a/bindings/typescript/src/converters.ts b/bindings/typescript/src/converters.ts index 8e9d1f4b..9de24218 100644 --- a/bindings/typescript/src/converters.ts +++ b/bindings/typescript/src/converters.ts @@ -22,6 +22,11 @@ type ImportSpan = { [key: string]: unknown; }; +export type ImportedSpanData = { + messages: Message[]; + metadata?: unknown; +}; + type GoogleWasmExports = { google_contents_to_lingua: (value: unknown) => unknown; lingua_to_google_contents: (value: unknown) => unknown; @@ -382,6 +387,29 @@ export function importMessagesFromSpans( } } +/** + * Import messages and normalized metadata from logging spans by parsing input/output fields. + * + * The returned `metadata` is provider-normalized request metadata when the importer can infer it. + * Today this is used for OpenAI Responses spans so the UI can consume chat-completions-style params + * without running a separate TypeScript-side metadata transform. + */ +export function importSpanDataFromSpans( + spans: ImportSpan[] +): ImportedSpanData { + try { + const result = getWasm().import_span_data_from_spans(spans); + return convertMapsToObjects(result) as ImportedSpanData; + } catch (error: unknown) { + throw new ConversionError( + "Failed to import span data from spans", + undefined, + undefined, + error + ); + } +} + /** * Import and deduplicate messages from spans in a single operation * diff --git a/bindings/typescript/src/wasm.ts b/bindings/typescript/src/wasm.ts index 1043dfd7..940ac2b4 100644 --- a/bindings/typescript/src/wasm.ts +++ b/bindings/typescript/src/wasm.ts @@ -21,6 +21,7 @@ export { // Processing functions deduplicateMessages, importMessagesFromSpans, + importSpanDataFromSpans, importAndDeduplicateMessages, // Chat Completions validation @@ -45,4 +46,8 @@ export { } from "./converters"; // Re-export types -export type { ValidationResult, TransformStreamChunkResult } from "./converters"; +export type { + ImportedSpanData, + ValidationResult, + TransformStreamChunkResult, +} from "./converters"; diff --git a/crates/lingua/src/processing/import.rs b/crates/lingua/src/processing/import.rs index 6fa42cff..8729c696 100644 --- a/crates/lingua/src/processing/import.rs +++ b/crates/lingua/src/processing/import.rs @@ -11,8 +11,12 @@ use crate::providers::bedrock::convert::try_parse_bedrock_for_import; use crate::providers::google::convert::try_parse_google_for_import; #[cfg(feature = "openai")] use crate::providers::openai::convert::{ - try_parse_openai_for_import, try_system_message_from_openai_metadata, - ChatCompletionRequestMessageExt, + try_parse_openai_for_import, ChatCompletionRequestMessageExt, +}; +#[cfg(feature = "openai")] +use crate::providers::openai::params::{ + normalize_openai_responses_metadata_for_chat_completions, + try_system_message_from_openai_metadata, }; use crate::serde_json; use crate::serde_json::Value; @@ -35,6 +39,13 @@ pub struct Span { pub other: serde_json::Map, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ImportedSpanData { + pub messages: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, +} + /// Try to convert a value to lingua messages by attempting multiple format conversions fn try_converting_to_messages(data: &Value) -> Vec { if is_role_message_array(data) { @@ -587,8 +598,9 @@ fn try_choices_array_parsing(data: &Value) -> Option> { /// /// This function processes spans and extracts messages from their input/output fields, /// attempting to convert them from various provider formats to the lingua format. -pub fn import_messages_from_spans(spans: Vec) -> Vec { +pub fn import_span_data_from_spans(spans: Vec) -> ImportedSpanData { let mut messages = Vec::new(); + let mut normalized_metadata = None; for span in spans { let mut span_messages = Vec::new(); @@ -604,8 +616,8 @@ pub fn import_messages_from_spans(spans: Vec) -> Vec { } #[cfg(feature = "openai")] - if let Some(metadata) = span.other.get("metadata") { - if let Some(system_message) = try_system_message_from_openai_metadata(metadata) { + if let Some(span_metadata) = span.other.get("metadata") { + if let Some(system_message) = try_system_message_from_openai_metadata(span_metadata) { let has_system_message = span_messages .iter() .any(|message| matches!(message, Message::System { .. })); @@ -613,6 +625,11 @@ pub fn import_messages_from_spans(spans: Vec) -> Vec { span_messages.insert(0, system_message); } } + + if normalized_metadata.is_none() { + normalized_metadata = + normalize_openai_responses_metadata_for_chat_completions(span_metadata); + } } messages.extend(span_messages); @@ -631,7 +648,14 @@ pub fn import_messages_from_spans(spans: Vec) -> Vec { } } - messages + ImportedSpanData { + messages, + metadata: normalized_metadata, + } +} + +pub fn import_messages_from_spans(spans: Vec) -> Vec { + import_span_data_from_spans(spans).messages } /// Import and deduplicate messages from spans in a single operation @@ -639,3 +663,47 @@ pub fn import_and_deduplicate_messages(spans: Vec) -> Vec { let messages = import_messages_from_spans(spans); super::dedup::deduplicate_messages(messages) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::serde_json::json; + + #[test] + fn test_import_span_data_from_spans_normalizes_openai_responses_metadata() { + let imported = import_span_data_from_spans(vec![Span { + input: Some(json!([{ "role": "user", "content": "hello" }])), + output: None, + other: serde_json::Map::from_iter([( + "metadata".into(), + json!({ + "object": "response", + "id": "resp_123", + "tools": [{ + "type": "function", + "name": "lookup_weather", + "description": "Find weather", + "parameters": { "type": "object" } + }] + }), + )]), + }]); + + assert_eq!(imported.messages.len(), 1); + assert_eq!( + imported.metadata, + Some(json!({ + "object": "response", + "id": "resp_123", + "tools": [{ + "type": "function", + "function": { + "name": "lookup_weather", + "description": "Find weather", + "parameters": { "type": "object" } + } + }] + })) + ); + } +} diff --git a/crates/lingua/src/processing/mod.rs b/crates/lingua/src/processing/mod.rs index fd8d9696..bdbced04 100644 --- a/crates/lingua/src/processing/mod.rs +++ b/crates/lingua/src/processing/mod.rs @@ -8,7 +8,10 @@ pub use adapters::{ insert_opt_string, insert_opt_value, ProviderAdapter, }; pub use dedup::deduplicate_messages; -pub use import::{import_and_deduplicate_messages, import_messages_from_spans, Span}; +pub use import::{ + import_and_deduplicate_messages, import_messages_from_spans, import_span_data_from_spans, + ImportedSpanData, Span, +}; pub use transform::{ extract_model, parse_stream_event, response_to_universal, sanitize_payload, transform_request, transform_response, transform_stream_chunk, ParsedStreamEvent, TransformError, TransformResult, diff --git a/crates/lingua/src/providers/openai/convert.rs b/crates/lingua/src/providers/openai/convert.rs index 20768e83..82e1a8fb 100644 --- a/crates/lingua/src/providers/openai/convert.rs +++ b/crates/lingua/src/providers/openai/convert.rs @@ -3,7 +3,6 @@ use crate::import_parse::{ non_empty_messages, try_convert_non_empty, try_parse, try_parse_vec_or_single, }; use crate::providers::openai::generated as openai; -use crate::providers::openai::params::OpenAIResponsesExtrasView; use crate::serde_json; use crate::universal::convert::TryFromLLM; use crate::universal::defaults::{EMPTY_OBJECT_STR, REFUSAL_TEXT}; @@ -302,29 +301,6 @@ fn try_messages_from_openai_instructions(input: openai::Instructions) -> Option< } } -fn extract_instructions_from_openai_metadata_value(metadata: &serde_json::Value) -> Option { - let typed = match metadata { - serde_json::Value::String(metadata_json) => { - let parsed = serde_json::from_str::(metadata_json).ok()?; - serde_json::from_value::(parsed).ok()? - } - _ => serde_json::from_value::(metadata.clone()).ok()?, - }; - typed.instructions -} - -pub(crate) fn try_system_message_from_openai_metadata( - metadata: &serde_json::Value, -) -> Option { - let instructions = extract_instructions_from_openai_metadata_value(metadata)?; - if instructions.is_empty() { - return None; - } - Some(Message::System { - content: UserContent::String(instructions), - }) -} - pub(crate) fn try_parse_openai_for_import(data: &serde_json::Value) -> Option> { // Prefer chat-completions request messages before Responses InputItem parsing. // Chat-completions arrays can deserialize as InputItems, but that path drops diff --git a/crates/lingua/src/providers/openai/params.rs b/crates/lingua/src/providers/openai/params.rs index 54cd6b7b..f79bb56b 100644 --- a/crates/lingua/src/providers/openai/params.rs +++ b/crates/lingua/src/providers/openai/params.rs @@ -5,12 +5,18 @@ These structs use `#[serde(flatten)]` to automatically capture unknown fields, eliminating the need for explicit KNOWN_KEYS arrays. */ +use crate::capabilities::ProviderFormat; use crate::providers::openai::generated::{ ChatCompletionRequestMessage, Instructions, Reasoning, ReasoningEffort, }; -use crate::serde_json::Value; +use crate::providers::openai::tool_parsing::parse_openai_responses_tools_array; +use crate::serde_json::{self, Map, Value}; +use crate::universal::message::{Message, UserContent}; +use crate::universal::request::{ResponseFormatConfig, ToolChoiceConfig}; +use crate::universal::tools::tools_to_openai_chat_value; use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; +use std::convert::TryInto; /// OpenAI Chat Completions API request parameters. /// @@ -171,6 +177,154 @@ pub struct OpenAIResponsesExtrasView { pub service_tier: Option, } +#[derive(Debug, Clone, Default, Deserialize)] +struct OpenAIResponsesMetadataFingerprint { + pub object: Option, + pub id: Option, + pub tool_choice: Option, + pub parallel_tool_calls: Option, + pub service_tier: Option, + pub top_logprobs: Option, +} + +fn parse_metadata_object(metadata: &Value) -> Option> { + match metadata { + Value::String(metadata_json) => { + serde_json::from_str::>(metadata_json).ok() + } + Value::Object(map) => Some(map.clone()), + _ => None, + } +} + +fn is_openai_responses_metadata( + metadata: &Map, + extras: &OpenAIResponsesExtrasView, +) -> bool { + let fingerprint: OpenAIResponsesMetadataFingerprint = + serde_json::from_value(Value::Object(metadata.clone())).unwrap_or_default(); + + if fingerprint.object.as_deref() == Some("response") { + return true; + } + + if fingerprint + .id + .as_deref() + .is_some_and(|id| id.starts_with("resp_")) + { + return true; + } + + if extras + .tools + .as_ref() + .is_some_and(|tools| !parse_openai_responses_tools_array(tools).is_empty()) + { + return true; + } + + extras.instructions.is_some() + || fingerprint.tool_choice.is_some() + || fingerprint.parallel_tool_calls.is_some() + || fingerprint.service_tier.is_some() + || fingerprint.top_logprobs.is_some() +} + +pub(crate) fn extract_openai_responses_metadata_view( + metadata: &Value, +) -> Option<(Map, OpenAIResponsesExtrasView)> { + let metadata_object = parse_metadata_object(metadata)?; + let extras = + serde_json::from_value::(Value::Object(metadata_object.clone())) + .ok()?; + + if !is_openai_responses_metadata(&metadata_object, &extras) { + return None; + } + + Some((metadata_object, extras)) +} + +pub(crate) fn try_system_message_from_openai_metadata(metadata: &Value) -> Option { + let (_, extras) = extract_openai_responses_metadata_view(metadata)?; + let instructions = extras.instructions?; + if instructions.is_empty() { + return None; + } + + Some(Message::System { + content: UserContent::String(instructions), + }) +} + +pub(crate) fn normalize_openai_responses_metadata_for_chat_completions( + metadata: &Value, +) -> Option { + let (mut normalized, extras) = extract_openai_responses_metadata_view(metadata)?; + + if let Some(tools) = extras.tools.as_ref() { + let parsed_tools = parse_openai_responses_tools_array(tools); + if let Ok(Some(chat_tools)) = tools_to_openai_chat_value(&parsed_tools) { + normalized.insert("tools".into(), chat_tools); + } + } + + if let Some(tool_choice) = extras.tool_choice.as_ref() { + if let Ok(config) = <(ProviderFormat, &Value) as TryInto>::try_into(( + ProviderFormat::Responses, + tool_choice, + )) { + if let Ok(Some(chat_tool_choice)) = + config.to_provider(ProviderFormat::ChatCompletions, None) + { + normalized.insert("tool_choice".into(), chat_tool_choice); + } + } + } + + let max_output_tokens = extras + .max_output_tokens + .as_ref() + .and_then(|value| serde_json::from_value::(value.clone()).ok()); + + if let Some(reasoning_value) = extras.reasoning.as_ref() { + if let Ok(reasoning) = serde_json::from_value::(reasoning_value.clone()) { + let config = + crate::universal::request::ReasoningConfig::from((&reasoning, max_output_tokens)); + if let Ok(Some(Value::String(reasoning_effort))) = + config.to_provider(ProviderFormat::ChatCompletions, max_output_tokens) + { + normalized.insert("reasoning_effort".into(), Value::String(reasoning_effort)); + } + } + } + + if let Some(text_value) = extras.text.as_ref() { + if let Some(verbosity) = text_value.get("verbosity") { + normalized.insert("verbosity".into(), verbosity.clone()); + } + + if let Some(format) = text_value.get("format") { + if let Ok(config) = + <(ProviderFormat, &Value) as TryInto>::try_into(( + ProviderFormat::Responses, + format, + )) + { + if let Ok(Some(response_format)) = + config.to_provider(ProviderFormat::ChatCompletions) + { + normalized.insert("response_format".into(), response_format); + normalized.remove("text"); + } + } + } + } + + Some(Value::Object(normalized)) +} + #[cfg(test)] mod tests { use super::*; @@ -246,4 +400,93 @@ mod tests { // Custom field should be preserved assert_eq!(back.get("custom_field"), json.get("custom_field")); } + + #[test] + fn test_normalize_openai_responses_metadata_for_chat_completions() { + let metadata = json!({ + "object": "response", + "id": "resp_123", + "instructions": "Be helpful", + "tools": [{ + "type": "function", + "name": "lookup_weather", + "description": "Find weather", + "parameters": { "type": "object" }, + "strict": true + }], + "tool_choice": { + "type": "function", + "name": "lookup_weather" + }, + "reasoning": { + "effort": "high" + }, + "text": { + "verbosity": "low", + "format": { + "type": "json_schema", + "name": "forecast", + "schema": { "type": "object" }, + "strict": true + } + } + }); + + let normalized = + normalize_openai_responses_metadata_for_chat_completions(&metadata).unwrap(); + + assert_eq!(normalized.get("instructions"), Some(&json!("Be helpful"))); + assert_eq!(normalized.get("reasoning_effort"), Some(&json!("high"))); + assert_eq!(normalized.get("verbosity"), Some(&json!("low"))); + assert_eq!( + normalized.get("response_format"), + Some(&json!({ + "type": "json_schema", + "json_schema": { + "name": "forecast", + "schema": { "type": "object" }, + "strict": true + } + })) + ); + assert_eq!( + normalized.get("tool_choice"), + Some(&json!({ + "type": "function", + "function": { + "name": "lookup_weather" + } + })) + ); + assert_eq!( + normalized.get("tools"), + Some(&json!([{ + "type": "function", + "function": { + "name": "lookup_weather", + "description": "Find weather", + "parameters": { "type": "object" }, + "strict": true + } + }])) + ); + assert_eq!(normalized.get("text"), None); + } + + #[test] + fn test_normalize_openai_responses_metadata_requires_responses_fingerprint() { + let metadata = json!({ + "braintrust": { "integration_name": "langchain-py" }, + "reasoning": { "effort": "medium" }, + "text": { "verbosity": "high" }, + "tools": [{ + "type": "function", + "function": { + "name": "already_normalized" + } + }] + }); + + assert!(normalize_openai_responses_metadata_for_chat_completions(&metadata).is_none()); + } } diff --git a/crates/lingua/src/wasm.rs b/crates/lingua/src/wasm.rs index 7bdaabfa..2b9ca36f 100644 --- a/crates/lingua/src/wasm.rs +++ b/crates/lingua/src/wasm.rs @@ -139,6 +139,20 @@ pub fn import_messages_from_spans(value: JsValue) -> Result { .map_err(|e| JsValue::from_str(&format!("Failed to serialize result: {}", e))) } +/// Import messages and normalized metadata from spans +#[wasm_bindgen] +pub fn import_span_data_from_spans(value: JsValue) -> Result { + use crate::processing::import::{import_span_data_from_spans as import, Span}; + + let spans: Vec = serde_wasm_bindgen::from_value(value) + .map_err(|e| JsValue::from_str(&format!("Failed to parse spans: {}", e)))?; + + let imported = import(spans); + + serde_wasm_bindgen::to_value(&imported) + .map_err(|e| JsValue::from_str(&format!("Failed to serialize result: {}", e))) +} + /// Import and deduplicate messages from spans in a single operation #[wasm_bindgen] pub fn import_and_deduplicate_messages(value: JsValue) -> Result { From 1b03e28625d336338a40898ee776b7142033309f Mon Sep 17 00:00:00 2001 From: Alex Z Date: Mon, 16 Mar 2026 17:19:02 -0700 Subject: [PATCH 2/2] fixup --- crates/lingua/src/providers/openai/params.rs | 72 +++++++++++++------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/crates/lingua/src/providers/openai/params.rs b/crates/lingua/src/providers/openai/params.rs index f79bb56b..d75ec352 100644 --- a/crates/lingua/src/providers/openai/params.rs +++ b/crates/lingua/src/providers/openai/params.rs @@ -187,6 +187,12 @@ struct OpenAIResponsesMetadataFingerprint { pub top_logprobs: Option, } +#[derive(Debug, Clone, Default, Deserialize)] +struct OpenAIResponsesTextView { + pub verbosity: Option, + pub format: Option, +} + fn parse_metadata_object(metadata: &Value) -> Option> { match metadata { Value::String(metadata_json) => { @@ -301,24 +307,28 @@ pub(crate) fn normalize_openai_responses_metadata_for_chat_completions( } if let Some(text_value) = extras.text.as_ref() { - if let Some(verbosity) = text_value.get("verbosity") { - normalized.insert("verbosity".into(), verbosity.clone()); - } + if let Ok(text) = serde_json::from_value::(text_value.clone()) { + if let Some(verbosity) = text.verbosity { + normalized.insert("verbosity".into(), verbosity); + } - if let Some(format) = text_value.get("format") { - if let Ok(config) = - <(ProviderFormat, &Value) as TryInto>::try_into(( - ProviderFormat::Responses, - format, - )) - { - if let Ok(Some(response_format)) = - config.to_provider(ProviderFormat::ChatCompletions) + if let Some(format) = text.format { + if let Ok(config) = + <(ProviderFormat, &Value) as TryInto>::try_into(( + ProviderFormat::Responses, + &format, + )) { - normalized.insert("response_format".into(), response_format); - normalized.remove("text"); + if let Ok(Some(response_format)) = + config.to_provider(ProviderFormat::ChatCompletions) + { + normalized.insert("response_format".into(), response_format); + normalized.remove("text"); + } } } + } else { + return Some(Value::Object(normalized)); } } @@ -328,8 +338,21 @@ pub(crate) fn normalize_openai_responses_metadata_for_chat_completions( #[cfg(test)] mod tests { use super::*; + use crate::providers::openai::generated::ReasoningEffort; use crate::serde_json; use crate::serde_json::json; + use serde::Deserialize; + + #[derive(Debug, Deserialize)] + struct NormalizedChatMetadataView { + instructions: Option, + tools: Option, + tool_choice: Option, + response_format: Option, + reasoning_effort: Option, + verbosity: Option, + text: Option, + } #[test] fn test_chat_params_known_fields() { @@ -434,13 +457,14 @@ mod tests { let normalized = normalize_openai_responses_metadata_for_chat_completions(&metadata).unwrap(); + let normalized: NormalizedChatMetadataView = serde_json::from_value(normalized).unwrap(); - assert_eq!(normalized.get("instructions"), Some(&json!("Be helpful"))); - assert_eq!(normalized.get("reasoning_effort"), Some(&json!("high"))); - assert_eq!(normalized.get("verbosity"), Some(&json!("low"))); + assert_eq!(normalized.instructions.as_deref(), Some("Be helpful")); + assert_eq!(normalized.reasoning_effort, Some(ReasoningEffort::High)); + assert_eq!(normalized.verbosity.as_deref(), Some("low")); assert_eq!( - normalized.get("response_format"), - Some(&json!({ + normalized.response_format, + Some(json!({ "type": "json_schema", "json_schema": { "name": "forecast", @@ -450,8 +474,8 @@ mod tests { })) ); assert_eq!( - normalized.get("tool_choice"), - Some(&json!({ + normalized.tool_choice, + Some(json!({ "type": "function", "function": { "name": "lookup_weather" @@ -459,8 +483,8 @@ mod tests { })) ); assert_eq!( - normalized.get("tools"), - Some(&json!([{ + normalized.tools, + Some(json!([{ "type": "function", "function": { "name": "lookup_weather", @@ -470,7 +494,7 @@ mod tests { } }])) ); - assert_eq!(normalized.get("text"), None); + assert_eq!(normalized.text, None); } #[test]