From 2a73c6f890dd7719339218fee885138496460be4 Mon Sep 17 00:00:00 2001 From: Wendong Li Date: Mon, 12 Jan 2026 21:48:19 +0000 Subject: [PATCH 1/3] null value passthrough --- .../ml/services/openai/OpenAIPrompt.scala | 8 ++- .../services/openai/OpenAIPromptSuite.scala | 60 +++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala index d451a499b7..2a9f018886 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala @@ -462,8 +462,14 @@ class OpenAIPrompt(override val uid: String) extends Transformer attachmentMap: Map[String, String], attachmentOrder: Seq[String] ): Seq[OpenAICompositeMessage] = { + // Filter to get only non-null, non-empty path values val orderedAttachments = attachmentOrder.flatMap { columnName => - attachmentMap.get(columnName).map(_.trim).filter(_.nonEmpty) + attachmentMap.get(columnName).flatMap(v => Option(v).map(_.trim).filter(_.nonEmpty)) + } + + // If there are path columns but all are null/empty, pass through null + if (attachmentOrder.nonEmpty && orderedAttachments.isEmpty) { + return null //scalastyle:ignore null } val contentParts = buildContentParts(userMessage, orderedAttachments) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala index 469d608fa5..04edb17a1a 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala @@ -86,6 +86,27 @@ class OpenAIPromptSuite extends TransformerFuzzing[OpenAIPrompt] with OpenAIAPIK } } + test("createMessagesForRow returns null when all path columns are null") { + val prompt = new OpenAIPrompt() + val attachments = Map("filePath" -> null.asInstanceOf[String]) + val messages = prompt.createMessagesForRow("Summarize the file", attachments, Seq("filePath")) + assert(messages == null) + } + + test("createMessagesForRow returns messages when at least one path column has value") { + val prompt = new OpenAIPrompt() + val tempFile = Files.createTempFile("synapseml-openai", ".txt") + try { + Files.write(tempFile, "example content".getBytes(StandardCharsets.UTF_8)) + val attachments = Map("filePath" -> null.asInstanceOf[String], "anotherPath" -> tempFile.toString) + val messages = prompt.createMessagesForRow("Summarize", attachments, Seq("filePath", "anotherPath")) + assert(messages != null) + assert(messages.nonEmpty) + } finally { + Files.deleteIfExists(tempFile) + } + } + test("RAI Usage") { val result = prompt .setDeploymentName(deploymentName) @@ -321,6 +342,45 @@ class OpenAIPromptSuite extends TransformerFuzzing[OpenAIPrompt] with OpenAIAPIK } } + test("null path columns return null output") { + val promptResponses = new OpenAIPrompt() + .setSubscriptionKey(openAIAPIKey) + .setDeploymentName(deploymentName) + .setCustomServiceName(openAIServiceName) + .setApiVersion("2025-04-01-preview") + .setApiType("responses") + .setColumnType("images", "path") + .setOutputCol("outParsed") + .setPromptTemplate("{questions}: {images}") + + val urlDF = Seq( + ( + "What's in this document?", + "https://mmlspark.blob.core.windows.net/datasets/OCR/paper.pdf" + ), + ( + "What's in this image?", + null.asInstanceOf[String] + ), + ( + "What's in this image?", + "https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png" + ) + ).toDF("questions", "images") + + val results = promptResponses + .transform(urlDF) + .select("outParsed") + .collect() + + // First row: valid path, should have output + assert(results(0).getString(0) != null) + // Second row: null path, should have null output + assert(results(1).get(0) == null) + // Third row: valid path, should have output + assert(results(2).getString(0) != null) + } + ignore("Custom EndPoint") { lazy val accessToken: String = sys.env.getOrElse("CUSTOM_ACCESS_TOKEN", "") lazy val customRootUrlValue: String = sys.env.getOrElse("CUSTOM_ROOT_URL", "") From 8d14f1a130c1e25cd27717e0f14001305e6f7892 Mon Sep 17 00:00:00 2001 From: Wendong Li Date: Mon, 12 Jan 2026 21:52:22 +0000 Subject: [PATCH 2/3] real null --- .../azure/synapse/ml/services/openai/OpenAIPromptSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala index 04edb17a1a..4c34bf2114 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala @@ -360,7 +360,7 @@ class OpenAIPromptSuite extends TransformerFuzzing[OpenAIPrompt] with OpenAIAPIK ), ( "What's in this image?", - null.asInstanceOf[String] + null // scalasty:ignore ), ( "What's in this image?", From c5ce0d8fa0077ee738228f3832b1a1260a38dc05 Mon Sep 17 00:00:00 2001 From: Wendong Li Date: Tue, 13 Jan 2026 17:43:16 +0000 Subject: [PATCH 3/3] fix style --- .../synapse/ml/services/openai/OpenAIPrompt.scala | 10 +++++----- .../synapse/ml/services/openai/OpenAIPromptSuite.scala | 8 +++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala index 2a9f018886..e4f8645972 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPrompt.scala @@ -469,12 +469,12 @@ class OpenAIPrompt(override val uid: String) extends Transformer // If there are path columns but all are null/empty, pass through null if (attachmentOrder.nonEmpty && orderedAttachments.isEmpty) { - return null //scalastyle:ignore null + null //scalastyle:ignore null + } else { + val contentParts = buildContentParts(userMessage, orderedAttachments) + val messages = getPromptsForMessage(Left(contentParts)) + messages } - - val contentParts = buildContentParts(userMessage, orderedAttachments) - val messages = getPromptsForMessage(Left(contentParts)) - messages } private def buildContentParts(promptText: String, attachmentPaths: Seq[String]): Seq[Map[String, String]] = { diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala index 4c34bf2114..b66aa6a203 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/openai/OpenAIPromptSuite.scala @@ -86,9 +86,10 @@ class OpenAIPromptSuite extends TransformerFuzzing[OpenAIPrompt] with OpenAIAPIK } } + // scalastyle:off null test("createMessagesForRow returns null when all path columns are null") { val prompt = new OpenAIPrompt() - val attachments = Map("filePath" -> null.asInstanceOf[String]) + val attachments = Map("filePath" -> null) val messages = prompt.createMessagesForRow("Summarize the file", attachments, Seq("filePath")) assert(messages == null) } @@ -98,7 +99,7 @@ class OpenAIPromptSuite extends TransformerFuzzing[OpenAIPrompt] with OpenAIAPIK val tempFile = Files.createTempFile("synapseml-openai", ".txt") try { Files.write(tempFile, "example content".getBytes(StandardCharsets.UTF_8)) - val attachments = Map("filePath" -> null.asInstanceOf[String], "anotherPath" -> tempFile.toString) + val attachments = Map("filePath" -> null, "anotherPath" -> tempFile.toString) val messages = prompt.createMessagesForRow("Summarize", attachments, Seq("filePath", "anotherPath")) assert(messages != null) assert(messages.nonEmpty) @@ -106,6 +107,7 @@ class OpenAIPromptSuite extends TransformerFuzzing[OpenAIPrompt] with OpenAIAPIK Files.deleteIfExists(tempFile) } } + // scalastyle:on null test("RAI Usage") { val result = prompt @@ -360,7 +362,7 @@ class OpenAIPromptSuite extends TransformerFuzzing[OpenAIPrompt] with OpenAIAPIK ), ( "What's in this image?", - null // scalasty:ignore + null // scalastyle:ignore null ), ( "What's in this image?",