From 9abaca8c7d5797f57997a6bb31ae314906913ae5 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 20 May 2026 01:45:26 +0200 Subject: [PATCH 1/2] test(querylang): add nested-array element taint reproducer (issue 98) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a minimal IFDS taint reproducer: when an element of a tainted array is itself an array that is later indexed (Object[] holding a String[]), the taint is dropped. The source's element path is transferred to a reference fact on the destination (types.$) instead of a nested element fact (types[*]), so the inner element read is not considered tainted. issue98 carries two positive samples: - PositiveScalarElementControl — scalar element of tainted array -> sink (passes today) - PositiveNestedArrayElement — (String[]) args[1] then types[0] -> sink (missed) This is the shape of Apache Dubbo GenericFilter's provider path (String[] parameterTypes = (String[]) invocation.getArguments()[1] -> ReflectUtils.name2class(parameterTypes[i])). The test is @Disabled pending an engine fix, matching the convention used for the other known-miss issues. --- .../samples/src/main/java/issues/issue98.java | 60 +++++++++++++++++++ .../src/main/resources/issues/issue98.yaml | 15 +++++ .../org/opentaint/semgrep/IssuesTest.kt | 5 ++ 3 files changed, 80 insertions(+) create mode 100644 core/opentaint-java-querylang/samples/src/main/java/issues/issue98.java create mode 100644 core/opentaint-java-querylang/samples/src/main/resources/issues/issue98.yaml diff --git a/core/opentaint-java-querylang/samples/src/main/java/issues/issue98.java b/core/opentaint-java-querylang/samples/src/main/java/issues/issue98.java new file mode 100644 index 000000000..a1e82657a --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/java/issues/issue98.java @@ -0,0 +1,60 @@ +package issues; + +import base.RuleSample; +import base.RuleSet; + +/** + * Repro: taint is lost when an element of a tainted array is itself an array + * that is later indexed (array-of-arrays / Object[] holding a String[]). + * + * The source marks the returned {@code Object[]} with element-level taint + * ({@code args[*]}). Reading a scalar element ({@code args[0]}) keeps the taint + * (see {@link PositiveScalarElementControl}), but reading an element that is + * itself an array and then indexing that inner array + * ({@code ((String[]) args[1])[0]}) drops it: the engine transfers the source's + * element path to a reference fact on the destination ({@code types.$}) instead + * of a nested element fact ({@code types[*]}), so the inner element read is not + * considered tainted. + * + * This is the shape of Apache Dubbo's GenericFilter provider path: + * String[] parameterTypes = (String[]) invocation.getArguments()[1]; + * ... ReflectUtils.name2class(parameterTypes[i]) ... + */ +@RuleSet("issues/issue98.yaml") +public abstract class issue98 implements RuleSample { + + Object[] src() { + return new Object[] {"", new String[] {""}}; + } + + void sink(String data) {} + + /** + * Control: scalar element of a tainted array reaches the sink. This already + * works today and anchors that the source/sink/element-read all function; + * the only difference from the failing case is that the element here is a + * scalar, not a nested array. + */ + static class PositiveScalarElementControl extends issue98 { + @Override + public void entrypoint() { + Object[] args = src(); + String name = (String) args[0]; + sink(name); + } + } + + /** + * False negative: the element {@code args[1]} is itself a {@code String[]}; + * indexing it ({@code types[0]}) loses the taint. Expected: a finding at the + * {@code sink(types[0])} call. Observed: none. + */ + static class PositiveNestedArrayElement extends issue98 { + @Override + public void entrypoint() { + Object[] args = src(); + String[] types = (String[]) args[1]; + sink(types[0]); + } + } +} diff --git a/core/opentaint-java-querylang/samples/src/main/resources/issues/issue98.yaml b/core/opentaint-java-querylang/samples/src/main/resources/issues/issue98.yaml new file mode 100644 index 000000000..2eb26a4d4 --- /dev/null +++ b/core/opentaint-java-querylang/samples/src/main/resources/issues/issue98.yaml @@ -0,0 +1,15 @@ +rules: + - id: i98 + languages: + - java + severity: ERROR + message: tainted array element reaches sink + mode: taint + pattern-sources: + - patterns: + - focus-metavariable: $X + - pattern: $X = src(); + pattern-sinks: + - patterns: + - pattern: sink($Y); + - focus-metavariable: $Y diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index 185fe851c..7bf4e1114 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -16,6 +16,7 @@ import issues.issue94 import issues.issue95 import issues.issue96 import issues.issue97 +import issues.issue98 import org.junit.jupiter.api.AfterAll import org.junit.jupiter.api.Disabled import org.junit.jupiter.api.TestInstance @@ -105,6 +106,10 @@ class IssuesTest : SampleBasedTest() { @Test fun `issue 97`() = runTest() + @Test + @Disabled // todo: nested array element taint — element of a tainted array that is itself an array loses taint when indexed + fun `issue 98`() = runTest() + @AfterAll fun close() { closeRunner() From 43c716d040af18c3dcf737a83fad7410ab2abee4 Mon Sep 17 00:00:00 2001 From: Aleksandr Misonizhnik Date: Wed, 20 May 2026 01:46:33 +0200 Subject: [PATCH 2/2] test(querylang): enable issue 98 so the nested-array taint miss fails loudly Remove @Disabled from the issue 98 reproducer. The test now fails (the PositiveNestedArrayElement sample is missed) instead of being skipped, so the engine regression is visible until the nested-array element taint propagation is fixed. --- .../src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt index 7bf4e1114..7e0aa4e5e 100644 --- a/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt +++ b/core/opentaint-java-querylang/src/test/kotlin/org/opentaint/semgrep/IssuesTest.kt @@ -106,8 +106,7 @@ class IssuesTest : SampleBasedTest() { @Test fun `issue 97`() = runTest() - @Test - @Disabled // todo: nested array element taint — element of a tainted array that is itself an array loses taint when indexed + @Test // todo: nested array element taint — element of a tainted array that is itself an array loses taint when indexed fun `issue 98`() = runTest() @AfterAll