diff --git a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java index 7c7770172..121f56ddd 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java +++ b/isthmus/src/test/java/io/substrait/isthmus/PlanTestBase.java @@ -20,8 +20,10 @@ import io.substrait.type.Type; import io.substrait.type.TypeCreator; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.type.RelDataType; @@ -45,11 +47,28 @@ public static String asString(String resource) throws IOException { return Resources.toString(Resources.getResource(resource), Charsets.UTF_8); } - public static List tpchSchemaCreateStatements() throws IOException { - String[] values = asString("tpch/schema.sql").split(";"); - return Arrays.stream(values) - .filter(t -> !t.trim().isBlank()) - .collect(java.util.stream.Collectors.toList()); + /** Holder class to load TPC-H create statements only once on first access. */ + private static final class TpchCreateStatementsHolder { + static final List createStatements; + + static { + final String[] values; + try { + values = asString("tpch/schema.sql").split(";"); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + + createStatements = + Arrays.stream(values) + .map(String::trim) + .filter(s -> !s.isBlank()) + .collect(Collectors.toList()); + } + } + + public static List tpchSchemaCreateStatements() { + return TpchCreateStatementsHolder.createStatements; } protected Plan assertProtoPlanRoundrip(String query) throws IOException, SqlParseException { diff --git a/isthmus/src/test/java/io/substrait/isthmus/TpcdsQueryNoValidation.java b/isthmus/src/test/java/io/substrait/isthmus/TpcdsQueryNoValidation.java deleted file mode 100644 index 60559ad48..000000000 --- a/isthmus/src/test/java/io/substrait/isthmus/TpcdsQueryNoValidation.java +++ /dev/null @@ -1,51 +0,0 @@ -package io.substrait.isthmus; - -import com.google.protobuf.util.JsonFormat; -import java.util.Set; -import java.util.stream.IntStream; -import org.apache.calcite.adapter.tpcds.TpcdsSchema; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; - -/** - * - * - *

Setup of Schema and Queries

- * - *
  • Schema using `org.apache.calcite.adapter.tpcds.TpcdsSchema` from - * `org.apache.calcite:calcite-plus:1.28.0` - *
  • For queries started with `net.hydromatic.tpcds.query.Query` and then fixed generation issues - * replacing with specific queries from Spark SQL tpcds benchmark. - * - *

    Generator and query parsing issues and fixes

    - * - *
  • `substr` instead of `substring` - *
  • keywords used `returns`, `at`,.... Change to `rets`, `at`, ... - *
  • doesn't handle may kinds of generator expressions like: `Define - * SDATE=date([YEAR]+"-01-01",[YEAR]+"-07-01",sales);`, `Define - * CATEGORY=ulist(dist(categories,1,1),3);` and `define STATE= ulist(dist(fips_county, 3, 1), - * 9). So replaced with constants from spark sql tpcds query. - *
  • Interval specified as `30 days`; changed to `interval '30' day` - */ -public class TpcdsQueryNoValidation extends PlanTestBase { - - static final Set EXCLUDED = Set.of(9, 27, 36, 70, 86, 98); - - static IntStream testCases() { - return IntStream.rangeClosed(1, 99).filter(n -> !EXCLUDED.contains(n)); - } - - /** - * This test only validates that generating substrait plans for TPC-DS queries does not fail. As - * of now this test does not validate correctness of the generated plan - */ - @ParameterizedTest - @MethodSource("testCases") - void testQuery(int i) throws Exception { - SqlToSubstrait s = new SqlToSubstrait(); - TpcdsSchema schema = new TpcdsSchema(1.0); - String sql = asString(String.format("tpcds/queries/%02d.sql", i)); - var plan = s.execute(sql, "tpcds", schema); - System.out.println(JsonFormat.printer().print(plan)); - } -} diff --git a/isthmus/src/test/java/io/substrait/isthmus/TpcdsQueryTest.java b/isthmus/src/test/java/io/substrait/isthmus/TpcdsQueryTest.java new file mode 100644 index 000000000..68bb87677 --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/TpcdsQueryTest.java @@ -0,0 +1,61 @@ +package io.substrait.isthmus; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.substrait.plan.Plan.Root; +import io.substrait.plan.ProtoPlanConverter; +import io.substrait.proto.Plan; +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.IntStream; +import org.apache.calcite.adapter.tpcds.TpcdsSchema; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.sql.parser.SqlParseException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +/** TPC-DS test to convert SQL to Substrait and then convert those plans back to SQL. */ +public class TpcdsQueryTest extends PlanTestBase { + private static final Set toSubstraitExclusions = Set.of(9, 27, 36, 70, 86, 98); + private static final Set fromSubstraitExclusions = Set.of(6, 8, 67); + + private final TpcdsSchema schema = new TpcdsSchema(1.0); + private final ProtoPlanConverter planConverter = new ProtoPlanConverter(); + + static IntStream testCases() { + return IntStream.rangeClosed(1, 99).filter(n -> !toSubstraitExclusions.contains(n)); + } + + /** + * Note that this test does not currently validate the correctness of the Substrait plan; just + * that the SQL can be converted to Substrait and back to SQL without error. + */ + @ParameterizedTest + @MethodSource("testCases") + public void testQuery(int query) throws IOException { + String inputSql = asString(String.format("tpcds/queries/%02d.sql", query)); + + Plan plan = assertDoesNotThrow(() -> toSubstraitPlan(inputSql), "SQL to Substrait"); + + if (!fromSubstraitExclusions.contains(query)) { + assertDoesNotThrow(() -> toSql(plan), "Substrait to SQL"); + } + } + + private Plan toSubstraitPlan(String sql) throws SqlParseException, IOException { + return new SqlToSubstrait().execute(sql, "tpcds", schema); + } + + private String toSql(Plan plan) { + List roots = planConverter.from(plan).getRoots(); + assertEquals(1, roots.size(), "number of roots"); + + Root root = roots.get(0); + RelRoot relRoot = new SubstraitToCalcite(extensions, typeFactory).convert(root); + RelNode project = relRoot.project(true); + return SubstraitToSql.toSql(project); + } +} diff --git a/isthmus/src/test/java/io/substrait/isthmus/TpchQueryNoValidation.java b/isthmus/src/test/java/io/substrait/isthmus/TpchQueryNoValidation.java deleted file mode 100644 index a53fda3d0..000000000 --- a/isthmus/src/test/java/io/substrait/isthmus/TpchQueryNoValidation.java +++ /dev/null @@ -1,24 +0,0 @@ -package io.substrait.isthmus; - -import com.google.protobuf.util.JsonFormat; -import java.util.Arrays; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; - -public class TpchQueryNoValidation extends PlanTestBase { - - @ParameterizedTest - // @ValueSource(ints = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22}) - @ValueSource( - ints = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}) - public void tpch(int query) throws Exception { - SqlToSubstrait s = new SqlToSubstrait(); - String[] values = asString("tpch/schema.sql").split(";"); - var creates = - Arrays.stream(values) - .filter(t -> !t.trim().isBlank()) - .collect(java.util.stream.Collectors.toList()); - var plan = s.execute(asString(String.format("tpch/queries/%02d.sql", query)), creates); - System.out.println(JsonFormat.printer().print(plan)); - } -} diff --git a/isthmus/src/test/java/io/substrait/isthmus/TpchQueryTest.java b/isthmus/src/test/java/io/substrait/isthmus/TpchQueryTest.java new file mode 100644 index 000000000..91f3bbf00 --- /dev/null +++ b/isthmus/src/test/java/io/substrait/isthmus/TpchQueryTest.java @@ -0,0 +1,59 @@ +package io.substrait.isthmus; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.substrait.plan.Plan.Root; +import io.substrait.plan.ProtoPlanConverter; +import io.substrait.proto.Plan; +import java.io.IOException; +import java.util.List; +import java.util.Set; +import java.util.stream.IntStream; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.sql.parser.SqlParseException; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +/** TPC-H test to convert SQL to Substrait and then convert those plans back to SQL. */ +public class TpchQueryTest extends PlanTestBase { + private static final Set fromSubstraitExclusions = Set.of(17); + + private final ProtoPlanConverter planConverter = new ProtoPlanConverter(); + + static IntStream testCases() { + return IntStream.rangeClosed(1, 22); + } + + /** + * Note that this test does not currently validate the correctness of the Substrait plan; just + * that the SQL can be converted to Substrait and back to SQL without error. + */ + @ParameterizedTest + @MethodSource("testCases") + public void testQuery(int query) throws IOException { + String inputSql = asString(String.format("tpch/queries/%02d.sql", query)); + + Plan plan = assertDoesNotThrow(() -> toSubstraitPlan(inputSql), "SQL to Substrait"); + + if (!fromSubstraitExclusions.contains(query)) { + assertDoesNotThrow(() -> toSql(plan), "Substrait to SQL"); + } + } + + private Plan toSubstraitPlan(String sql) throws SqlParseException { + List createStatements = tpchSchemaCreateStatements(); + return new SqlToSubstrait().execute(sql, createStatements); + } + + private String toSql(Plan plan) { + List roots = planConverter.from(plan).getRoots(); + assertEquals(1, roots.size(), "number of roots"); + + Root root = roots.get(0); + RelRoot relRoot = new SubstraitToCalcite(extensions, typeFactory).convert(root); + RelNode project = relRoot.project(true); + return SubstraitToSql.toSql(project); + } +}