Skip to content

Commit 0706ae1

Browse files
authored
Merge pull request #152 from DNAstack/CU-86b7ugbme
[CU-86b7ugbme] Fixed SQL identifier quoting
2 parents df9b63c + 672a726 commit 0706ae1

2 files changed

Lines changed: 79 additions & 34 deletions

File tree

src/main/java/com/dnastack/ga4gh/dataconnect/adapter/trino/TrinoDataConnectAdapter.java

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import lombok.Getter;
2020
import lombok.SneakyThrows;
2121
import lombok.extern.slf4j.Slf4j;
22-
import org.apache.commons.lang3.StringUtils;
2322
import org.jdbi.v3.core.Jdbi;
2423
import org.jetbrains.annotations.NotNull;
2524
import org.jetbrains.annotations.Nullable;
@@ -62,6 +61,18 @@ public String toString() {
6261
private static final Pattern qualifiedNameMatcher =
6362
Pattern.compile("^\"?[^\"]+\"?\\.\"?[^\"]+\"?\\.\"?[^\"]+\"?$");
6463

64+
/**
65+
* Pattern to match fully-qualified table names (catalog.schema.table) in FROM and JOIN clauses.
66+
* Matches unquoted identifiers that contain dots, capturing the three parts separately.
67+
* Does not match already-quoted identifiers.
68+
*/
69+
private static final Pattern FROM_TABLE_PATTERN = Pattern.compile(
70+
"(?i)(FROM|JOIN)\\s+(?!\")" + // FROM or JOIN keyword, not followed by quote
71+
"([a-zA-Z_][a-zA-Z0-9_-]*)\\." + // catalog (unquoted identifier, may contain hyphens)
72+
"([a-zA-Z_][a-zA-Z0-9_-]*)\\." + // schema (unquoted identifier, may contain hyphens)
73+
"([a-zA-Z0-9_][a-zA-Z0-9_-]*)" // table (can start with digit, may contain hyphens)
74+
);
75+
6576
private final Map<String, Set<String>> trinoSchemaCache;
6677
private final Map<String, Set<String>> trinoCatalogCache;
6778

@@ -128,9 +139,16 @@ public SQLFunction(MatchResult matchResult) {
128139

129140
}
130141

131-
//rewrites the query by replacing all instances of functionName(a_0, a_1)
132-
//with a_argIndex
133-
private String rewriteQuery(String query, String functionName, int argIndex) {
142+
143+
/**
144+
rewrites the query by replacing all instances of functionName(a_0, a_1) with a_argIndex
145+
*
146+
* @param query
147+
* @param functionName
148+
* @param argIndex
149+
* @return
150+
*/
151+
private String rewriteFunctionNameIndex(String query, String functionName, int argIndex) {
134152
return biFunctionPattern.matcher(query)
135153
.replaceAll(matchResult -> {
136154
SQLFunction sf = new SQLFunction(matchResult);
@@ -242,8 +260,7 @@ public TableData search(
242260
Map<String, String> extraCredentials,
243261
DataModel dataModel
244262
) {
245-
246-
String rewrittenQuery = rewriteQuery(query, "ga4gh_type", 0);
263+
String rewrittenQuery = applyQueryRewrites(query);
247264
TrinoDataPage response = client.query(rewrittenQuery, extraCredentials);
248265
QueryJob queryJob = createQueryJob(response.id(), query, dataModel, response.nextUri());
249266
return toTableData(response, queryJob, request);
@@ -505,10 +522,7 @@ private TablesList getTables(CatalogSchema current, CatalogSchema next, HttpServ
505522
public TableData getTableData(String tableName, HttpServletRequest request, Map<String, String> extraCredentials) {
506523
// Get table JSON schema from tables registry if one exists for this table (for tables from trino-public)
507524
DataModel dataModel = getDataModelFromSupplier(tableName);
508-
//Add quotes to tableName in the query. Table name can be of the format <catalog_name>.<datasource_name>.tableName
509-
//So if the tableName has two dots in it, then everything after the third dot, should come within quotes.
510-
String validTableName = getTableNameInCorrectFormat(tableName);
511-
TableData tableData = search("SELECT * FROM " + validTableName, request, extraCredentials, dataModel);
525+
TableData tableData = search("SELECT * FROM " + tableName, request, extraCredentials, dataModel);
512526

513527
// Populate the dataModel only if there is tableData
514528
if (!tableData.getData().isEmpty()) {
@@ -542,10 +556,7 @@ public TableInfo getTableInfo(
542556
log.info("Data model supplier returned null for table: '{}'. Falling back to trino query", tableName);
543557
// since the data model was not found in the supplier, perform a more expensive query to fallback to trino and fetch a single
544558
// row of data.
545-
//Add quotes to tableName in the query. Table name can be of the format <catalog_name>.<datasource_name>.tableName
546-
//So if the tableName has two dots in it, then everything after the third dot, should come within quotes.
547-
String validTableName = getTableNameInCorrectFormat(tableName);
548-
TableData tableData = searchAll("SELECT * FROM " + validTableName + " LIMIT 1", request, extraCredentials, dataModel);
559+
TableData tableData = searchAll("SELECT * FROM " + tableName + " LIMIT 1", request, extraCredentials, dataModel);
549560
log.info("Data model is empty in tables registry for table {}.", tableName);
550561
dataModel = tableData.getDataModel();
551562
dataModel.setId(getDataModelId(tableName, request));
@@ -554,26 +565,12 @@ public TableInfo getTableInfo(
554565
return new TableInfo(tableName, dataModel.getDescription(), dataModel, null);
555566
}
556567

557-
private String getTableNameInCorrectFormat(String tableName) {
558-
String validTableName = tableName;
559-
if (StringUtils.countMatches(tableName, ".") >= 2) {
560-
561-
// If there are two or more dots, then quote the entire part after the second dot(assuming that this will be the table name).
562-
int secondIndex = StringUtils.ordinalIndexOf(tableName, ".", 2);
563-
564-
//Everything before second catalog name will be catalog(+schema)
565-
String catalogAndSchema = tableName.substring(0, secondIndex + 1);
566-
String table = tableName.substring(secondIndex + 1);
567-
568-
//If the table name doesn't starts with or ends with quotes then add quotes
569-
if (!table.startsWith("\"") || !table.endsWith("\"")) {
570-
table = "\"" + table + "\"";
571-
}
572-
validTableName = catalogAndSchema + table;
573-
} else {
574-
log.warn("Table name {} has less than 2 dots in it.", tableName);
575-
}
576-
return validTableName;
568+
/**
569+
* Quotes the given string so it can be used in a SQL query as an identifier
570+
* (for example, a catalog, schema, table, or column name).
571+
*/
572+
private static String quoteIdentifier(String identifier) {
573+
return "\"" + identifier.replace("\"", "\"\"") + "\"";
577574
}
578575

579576
private boolean isValidTrinoName(String tableName) {
@@ -1059,4 +1056,44 @@ private QueryJob getQueryJob(String id) {
10591056
.orElseThrow(() -> new InvalidQueryJobException(id));
10601057
}
10611058

1059+
/**
1060+
* Applies all query rewrites in sequence.
1061+
* Add new rewrite steps here to keep the transformation pipeline in one place
1062+
* @param query input query
1063+
*/
1064+
private String applyQueryRewrites(String query) {
1065+
String result = query;
1066+
result = rewriteFunctionNameIndex(result, "ga4gh_type", 0);
1067+
result = quoteTableNamesInQuery(result);
1068+
return result;
1069+
}
1070+
1071+
/**
1072+
* Quotes fully-qualified table names in FROM and JOIN clauses.
1073+
* Converts "FROM catalog.schema.table" to "FROM \"catalog\".\"schema\".\"table\"".
1074+
* This prevents SQL parsing errors when table names start with numbers (e.g., "03_chris").
1075+
* @param query input query
1076+
*/
1077+
private String quoteTableNamesInQuery(String query) {
1078+
Matcher matcher = FROM_TABLE_PATTERN.matcher(query);
1079+
StringBuilder result = new StringBuilder();
1080+
while (matcher.find()) {
1081+
String keyword = matcher.group(1);
1082+
String catalog = matcher.group(2);
1083+
String schema = matcher.group(3);
1084+
String table = matcher.group(4);
1085+
String replacement = keyword + " " +
1086+
quoteIdentifier(catalog) + "." +
1087+
quoteIdentifier(schema) + "." +
1088+
quoteIdentifier(table);
1089+
matcher.appendReplacement(result, Matcher.quoteReplacement(replacement));
1090+
}
1091+
matcher.appendTail(result);
1092+
String transformedQuery = result.toString();
1093+
if (!transformedQuery.equals(query)) {
1094+
log.debug("Quoted table names in query: {} -> {}", query, transformedQuery);
1095+
}
1096+
return transformedQuery;
1097+
}
1098+
10621099
}

src/test/java/com/dnastack/ga4gh/dataconnect/adapter/trino/TrinoDataConnectAdapterTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,4 +1103,12 @@ public void getTablesByCatalogAndSchema_singleCatalog_lastSchema() {
11031103
assertNull(tablesList.getErrors());
11041104
}
11051105

1106+
@Test
1107+
public void quoteTableNamesInQuery_should_quoteTableNameStartingWithDigit() {
1108+
// Table names starting with a digit (e.g., "03_test") cause Trino parsing errors if unquoted
1109+
String query = "SELECT * FROM bigquery_catalog.some_dataset.03_test_table WHERE id = 1";
1110+
String result = ReflectionTestUtils.invokeMethod(dataConnectAdapter, "quoteTableNamesInQuery", query);
1111+
assertThat(result, equalTo("SELECT * FROM \"bigquery_catalog\".\"some_dataset\".\"03_test_table\" WHERE id = 1"));
1112+
}
1113+
11061114
}

0 commit comments

Comments
 (0)