Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,7 @@ fn invoke_base_query_options<IT: InnerTypes>(b: &NativeBaseQueryOptions<IT>) ->
r.record("join_graph", b.join_graph());
r.record("security_context", b.security_context());
r.record("join_hints", b.join_hints());
r.record("subquery_joins", b.subquery_joins());
r
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ const BRIDGES: BridgeSpec[] = [
'row_limit',
'security_context',
'segments',
'subquery_joins',
'time_dimensions',
'timezone',
'total_query',
Expand Down
2 changes: 2 additions & 0 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,7 @@ export class BaseQuery {
convertTzForRawTimeDimension: !!this.options.convertTzForRawTimeDimension,
maskedMembers: this.options.maskedMembers,
memberToAlias: this.options.memberToAlias,
subqueryJoins: this.options.subqueryJoins,
};

try {
Expand Down Expand Up @@ -1011,6 +1012,7 @@ export class BaseQuery {
securityContext: this.contextSymbols.securityContext,
cubestoreSupportMultistage: this.options.cubestoreSupportMultistage ?? getEnv('cubeStoreRollingWindowJoin'),
disableExternalPreAggregations: !!this.options.disableExternalPreAggregations,
subqueryJoins: this.options.subqueryJoins,
};

const buildResult = nativeBuildSqlAndParams(queryParams);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use super::join_graph::{JoinGraph, NativeJoinGraph};
use super::join_hints::JoinHintItem;
use super::options_member::OptionsMember;
use super::security_context::{NativeSecurityContext, SecurityContext};
use super::subquery_join::{NativeSubqueryJoin, SubqueryJoin};
use crate::cube_bridge::base_tools::{BaseTools, NativeBaseTools};
use crate::cube_bridge::evaluator::{CubeEvaluator, NativeCubeEvaluator};
use cubenativeutils::wrappers::serializer::{
Expand Down Expand Up @@ -239,6 +240,8 @@ pub trait BaseQueryOptions {
fn security_context(&self) -> Result<Rc<dyn SecurityContext>, CubeError>;
#[nbridge(field, optional, vec)]
fn join_hints(&self) -> Result<Option<Vec<JoinHintItem>>, CubeError>;
#[nbridge(field, optional, vec)]
fn subquery_joins(&self) -> Result<Option<Vec<Rc<dyn SubqueryJoin>>>, CubeError>;
}

#[cfg(test)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,6 @@ pub mod sql_templates_render;
pub mod sql_utils;
pub mod string_or_sql;
pub mod struct_with_sql_member;
pub mod subquery_join;
pub mod timeshift_definition;
pub mod view_filter_definition;
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use super::member_expression::{MemberExpressionDefinition, NativeMemberExpressionDefinition};
use cubenativeutils::wrappers::serializer::{
NativeDeserialize, NativeDeserializer, NativeSerialize,
};
use cubenativeutils::wrappers::{NativeContextHolder, NativeObjectHandle};
use cubenativeutils::CubeError;
use serde::{Deserialize, Serialize};
use std::any::Any;
use std::rc::Rc;

/// A query-level join against an opaque sub-query, originating from the
/// SQL API (cubesql) `subqueryJoins`. `sql` is a complete, pre-rendered
/// SELECT; `on` is the join condition expressed as a member expression
/// (same shape as a parsed member expression: `cubeName` + `expression`).
#[derive(Serialize, Deserialize, Debug, Clone, nativebridge::NativeBridgeStatic)]
pub struct SubqueryJoinStatic {
pub sql: String,
#[serde(rename = "joinType")]
pub join_type: Option<String>,
pub alias: String,
}
Comment on lines +17 to +21

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alias contract is trust-based. alias is consumed verbatim (no quoting) and the inner sql is emitted verbatim into the outer query. The PR description states the SQL API sends a pre-quoted alias and a controlled inner SELECT, which is fine for the cubesql producer, but the trait itself doesn't enforce or document the invariant. If any other caller of buildSqlAndParamsRust ever populates subqueryJoins (the JS BaseQuery now forwards this.options.subqueryJoins from any caller), an unquoted/attacker-controlled alias would land in the FROM clause untouched.

Consider tightening the doc-comment on this struct to state explicitly that:

  1. alias MUST be a fully-quoted identifier produced by the caller, and
  2. sql is emitted verbatim and MUST come from a trusted, planner-side path.

Optionally validate at compile time that alias starts/ends with the quote character (" or the dialect-specific one). Worth at least a // SAFETY: …-style note here so the next reader doesn't reintroduce quoting at a different layer.


#[nativebridge::native_bridge(SubqueryJoinStatic, with_static_meta)]
pub trait SubqueryJoin {
#[nbridge(field)]
fn on(&self) -> Result<Rc<dyn MemberExpressionDefinition>, CubeError>;
}
51 changes: 51 additions & 0 deletions rust/cube/cubesqlplanner/cubesqlplanner/src/logical_plan/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,29 @@ impl PrettyPrint for LogicalJoinItem {
}
}

/// A query-level join against an opaque pre-rendered sub-query, sourced
/// from the SQL API `subqueryJoins`. `sql` is a complete SELECT emitted
/// verbatim; `on_sql` is the compiled join condition (it references the
/// owning cube and the sub-query `alias` as a literal). Unlike
/// `LogicalJoinItem` there is no joined `Cube` node, so these are carried
/// as opaque data and never participate in input packing.
#[derive(Clone)]
pub struct LogicalSubqueryJoinItem {
pub sql: String,
pub alias: String,
pub join_type: String,
pub on_sql: Rc<SqlCall>,
}

impl PrettyPrint for LogicalSubqueryJoinItem {
fn pretty_print(&self, result: &mut PrettyPrintResult, state: &PrettyPrintState) {
result.println(
&format!("SubqueryJoinItem: {} AS {}", self.join_type, self.alias),
state,
);
}
}

/// Join of cubes that backs a query source: a `root` cube plus
/// non-root cubes (`joins`), optionally extended by sub-query
/// dimensions that contribute their own joined-in CTEs.
Expand All @@ -42,6 +65,8 @@ pub struct LogicalJoin {
joins: Vec<LogicalJoinItem>,
#[builder(default)]
dimension_subqueries: Vec<Rc<DimensionSubQuery>>,
#[builder(default)]
subquery_joins: Vec<LogicalSubqueryJoinItem>,
}

impl LogicalJoin {
Expand All @@ -56,6 +81,22 @@ impl LogicalJoin {
pub fn dimension_subqueries(&self) -> &Vec<Rc<DimensionSubQuery>> {
&self.dimension_subqueries
}

pub fn subquery_joins(&self) -> &Vec<LogicalSubqueryJoinItem> {
&self.subquery_joins
}

/// Returns a copy of this join with the given query-level sub-query
/// joins attached. Used to fold in SQL-API `subqueryJoins` after the
/// cube join tree has been built.
pub fn with_subquery_joins(&self, subquery_joins: Vec<LogicalSubqueryJoinItem>) -> Rc<Self> {
Rc::new(Self {
root: self.root.clone(),
joins: self.joins.clone(),
dimension_subqueries: self.dimension_subqueries.clone(),
subquery_joins,
})
}
}

impl LogicalNode for LogicalJoin {
Expand Down Expand Up @@ -101,6 +142,9 @@ impl LogicalNode for LogicalJoin {
.map(|itm| itm.clone().into_logical_node())
.collect::<Result<Vec<_>, _>>()?,
)
// Sub-query joins are opaque data (no child plan nodes), so they are
// not packed/unpacked as inputs; clone them through unchanged.
.subquery_joins(self.subquery_joins.clone())
.build();

Ok(Rc::new(result))
Expand Down Expand Up @@ -191,6 +235,13 @@ impl PrettyPrint for LogicalJoin {
subquery.pretty_print(result, &details_state);
}
}
if !self.subquery_joins().is_empty() {
result.println("subquery_joins:", &state);
let details_state = state.new_level();
for subquery_join in self.subquery_joins().iter() {
subquery_join.pretty_print(result, &details_state);
}
}
} else {
result.println(&format!("Empty source"), state);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ pub enum SingleSource {
Subquery(Rc<QueryPlan>),
Cube(Rc<BaseCube>),
TableReference(String, Rc<Schema>),
/// An opaque, pre-rendered SQL sub-query (a complete SELECT) emitted
/// verbatim wrapped in parentheses. Used for SQL-API `subqueryJoins`,
/// whose body is built outside the planner.
RawSubquerySql(String),
}

impl SingleSource {
Expand All @@ -25,6 +29,7 @@ impl SingleSource {
}
SingleSource::Subquery(s) => format!("({})", s.to_sql(templates)?),
SingleSource::TableReference(r, _) => format!(" {} ", r),
SingleSource::RawSubquerySql(sql) => format!("({})", sql),
};
Ok(sql)
}
Expand All @@ -34,6 +39,7 @@ impl SingleSource {
SingleSource::Subquery(subquery) => subquery.schema(),
SingleSource::Cube(_) => Rc::new(Schema::empty()),
SingleSource::TableReference(_, schema) => schema.clone(),
SingleSource::RawSubquerySql(_) => Rc::new(Schema::empty()),
}
}
}
Expand Down Expand Up @@ -91,7 +97,13 @@ impl SingleAliasedSource {
}
} */

templates.query_aliased(&sql, &self.alias)
match &self.source {
// The alias of a SQL-API sub-query join is already a final, quoted
// identifier and is referenced verbatim in the join ON condition.
// Re-quoting it would double the quotes and break the reference.
SingleSource::RawSubquerySql(_) => templates.query_aliased_prequoted(&sql, &self.alias),
_ => templates.query_aliased(&sql, &self.alias),
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ impl ReferencesBuilder {
}
SingleSource::Cube(_) => None,
SingleSource::TableReference(_, schema) => schema.resolve_member_reference(member),
// Opaque pre-rendered SQL: members are referenced via the alias as
// literals in the ON/projection SQL, not resolved against a schema.
SingleSource::RawSubquerySql(_) => None,
};
column_name.map(|col| QualifiedColumnName::new(Some(source.alias.clone()), col))
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::super::{LogicalNodeProcessor, ProcessableNode, PushDownBuilderContext};
use crate::logical_plan::LogicalJoin;
use crate::physical_plan::{From, JoinBuilder, JoinCondition};
use crate::physical_plan::{From, JoinBuilder, JoinCondition, SingleSource};
use crate::physical_plan_builder::PhysicalPlanBuilder;
use crate::planner::SqlJoinCondition;
use cubenativeutils::CubeError;
Expand Down Expand Up @@ -38,6 +38,7 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> {
let root = logical_join.root().clone().unwrap().cube().clone();
if logical_join.joins().is_empty()
&& logical_join.dimension_subqueries().is_empty()
&& logical_join.subquery_joins().is_empty()
&& multi_stage_dimension.is_none()
{
Ok(From::new_from_cube(
Expand All @@ -61,6 +62,7 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> {
context,
)?;
}

for join in logical_join.joins().iter() {
join_builder.left_join_cube(
join.cube().cube().clone(),
Expand All @@ -83,13 +85,33 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> {
)?;
}
}

if let Some(multi_stage_dimension) = &multi_stage_dimension {
self.builder.add_multistage_dimension_join(
multi_stage_dimension,
&mut join_builder,
&context,
)?;
}

for subquery_join in logical_join.subquery_joins().iter() {
let source = SingleSource::RawSubquerySql(subquery_join.sql.clone());
let on = JoinCondition::new_base_join(SqlJoinCondition::try_new(
subquery_join.on_sql.clone(),
)?);

if subquery_join.join_type.eq_ignore_ascii_case("INNER") {
join_builder.inner_join_source(source, subquery_join.alias.clone(), on);
} else if subquery_join.join_type.eq_ignore_ascii_case("LEFT") {
join_builder.left_join_source(source, subquery_join.alias.clone(), on);
} else {
return Err(CubeError::user(format!(
"Unsupported join type '{}' for sub-query join, expected INNER or LEFT",
subquery_join.join_type
)));
}
Comment thread
claude[bot] marked this conversation as resolved.
}

Ok(From::new_from_join(join_builder.build()))
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::{DimensionSubqueryPlanner, JoinPlanner};
use crate::logical_plan::*;
use crate::planner::collectors::collect_sub_query_dimensions_from_symbols;
use crate::planner::collectors::{collect_join_hints, collect_sub_query_dimensions_from_symbols};
use crate::planner::join_hints::JoinHints;
use crate::planner::planners::multi_stage::PlanningScope;
use crate::planner::state::State;
use crate::planner::QueryProperties;
Expand Down Expand Up @@ -77,12 +78,44 @@ impl SimpleQueryPlanner {
)?;
let subquery_dimension_queries =
dimension_subquery_planner.plan_queries(&subquery_dimensions, scope)?;
// Query-level SQL-API sub-query joins (from `subqueryJoins`) extend the
// FROM clause with opaque joined sub-queries.
let subquery_joins = self.query_properties.subquery_joins();
let source = if let Some(join) = &join {
self.join_planner
.make_join_logical_plan(join, subquery_dimension_queries)
} else if !subquery_joins.is_empty() {
// No selected member resolves the base cube, but the sub-query joins'
// ON conditions reference it (e.g. `SELECT t.col FROM Cube JOIN (...) t
// ON Cube.x = t.x`). Derive the join root from those ON references so
// the base cube — and the sub-query joins below — are emitted.
let mut hints = JoinHints::new();

for subquery_join in subquery_joins {
for dep in subquery_join.on_sql.get_dependencies() {
hints.extend(&collect_join_hints(&dep)?);
}
}

if hints.is_empty() {
return Err(CubeError::user(
"Sub-query join requires its ON condition to reference at least one \
cube member so the base table can be resolved \
(e.g. `{cube.field} = alias.field`)"
.to_string(),
));
}

self.join_planner
.make_join_logical_plan_with_join_hints(hints, subquery_dimension_queries)?
} else {
self.join_planner.make_empty_join_logical_plan()
};
let source = if subquery_joins.is_empty() {
source
} else {
source.with_subquery_joins(subquery_joins.clone())
};
Ok(source)
Comment thread
claude[bot] marked this conversation as resolved.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use super::state::State;
use super::MemberSymbol;
use crate::cube_bridge::base_query_options::FilterValue;
use crate::logical_plan::LogicalSubqueryJoinItem;
use crate::planner::collectors::{collect_multiplied_measures, has_multi_stage_members};
use crate::planner::filter::tree_ops;
use crate::planner::filter::{Filter, FilterGroup, FilterItem, FilterOperator};
Expand Down Expand Up @@ -176,6 +177,10 @@ pub struct QueryProperties {
disable_external_pre_aggregations: bool,
#[builder(default)]
pre_aggregation_id: Option<String>,
/// Query-level joins against opaque sub-queries, from the SQL API
/// `subqueryJoins`. Folded into the query's `LogicalJoin` source.
#[builder(default)]
subquery_joins: Vec<LogicalSubqueryJoinItem>,
#[builder(setter(skip), default)]
multi_fact_join_groups: OnceCell<MultiFactJoinGroups>,
}
Expand Down Expand Up @@ -207,6 +212,10 @@ impl QueryProperties {
self.allow_multi_stage
}

pub fn subquery_joins(&self) -> &Vec<LogicalSubqueryJoinItem> {
&self.subquery_joins
}

// Push every entry of `dimensions_filters` into matching `case`
// expressions on each member, filter and order item. Run once at
// construction; mutators do not re-apply it.
Expand Down Expand Up @@ -1140,6 +1149,7 @@ impl PartialEq for QueryProperties {
disable_external_pre_aggregations,
pre_aggregation_id,
query_join_hints,
subquery_joins,
// Not part of semantic equality:
query_tools: _,
multi_fact_join_groups: _,
Expand All @@ -1164,5 +1174,19 @@ impl PartialEq for QueryProperties {
&& *disable_external_pre_aggregations == other.disable_external_pre_aggregations
&& *pre_aggregation_id == other.pre_aggregation_id
&& *query_join_hints == other.query_join_hints
// Sub-query joins compared semantically: the request triple
// (`sql`, `alias`, `join_type`) plus the compiled ON condition
// (`on_sql` via `SqlCall::struct_eq`), so two joins differing
// only in their ON are not equal.
&& subquery_joins.len() == other.subquery_joins.len()
&& subquery_joins
.iter()
.zip(other.subquery_joins.iter())
.all(|(a, b)| {
a.sql == b.sql
&& a.alias == b.alias
&& a.join_type == b.join_type
&& a.on_sql.struct_eq(&b.on_sql)
})
}
}
Loading
Loading