From be78570f69fe8b8cb5ccd2010246ad9e9128180f Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Wed, 5 Jun 2024 15:31:11 -0700 Subject: [PATCH 01/17] add failing test --- .../src/query/make_selector.rs | 39 +++++++++- .../mongodb-agent-common/src/test_helpers.rs | 73 +++++++++++++++++++ crates/ndc-test-helpers/src/lib.rs | 38 +--------- crates/ndc-test-helpers/src/path_element.rs | 39 ++++++++++ 4 files changed, 152 insertions(+), 37 deletions(-) create mode 100644 crates/ndc-test-helpers/src/path_element.rs diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index 0050617b..9b616b65 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -164,14 +164,21 @@ fn safe_selector<'a>(path: impl IntoIterator) -> Result anyhow::Result<()> { + let request = query_request() + .collection("Artist") + .query( + query().fields([relation_field!("Albums" => "Albums", query().predicate( + binop( + "_gt", + target!("Milliseconds", relations: [ + path_element("Tracks").predicate( + binop("_eq", target!("Name"), column_value!(root("Title"))) + ), + ]), + value!(30_000), + ) + ))]), + ) + .relationships(chinook_relationships()) + .into(); + + let config = chinook_config(); + let plan = plan_for_query_request(&config, request)?; + let pipeline = pipeline_for_query_request(&config, &plan)?; + + let expected_pipeline = bson!([]); + + assert_eq!(bson::to_bson(&pipeline).unwrap(), expected_pipeline); + Ok(()) + } } diff --git a/crates/mongodb-agent-common/src/test_helpers.rs b/crates/mongodb-agent-common/src/test_helpers.rs index 85f61788..406ecc4d 100644 --- a/crates/mongodb-agent-common/src/test_helpers.rs +++ b/crates/mongodb-agent-common/src/test_helpers.rs @@ -86,6 +86,79 @@ pub fn make_nested_schema() -> MongoConfiguration { }) } +/// Configuration for a MongoDB database with Chinook test data +pub fn chinook_config() -> MongoConfiguration { + MongoConfiguration(Configuration { + collections: [ + collection("Album"), + collection("Artist"), + collection("Genre"), + collection("Track"), + ] + .into(), + object_types: [ + ( + "Album".into(), + object_type([ + ("AlbumId", named_type("Int")), + ("ArtistId", named_type("Int")), + ("Title", named_type("String")), + ]), + ), + ( + "Artist".into(), + object_type([ + ("ArtistId", named_type("Int")), + ("Name", named_type("String")), + ]), + ), + ( + "Genre".into(), + object_type([ + ("GenreId", named_type("Int")), + ("Name", named_type("String")), + ]), + ), + ( + "Track".into(), + object_type([ + ("AlbumId", named_type("Int")), + ("GenreId", named_type("Int")), + ("TrackId", named_type("Int")), + ("Name", named_type("String")), + ("Milliseconds", named_type("Int")), + ]), + ), + ] + .into(), + functions: Default::default(), + procedures: Default::default(), + native_mutations: Default::default(), + native_queries: Default::default(), + options: Default::default(), + }) +} + +pub fn chinook_relationships() -> BTreeMap { + [ + ( + "Albums", + ndc_test_helpers::relationship("Album", [("AlbumId", "AlbumId")]), + ), + ( + "Tracks", + ndc_test_helpers::relationship("Track", [("TrackId", "TrackId")]), + ), + ( + "Genre", + ndc_test_helpers::relationship("Genre", [("GenreId", "GenreId")]).object_type(), + ), + ] + .into_iter() + .map(|(name, relationship_builder)| (name.to_string(), relationship_builder.into())) + .collect() +} + /// Configuration for a MongoDB database that resembles MongoDB's sample_mflix test data set. pub fn mflix_config() -> MongoConfiguration { MongoConfiguration(Configuration { diff --git a/crates/ndc-test-helpers/src/lib.rs b/crates/ndc-test-helpers/src/lib.rs index 759f11dd..1859cf6c 100644 --- a/crates/ndc-test-helpers/src/lib.rs +++ b/crates/ndc-test-helpers/src/lib.rs @@ -9,6 +9,7 @@ mod exists_in_collection; mod expressions; mod field; mod object_type; +mod path_element; mod query_response; mod relationships; mod type_helpers; @@ -31,6 +32,7 @@ pub use exists_in_collection::*; pub use expressions::*; pub use field::*; pub use object_type::*; +pub use path_element::*; pub use query_response::*; pub use relationships::*; pub use type_helpers::*; @@ -209,39 +211,3 @@ pub fn empty_expression() -> Expression { expressions: vec![], } } - -#[derive(Clone, Debug)] -pub struct PathElementBuilder { - relationship: String, - arguments: Option>, - predicate: Option>, -} - -pub fn path_element(relationship: &str) -> PathElementBuilder { - PathElementBuilder::new(relationship) -} - -impl PathElementBuilder { - pub fn new(relationship: &str) -> Self { - PathElementBuilder { - relationship: relationship.to_owned(), - arguments: None, - predicate: None, - } - } - - pub fn predicate(mut self, expression: Expression) -> Self { - self.predicate = Some(Box::new(expression)); - self - } -} - -impl From for PathElement { - fn from(value: PathElementBuilder) -> Self { - PathElement { - relationship: value.relationship, - arguments: value.arguments.unwrap_or_default(), - predicate: value.predicate, - } - } -} diff --git a/crates/ndc-test-helpers/src/path_element.rs b/crates/ndc-test-helpers/src/path_element.rs new file mode 100644 index 00000000..d0ee34e6 --- /dev/null +++ b/crates/ndc-test-helpers/src/path_element.rs @@ -0,0 +1,39 @@ +use std::collections::BTreeMap; + +use ndc_models::{Expression, PathElement, RelationshipArgument}; + +#[derive(Clone, Debug)] +pub struct PathElementBuilder { + relationship: String, + arguments: Option>, + predicate: Option>, +} + +pub fn path_element(relationship: &str) -> PathElementBuilder { + PathElementBuilder::new(relationship) +} + +impl PathElementBuilder { + pub fn new(relationship: &str) -> Self { + PathElementBuilder { + relationship: relationship.to_owned(), + arguments: None, + predicate: None, + } + } + + pub fn predicate(mut self, expression: Expression) -> Self { + self.predicate = Some(Box::new(expression)); + self + } +} + +impl From for PathElement { + fn from(value: PathElementBuilder) -> Self { + PathElement { + relationship: value.relationship, + arguments: value.arguments.unwrap_or_default(), + predicate: value.predicate, + } + } +} From f7977e1ddf1567ce5659174a6c3b6c4741aaca07 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Wed, 5 Jun 2024 15:37:41 -0700 Subject: [PATCH 02/17] update root_collection_object_type when following relation field --- crates/ndc-query-plan/src/plan_for_query_request/mod.rs | 1 + .../src/plan_for_query_request/type_annotated_field.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/ndc-query-plan/src/plan_for_query_request/mod.rs b/crates/ndc-query-plan/src/plan_for_query_request/mod.rs index 883fa0ba..fd5c5d46 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/mod.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/mod.rs @@ -52,6 +52,7 @@ pub fn plan_for_query_request( }) } +/// root_collection_object_type references the collection type of the nearest enclosing [ndc::Query] pub fn plan_for_query( plan_state: &mut QueryPlanState<'_, T>, root_collection_object_type: &plan::ObjectType, diff --git a/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs b/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs index cd8b6a02..53193cbc 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs @@ -71,7 +71,7 @@ fn type_annotated_field_helper( let query_plan = plan_for_query( &mut plan_state.state_for_subquery(), - root_collection_object_type, + &related_collection_type, &related_collection_type, *query, )?; From 25d3008649bbc16e6ee62c450a0bd7f5582730f4 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Thu, 6 Jun 2024 13:14:26 -0700 Subject: [PATCH 03/17] compare two columns in current collection --- .../src/comparison_function.rs | 22 +++- .../src/query/make_selector.rs | 113 ++++++++++++++---- 2 files changed, 109 insertions(+), 26 deletions(-) diff --git a/crates/mongodb-agent-common/src/comparison_function.rs b/crates/mongodb-agent-common/src/comparison_function.rs index 3e7b2dc1..881c0d61 100644 --- a/crates/mongodb-agent-common/src/comparison_function.rs +++ b/crates/mongodb-agent-common/src/comparison_function.rs @@ -57,8 +57,8 @@ impl ComparisonFunction { .ok_or(QueryPlanError::UnknownComparisonOperator(s.to_owned())) } - /// Produce a MongoDB expression that applies this function to the given operands. - pub fn mongodb_expression( + /// Produce a MongoDB expression for use in a match query that applies this function to the given operands. + pub fn mongodb_match_query( self, column_ref: impl Into, comparison_value: Bson, @@ -70,4 +70,22 @@ impl ComparisonFunction { _ => doc! { column_ref: { self.mongodb_name(): comparison_value } }, } } + + /// Produce a MongoDB expression for use in an aggregation expression that applies this + /// function to the given operands. + pub fn mongodb_aggregation_expression( + self, + column_ref: impl Into, + comparison_value: impl Into, + ) -> Document { + match self { + C::Regex => { + doc! { "$regexMatch": { "input": column_ref, "regex": comparison_value } } + } + C::IRegex => { + doc! { "$regexMatch": { "input": column_ref, "regex": comparison_value, "options": "i" } } + } + _ => doc! { self.mongodb_name(): [column_ref, comparison_value] }, + } + } } diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index 9b616b65..6259c588 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -6,6 +6,7 @@ use mongodb::bson::{self, doc, Document}; use ndc_models::UnaryComparisonOperator; use crate::{ + comparison_function::ComparisonFunction, interface_types::MongoAgentError, mongo_query_plan::{ComparisonTarget, ComparisonValue, ExistsInCollection, Expression, Type}, mongodb::sanitize::safe_name, @@ -55,6 +56,7 @@ pub fn make_selector( }, None => doc! { format!("{relationship}.0"): { "$exists": true } }, }, + // TODO: This doesn't work - we can't use a variable as the key in a match query ExistsInCollection::Unrelated { unrelated_collection, } => doc! { format!("$$ROOT.{unrelated_collection}.0"): { "$exists": true } }, @@ -63,27 +65,7 @@ pub fn make_selector( column, operator, value, - } => { - let comparison_value = match value { - // TODO: MDB-152 To compare to another column we need to wrap the entire expression in - // an `$expr` aggregation operator (assuming the expression is not already in - // an aggregation expression context) - ComparisonValue::Column { .. } => Err(MongoAgentError::NotImplemented( - "comparisons between columns", - )), - ComparisonValue::Scalar { value, value_type } => { - bson_from_scalar_value(value, value_type) - } - ComparisonValue::Variable { - name, - variable_type, - } => variable_to_mongo_expression(variables, name, variable_type).map(Into::into), - }?; - Ok(traverse_relationship_path( - column.relationship_path(), - operator.mongodb_expression(column_ref(column)?, comparison_value), - )) - } + } => make_binary_comparison_selector(variables, column, operator, value), Expression::UnaryComparisonOperator { column, operator } => match operator { UnaryComparisonOperator::IsNull => Ok(traverse_relationship_path( column.relationship_path(), @@ -93,6 +75,45 @@ pub fn make_selector( } } +fn make_binary_comparison_selector( + variables: Option<&BTreeMap>, + target_column: &ComparisonTarget, + operator: &ComparisonFunction, + value: &ComparisonValue, +) -> Result { + let selector = match value { + ComparisonValue::Column { + column: value_column, + } => { + doc! { + "$expr": operator.mongodb_aggregation_expression( + column_expression(target_column)?, + column_expression(value_column)? + ) + } + } + ComparisonValue::Scalar { value, value_type } => { + let comparison_value = bson_from_scalar_value(value, value_type)?; + traverse_relationship_path( + target_column.relationship_path(), + operator.mongodb_match_query(column_ref(target_column)?, comparison_value), + ) + } + ComparisonValue::Variable { + name, + variable_type, + } => { + let comparison_value = + variable_to_mongo_expression(variables, name, variable_type).map(Into::into)?; + traverse_relationship_path( + target_column.relationship_path(), + operator.mongodb_match_query(column_ref(target_column)?, comparison_value), + ) + } + }; + Ok(selector) +} + /// For simple cases the target of an expression is a field reference. But if the target is /// a column of a related collection then we're implicitly making an array comparison (because /// related documents always come as an array, even for object relationships), so we have to wrap @@ -121,9 +142,12 @@ fn variable_to_mongo_expression( bson_from_scalar_value(value, value_type) } -/// Given a column target returns a MongoDB expression that resolves to the value of the -/// corresponding field, either in the target collection of a query request, or in the related -/// collection. Resolves nested fields, but does not traverse relationships. +/// Given a column target returns a string that can be used in a MongoDB match query that +/// references the corresponding field, either in the target collection of a query request, or in +/// the related collection. Resolves nested fields, but does not traverse relationships. +/// +/// The string produced by this function cannot be used as an aggregation expression, only as +/// a match query key (a key in the document used in a `$match` stage). fn column_ref(column: &ComparisonTarget) -> Result> { let path = match column { ComparisonTarget::Column { @@ -139,6 +163,7 @@ fn column_ref(column: &ComparisonTarget) -> Result> { ComparisonTarget::RootCollectionColumn { name, field_path, .. } => Either::Right( + // TODO: This doesn't work - we can't use a variable as the key in a match query once("$$ROOT") .chain(once(name.as_ref())) .chain(field_path.iter().flatten().map(AsRef::as_ref)), @@ -147,6 +172,13 @@ fn column_ref(column: &ComparisonTarget) -> Result> { safe_selector(path) } +/// Produces an aggregation expression that evaluates to the value of a given document field. +/// Unlike `column_ref` this expression cannot be used as a match query key - it can only be used +/// as an expression. +fn column_expression(column: &ComparisonTarget) -> Result { + Ok(format!("${}", column_ref(column)?)) +} + /// Given an iterable of fields to access, ensures that each field name does not include characters /// that could be interpereted as a MongoDB expression. fn safe_selector<'a>(path: impl IntoIterator) -> Result> { @@ -251,6 +283,39 @@ mod tests { Ok(()) } + #[test] + fn compares_two_columns() -> anyhow::Result<()> { + let selector = make_selector( + None, + &Expression::BinaryComparisonOperator { + column: ComparisonTarget::Column { + name: "Name".to_owned(), + field_path: None, + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: Default::default(), + }, + operator: ComparisonFunction::Equal, + value: ComparisonValue::Column { + column: ComparisonTarget::Column { + name: "Title".to_owned(), + field_path: None, + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: Default::default(), + }, + }, + }, + )?; + + let expected = doc! { + "$expr": { + "$eq": ["$Name", "$Title"] + } + }; + + assert_eq!(selector, expected); + Ok(()) + } + #[test] fn root_column_reference_refereces_column_of_nearest_query() -> anyhow::Result<()> { let request = query_request() From b1bc8f0f843a6a4efee578d4efe33c7281fe65a9 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Thu, 6 Jun 2024 13:22:04 -0700 Subject: [PATCH 04/17] handle comparison target in related collection --- .../src/query/make_selector.rs | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index 6259c588..d6a6045a 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -85,12 +85,13 @@ fn make_binary_comparison_selector( ComparisonValue::Column { column: value_column, } => { - doc! { + let comparison_expr = doc! { "$expr": operator.mongodb_aggregation_expression( column_expression(target_column)?, column_expression(value_column)? ) - } + }; + traverse_relationship_path(target_column.relationship_path(), comparison_expr) } ComparisonValue::Scalar { value, value_type } => { let comparison_value = bson_from_scalar_value(value, value_type)?; @@ -316,6 +317,43 @@ mod tests { Ok(()) } + #[test] + fn compares_column_of_relationship_to_local_column() -> anyhow::Result<()> { + let selector = make_selector( + None, + &Expression::BinaryComparisonOperator { + column: ComparisonTarget::Column { + name: "Name".to_owned(), + field_path: None, + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: vec!["Artist".into()], + }, + operator: ComparisonFunction::Equal, + value: ComparisonValue::Column { + column: ComparisonTarget::Column { + name: "Title".to_owned(), + field_path: None, + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: Default::default(), + }, + }, + }, + )?; + + let expected = doc! { + "Artist": { + "$elemMatch": { + "$expr": { + "$eq": ["$Name", "$Title"] + } + } + } + }; + + assert_eq!(selector, expected); + Ok(()) + } + #[test] fn root_column_reference_refereces_column_of_nearest_query() -> anyhow::Result<()> { let request = query_request() From 601c6f135f692b5a5236e638ce01fdc61d4f3056 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Thu, 6 Jun 2024 15:25:30 -0700 Subject: [PATCH 05/17] skip comparing columns in related collections for now --- .../src/query/make_selector.rs | 49 ++++--------------- 1 file changed, 9 insertions(+), 40 deletions(-) diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index d6a6045a..9b0929b2 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -85,13 +85,19 @@ fn make_binary_comparison_selector( ComparisonValue::Column { column: value_column, } => { - let comparison_expr = doc! { + if !target_column.relationship_path().is_empty() + || !value_column.relationship_path().is_empty() + { + return Err(MongoAgentError::NotImplemented( + "binary comparisons between two fields where either field is in a related collection", + )); + } + doc! { "$expr": operator.mongodb_aggregation_expression( column_expression(target_column)?, column_expression(value_column)? ) - }; - traverse_relationship_path(target_column.relationship_path(), comparison_expr) + } } ComparisonValue::Scalar { value, value_type } => { let comparison_value = bson_from_scalar_value(value, value_type)?; @@ -317,43 +323,6 @@ mod tests { Ok(()) } - #[test] - fn compares_column_of_relationship_to_local_column() -> anyhow::Result<()> { - let selector = make_selector( - None, - &Expression::BinaryComparisonOperator { - column: ComparisonTarget::Column { - name: "Name".to_owned(), - field_path: None, - column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), - path: vec!["Artist".into()], - }, - operator: ComparisonFunction::Equal, - value: ComparisonValue::Column { - column: ComparisonTarget::Column { - name: "Title".to_owned(), - field_path: None, - column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), - path: Default::default(), - }, - }, - }, - )?; - - let expected = doc! { - "Artist": { - "$elemMatch": { - "$expr": { - "$eq": ["$Name", "$Title"] - } - } - } - }; - - assert_eq!(selector, expected); - Ok(()) - } - #[test] fn root_column_reference_refereces_column_of_nearest_query() -> anyhow::Result<()> { let request = query_request() From f43a3e0a11269c85ee381ce048c18679a2897f0b Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Thu, 6 Jun 2024 15:57:44 -0700 Subject: [PATCH 06/17] wip: switch to expression context as necessary --- .../src/query/column_ref.rs | 100 ++++++++++++++++++ .../src/query/make_selector.rs | 51 --------- crates/mongodb-agent-common/src/query/mod.rs | 1 + 3 files changed, 101 insertions(+), 51 deletions(-) create mode 100644 crates/mongodb-agent-common/src/query/column_ref.rs diff --git a/crates/mongodb-agent-common/src/query/column_ref.rs b/crates/mongodb-agent-common/src/query/column_ref.rs new file mode 100644 index 00000000..a79611d9 --- /dev/null +++ b/crates/mongodb-agent-common/src/query/column_ref.rs @@ -0,0 +1,100 @@ +use std::{borrow::Cow, iter::once}; + +use itertools::Either; +use mongodb::bson::Bson; + +use crate::{ + interface_types::MongoAgentError, mongo_query_plan::ComparisonTarget, + mongodb::sanitize::safe_name, +}; + +pub type Result = std::result::Result; + +/// Reference to a column / document field based on a [ComparisonTarget]. There are two contexts +/// where we reference columns: +/// +/// - match queries, where the reference is a key in the document used in a `$match` aggregation stage +/// - aggregation expressions which appear in a number of contexts +/// +/// Those two contexts are not compatible. For example in aggregation expressions column names are +/// prefixed with a dollar sign ($), but in match queries names are not prefixed. Expressions may +/// reference variables, while match queries may not. Some [ComparisonTarget] values **cannot** be +/// expressed in match queries. Those include root collection column references (which require +/// a variable reference), and columns with names that include characters that MongoDB evaluates +/// specially, such as dollar signs or dots. +/// +/// This type provides a helper that attempts to produce a match query reference for +/// a [ComparisonTarget], but falls back to an aggregation expression if necessary. It is up to the +/// caller to switch contexts in the second case. +#[derive(Clone, Debug, PartialEq)] +pub enum ColumnRef<'a> { + MatchKey(Cow<'a, str>), + Expression(Bson), +} + +impl<'a> ColumnRef<'a> { + /// Given a column target returns a string that can be used in a MongoDB match query that + /// references the corresponding field, either in the target collection of a query request, or in + /// the related collection. Resolves nested fields, but does not traverse relationships. + /// + /// If the given target cannot be represented as a match query key, falls back to providing an + /// aggregation expression referencing the column. + pub fn from_comparison_target(column: &ComparisonTarget) -> ColumnRef<'_> { + if let Ok(match_key) = column_match_key(column) { + ColumnRef::MatchKey(match_key) + } else { + ColumnRef::Expression(column_expression(column)) + } + } +} + +/// Given a column target returns a string that can be used in a MongoDB match query that +/// references the corresponding field, either in the target collection of a query request, or in +/// the related collection. Resolves nested fields, but does not traverse relationships. +/// +/// The string produced by this function cannot be used as an aggregation expression, only as +/// a match query key (a key in the document used in a `$match` stage). +fn column_match_key(column: &ComparisonTarget) -> Result> { + let path = match column { + ComparisonTarget::Column { + name, + field_path, + // path, + .. + } => Either::Left( + once(name) + .chain(field_path.iter().flatten()) + .map(AsRef::as_ref), + ), + ComparisonTarget::RootCollectionColumn { + name, field_path, .. + } => Either::Right( + // TODO: This doesn't work - we can't use a variable as the key in a match query + once("$$ROOT") + .chain(once(name.as_ref())) + .chain(field_path.iter().flatten().map(AsRef::as_ref)), + ), + }; + safe_selector(path) +} + +/// Given an iterable of fields to access, ensures that each field name does not include characters +/// that could be interpereted as a MongoDB expression. +fn safe_selector<'a>(path: impl IntoIterator) -> Result> { + let mut safe_elements = path + .into_iter() + .map(safe_name) + .collect::>>>()?; + if safe_elements.len() == 1 { + Ok(safe_elements.pop().unwrap()) + } else { + Ok(Cow::Owned(safe_elements.join("."))) + } +} + +/// Produces an aggregation expression that evaluates to the value of a given document field. +/// Unlike `column_ref` this expression cannot be used as a match query key - it can only be used +/// as an expression. +fn column_expression(column: &ComparisonTarget) -> Bson { + Ok(format!("${}", column_ref(column)?)) +} diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index 9b0929b2..ba3e9916 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -149,57 +149,6 @@ fn variable_to_mongo_expression( bson_from_scalar_value(value, value_type) } -/// Given a column target returns a string that can be used in a MongoDB match query that -/// references the corresponding field, either in the target collection of a query request, or in -/// the related collection. Resolves nested fields, but does not traverse relationships. -/// -/// The string produced by this function cannot be used as an aggregation expression, only as -/// a match query key (a key in the document used in a `$match` stage). -fn column_ref(column: &ComparisonTarget) -> Result> { - let path = match column { - ComparisonTarget::Column { - name, - field_path, - // path, - .. - } => Either::Left( - once(name) - .chain(field_path.iter().flatten()) - .map(AsRef::as_ref), - ), - ComparisonTarget::RootCollectionColumn { - name, field_path, .. - } => Either::Right( - // TODO: This doesn't work - we can't use a variable as the key in a match query - once("$$ROOT") - .chain(once(name.as_ref())) - .chain(field_path.iter().flatten().map(AsRef::as_ref)), - ), - }; - safe_selector(path) -} - -/// Produces an aggregation expression that evaluates to the value of a given document field. -/// Unlike `column_ref` this expression cannot be used as a match query key - it can only be used -/// as an expression. -fn column_expression(column: &ComparisonTarget) -> Result { - Ok(format!("${}", column_ref(column)?)) -} - -/// Given an iterable of fields to access, ensures that each field name does not include characters -/// that could be interpereted as a MongoDB expression. -fn safe_selector<'a>(path: impl IntoIterator) -> Result> { - let mut safe_elements = path - .into_iter() - .map(safe_name) - .collect::>>>()?; - if safe_elements.len() == 1 { - Ok(safe_elements.pop().unwrap()) - } else { - Ok(Cow::Owned(safe_elements.join("."))) - } -} - #[cfg(test)] mod tests { use configuration::MongoScalarType; diff --git a/crates/mongodb-agent-common/src/query/mod.rs b/crates/mongodb-agent-common/src/query/mod.rs index 60c9cad9..2f574656 100644 --- a/crates/mongodb-agent-common/src/query/mod.rs +++ b/crates/mongodb-agent-common/src/query/mod.rs @@ -1,4 +1,5 @@ pub mod arguments; +mod column_ref; mod constants; mod execute_query_request; mod foreach; From d09846a27de8edda7573631e626f439a68c61daf Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Mon, 10 Jun 2024 10:29:03 -0700 Subject: [PATCH 07/17] safe column/field refs that work with variables --- .../src/mongodb/sanitize.rs | 10 +- .../src/query/column_ref.rs | 371 +++++++++++++++--- .../src/query/make_selector.rs | 51 ++- .../src/query/make_sort.rs | 3 +- .../src/query/relations.rs | 1 + 5 files changed, 363 insertions(+), 73 deletions(-) diff --git a/crates/mongodb-agent-common/src/mongodb/sanitize.rs b/crates/mongodb-agent-common/src/mongodb/sanitize.rs index 0ef537a2..5ac11794 100644 --- a/crates/mongodb-agent-common/src/mongodb/sanitize.rs +++ b/crates/mongodb-agent-common/src/mongodb/sanitize.rs @@ -9,6 +9,8 @@ use crate::interface_types::MongoAgentError; /// Produces a MongoDB expression that references a field by name in a way that is safe from code /// injection. +/// +/// TODO: equivalent to ColumnRef::Expression pub fn get_field(name: &str) -> Document { doc! { "$getField": { "$literal": name } } } @@ -33,10 +35,16 @@ pub fn variable(name: &str) -> Result { } } +/// Returns false if the name contains characters that MongoDB will interpret specially, such as an +/// initial dollar sign, or dots. +pub fn is_name_safe(name: &str) -> bool { + !(name.starts_with('$') || name.contains('.')) +} + /// Given a collection or field name, returns Ok if the name is safe, or Err if it contains /// characters that MongoDB will interpret specially. /// -/// TODO: Can we handle names with dots or dollar signs safely instead of throwing an error? +/// TODO: MDB-159, MBD-160 remove this function in favor of ColumnRef which is infallible pub fn safe_name(name: &str) -> Result, MongoAgentError> { if name.starts_with('$') || name.contains('.') { Err(MongoAgentError::BadQuery(anyhow!("cannot execute query that includes the name, \"{name}\", because it includes characters that MongoDB interperets specially"))) diff --git a/crates/mongodb-agent-common/src/query/column_ref.rs b/crates/mongodb-agent-common/src/query/column_ref.rs index a79611d9..fd33829e 100644 --- a/crates/mongodb-agent-common/src/query/column_ref.rs +++ b/crates/mongodb-agent-common/src/query/column_ref.rs @@ -1,16 +1,10 @@ use std::{borrow::Cow, iter::once}; -use itertools::Either; -use mongodb::bson::Bson; +use mongodb::bson::{doc, Bson}; -use crate::{ - interface_types::MongoAgentError, mongo_query_plan::ComparisonTarget, - mongodb::sanitize::safe_name, -}; +use crate::{mongo_query_plan::ComparisonTarget, mongodb::sanitize::is_name_safe}; -pub type Result = std::result::Result; - -/// Reference to a column / document field based on a [ComparisonTarget]. There are two contexts +/// Reference to a document field, or a nested property of a document field. There are two contexts /// where we reference columns: /// /// - match queries, where the reference is a key in the document used in a `$match` aggregation stage @@ -34,67 +28,336 @@ pub enum ColumnRef<'a> { impl<'a> ColumnRef<'a> { /// Given a column target returns a string that can be used in a MongoDB match query that - /// references the corresponding field, either in the target collection of a query request, or in - /// the related collection. Resolves nested fields, but does not traverse relationships. + /// references the corresponding field, either in the target collection of a query request, or + /// in the related collection. Resolves nested fields and root collection references, but does + /// not traverse relationships. /// /// If the given target cannot be represented as a match query key, falls back to providing an /// aggregation expression referencing the column. pub fn from_comparison_target(column: &ComparisonTarget) -> ColumnRef<'_> { - if let Ok(match_key) = column_match_key(column) { - ColumnRef::MatchKey(match_key) - } else { - ColumnRef::Expression(column_expression(column)) - } + from_target(column) } } -/// Given a column target returns a string that can be used in a MongoDB match query that -/// references the corresponding field, either in the target collection of a query request, or in -/// the related collection. Resolves nested fields, but does not traverse relationships. -/// -/// The string produced by this function cannot be used as an aggregation expression, only as -/// a match query key (a key in the document used in a `$match` stage). -fn column_match_key(column: &ComparisonTarget) -> Result> { - let path = match column { +fn from_target(column: &ComparisonTarget) -> ColumnRef<'_> { + match column { ComparisonTarget::Column { - name, - field_path, - // path, - .. - } => Either::Left( - once(name) - .chain(field_path.iter().flatten()) - .map(AsRef::as_ref), - ), + name, field_path, .. + } => { + let name_and_path = once(name).chain(field_path.iter().flatten()); + // The None case won't come up if the input to [from_target_helper] has at least + // one element, and we know it does because we start the iterable with `name` + from_path(None, name_and_path).unwrap() + } ComparisonTarget::RootCollectionColumn { name, field_path, .. - } => Either::Right( - // TODO: This doesn't work - we can't use a variable as the key in a match query - once("$$ROOT") - .chain(once(name.as_ref())) - .chain(field_path.iter().flatten().map(AsRef::as_ref)), - ), - }; - safe_selector(path) + } => { + // "$$ROOT" is not actually a valid match key, but cheating here makes the + // implementation much simpler. This match branch produces a ColumnRef::Expression + // in all cases. + let init = ColumnRef::MatchKey("$ROOT".into()); + // The None case won't come up if the input to [from_target_helper] has at least + // one element, and we know it does because we start the iterable with `name` + let col_ref = + from_path(Some(init), once(name).chain(field_path.iter().flatten())).unwrap(); + match col_ref { + // move from MatchKey to Expression because "$$ROOT" is not valid in a match key + ColumnRef::MatchKey(key) => ColumnRef::Expression(format!("${key}").into()), + e @ ColumnRef::Expression(_) => e, + } + } + } +} + +fn from_path<'a>( + init: Option>, + path: impl IntoIterator, +) -> Option> { + path.into_iter().fold(init, |accum, element| { + Some(fold_path_element(accum, element)) + }) } -/// Given an iterable of fields to access, ensures that each field name does not include characters -/// that could be interpereted as a MongoDB expression. -fn safe_selector<'a>(path: impl IntoIterator) -> Result> { - let mut safe_elements = path - .into_iter() - .map(safe_name) - .collect::>>>()?; - if safe_elements.len() == 1 { - Ok(safe_elements.pop().unwrap()) - } else { - Ok(Cow::Owned(safe_elements.join("."))) +fn fold_path_element<'a>( + ref_so_far: Option>, + path_element: &'a str, +) -> ColumnRef<'a> { + match (ref_so_far, is_name_safe(path_element)) { + (Some(ColumnRef::MatchKey(parent)), true) => { + ColumnRef::MatchKey(format!("{parent}.{path_element}").into()) + } + (Some(ColumnRef::MatchKey(parent)), false) => ColumnRef::Expression( + doc! { + "$getField": { + "input": format!("${parent}"), + "field": { "$literal": path_element }, + } + } + .into(), + ), + (Some(ColumnRef::Expression(parent)), true) => ColumnRef::Expression( + doc! { + "$getField": { + "input": parent, + "field": path_element, + } + } + .into(), + ), + (Some(ColumnRef::Expression(parent)), false) => ColumnRef::Expression( + doc! { + "$getField": { + "input": parent, + "field": { "$literal": path_element }, + } + } + .into(), + ), + (None, true) => ColumnRef::MatchKey(path_element.into()), + (None, false) => ColumnRef::Expression( + doc! { + "$getField": { + "$literal": path_element + } + } + .into(), + ), } } /// Produces an aggregation expression that evaluates to the value of a given document field. /// Unlike `column_ref` this expression cannot be used as a match query key - it can only be used /// as an expression. -fn column_expression(column: &ComparisonTarget) -> Bson { - Ok(format!("${}", column_ref(column)?)) +pub fn column_expression(column: &ComparisonTarget) -> Bson { + match ColumnRef::from_comparison_target(column) { + ColumnRef::MatchKey(key) => format!("${key}").into(), + ColumnRef::Expression(expr) => expr, + } +} + +#[cfg(test)] +mod tests { + use configuration::MongoScalarType; + use mongodb::bson::doc; + use mongodb_support::BsonScalarType; + use pretty_assertions::assert_eq; + + use crate::mongo_query_plan::{ComparisonTarget, Type}; + + use super::ColumnRef; + + #[test] + fn produces_match_query_key() -> anyhow::Result<()> { + let target = ComparisonTarget::Column { + name: "imdb".into(), + field_path: Some(vec!["rating".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::Double)), + path: Default::default(), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::MatchKey("imdb.rating".into()); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn escapes_nested_field_name_with_dots() -> anyhow::Result<()> { + let target = ComparisonTarget::Column { + name: "subtitles".into(), + field_path: Some(vec!["english.us".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: Default::default(), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": "$subtitles", + "field": { "$literal": "english.us" } , + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn escapes_top_level_field_name_with_dots() -> anyhow::Result<()> { + let target = ComparisonTarget::Column { + name: "meta.subtitles".into(), + field_path: Some(vec!["english_us".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: Default::default(), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": { "$getField": { "$literal": "meta.subtitles" } }, + "field": "english_us", + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn escapes_multiple_unsafe_nested_field_names() -> anyhow::Result<()> { + let target = ComparisonTarget::Column { + name: "meta".into(), + field_path: Some(vec!["$unsafe".into(), "$also_unsafe".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: Default::default(), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": { + "$getField": { + "input": "$meta", + "field": { "$literal": "$unsafe" }, + } + }, + "field": { "$literal": "$also_unsafe" }, + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn traverses_multiple_field_names_before_escaping() -> anyhow::Result<()> { + let target = ComparisonTarget::Column { + name: "valid_key".into(), + field_path: Some(vec!["also_valid".into(), "$not_valid".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + path: Default::default(), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": "$valid_key.also_valid", + "field": { "$literal": "$not_valid" }, + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn produces_dot_separated_root_column_reference() -> anyhow::Result<()> { + let target = ComparisonTarget::RootCollectionColumn { + name: "field".into(), + field_path: Some(vec!["prop1".into(), "prop2".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression("$$ROOT.field.prop1.prop2".into()); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn escapes_unsafe_field_name_in_root_column_reference() -> anyhow::Result<()> { + let target = ComparisonTarget::RootCollectionColumn { + name: "$field".into(), + field_path: Default::default(), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": "$$ROOT", + "field": { "$literal": "$field" }, + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn escapes_unsafe_nested_property_name_in_root_column_reference() -> anyhow::Result<()> { + let target = ComparisonTarget::RootCollectionColumn { + name: "field".into(), + field_path: Some(vec!["$unsafe_name".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": "$$ROOT.field", + "field": { "$literal": "$unsafe_name" }, + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn escapes_multiple_layers_of_nested_property_names_in_root_column_reference( + ) -> anyhow::Result<()> { + let target = ComparisonTarget::RootCollectionColumn { + name: "$field".into(), + field_path: Some(vec!["$unsafe_name1".into(), "$unsafe_name2".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": { + "$getField": { + "input": { + "$getField": { + "input": "$$ROOT", + "field": { "$literal": "$field" }, + } + }, + "field": { "$literal": "$unsafe_name1" }, + } + }, + "field": { "$literal": "$unsafe_name2" }, + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } + + #[test] + fn escapes_unsafe_deeply_nested_property_name_in_root_column_reference() -> anyhow::Result<()> { + let target = ComparisonTarget::RootCollectionColumn { + name: "field".into(), + field_path: Some(vec!["prop1".into(), "$unsafe_name".into()]), + column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + }; + let actual = ColumnRef::from_comparison_target(&target); + let expected = ColumnRef::Expression( + doc! { + "$getField": { + "input": "$$ROOT.field.prop1", + "field": { "$literal": "$unsafe_name" }, + } + } + .into(), + ); + assert_eq!(actual, expected); + Ok(()) + } } diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index ba3e9916..add12620 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -1,7 +1,6 @@ -use std::{borrow::Cow, collections::BTreeMap, iter::once}; +use std::collections::BTreeMap; use anyhow::anyhow; -use itertools::Either; use mongodb::bson::{self, doc, Document}; use ndc_models::UnaryComparisonOperator; @@ -9,7 +8,7 @@ use crate::{ comparison_function::ComparisonFunction, interface_types::MongoAgentError, mongo_query_plan::{ComparisonTarget, ComparisonValue, ExistsInCollection, Expression, Type}, - mongodb::sanitize::safe_name, + query::column_ref::{column_expression, ColumnRef}, }; use super::serialization::json_to_bson; @@ -67,10 +66,22 @@ pub fn make_selector( value, } => make_binary_comparison_selector(variables, column, operator, value), Expression::UnaryComparisonOperator { column, operator } => match operator { - UnaryComparisonOperator::IsNull => Ok(traverse_relationship_path( - column.relationship_path(), - doc! { column_ref(column)?: { "$eq": null } }, - )), + UnaryComparisonOperator::IsNull => { + let match_doc = match ColumnRef::from_comparison_target(column) { + ColumnRef::MatchKey(key) => doc! { + key: { "$eq": null } + }, + ColumnRef::Expression(expr) => doc! { + "$expr": { + "$eq": [expr, null] + } + }, + }; + Ok(traverse_relationship_path( + column.relationship_path(), + match_doc, + )) + } }, } } @@ -94,17 +105,20 @@ fn make_binary_comparison_selector( } doc! { "$expr": operator.mongodb_aggregation_expression( - column_expression(target_column)?, - column_expression(value_column)? + column_expression(target_column), + column_expression(value_column) ) } } ComparisonValue::Scalar { value, value_type } => { let comparison_value = bson_from_scalar_value(value, value_type)?; - traverse_relationship_path( - target_column.relationship_path(), - operator.mongodb_match_query(column_ref(target_column)?, comparison_value), - ) + let match_doc = match ColumnRef::from_comparison_target(target_column) { + ColumnRef::MatchKey(key) => operator.mongodb_match_query(key, comparison_value), + ColumnRef::Expression(expr) => { + operator.mongodb_aggregation_expression(expr, comparison_value) + } + }; + traverse_relationship_path(target_column.relationship_path(), match_doc) } ComparisonValue::Variable { name, @@ -112,10 +126,13 @@ fn make_binary_comparison_selector( } => { let comparison_value = variable_to_mongo_expression(variables, name, variable_type).map(Into::into)?; - traverse_relationship_path( - target_column.relationship_path(), - operator.mongodb_match_query(column_ref(target_column)?, comparison_value), - ) + let match_doc = match ColumnRef::from_comparison_target(target_column) { + ColumnRef::MatchKey(key) => operator.mongodb_match_query(key, comparison_value), + ColumnRef::Expression(expr) => { + operator.mongodb_aggregation_expression(expr, comparison_value) + } + }; + traverse_relationship_path(target_column.relationship_path(), match_doc) } }; Ok(selector) diff --git a/crates/mongodb-agent-common/src/query/make_sort.rs b/crates/mongodb-agent-common/src/query/make_sort.rs index 473dc017..f32e7704 100644 --- a/crates/mongodb-agent-common/src/query/make_sort.rs +++ b/crates/mongodb-agent-common/src/query/make_sort.rs @@ -1,4 +1,4 @@ -use itertools::Itertools; +use itertools::Itertools as _; use mongodb::bson::{bson, Document}; use ndc_models::OrderDirection; @@ -49,6 +49,7 @@ pub fn make_sort(order_by: &OrderBy) -> Result { .collect() } +// TODO: MDB-159 Replace use of [safe_name] with [ColumnRef]. fn column_ref_with_path( name: &String, field_path: Option<&[String]>, diff --git a/crates/mongodb-agent-common/src/query/relations.rs b/crates/mongodb-agent-common/src/query/relations.rs index dfbad643..4b321a3c 100644 --- a/crates/mongodb-agent-common/src/query/relations.rs +++ b/crates/mongodb-agent-common/src/query/relations.rs @@ -79,6 +79,7 @@ fn make_lookup_stage( } } +// TODO: MDB-160 Replace uses of [safe_name] with [ColumnRef]. fn single_column_mapping_lookup( from: String, source_selector: &str, From 351afd057a3f08e537b85a9260d50dd26268578e Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Mon, 10 Jun 2024 13:10:21 -0700 Subject: [PATCH 08/17] fixture updates --- fixtures/connector/sample_mflix/schema/movies.json | 12 +++++++++--- .../ddn/sample_mflix/dataconnectors/sample_mflix.hml | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/fixtures/connector/sample_mflix/schema/movies.json b/fixtures/connector/sample_mflix/schema/movies.json index 31237cc7..bb96aee5 100644 --- a/fixtures/connector/sample_mflix/schema/movies.json +++ b/fixtures/connector/sample_mflix/schema/movies.json @@ -167,7 +167,9 @@ } }, "rating": { - "type": "extendedJSON" + "type": { + "scalar": "double" + } }, "votes": { "type": { @@ -282,9 +284,13 @@ } }, "rating": { - "type": "extendedJSON" + "type": { + "nullable": { + "scalar": "double" + } + } } } } } -} \ No newline at end of file +} diff --git a/fixtures/ddn/sample_mflix/dataconnectors/sample_mflix.hml b/fixtures/ddn/sample_mflix/dataconnectors/sample_mflix.hml index 091a6358..762746bb 100644 --- a/fixtures/ddn/sample_mflix/dataconnectors/sample_mflix.hml +++ b/fixtures/ddn/sample_mflix/dataconnectors/sample_mflix.hml @@ -663,7 +663,7 @@ definition: type: nullable underlying_type: type: named - name: ExtendedJSON + name: Double votes: type: type: named @@ -741,7 +741,7 @@ definition: type: nullable underlying_type: type: named - name: ExtendedJSON + name: Double movies_tomatoes_viewer: fields: meter: @@ -757,7 +757,7 @@ definition: type: nullable underlying_type: type: named - name: ExtendedJSON + name: Double sessions: fields: _id: From 7cfdfffe602e2425b5f597f656f78d5049961c54 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Mon, 10 Jun 2024 13:10:30 -0700 Subject: [PATCH 09/17] exists unrelated selector must use $expr --- crates/mongodb-agent-common/src/query/make_selector.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index add12620..7eeed09f 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -55,10 +55,13 @@ pub fn make_selector( }, None => doc! { format!("{relationship}.0"): { "$exists": true } }, }, - // TODO: This doesn't work - we can't use a variable as the key in a match query ExistsInCollection::Unrelated { unrelated_collection, - } => doc! { format!("$$ROOT.{unrelated_collection}.0"): { "$exists": true } }, + } => doc! { + "$expr": { + "$ne": [format!("$$ROOT.{unrelated_collection}.0"), null] + } + }, }), Expression::BinaryComparisonOperator { column, From bf0925294de3ddd24dcb4425219b1f1867c0a9ac Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Mon, 10 Jun 2024 17:06:59 -0700 Subject: [PATCH 10/17] integration test for filtering based on permissions --- crates/integration-tests/src/graphql.rs | 27 +++++++++--- crates/integration-tests/src/tests/mod.rs | 1 + .../src/tests/permissions.rs | 36 ++++++++++++++++ ...s_according_to_configured_permissions.snap | 42 +++++++++++++++++++ fixtures/ddn/sample_mflix/models/Comments.hml | 21 +++++++++- fixtures/ddn/sample_mflix/models/Users.hml | 15 ++++++- 6 files changed, 134 insertions(+), 8 deletions(-) create mode 100644 crates/integration-tests/src/tests/permissions.rs create mode 100644 crates/integration-tests/src/tests/snapshots/integration_tests__tests__permissions__filters_results_according_to_configured_permissions.snap diff --git a/crates/integration-tests/src/graphql.rs b/crates/integration-tests/src/graphql.rs index d027b056..9e2ba1e8 100644 --- a/crates/integration-tests/src/graphql.rs +++ b/crates/integration-tests/src/graphql.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use reqwest::Client; use serde::{Deserialize, Serialize}; use serde_json::{to_value, Value}; @@ -12,6 +14,8 @@ pub struct GraphQLRequest { operation_name: Option, #[serde(skip_serializing_if = "Option::is_none")] variables: Option, + #[serde(skip_serializing)] + headers: BTreeMap, } impl GraphQLRequest { @@ -20,6 +24,7 @@ impl GraphQLRequest { query, operation_name: Default::default(), variables: Default::default(), + headers: [("x-hasura-role".into(), "admin".into())].into(), } } @@ -33,15 +38,25 @@ impl GraphQLRequest { self } + pub fn headers( + mut self, + headers: impl IntoIterator, + ) -> Self { + self.headers = headers + .into_iter() + .map(|(key, value)| (key.to_string(), value.to_string())) + .collect(); + self + } + pub async fn run(&self) -> anyhow::Result { let graphql_url = get_graphql_url()?; let client = Client::new(); - let response = client - .post(graphql_url) - .header("x-hasura-role", "admin") - .json(self) - .send() - .await?; + let mut request_builder = client.post(graphql_url).json(self); + for (key, value) in self.headers.iter() { + request_builder = request_builder.header(key, value); + } + let response = request_builder.send().await?; let graphql_response = response.json().await?; Ok(graphql_response) } diff --git a/crates/integration-tests/src/tests/mod.rs b/crates/integration-tests/src/tests/mod.rs index 74271150..1d008adf 100644 --- a/crates/integration-tests/src/tests/mod.rs +++ b/crates/integration-tests/src/tests/mod.rs @@ -11,4 +11,5 @@ mod basic; mod local_relationship; mod native_mutation; mod native_query; +mod permissions; mod remote_relationship; diff --git a/crates/integration-tests/src/tests/permissions.rs b/crates/integration-tests/src/tests/permissions.rs new file mode 100644 index 00000000..a807e390 --- /dev/null +++ b/crates/integration-tests/src/tests/permissions.rs @@ -0,0 +1,36 @@ +use crate::graphql_query; +use insta::assert_yaml_snapshot; + +#[tokio::test] +async fn filters_results_according_to_configured_permissions() -> anyhow::Result<()> { + assert_yaml_snapshot!( + graphql_query( + r#" + query { + users(order_by: {id: Asc}) { + id + name + email + comments(limit: 5, order_by: {id: Asc}) { + date + email + text + } + } + comments(limit: 5, order_by: {id: Asc}) { + date + email + text + } + } + "# + ) + .headers([ + ("x-hasura-role", "user"), + ("x-hasura-user-id", "59b99db4cfa9a34dcd7885b6"), + ]) + .run() + .await? + ); + Ok(()) +} diff --git a/crates/integration-tests/src/tests/snapshots/integration_tests__tests__permissions__filters_results_according_to_configured_permissions.snap b/crates/integration-tests/src/tests/snapshots/integration_tests__tests__permissions__filters_results_according_to_configured_permissions.snap new file mode 100644 index 00000000..d990e06c --- /dev/null +++ b/crates/integration-tests/src/tests/snapshots/integration_tests__tests__permissions__filters_results_according_to_configured_permissions.snap @@ -0,0 +1,42 @@ +--- +source: crates/integration-tests/src/tests/permissions.rs +expression: "graphql_query(r#\"\n query {\n users(limit: 5) {\n id\n name\n email\n comments(limit: 5) {\n date\n email\n text\n }\n }\n comments(limit: 5) {\n date\n email\n text\n }\n }\n \"#).headers([(\"x-hasura-role\",\n \"user\"),\n (\"x-hasura-user-id\",\n \"59b99db4cfa9a34dcd7885b6\")]).run().await?" +--- +data: + users: + - id: 59b99db4cfa9a34dcd7885b6 + name: Ned Stark + email: sean_bean@gameofthron.es + comments: + - date: "2000-01-21T03:17:04.000000000Z" + email: sean_bean@gameofthron.es + text: Illo nostrum enim sequi doloremque dolore saepe beatae. Iusto alias odit quaerat id dolores. Dolore quaerat accusantium esse voluptatibus. Aspernatur fuga exercitationem explicabo. + - date: "2005-09-24T16:22:38.000000000Z" + email: sean_bean@gameofthron.es + text: Architecto eos eum iste facilis. Sunt aperiam fugit nihil quas. + - date: "1978-10-22T23:49:33.000000000Z" + email: sean_bean@gameofthron.es + text: Aspernatur ullam blanditiis qui dolorum. Magnam minima suscipit esse. Laudantium voluptates incidunt quia saepe. + - date: "2013-08-15T07:24:54.000000000Z" + email: sean_bean@gameofthron.es + text: Ullam error officiis incidunt praesentium debitis. Rerum repudiandae illum reprehenderit aut non. Iusto eum autem veniam eveniet temporibus sed. Accusamus sint sed veritatis eaque. + - date: "2004-12-22T12:53:43.000000000Z" + email: sean_bean@gameofthron.es + text: Ducimus sunt neque sint nesciunt quis vero. Debitis ex non asperiores voluptatem iusto possimus. Doloremque blanditiis consequuntur explicabo placeat commodi repudiandae. + comments: + - date: "2000-01-21T03:17:04.000000000Z" + email: sean_bean@gameofthron.es + text: Illo nostrum enim sequi doloremque dolore saepe beatae. Iusto alias odit quaerat id dolores. Dolore quaerat accusantium esse voluptatibus. Aspernatur fuga exercitationem explicabo. + - date: "2005-09-24T16:22:38.000000000Z" + email: sean_bean@gameofthron.es + text: Architecto eos eum iste facilis. Sunt aperiam fugit nihil quas. + - date: "1978-10-22T23:49:33.000000000Z" + email: sean_bean@gameofthron.es + text: Aspernatur ullam blanditiis qui dolorum. Magnam minima suscipit esse. Laudantium voluptates incidunt quia saepe. + - date: "2013-08-15T07:24:54.000000000Z" + email: sean_bean@gameofthron.es + text: Ullam error officiis incidunt praesentium debitis. Rerum repudiandae illum reprehenderit aut non. Iusto eum autem veniam eveniet temporibus sed. Accusamus sint sed veritatis eaque. + - date: "2004-12-22T12:53:43.000000000Z" + email: sean_bean@gameofthron.es + text: Ducimus sunt neque sint nesciunt quis vero. Debitis ex non asperiores voluptatem iusto possimus. Doloremque blanditiis consequuntur explicabo placeat commodi repudiandae. +errors: ~ diff --git a/fixtures/ddn/sample_mflix/models/Comments.hml b/fixtures/ddn/sample_mflix/models/Comments.hml index a525e184..5e0cba4f 100644 --- a/fixtures/ddn/sample_mflix/models/Comments.hml +++ b/fixtures/ddn/sample_mflix/models/Comments.hml @@ -57,6 +57,15 @@ definition: - movieId - name - text + - role: user + output: + allowedFields: + - id + - date + - email + - movieId + - name + - text --- kind: ObjectBooleanExpressionType @@ -135,4 +144,14 @@ definition: - role: admin select: filter: null - + - role: user + select: + filter: + relationship: + name: user + predicate: + fieldComparison: + field: id + operator: _eq + value: + sessionVariable: x-hasura-user-id diff --git a/fixtures/ddn/sample_mflix/models/Users.hml b/fixtures/ddn/sample_mflix/models/Users.hml index 48f2c1f4..48ba8510 100644 --- a/fixtures/ddn/sample_mflix/models/Users.hml +++ b/fixtures/ddn/sample_mflix/models/Users.hml @@ -45,6 +45,12 @@ definition: - email - name - password + - role: user + output: + allowedFields: + - id + - email + - name --- kind: ObjectBooleanExpressionType @@ -111,4 +117,11 @@ definition: - role: admin select: filter: null - + - role: user + select: + filter: + fieldComparison: + field: id + operator: _eq + value: + sessionVariable: x-hasura-user-id From b95383eae35a7b1703d0478bdc2f352bcc3f0bb4 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Tue, 11 Jun 2024 13:53:12 -0700 Subject: [PATCH 11/17] update snapshot for config change --- ...ion_tests__tests__basic__runs_a_query.snap | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/crates/integration-tests/src/tests/snapshots/integration_tests__tests__basic__runs_a_query.snap b/crates/integration-tests/src/tests/snapshots/integration_tests__tests__basic__runs_a_query.snap index b90d3938..65c13270 100644 --- a/crates/integration-tests/src/tests/snapshots/integration_tests__tests__basic__runs_a_query.snap +++ b/crates/integration-tests/src/tests/snapshots/integration_tests__tests__basic__runs_a_query.snap @@ -6,52 +6,42 @@ data: movies: - title: Blacksmith Scene imdb: - rating: - $numberDouble: "6.2" + rating: 6.2 votes: 1189 - title: The Great Train Robbery imdb: - rating: - $numberDouble: "7.4" + rating: 7.4 votes: 9847 - title: The Land Beyond the Sunset imdb: - rating: - $numberDouble: "7.1" + rating: 7.1 votes: 448 - title: A Corner in Wheat imdb: - rating: - $numberDouble: "6.6" + rating: 6.6 votes: 1375 - title: "Winsor McCay, the Famous Cartoonist of the N.Y. Herald and His Moving Comics" imdb: - rating: - $numberDouble: "7.3" + rating: 7.3 votes: 1034 - title: Traffic in Souls imdb: - rating: - $numberInt: "6" + rating: 6 votes: 371 - title: Gertie the Dinosaur imdb: - rating: - $numberDouble: "7.3" + rating: 7.3 votes: 1837 - title: In the Land of the Head Hunters imdb: - rating: - $numberDouble: "5.8" + rating: 5.8 votes: 223 - title: The Perils of Pauline imdb: - rating: - $numberDouble: "7.6" + rating: 7.6 votes: 744 - title: The Birth of a Nation imdb: - rating: - $numberDouble: "6.8" + rating: 6.8 votes: 15715 errors: ~ From caebfc1a048e468abc4012cb020bdb95488a962d Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Tue, 11 Jun 2024 15:19:16 -0700 Subject: [PATCH 12/17] test root column reference that shouldn't reference $$ROOT --- .../src/query/make_selector.rs | 57 ++++++++++++++++++- .../mongodb-agent-common/src/test_helpers.rs | 4 +- 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index 7eeed09f..c7dda6d1 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -316,7 +316,62 @@ mod tests { let plan = plan_for_query_request(&config, request)?; let pipeline = pipeline_for_query_request(&config, &plan)?; - let expected_pipeline = bson!([]); + let expected_pipeline = bson!([ + { + "$lookup": { + "from": "Album", + "localField": "AlbumId", + "foreignField": "AlbumId", + "as": "Albums", + "pipeline": [ + { + "$lookup": { + "from": "Track", + "localField": "AlbumId", + "foreignField": "AlbumId", + "as": "Tracks", + "let": { + "scope_0": "$$ROOT", + }, + "pipeline": [ + { + "$match": { + "$expr": { "$eq": ["$Name", "$$scope_0.Title"] }, + }, + }, + { + "$replaceWith": { + "Milliseconds": { "$ifNull": ["$Milliseconds", null] } + } + }, + ] + } + }, + { + "$match": { + "Tracks": { + "$elemMatch": { + "Milliseconds": { "$gt": 30_000 } + } + } + } + }, + { + "$replaceWith": { + "Tracks": { "$getField": { "$literal": "Tracks" } } + } + }, + ], + }, + }, + { + "$replaceWith": { + "Albums": { + "rows": [] + } + } + }, + ]); assert_eq!(bson::to_bson(&pipeline).unwrap(), expected_pipeline); Ok(()) diff --git a/crates/mongodb-agent-common/src/test_helpers.rs b/crates/mongodb-agent-common/src/test_helpers.rs index 406ecc4d..d1058709 100644 --- a/crates/mongodb-agent-common/src/test_helpers.rs +++ b/crates/mongodb-agent-common/src/test_helpers.rs @@ -143,11 +143,11 @@ pub fn chinook_relationships() -> BTreeMap { [ ( "Albums", - ndc_test_helpers::relationship("Album", [("AlbumId", "AlbumId")]), + ndc_test_helpers::relationship("Album", [("ArtistId", "ArtistId")]), ), ( "Tracks", - ndc_test_helpers::relationship("Track", [("TrackId", "TrackId")]), + ndc_test_helpers::relationship("Track", [("AlbumId", "AlbumId")]), ), ( "Genre", From 8509b23c388be4776b9f314c9efa00da570b0913 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Tue, 11 Jun 2024 18:47:21 -0700 Subject: [PATCH 13/17] track scope in query plan, replace root column references with scope references --- .../src/mongodb/selection.rs | 6 ++- .../src/query/column_ref.rs | 43 ++++++++++++----- .../src/query/make_selector.rs | 7 ++- .../src/query/relations.rs | 47 +++++++++++++++++-- crates/ndc-query-plan/src/lib.rs | 2 +- .../src/plan_for_query_request/mod.rs | 9 ++-- .../plan_test_helpers/query.rs | 9 ++++ .../query_plan_state.rs | 46 ++++++++++++------ .../src/plan_for_query_request/tests.rs | 6 ++- .../type_annotated_field.rs | 8 +++- .../unify_relationship_references.rs | 2 + crates/ndc-query-plan/src/query_plan.rs | 25 ++++++++-- 12 files changed, 162 insertions(+), 48 deletions(-) diff --git a/crates/mongodb-agent-common/src/mongodb/selection.rs b/crates/mongodb-agent-common/src/mongodb/selection.rs index 56edff9a..b2ad3b04 100644 --- a/crates/mongodb-agent-common/src/mongodb/selection.rs +++ b/crates/mongodb-agent-common/src/mongodb/selection.rs @@ -323,6 +323,10 @@ mod tests { let query_plan = plan_for_query_request(&students_config(), query_request)?; + // TODO: This selection illustrates that we end up looking up the relationship twice (once + // with the key `class_students`, and then with the key `class_students_0`). This is + // because the queries on the two relationships have different scope names. The query would + // work with just one lookup. Can we do that optimization? let selection = Selection::from_query_request(&query_plan)?; assert_eq!( Into::::into(selection), @@ -340,7 +344,7 @@ mod tests { "students": { "rows": { "$map": { - "input": { "$getField": { "$literal": "class_students" } }, + "input": { "$getField": { "$literal": "class_students_0" } }, "in": { "student_name": "$$this.student_name" }, diff --git a/crates/mongodb-agent-common/src/query/column_ref.rs b/crates/mongodb-agent-common/src/query/column_ref.rs index fd33829e..2a584724 100644 --- a/crates/mongodb-agent-common/src/query/column_ref.rs +++ b/crates/mongodb-agent-common/src/query/column_ref.rs @@ -1,6 +1,7 @@ use std::{borrow::Cow, iter::once}; use mongodb::bson::{doc, Bson}; +use ndc_query_plan::Scope; use crate::{mongo_query_plan::ComparisonTarget, mongodb::sanitize::is_name_safe}; @@ -49,13 +50,16 @@ fn from_target(column: &ComparisonTarget) -> ColumnRef<'_> { // one element, and we know it does because we start the iterable with `name` from_path(None, name_and_path).unwrap() } - ComparisonTarget::RootCollectionColumn { - name, field_path, .. + ComparisonTarget::ColumnInScope { + name, + field_path, + scope, + .. } => { // "$$ROOT" is not actually a valid match key, but cheating here makes the // implementation much simpler. This match branch produces a ColumnRef::Expression // in all cases. - let init = ColumnRef::MatchKey("$ROOT".into()); + let init = ColumnRef::MatchKey(format!("${}", name_from_scope(scope)).into()); // The None case won't come up if the input to [from_target_helper] has at least // one element, and we know it does because we start the iterable with `name` let col_ref = @@ -69,6 +73,13 @@ fn from_target(column: &ComparisonTarget) -> ColumnRef<'_> { } } +pub fn name_from_scope(scope: &Scope) -> Cow<'_, str> { + match scope { + Scope::Root => "scope_root".into(), + Scope::Named(name) => name.into(), + } +} + fn from_path<'a>( init: Option>, path: impl IntoIterator, @@ -140,6 +151,7 @@ mod tests { use configuration::MongoScalarType; use mongodb::bson::doc; use mongodb_support::BsonScalarType; + use ndc_query_plan::Scope; use pretty_assertions::assert_eq; use crate::mongo_query_plan::{ComparisonTarget, Type}; @@ -255,29 +267,31 @@ mod tests { #[test] fn produces_dot_separated_root_column_reference() -> anyhow::Result<()> { - let target = ComparisonTarget::RootCollectionColumn { + let target = ComparisonTarget::ColumnInScope { name: "field".into(), field_path: Some(vec!["prop1".into(), "prop2".into()]), column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + scope: Scope::Root, }; let actual = ColumnRef::from_comparison_target(&target); - let expected = ColumnRef::Expression("$$ROOT.field.prop1.prop2".into()); + let expected = ColumnRef::Expression("$$scope_root.field.prop1.prop2".into()); assert_eq!(actual, expected); Ok(()) } #[test] fn escapes_unsafe_field_name_in_root_column_reference() -> anyhow::Result<()> { - let target = ComparisonTarget::RootCollectionColumn { + let target = ComparisonTarget::ColumnInScope { name: "$field".into(), field_path: Default::default(), column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + scope: Scope::Named("scope_0".into()), }; let actual = ColumnRef::from_comparison_target(&target); let expected = ColumnRef::Expression( doc! { "$getField": { - "input": "$$ROOT", + "input": "$$scope_0", "field": { "$literal": "$field" }, } } @@ -289,16 +303,17 @@ mod tests { #[test] fn escapes_unsafe_nested_property_name_in_root_column_reference() -> anyhow::Result<()> { - let target = ComparisonTarget::RootCollectionColumn { + let target = ComparisonTarget::ColumnInScope { name: "field".into(), field_path: Some(vec!["$unsafe_name".into()]), column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + scope: Scope::Root, }; let actual = ColumnRef::from_comparison_target(&target); let expected = ColumnRef::Expression( doc! { "$getField": { - "input": "$$ROOT.field", + "input": "$$scope_root.field", "field": { "$literal": "$unsafe_name" }, } } @@ -311,10 +326,11 @@ mod tests { #[test] fn escapes_multiple_layers_of_nested_property_names_in_root_column_reference( ) -> anyhow::Result<()> { - let target = ComparisonTarget::RootCollectionColumn { + let target = ComparisonTarget::ColumnInScope { name: "$field".into(), field_path: Some(vec!["$unsafe_name1".into(), "$unsafe_name2".into()]), column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + scope: Scope::Root, }; let actual = ColumnRef::from_comparison_target(&target); let expected = ColumnRef::Expression( @@ -324,7 +340,7 @@ mod tests { "$getField": { "input": { "$getField": { - "input": "$$ROOT", + "input": "$$scope_root", "field": { "$literal": "$field" }, } }, @@ -342,16 +358,17 @@ mod tests { #[test] fn escapes_unsafe_deeply_nested_property_name_in_root_column_reference() -> anyhow::Result<()> { - let target = ComparisonTarget::RootCollectionColumn { + let target = ComparisonTarget::ColumnInScope { name: "field".into(), field_path: Some(vec!["prop1".into(), "$unsafe_name".into()]), column_type: Type::Scalar(MongoScalarType::Bson(BsonScalarType::String)), + scope: Scope::Root, }; let actual = ColumnRef::from_comparison_target(&target); let expected = ColumnRef::Expression( doc! { "$getField": { - "input": "$$ROOT.field.prop1", + "input": "$$scope_root.field.prop1", "field": { "$literal": "$unsafe_name" }, } } diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index c7dda6d1..416d4d31 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -320,9 +320,12 @@ mod tests { { "$lookup": { "from": "Album", - "localField": "AlbumId", - "foreignField": "AlbumId", + "localField": "ArtistId", + "foreignField": "ArtistId", "as": "Albums", + "let": { + "scope_root": "$$ROOT", + }, "pipeline": [ { "$lookup": { diff --git a/crates/mongodb-agent-common/src/query/relations.rs b/crates/mongodb-agent-common/src/query/relations.rs index 4b321a3c..c700a653 100644 --- a/crates/mongodb-agent-common/src/query/relations.rs +++ b/crates/mongodb-agent-common/src/query/relations.rs @@ -2,11 +2,12 @@ use std::collections::BTreeMap; use itertools::Itertools as _; use mongodb::bson::{doc, Bson, Document}; -use ndc_query_plan::VariableSet; +use ndc_query_plan::{Scope, VariableSet}; use crate::mongo_query_plan::{MongoConfiguration, Query, QueryPlan}; use crate::mongodb::sanitize::safe_name; use crate::mongodb::Pipeline; +use crate::query::column_ref::name_from_scope; use crate::{ interface_types::MongoAgentError, mongodb::{sanitize::variable, Stage}, @@ -25,7 +26,11 @@ pub fn pipeline_for_relations( query_plan: &QueryPlan, ) -> Result { let QueryPlan { query, .. } = query_plan; - let Query { relationships, .. } = query; + let Query { + relationships, + scope, + .. + } = query; // Lookup stages perform the join for each relationship, and assign the list of rows or mapping // of aggregate results to a field in the parent document. @@ -49,6 +54,7 @@ pub fn pipeline_for_relations( &relationship.column_mapping, name.to_owned(), lookup_pipeline, + scope.as_ref(), ) }) .try_collect()?; @@ -61,6 +67,7 @@ fn make_lookup_stage( column_mapping: &BTreeMap, r#as: String, lookup_pipeline: Pipeline, + scope: Option<&Scope>, ) -> Result { // If we are mapping a single field in the source collection to a single field in the target // collection then we can use the correlated subquery syntax. @@ -73,9 +80,10 @@ fn make_lookup_stage( target_selector, r#as, lookup_pipeline, + scope, ) } else { - multiple_column_mapping_lookup(from, column_mapping, r#as, lookup_pipeline) + multiple_column_mapping_lookup(from, column_mapping, r#as, lookup_pipeline, scope) } } @@ -86,12 +94,17 @@ fn single_column_mapping_lookup( target_selector: &str, r#as: String, lookup_pipeline: Pipeline, + scope: Option<&Scope>, ) -> Result { Ok(Stage::Lookup { from: Some(from), local_field: Some(safe_name(source_selector)?.into_owned()), foreign_field: Some(safe_name(target_selector)?.into_owned()), - r#let: None, + r#let: scope.map(|scope| { + doc! { + name_from_scope(scope): "$$ROOT" + } + }), pipeline: if lookup_pipeline.is_empty() { None } else { @@ -106,8 +119,9 @@ fn multiple_column_mapping_lookup( column_mapping: &BTreeMap, r#as: String, lookup_pipeline: Pipeline, + scope: Option<&Scope>, ) -> Result { - let let_bindings: Document = column_mapping + let mut let_bindings: Document = column_mapping .keys() .map(|local_field| { Ok(( @@ -117,6 +131,10 @@ fn multiple_column_mapping_lookup( }) .collect::>()?; + if let Some(scope) = scope { + let_bindings.insert(name_from_scope(scope), "$$ROOT"); + } + // Creating an intermediate Vec and sorting it is done just to help with testing. // A stable order for matchers makes it easier to assert equality between actual // and expected pipelines. @@ -208,6 +226,9 @@ mod tests { "from": "students", "localField": "_id", "foreignField": "classId", + "let": { + "scope_root": "$$ROOT", + }, "pipeline": [ { "$replaceWith": { @@ -294,6 +315,9 @@ mod tests { "from": "classes", "localField": "classId", "foreignField": "_id", + "let": { + "scope_root": "$$ROOT", + }, "pipeline": [ { "$replaceWith": { @@ -378,6 +402,7 @@ mod tests { "let": { "v_year": "$year", "v_title": "$title", + "scope_root": "$$ROOT", }, "pipeline": [ { @@ -484,12 +509,18 @@ mod tests { "from": "students", "localField": "_id", "foreignField": "class_id", + "let": { + "scope_root": "$$ROOT", + }, "pipeline": [ { "$lookup": { "from": "assignments", "localField": "_id", "foreignField": "student_id", + "let": { + "scope_0": "$$ROOT", + }, "pipeline": [ { "$replaceWith": { @@ -592,6 +623,9 @@ mod tests { "from": "students", "localField": "_id", "foreignField": "classId", + "let": { + "scope_root": "$$ROOT", + }, "pipeline": [ { "$facet": { @@ -703,6 +737,9 @@ mod tests { "from": "movies", "localField": "movie_id", "foreignField": "_id", + "let": { + "scope_root": "$$ROOT", + }, "pipeline": [ { "$replaceWith": { diff --git a/crates/ndc-query-plan/src/lib.rs b/crates/ndc-query-plan/src/lib.rs index 032382cb..7ce74bd1 100644 --- a/crates/ndc-query-plan/src/lib.rs +++ b/crates/ndc-query-plan/src/lib.rs @@ -12,6 +12,6 @@ pub use query_plan::{ Aggregate, AggregateFunctionDefinition, ComparisonOperatorDefinition, ComparisonTarget, ComparisonValue, ConnectorTypes, ExistsInCollection, Expression, Field, NestedArray, NestedField, NestedObject, OrderBy, OrderByElement, OrderByTarget, Query, QueryPlan, - Relationship, Relationships, VariableSet, + Relationship, Relationships, Scope, VariableSet, }; pub use type_system::{inline_object_types, ObjectType, Type}; diff --git a/crates/ndc-query-plan/src/plan_for_query_request/mod.rs b/crates/ndc-query-plan/src/plan_for_query_request/mod.rs index fd5c5d46..65a68aca 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/mod.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/mod.rs @@ -12,7 +12,7 @@ mod tests; use std::collections::VecDeque; -use crate::{self as plan, type_annotated_field, ObjectType, QueryPlan}; +use crate::{self as plan, type_annotated_field, ObjectType, QueryPlan, Scope}; use indexmap::IndexMap; use itertools::Itertools; use ndc::{ExistsInCollection, QueryRequest}; @@ -34,12 +34,13 @@ pub fn plan_for_query_request( let mut plan_state = QueryPlanState::new(context, &request.collection_relationships); let collection_object_type = context.find_collection_object_type(&request.collection)?; - let query = plan_for_query( + let mut query = plan_for_query( &mut plan_state, &collection_object_type, &collection_object_type, request.query, )?; + query.scope = Some(Scope::Root); let unrelated_collections = plan_state.into_unrelated_collections(); @@ -106,6 +107,7 @@ pub fn plan_for_query( offset, predicate, relationships: plan_state.into_relationships(), + scope: None, }) } @@ -512,10 +514,11 @@ fn plan_for_comparison_target( } ndc::ComparisonTarget::RootCollectionColumn { name } => { let column_type = find_object_field(root_collection_object_type, &name)?.clone(); - Ok(plan::ComparisonTarget::RootCollectionColumn { + Ok(plan::ComparisonTarget::ColumnInScope { name, field_path: Default::default(), // TODO: propagate this after ndc-spec update column_type, + scope: plan_state.scope.clone(), }) } } diff --git a/crates/ndc-query-plan/src/plan_for_query_request/plan_test_helpers/query.rs b/crates/ndc-query-plan/src/plan_for_query_request/plan_test_helpers/query.rs index 0f75a3b1..4bad3cac 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/plan_test_helpers/query.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/plan_test_helpers/query.rs @@ -2,6 +2,7 @@ use indexmap::IndexMap; use crate::{ Aggregate, ConnectorTypes, Expression, Field, OrderBy, OrderByElement, Query, Relationships, + Scope, }; #[derive(Clone, Debug, Default)] @@ -14,6 +15,7 @@ pub struct QueryBuilder { order_by: Option>, predicate: Option>, relationships: Relationships, + scope: Option, } #[allow(dead_code)] @@ -32,6 +34,7 @@ impl QueryBuilder { order_by: None, predicate: None, relationships: Default::default(), + scope: None, } } @@ -72,6 +75,11 @@ impl QueryBuilder { self.predicate = Some(expression); self } + + pub fn scope(mut self, scope: Scope) -> Self { + self.scope = Some(scope); + self + } } impl From> for Query { @@ -85,6 +93,7 @@ impl From> for Query { order_by: value.order_by, predicate: value.predicate, relationships: value.relationships, + scope: value.scope, } } } diff --git a/crates/ndc-query-plan/src/plan_for_query_request/query_plan_state.rs b/crates/ndc-query-plan/src/plan_for_query_request/query_plan_state.rs index 2d90ee6f..5ea76bb0 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/query_plan_state.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/query_plan_state.rs @@ -8,8 +8,9 @@ use ndc::RelationshipArgument; use ndc_models as ndc; use crate::{ - plan_for_query_request::helpers::lookup_relationship, query_plan::UnrelatedJoin, Query, - QueryContext, QueryPlanError, Relationship, + plan_for_query_request::helpers::lookup_relationship, + query_plan::{Scope, UnrelatedJoin}, + Query, QueryContext, QueryPlanError, Relationship, }; use super::unify_relationship_references::unify_relationship_references; @@ -26,15 +27,13 @@ type Result = std::result::Result; pub struct QueryPlanState<'a, T: QueryContext> { pub context: &'a T, pub collection_relationships: &'a BTreeMap, + pub scope: Scope, relationships: BTreeMap>, unrelated_joins: Rc>>>, - counter: Rc>, + relationship_name_counter: Rc>, + scope_name_counter: Rc>, } -// TODO: We may be able to unify relationships that are not identical, but that are compatible. -// For example two relationships that differ only in field selection could be merged into one -// with the union of both field selections. - impl QueryPlanState<'_, T> { pub fn new<'a>( query_context: &'a T, @@ -43,9 +42,11 @@ impl QueryPlanState<'_, T> { QueryPlanState { context: query_context, collection_relationships, + scope: Scope::Root, relationships: Default::default(), unrelated_joins: Rc::new(RefCell::new(Default::default())), - counter: Rc::new(Cell::new(0)), + relationship_name_counter: Rc::new(Cell::new(0)), + scope_name_counter: Rc::new(Cell::new(0)), } } @@ -56,12 +57,19 @@ impl QueryPlanState<'_, T> { QueryPlanState { context: self.context, collection_relationships: self.collection_relationships, + scope: self.scope.clone(), relationships: Default::default(), unrelated_joins: self.unrelated_joins.clone(), - counter: self.counter.clone(), + relationship_name_counter: self.relationship_name_counter.clone(), + scope_name_counter: self.scope_name_counter.clone(), } } + pub fn new_scope(&mut self) { + let name = self.unique_scope_name(); + self.scope = Scope::Named(name) + } + /// Record a relationship reference so that it is added to the list of joins for the query /// plan, and get back an identifier than can be used to access the joined collection. pub fn register_relationship( @@ -94,7 +102,7 @@ impl QueryPlanState<'_, T> { // relationship that we just removed. self.relationships .insert(existing_key, already_registered_relationship); - let key = self.unique_name(ndc_relationship_name); + let key = self.unique_relationship_name(ndc_relationship_name); (key, relationship) } } @@ -121,7 +129,7 @@ impl QueryPlanState<'_, T> { query, }; - let key = self.unique_name(format!("__join_{}", join.target_collection)); + let key = self.unique_relationship_name(format!("__join_{}", join.target_collection)); self.unrelated_joins.borrow_mut().insert(key.clone(), join); // Unlike [Self::register_relationship] this method does not return a reference to the @@ -138,14 +146,24 @@ impl QueryPlanState<'_, T> { self.relationships } + pub fn into_scope(self) -> Scope { + self.scope + } + /// Use this with the top-level plan to get unrelated joins. pub fn into_unrelated_collections(self) -> BTreeMap> { self.unrelated_joins.take() } - fn unique_name(&mut self, name: String) -> String { - let count = self.counter.get(); - self.counter.set(count + 1); + fn unique_relationship_name(&mut self, name: impl std::fmt::Display) -> String { + let count = self.relationship_name_counter.get(); + self.relationship_name_counter.set(count + 1); format!("{name}_{count}") } + + fn unique_scope_name(&mut self) -> String { + let count = self.scope_name_counter.get(); + self.scope_name_counter.set(count + 1); + format!("scope_{count}") + } } diff --git a/crates/ndc-query-plan/src/plan_for_query_request/tests.rs b/crates/ndc-query-plan/src/plan_for_query_request/tests.rs index 69a46b51..4a878703 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/tests.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/tests.rs @@ -394,12 +394,13 @@ fn translates_root_column_references() -> Result<(), anyhow::Error> { }, operator: plan_test_helpers::ComparisonOperator::Equal, value: plan::ComparisonValue::Column { - column: plan::ComparisonTarget::RootCollectionColumn { + column: plan::ComparisonTarget::ColumnInScope { name: "id".into(), field_path: Default::default(), column_type: plan::Type::Scalar( plan_test_helpers::ScalarType::Int, ), + scope: plan::Scope::Root, }, }, }, @@ -455,8 +456,9 @@ fn translates_root_column_references() -> Result<(), anyhow::Error> { }, operator: plan_test_helpers::ComparisonOperator::Equal, value: plan::ComparisonValue::Column { - column: plan::ComparisonTarget::RootCollectionColumn { + column: plan::ComparisonTarget::ColumnInScope { name: "id".into(), + scope: plan::Scope::Root, column_type: plan::Type::Scalar( plan_test_helpers::ScalarType::Int, ), diff --git a/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs b/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs index 53193cbc..fe1b720b 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/type_annotated_field.rs @@ -69,12 +69,16 @@ fn type_annotated_field_helper( .context .find_collection_object_type(&relationship_def.target_collection)?; - let query_plan = plan_for_query( - &mut plan_state.state_for_subquery(), + let mut subquery_state = plan_state.state_for_subquery(); + subquery_state.new_scope(); + + let mut query_plan = plan_for_query( + &mut subquery_state, &related_collection_type, &related_collection_type, *query, )?; + query_plan.scope = Some(subquery_state.into_scope()); // It's important to get fields and aggregates from the constructed relationship query // before it is registered because at that point fields and aggregates will be merged diff --git a/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs b/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs index def0552b..57eb45d3 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs @@ -88,6 +88,7 @@ where (a.offset != b.offset, "offset"), (a.order_by != b.order_by, "order_by"), (predicate_a != predicate_b, "predicate"), + (a.scope != b.scope, "scope"), ] .into_iter() .filter_map(|(is_mismatch, field_name)| if is_mismatch { Some(field_name) } else { None }) @@ -106,6 +107,7 @@ where order_by: a.order_by, predicate: predicate_a, relationships: unify_nested_relationships(a.relationships, b.relationships)?, + scope: a.scope, }; Ok(query) } diff --git a/crates/ndc-query-plan/src/query_plan.rs b/crates/ndc-query-plan/src/query_plan.rs index e4e10192..323b7f8e 100644 --- a/crates/ndc-query-plan/src/query_plan.rs +++ b/crates/ndc-query-plan/src/query_plan.rs @@ -55,6 +55,11 @@ pub struct Query { /// Relationships referenced by fields and expressions in this query or sub-query. Does not /// include relationships in sub-queries nested under this one. pub relationships: Relationships, + + /// Some relationship references may introduce a named "scope" so that other parts of the query + /// request can reference fields of documents in the related collection. The connector must + /// introduce a variable, or something similar, for such references. + pub scope: Option, } impl Query { @@ -93,6 +98,12 @@ pub struct UnrelatedJoin { pub query: Query, } +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Scope { + Root, + Named(String), +} + #[derive(Derivative)] #[derivative(Clone(bound = ""), Debug(bound = ""), PartialEq(bound = ""))] pub enum Aggregate { @@ -227,7 +238,7 @@ impl Expression { Either::Right(iter::empty()) } } - t @ ComparisonTarget::RootCollectionColumn { .. } => Either::Left(iter::once(t)), + t @ ComparisonTarget::ColumnInScope { .. } => Either::Left(iter::once(t)), } } } @@ -299,10 +310,14 @@ pub enum ComparisonTarget { /// fields for the [QueryPlan]. path: Vec, }, - RootCollectionColumn { + ColumnInScope { /// The name of the column name: String, + /// The named scope that identifies the collection to reference. This corresponds to the + /// `scope` field of the [Query] type. + scope: Scope, + /// Path to a nested field within an object column field_path: Option>, @@ -314,14 +329,14 @@ impl ComparisonTarget { pub fn column_name(&self) -> &str { match self { ComparisonTarget::Column { name, .. } => name, - ComparisonTarget::RootCollectionColumn { name, .. } => name, + ComparisonTarget::ColumnInScope { name, .. } => name, } } pub fn relationship_path(&self) -> &[String] { match self { ComparisonTarget::Column { path, .. } => path, - ComparisonTarget::RootCollectionColumn { .. } => &[], + ComparisonTarget::ColumnInScope { .. } => &[], } } } @@ -330,7 +345,7 @@ impl ComparisonTarget { pub fn get_column_type(&self) -> &Type { match self { ComparisonTarget::Column { column_type, .. } => column_type, - ComparisonTarget::RootCollectionColumn { column_type, .. } => column_type, + ComparisonTarget::ColumnInScope { column_type, .. } => column_type, } } } From 2fe01411a627f483a222c20cf23ca0ab2cc279c0 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Tue, 11 Jun 2024 19:22:00 -0700 Subject: [PATCH 14/17] unify scopes --- .../unify_relationship_references.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs b/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs index 57eb45d3..b011b2ba 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/unify_relationship_references.rs @@ -88,7 +88,6 @@ where (a.offset != b.offset, "offset"), (a.order_by != b.order_by, "order_by"), (predicate_a != predicate_b, "predicate"), - (a.scope != b.scope, "scope"), ] .into_iter() .filter_map(|(is_mismatch, field_name)| if is_mismatch { Some(field_name) } else { None }) @@ -98,6 +97,14 @@ where return Err(RelationshipUnificationError::Mismatch(mismatching_fields)); } + let scope = unify_options(a.scope, b.scope, |a, b| { + if a == b { + Ok(a) + } else { + Err(RelationshipUnificationError::Mismatch(vec!["scope"])) + } + })?; + let query = Query { aggregates: unify_aggregates(a.aggregates, b.aggregates)?, fields: unify_fields(a.fields, b.fields)?, @@ -107,7 +114,7 @@ where order_by: a.order_by, predicate: predicate_a, relationships: unify_nested_relationships(a.relationships, b.relationships)?, - scope: a.scope, + scope, }; Ok(query) } From 706ae6ecf7aa9e04d5ac167770615911646afe48 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Tue, 11 Jun 2024 19:34:05 -0700 Subject: [PATCH 15/17] update plan tests --- .../src/plan_for_query_request/tests.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/ndc-query-plan/src/plan_for_query_request/tests.rs b/crates/ndc-query-plan/src/plan_for_query_request/tests.rs index 4a878703..e834b186 100644 --- a/crates/ndc-query-plan/src/plan_for_query_request/tests.rs +++ b/crates/ndc-query-plan/src/plan_for_query_request/tests.rs @@ -246,10 +246,13 @@ fn translates_query_request_relationships() -> Result<(), anyhow::Error> { column_mapping: [("_id".into(), "class_id".into())].into(), relationship_type: RelationshipType::Array, arguments: Default::default(), - query: Default::default(), + query: Query { + scope: Some(plan::Scope::Named("scope_1".into())), + ..Default::default() + }, }, - )] - .into(), + )].into(), + scope: Some(plan::Scope::Named("scope_0".into())), ..Default::default() }, }, @@ -290,6 +293,7 @@ fn translates_query_request_relationships() -> Result<(), anyhow::Error> { )] .into(), ), + scope: Some(plan::Scope::Root), ..Default::default() }, }; @@ -435,6 +439,7 @@ fn translates_root_column_references() -> Result<(), anyhow::Error> { )] .into(), ), + scope: Some(plan::Scope::Root), ..Default::default() }, unrelated_collections: [( @@ -535,6 +540,7 @@ fn translates_aggregate_selections() -> Result<(), anyhow::Error> { ] .into(), ), + scope: Some(plan::Scope::Root), ..Default::default() }, arguments: Default::default(), @@ -711,11 +717,13 @@ fn translates_relationships_in_fields_predicates_and_orderings() -> Result<(), a ] .into(), ), + scope: Some(plan::Scope::Named("scope_0".into())), ..Default::default() }, }, )] .into(), + scope: Some(plan::Scope::Root), ..Default::default() }, arguments: Default::default(), @@ -824,6 +832,7 @@ fn translates_nested_fields() -> Result<(), anyhow::Error> { ] .into(), ), + scope: Some(plan::Scope::Root), ..Default::default() }, arguments: Default::default(), @@ -911,11 +920,13 @@ fn translates_predicate_referencing_field_of_related_collection() -> anyhow::Res )] .into(), ), + scope: Some(plan::Scope::Named("scope_0".into())), ..Default::default() }, }, )] .into(), + scope: Some(plan::Scope::Root), ..Default::default() }, arguments: Default::default(), From 0a0e03a50bb0a8f4473fe6929276314168e116e4 Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Tue, 11 Jun 2024 19:55:50 -0700 Subject: [PATCH 16/17] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b8dd8c6..b6efc455 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ This changelog documents the changes between release versions. ## [Unreleased] - Support filtering and sorting by fields of related collections ([#72](https://github.com/hasura/ndc-mongodb/pull/72)) +- Support for root collection column references ([#75](https://github.com/hasura/ndc-mongodb/pull/75)) +- Fix for databases with field names that begin with a dollar sign, or that contain dots ([#74](https://github.com/hasura/ndc-mongodb/pull/74)) +- Implement column-to-column comparisons within the same collection ([#74](https://github.com/hasura/ndc-mongodb/pull/74)) ## [0.0.6] - 2024-05-01 - Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67)) From 3bf52d24effe4d19f9b131afd17754aea6a7026d Mon Sep 17 00:00:00 2001 From: Jesse Hallett Date: Wed, 12 Jun 2024 13:33:07 -0700 Subject: [PATCH 17/17] add ticket number to todo comment --- crates/mongodb-agent-common/src/mongodb/selection.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/mongodb-agent-common/src/mongodb/selection.rs b/crates/mongodb-agent-common/src/mongodb/selection.rs index b2ad3b04..cc7d7721 100644 --- a/crates/mongodb-agent-common/src/mongodb/selection.rs +++ b/crates/mongodb-agent-common/src/mongodb/selection.rs @@ -323,10 +323,10 @@ mod tests { let query_plan = plan_for_query_request(&students_config(), query_request)?; - // TODO: This selection illustrates that we end up looking up the relationship twice (once - // with the key `class_students`, and then with the key `class_students_0`). This is - // because the queries on the two relationships have different scope names. The query would - // work with just one lookup. Can we do that optimization? + // TODO: MDB-164 This selection illustrates that we end up looking up the relationship + // twice (once with the key `class_students`, and then with the key `class_students_0`). + // This is because the queries on the two relationships have different scope names. The + // query would work with just one lookup. Can we do that optimization? let selection = Selection::from_query_request(&query_plan)?; assert_eq!( Into::::into(selection),