这是indexloc提供的服务,不要输入任何密码
Skip to content

support more query operators in native query pipeline type inference #121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 173 additions & 40 deletions crates/cli/src/native_query/pipeline/match_stage.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use mongodb::bson::{Bson, Document};
use mongodb_support::BsonScalarType;
use nonempty::nonempty;
use nonempty::NonEmpty;

use crate::native_query::{
aggregation_expression::infer_type_from_aggregation_expression,
Expand Down Expand Up @@ -41,61 +41,185 @@ fn check_match_doc_for_parameters_helper(
input_document_type: &TypeConstraint,
match_doc: Document,
) -> Result<()> {
if match_doc.keys().any(|key| key.starts_with("$")) {
analyze_document_with_match_operators(
context,
desired_object_type_name,
input_document_type,
match_doc,
)
} else {
analyze_document_with_field_name_keys(
context,
desired_object_type_name,
input_document_type,
match_doc,
)
for (key, value) in match_doc {
if key.starts_with("$") {
analyze_match_operator(
context,
desired_object_type_name,
input_document_type,
key,
value,
)?;
} else {
analyze_input_doc_field(
context,
desired_object_type_name,
input_document_type,
key,
value,
)?;
}
}
Ok(())
}

fn analyze_document_with_field_name_keys(
fn analyze_input_doc_field(
context: &mut PipelineTypeContext<'_>,
desired_object_type_name: &str,
input_document_type: &TypeConstraint,
match_doc: Document,
field_name: String,
match_expression: Bson,
) -> Result<()> {
for (field_name, match_expression) in match_doc {
let field_type = TypeConstraint::FieldOf {
target_type: Box::new(input_document_type.clone()),
path: nonempty![field_name.into()],
};
analyze_match_expression(
context,
desired_object_type_name,
&field_type,
match_expression,
)?;
}
Ok(())
let field_type = TypeConstraint::FieldOf {
target_type: Box::new(input_document_type.clone()),
path: NonEmpty::from_vec(field_name.split(".").map(Into::into).collect())
.ok_or_else(|| Error::Other("object field reference is an empty string".to_string()))?,
};
analyze_match_expression(
context,
desired_object_type_name,
&field_type,
match_expression,
)
}

fn analyze_document_with_match_operators(
fn analyze_match_operator(
context: &mut PipelineTypeContext<'_>,
desired_object_type_name: &str,
field_type: &TypeConstraint,
match_doc: Document,
operator: String,
match_expression: Bson,
) -> Result<()> {
for (operator, match_expression) in match_doc {
match operator.as_ref() {
"$eq" => analyze_match_expression(
match operator.as_ref() {
"$and" | "$or" | "$nor" => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So once again we hardcode every function we want to support? This does at least result in very concrete checking code.

if let Bson::Array(array) = match_expression {
for expression in array {
check_match_doc_for_parameters_helper(
context,
desired_object_type_name,
field_type,
expression
.as_document()
.ok_or_else(|| {
Error::Other(format!(
"expected argument to {operator} to be an array of objects"
))
})?
.clone(),
)?;
}
} else {
Err(Error::Other(format!(
"expected argument to {operator} to be an array of objects"
)))?;
}
}
"$not" => {
match match_expression {
Bson::Document(match_doc) => check_match_doc_for_parameters_helper(
context,
desired_object_type_name,
field_type,
match_doc,
)?,
_ => Err(Error::Other(format!(
"{operator} operator requires a document",
)))?,
};
}
"$elemMatch" => {
let element_type = field_type.clone().map_nullable(|ft| match ft {
TypeConstraint::ArrayOf(t) => *t,
other => TypeConstraint::ElementOf(Box::new(other)),
});
match match_expression {
Bson::Document(match_doc) => check_match_doc_for_parameters_helper(
context,
desired_object_type_name,
&element_type,
match_doc,
)?,
_ => Err(Error::Other(format!(
"{operator} operator requires a document",
)))?,
};
}
"$eq" | "$ne" | "$gt" | "$lt" | "$gte" | "$lte" => analyze_match_expression(
context,
desired_object_type_name,
field_type,
match_expression,
)?,
"$in" | "$nin" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::ArrayOf(Box::new(field_type.clone())),
match_expression,
)?,
"$exists" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Bool),
match_expression,
)?,
// In MongoDB $type accepts either a number, a string, an array of numbers, or an array of
// strings - for simplicity we're only accepting an array of strings since this form can
// express all comparisons that can be expressed with the other forms.
"$type" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::ArrayOf(Box::new(TypeConstraint::Scalar(BsonScalarType::String))),
match_expression,
)?,
"$mod" => match match_expression {
Bson::Array(xs) => {
if xs.len() != 2 {
Err(Error::Other(format!(
"{operator} operator requires exactly two arguments",
operator = operator
)))?;
}
for divisor_or_remainder in xs {
analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Int),
divisor_or_remainder,
)?;
}
}
_ => Err(Error::Other(format!(
"{operator} operator requires an array of two elements",
)))?,
},
"$regex" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Regex),
match_expression,
)?,
"$all" => {
let element_type = field_type.clone().map_nullable(|ft| match ft {
TypeConstraint::ArrayOf(t) => *t,
other => TypeConstraint::ElementOf(Box::new(other)),
});
// It's like passing field_type through directly, except that we move out of
// a possible nullable type, and we enforce an array type.
let argument_type = TypeConstraint::ArrayOf(Box::new(element_type));
analyze_match_expression(
context,
desired_object_type_name,
field_type,
&argument_type,
match_expression,
)?,
// TODO: more operators! ENG-1248
_ => Err(Error::UnknownMatchDocumentOperator(operator))?,
)?;
}
"$size" => analyze_match_expression(
context,
desired_object_type_name,
&TypeConstraint::Scalar(BsonScalarType::Int),
match_expression,
)?,
_ => Err(Error::UnknownMatchDocumentOperator(operator))?,
}
Ok(())
}
Expand All @@ -114,7 +238,16 @@ fn analyze_match_expression(
field_type,
match_doc,
),
Bson::Array(_) => todo!(),
Bson::Array(xs) => {
let element_type = field_type.clone().map_nullable(|ft| match ft {
TypeConstraint::ArrayOf(t) => *t,
other => TypeConstraint::ElementOf(Box::new(other)),
});
for x in xs {
analyze_match_expression(context, desired_object_type_name, &element_type, x)?;
}
Ok(())
}
_ => Ok(()),
}
}
Expand Down
73 changes: 73 additions & 0 deletions crates/cli/src/native_query/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,79 @@ fn infers_parameter_type_from_binary_comparison() -> googletest::Result<()> {
Ok(())
}

#[googletest::test]
fn supports_various_query_predicate_operators() -> googletest::Result<()> {
let config = mflix_config();

let pipeline = Pipeline::new(vec![Stage::Match(doc! {
"title": { "$eq": "{{ title }}" },
"rated": { "$ne": "{{ rating }}" },
"year": "{{ year_1 }}",
"imdb.votes": { "$gt": "{{ votes }}" },
"num_mflix_comments": { "$in": "{{ num_comments_options }}" },
"$not": { "runtime": { "$lt": "{{ runtime }}" } },
"tomatoes.critic": { "$exists": "{{ critic_exists }}" },
"released": { "$type": ["date", "{{ other_type }}"] },
"$or": [
{ "$and": [
{ "writers": { "$eq": "{{ writers }}" } },
{ "year": "{{ year_2 }}", }
] },
{
"year": { "$mod": ["{{ divisor }}", "{{ expected_remainder }}"] },
"title": { "$regex": "{{ title_regex }}" },
},
],
"$and": [
{ "genres": { "$all": "{{ genres }}" } },
{ "genres": { "$all": ["{{ genre_1 }}"] } },
{ "genres": { "$elemMatch": {
"$gt": "{{ genre_start }}",
"$lt": "{{ genre_end }}",
}} },
{ "genres": { "$size": "{{ genre_size }}" } },
],
})]);

let native_query =
native_query_from_pipeline(&config, "operators_test", Some("movies".into()), pipeline)?;

expect_eq!(
native_query.arguments,
object_fields([
("title", Type::Scalar(BsonScalarType::String)),
("rating", Type::Scalar(BsonScalarType::String)),
("year_1", Type::Scalar(BsonScalarType::Int)),
("year_2", Type::Scalar(BsonScalarType::Int)),
("votes", Type::Scalar(BsonScalarType::Int)),
(
"num_comments_options",
Type::ArrayOf(Box::new(Type::Scalar(BsonScalarType::Int)))
),
("runtime", Type::Scalar(BsonScalarType::Int)),
("critic_exists", Type::Scalar(BsonScalarType::Bool)),
("other_type", Type::Scalar(BsonScalarType::String)),
(
"writers",
Type::ArrayOf(Box::new(Type::Scalar(BsonScalarType::String)))
),
("divisor", Type::Scalar(BsonScalarType::Int)),
("expected_remainder", Type::Scalar(BsonScalarType::Int)),
("title_regex", Type::Scalar(BsonScalarType::Regex)),
(
"genres",
Type::ArrayOf(Box::new(Type::Scalar(BsonScalarType::String)))
),
("genre_1", Type::Scalar(BsonScalarType::String)),
("genre_start", Type::Scalar(BsonScalarType::String)),
("genre_end", Type::Scalar(BsonScalarType::String)),
("genre_size", Type::Scalar(BsonScalarType::Int)),
])
);

Ok(())
}

#[googletest::test]
fn supports_various_aggregation_operators() -> googletest::Result<()> {
let config = mflix_config();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ fn element_of(array_type: Type) -> Result<Type> {
let element_type = match array_type {
Type::ArrayOf(elem_type) => Ok(*elem_type),
Type::Nullable(t) => element_of(*t).map(|t| Type::Nullable(Box::new(t))),
Type::ExtendedJSON => Ok(Type::ExtendedJSON),
_ => Err(Error::ExpectedArray {
actual_type: array_type,
}),
Expand Down
6 changes: 6 additions & 0 deletions crates/test-helpers/src/configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ pub fn mflix_config() -> Configuration {
("credits", named_type("credits")),
("genres", array_of(named_type("String"))),
("imdb", named_type("Imdb")),
("lastUpdated", named_type("String")),
("num_mflix_comments", named_type("Int")),
("rated", named_type("String")),
("released", named_type("Date")),
("runtime", named_type("Int")),
("title", named_type("String")),
("writers", array_of(named_type("String"))),
("year", named_type("Int")),
("tomatoes", named_type("Tomatoes")),
]),
Expand Down
Loading