From f4c2c29d21aed59ed3421876b39ac0ebc3945bde Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 21:26:17 +1100 Subject: [PATCH 1/7] Introduce Any type --- crates/cli/src/introspection/sampling.rs | 112 ++++--- .../cli/src/introspection/type_unification.rs | 296 ++++++------------ crates/configuration/src/schema/database.rs | 31 ++ crates/mongodb-connector/src/schema.rs | 14 +- crates/mongodb-support/src/align.rs | 16 +- 5 files changed, 219 insertions(+), 250 deletions(-) diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs index 6ffbeb81..c4018ef8 100644 --- a/crates/cli/src/introspection/sampling.rs +++ b/crates/cli/src/introspection/sampling.rs @@ -1,8 +1,6 @@ use std::collections::{BTreeMap, HashSet}; -use super::type_unification::{ - unify_object_types, unify_type, TypeUnificationContext, TypeUnificationResult, -}; +use super::type_unification::{unify_object_types, unify_type}; use configuration::{ schema::{self, Type}, Schema, WithName, @@ -52,11 +50,11 @@ async fn sample_schema_from_collection( .await?; let mut collected_object_types = vec![]; while let Some(document) = cursor.try_next().await? { - let object_types = make_object_type(collection_name, &document)?; + let object_types = make_object_type(collection_name, &document); collected_object_types = if collected_object_types.is_empty() { object_types } else { - unify_object_types(collected_object_types, object_types)? + unify_object_types(collected_object_types, object_types) }; } let collection_info = WithName::named( @@ -73,10 +71,7 @@ async fn sample_schema_from_collection( }) } -fn make_object_type( - object_type_name: &str, - document: &Document, -) -> TypeUnificationResult> { +fn make_object_type(object_type_name: &str, document: &Document) -> Vec { let (mut object_type_defs, object_fields) = { let type_prefix = format!("{object_type_name}_"); let (object_type_defs, object_fields): (Vec>, Vec) = document @@ -84,8 +79,6 @@ fn make_object_type( .map(|(field_name, field_value)| { make_object_field(&type_prefix, field_name, field_value) }) - .collect::, ObjectField)>>>()? - .into_iter() .unzip(); (object_type_defs.concat(), object_fields) }; @@ -99,16 +92,16 @@ fn make_object_type( ); object_type_defs.push(object_type); - Ok(object_type_defs) + object_type_defs } fn make_object_field( type_prefix: &str, field_name: &str, field_value: &Bson, -) -> TypeUnificationResult<(Vec, ObjectField)> { +) -> (Vec, ObjectField) { let object_type_name = format!("{type_prefix}{field_name}"); - let (collected_otds, field_type) = make_field_type(&object_type_name, field_name, field_value)?; + let (collected_otds, field_type) = make_field_type(&object_type_name, field_name, field_value); let object_field = WithName::named( field_name.to_owned(), @@ -118,16 +111,16 @@ fn make_object_field( }, ); - Ok((collected_otds, object_field)) + (collected_otds, object_field) } fn make_field_type( object_type_name: &str, field_name: &str, field_value: &Bson, -) -> TypeUnificationResult<(Vec, Type)> { - fn scalar(t: BsonScalarType) -> TypeUnificationResult<(Vec, Type)> { - Ok((vec![], Type::Scalar(t))) +) -> (Vec, Type) { + fn scalar(t: BsonScalarType) -> (Vec, Type) { + (vec![], Type::Scalar(t)) } match field_value { Bson::Double(_) => scalar(Double), @@ -138,20 +131,19 @@ fn make_field_type( let mut result_type = Type::Scalar(Undefined); for elem in arr { let (elem_collected_otds, elem_type) = - make_field_type(object_type_name, field_name, elem)?; + make_field_type(object_type_name, field_name, elem); collected_otds = if collected_otds.is_empty() { elem_collected_otds } else { - unify_object_types(collected_otds, elem_collected_otds)? + unify_object_types(collected_otds, elem_collected_otds) }; - let context = TypeUnificationContext::new(object_type_name, field_name); - result_type = unify_type(context, result_type, elem_type)?; + result_type = unify_type(result_type, elem_type); } - Ok((collected_otds, Type::ArrayOf(Box::new(result_type)))) + (collected_otds, Type::ArrayOf(Box::new(result_type))) } Bson::Document(document) => { - let collected_otds = make_object_type(object_type_name, document)?; - Ok((collected_otds, Type::Object(object_type_name.to_owned()))) + let collected_otds = make_object_type(object_type_name, document); + (collected_otds, Type::Object(object_type_name.to_owned())) } Bson::Boolean(_) => scalar(Bool), Bson::Null => scalar(Null), @@ -184,17 +176,15 @@ mod tests { use mongodb::bson::doc; use mongodb_support::BsonScalarType; - use crate::introspection::type_unification::{TypeUnificationContext, TypeUnificationError}; - use super::make_object_type; #[test] fn simple_doc() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_int": 1, "my_string": "two"}; - let result = make_object_type(object_name, &doc).map(WithName::into_map::>); + let result = WithName::into_map::>(make_object_type(object_name, &doc)); - let expected = Ok(BTreeMap::from([( + let expected = BTreeMap::from([( object_name.to_owned(), ObjectType { fields: BTreeMap::from([ @@ -215,7 +205,7 @@ mod tests { ]), description: None, }, - )])); + )]); assert_eq!(expected, result); @@ -226,9 +216,9 @@ mod tests { fn array_of_objects() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": "wut", "baz": 3.77}]}; - let result = make_object_type(object_name, &doc).map(WithName::into_map::>); + let result = WithName::into_map::>(make_object_type(object_name, &doc)); - let expected = Ok(BTreeMap::from([ + let expected = BTreeMap::from([ ( "foo_my_array".to_owned(), ObjectType { @@ -275,7 +265,7 @@ mod tests { description: None, }, ), - ])); + ]); assert_eq!(expected, result); @@ -286,13 +276,57 @@ mod tests { fn non_unifiable_array_of_objects() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": 17, "baz": 3.77}]}; - let result = make_object_type(object_name, &doc); + let result = WithName::into_map::>(make_object_type(object_name, &doc)); + + let expected = BTreeMap::from([ + ( + "foo_my_array".to_owned(), + ObjectType { + fields: BTreeMap::from([ + ( + "foo".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::Int))), + description: None, + }, + ), + ( + "bar".to_owned(), + ObjectField { + r#type: Type::Any, + description: None, + }, + ), + ( + "baz".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar( + BsonScalarType::Double, + ))), + description: None, + }, + ), + ]), + description: None, + }, + ), + ( + object_name.to_owned(), + ObjectType { + fields: BTreeMap::from([( + "my_array".to_owned(), + ObjectField { + r#type: Type::ArrayOf(Box::new(Type::Object( + "foo_my_array".to_owned(), + ))), + description: None, + }, + )]), + description: None, + }, + ), + ]); - let expected = Err(TypeUnificationError::ScalarType( - TypeUnificationContext::new("foo_my_array", "bar"), - BsonScalarType::String, - BsonScalarType::Int, - )); assert_eq!(expected, result); Ok(()) diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs index 97443039..655c6811 100644 --- a/crates/cli/src/introspection/type_unification.rs +++ b/crates/cli/src/introspection/type_unification.rs @@ -9,169 +9,86 @@ use configuration::{ use indexmap::IndexMap; use itertools::Itertools as _; use mongodb_support::{ - align::align_with_result, - BsonScalarType::{self, *}, + align::align, + BsonScalarType::*, }; -use std::{ - fmt::{self, Display}, - string::String, -}; -use thiserror::Error; +use std::string::String; type ObjectField = WithName; type ObjectType = WithName; -#[derive(Debug, PartialEq, Eq, Clone)] -pub struct TypeUnificationContext { - object_type_name: String, - field_name: String, -} - -impl TypeUnificationContext { - pub fn new(object_type_name: &str, field_name: &str) -> Self { - TypeUnificationContext { - object_type_name: object_type_name.to_owned(), - field_name: field_name.to_owned(), - } - } -} - -impl Display for TypeUnificationContext { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "object type: {}, field: {}", - self.object_type_name, self.field_name - ) - } -} - -#[derive(Debug, Error, PartialEq, Eq)] -pub enum TypeUnificationError { - ScalarType(TypeUnificationContext, BsonScalarType, BsonScalarType), - ObjectType(String, String), - TypeKind(Type, Type), -} - -impl Display for TypeUnificationError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::ScalarType(context, scalar_a, scalar_b) => write!( - f, - "Scalar type mismatch {} {} at {}", - scalar_a.bson_name(), - scalar_b.bson_name(), - context - ), - Self::ObjectType(object_a, object_b) => { - write!(f, "Object type mismatch {} {}", object_a, object_b) - } - Self::TypeKind(type_a, type_b) => { - write!(f, "Type mismatch {:?} {:?}", type_a, type_b) - } - } - } -} - -pub type TypeUnificationResult = Result; - /// Unify two types. -/// Return an error if the types are not unifiable. -pub fn unify_type( - context: TypeUnificationContext, - type_a: Type, - type_b: Type, -) -> TypeUnificationResult { - match (type_a, type_b) { +pub fn unify_type(type_a: Type, type_b: Type) -> Type { + let result_type = match (type_a, type_b) { + // Union of any type with Any is Any + (Type::Any, _) => Type::Any, + (_, Type::Any) => Type::Any, + // If one type is undefined, the union is the other type. // This is used as the base case when inferring array types from documents. - (Type::Scalar(Undefined), type_b) => Ok(type_b), - (type_a, Type::Scalar(Undefined)) => Ok(type_a), + (Type::Scalar(Undefined), type_b) => type_b, + (type_a, Type::Scalar(Undefined)) => type_a, // A Nullable type will unify with another type iff the underlying type is unifiable. // The resulting type will be Nullable. (Type::Nullable(nullable_type_a), type_b) => { - let result_type = unify_type(context, *nullable_type_a, type_b)?; - Ok(make_nullable(result_type)) + let result_type = unify_type(*nullable_type_a, type_b); + result_type.make_nullable() } (type_a, Type::Nullable(nullable_type_b)) => { - let result_type = unify_type(context, type_a, *nullable_type_b)?; - Ok(make_nullable(result_type)) + let result_type = unify_type(type_a, *nullable_type_b); + result_type.make_nullable() } // Union of any type with Null is the Nullable version of that type - (Type::Scalar(Null), type_b) => Ok(make_nullable(type_b)), - (type_a, Type::Scalar(Null)) => Ok(make_nullable(type_a)), + (Type::Scalar(Null), type_b) => type_b.make_nullable(), + (type_a, Type::Scalar(Null)) => type_a.make_nullable(), - // Scalar types only unify if they are the same type. + // Scalar types unify if they are the same type. + // If they are diffferent then the union is Any. (Type::Scalar(scalar_a), Type::Scalar(scalar_b)) => { if scalar_a == scalar_b { - Ok(Type::Scalar(scalar_a)) + Type::Scalar(scalar_a) } else { - Err(TypeUnificationError::ScalarType( - context, scalar_a, scalar_b, - )) + Type::Any } } - // Object types only unify if they have the same name. + // Object types unify if they have the same name. + // If they are diffferent then the union is Any. (Type::Object(object_a), Type::Object(object_b)) => { if object_a == object_b { - Ok(Type::Object(object_a)) + Type::Object(object_a) } else { - Err(TypeUnificationError::ObjectType(object_a, object_b)) + Type::Any } } // Array types unify iff their element types unify. (Type::ArrayOf(elem_type_a), Type::ArrayOf(elem_type_b)) => { - let elem_type = unify_type(context, *elem_type_a, *elem_type_b)?; - Ok(Type::ArrayOf(Box::new(elem_type))) + let elem_type = unify_type(*elem_type_a, *elem_type_b); + Type::ArrayOf(Box::new(elem_type)) } - // Anything else is a unification error. - (type_a, type_b) => Err(TypeUnificationError::TypeKind(type_a, type_b)), - } - .map(normalize_type) -} - -fn normalize_type(t: Type) -> Type { - match t { - Type::Scalar(s) => Type::Scalar(s), - Type::Object(o) => Type::Object(o), - Type::ArrayOf(a) => Type::ArrayOf(Box::new(normalize_type(*a))), - Type::Nullable(n) => match *n { - Type::Scalar(BsonScalarType::Null) => Type::Scalar(BsonScalarType::Null), - Type::Nullable(t) => normalize_type(Type::Nullable(t)), - t => Type::Nullable(Box::new(normalize_type(t))), - }, - } -} - -fn make_nullable(t: Type) -> Type { - match t { - Type::Nullable(t) => Type::Nullable(t), - Type::Scalar(BsonScalarType::Null) => Type::Scalar(BsonScalarType::Null), - t => Type::Nullable(Box::new(t)), - } + // Anything else gives Any + (_, _) => Type::Any, + }; + result_type.normalize_type() } -fn make_nullable_field(field: ObjectField) -> Result { - Ok(WithName::named( +fn make_nullable_field(field: ObjectField) -> ObjectField { + WithName::named( field.name, schema::ObjectField { - r#type: make_nullable(field.value.r#type), + r#type: field.value.r#type.make_nullable(), description: field.value.description, }, - )) + ) } /// Unify two `ObjectType`s. /// Any field that appears in only one of the `ObjectType`s will be made nullable. -fn unify_object_type( - object_type_a: ObjectType, - object_type_b: ObjectType, -) -> TypeUnificationResult { +fn unify_object_type(object_type_a: ObjectType, object_type_b: ObjectType) -> ObjectType { let field_map_a: IndexMap = object_type_a .value .fields @@ -187,15 +104,15 @@ fn unify_object_type( .map(|o| (o.name.to_owned(), o)) .collect(); - let merged_field_map = align_with_result( + let merged_field_map = align( field_map_a, field_map_b, make_nullable_field, make_nullable_field, - |field_a, field_b| unify_object_field(&object_type_a.name, field_a, field_b), - )?; + |field_a, field_b| unify_object_field(field_a, field_b), + ); - Ok(WithName::named( + WithName::named( object_type_a.name, schema::ObjectType { fields: merged_field_map @@ -207,31 +124,25 @@ fn unify_object_type( .description .or(object_type_b.value.description), }, - )) + ) } /// Unify the types of two `ObjectField`s. /// If the types are not unifiable then return an error. fn unify_object_field( - object_type_name: &str, object_field_a: ObjectField, object_field_b: ObjectField, -) -> TypeUnificationResult { - let context = TypeUnificationContext::new(object_type_name, &object_field_a.name); - Ok(WithName::named( +) -> ObjectField { + WithName::named( object_field_a.name, schema::ObjectField { - r#type: unify_type( - context, - object_field_a.value.r#type, - object_field_b.value.r#type, - )?, + r#type: unify_type(object_field_a.value.r#type, object_field_b.value.r#type), description: object_field_a .value .description .or(object_field_b.value.description), }, - )) + ) } /// Unify two sets of `ObjectType`s. @@ -240,7 +151,7 @@ fn unify_object_field( pub fn unify_object_types( object_types_a: Vec, object_types_b: Vec, -) -> TypeUnificationResult> { +) -> Vec { let type_map_a: IndexMap = object_types_a .into_iter() .map(|t| (t.name.to_owned(), t)) @@ -250,18 +161,22 @@ pub fn unify_object_types( .map(|t| (t.name.to_owned(), t)) .collect(); - let merged_type_map = align_with_result(type_map_a, type_map_b, Ok, Ok, unify_object_type)?; + let merged_type_map = align( + type_map_a, + type_map_b, + std::convert::identity, + std::convert::identity, + unify_object_type, + ); - Ok(merged_type_map.into_values().collect()) + merged_type_map.into_values().collect() } #[cfg(test)] mod tests { use std::collections::{HashMap, HashSet}; - use super::{ - normalize_type, unify_object_type, unify_type, TypeUnificationContext, TypeUnificationError, - }; + use super::{unify_object_type, unify_type}; use configuration::{ schema::{self, Type}, WithName, @@ -271,10 +186,8 @@ mod tests { #[test] fn test_unify_scalar() -> Result<(), anyhow::Error> { - let context = TypeUnificationContext::new("foo", "bar"); - let expected = Ok(Type::Scalar(BsonScalarType::Int)); + let expected = Type::Scalar(BsonScalarType::Int); let actual = unify_type( - context, Type::Scalar(BsonScalarType::Int), Type::Scalar(BsonScalarType::Int), ); @@ -284,14 +197,8 @@ mod tests { #[test] fn test_unify_scalar_error() -> Result<(), anyhow::Error> { - let context = TypeUnificationContext::new("foo", "bar"); - let expected = Err(TypeUnificationError::ScalarType( - context.clone(), - BsonScalarType::Int, - BsonScalarType::String, - )); + let expected = Type::Any; let actual = unify_type( - context, Type::Scalar(BsonScalarType::Int), Type::Scalar(BsonScalarType::String), ); @@ -299,12 +206,6 @@ mod tests { Ok(()) } - prop_compose! { - fn arb_type_unification_context()(object_type_name in any::(), field_name in any::()) -> TypeUnificationContext { - TypeUnificationContext { object_type_name, field_name } - } - } - fn arb_bson_scalar_type() -> impl Strategy { prop_oneof![ Just(BsonScalarType::Double), @@ -342,66 +243,65 @@ mod tests { }) } - fn swap_error(err: TypeUnificationError) -> TypeUnificationError { - match err { - TypeUnificationError::ScalarType(c, a, b) => TypeUnificationError::ScalarType(c, b, a), - TypeUnificationError::ObjectType(a, b) => TypeUnificationError::ObjectType(b, a), - TypeUnificationError::TypeKind(a, b) => TypeUnificationError::TypeKind(b, a), - } - } - fn is_nullable(t: &Type) -> bool { - matches!(t, Type::Scalar(BsonScalarType::Null) | Type::Nullable(_)) + matches!(t, Type::Scalar(BsonScalarType::Null) | Type::Nullable(_) | Type::Any) } proptest! { #[test] fn test_type_unifies_with_itself_and_normalizes(t in arb_type()) { - let c = TypeUnificationContext::new("", ""); - let u = unify_type(c, t.clone(), t.clone()); - prop_assert_eq!(Ok(normalize_type(t)), u) + let u = unify_type(t.clone(), t.clone()); + prop_assert_eq!(t.normalize_type(), u) } } proptest! { #[test] fn test_unify_type_is_commutative(ta in arb_type(), tb in arb_type()) { - let c = TypeUnificationContext::new("", ""); - let result_a_b = unify_type(c.clone(), ta.clone(), tb.clone()); - let result_b_a = unify_type(c, tb, ta); - prop_assert_eq!(result_a_b, result_b_a.map_err(swap_error)) + let result_a_b = unify_type(ta.clone(), tb.clone()); + let result_b_a = unify_type(tb, ta); + prop_assert_eq!(result_a_b, result_b_a) } } proptest! { #[test] fn test_unify_type_is_associative(ta in arb_type(), tb in arb_type(), tc in arb_type()) { - let c = TypeUnificationContext::new("", ""); - let result_lr = unify_type(c.clone(), ta.clone(), tb.clone()).and_then(|tab| unify_type(c.clone(), tab, tc.clone())); - let result_rl = unify_type(c.clone(), tb, tc).and_then(|tbc| unify_type(c, ta, tbc)); - if let Ok(tlr) = result_lr { - prop_assert_eq!(Ok(tlr), result_rl) - } else if result_rl.is_ok() { - panic!("Err, Ok") - } + let result_lr = unify_type(unify_type(ta.clone(), tb.clone()), tc.clone()); + let result_rl = unify_type(ta, unify_type(tb, tc)); + prop_assert_eq!(result_lr, result_rl) } } proptest! { #[test] fn test_undefined_is_left_identity(t in arb_type()) { - let c = TypeUnificationContext::new("", ""); - let u = unify_type(c, Type::Scalar(BsonScalarType::Undefined), t.clone()); - prop_assert_eq!(Ok(normalize_type(t)), u) + let u = unify_type(Type::Scalar(BsonScalarType::Undefined), t.clone()); + prop_assert_eq!(t.normalize_type(), u) } } proptest! { #[test] fn test_undefined_is_right_identity(t in arb_type()) { - let c = TypeUnificationContext::new("", ""); - let u = unify_type(c, t.clone(), Type::Scalar(BsonScalarType::Undefined)); - prop_assert_eq!(Ok(normalize_type(t)), u) + let u = unify_type(t.clone(), Type::Scalar(BsonScalarType::Undefined)); + prop_assert_eq!(t.normalize_type(), u) + } + } + + proptest! { + #[test] + fn test_any_left(t in arb_type()) { + let u = unify_type(Type::Any, t); + prop_assert_eq!(Type::Any, u) + } + } + + proptest! { + #[test] + fn test_any_right(t in arb_type()) { + let u = unify_type(t, Type::Any); + prop_assert_eq!(Type::Any, u) } } @@ -429,22 +329,18 @@ mod tests { description: None }); let result = unify_object_type(left_object, right_object); - match result { - Err(err) => panic!("Got error result {err}"), - Ok(ot) => { - for field in ot.value.named_fields() { - // Any fields not shared between the two input types should be nullable. - if !shared.contains_key(field.name) { - assert!(is_nullable(&field.value.r#type), "Found a non-shared field that is not nullable") - } - } - - // All input fields must appear in the result. - let fields: HashSet = ot.value.fields.into_keys().collect(); - assert!(left.into_keys().chain(right.into_keys()).chain(shared.into_keys()).all(|k| fields.contains(&k)), - "Missing field in result type") + + for field in result.value.named_fields() { + // Any fields not shared between the two input types should be nullable. + if !shared.contains_key(field.name) { + assert!(is_nullable(&field.value.r#type), "Found a non-shared field that is not nullable") } } + + // All input fields must appear in the result. + let fields: HashSet = result.value.fields.into_keys().collect(); + assert!(left.into_keys().chain(right.into_keys()).chain(shared.into_keys()).all(|k| fields.contains(&k)), + "Missing field in result type") } } } diff --git a/crates/configuration/src/schema/database.rs b/crates/configuration/src/schema/database.rs index 53a99521..41538ac1 100644 --- a/crates/configuration/src/schema/database.rs +++ b/crates/configuration/src/schema/database.rs @@ -21,6 +21,11 @@ pub struct Collection { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] pub enum Type { + /// Any BSON value. To be used when we don't have any more information + /// about the types of values that a column, field or argument can take. + /// Also used when we unifying two incompatible types in schemas derived + /// from sample documents. + Any, /// One of the predefined BSON scalar types Scalar(BsonScalarType), /// The name of an object type declared in `objectTypes` @@ -30,6 +35,32 @@ pub enum Type { Nullable(Box), } +impl Type { + pub fn normalize_type(self) -> Type { + match self { + Type::Any => Type::Any, + Type::Scalar(s) => Type::Scalar(s), + Type::Object(o) => Type::Object(o), + Type::ArrayOf(a) => Type::ArrayOf(Box::new((*a).normalize_type())), + Type::Nullable(n) => match *n { + Type::Any => Type::Any, + Type::Scalar(BsonScalarType::Null) => Type::Scalar(BsonScalarType::Null), + Type::Nullable(t) => Type::Nullable(t).normalize_type(), + t => Type::Nullable(Box::new(t.normalize_type())), + }, + } + } + + pub fn make_nullable(self) -> Type { + match self { + Type::Any => Type::Any, + Type::Nullable(t) => Type::Nullable(t), + Type::Scalar(BsonScalarType::Null) => Type::Scalar(BsonScalarType::Null), + t => Type::Nullable(Box::new(t)), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct ObjectType { diff --git a/crates/mongodb-connector/src/schema.rs b/crates/mongodb-connector/src/schema.rs index fe00ed7e..47fa852e 100644 --- a/crates/mongodb-connector/src/schema.rs +++ b/crates/mongodb-connector/src/schema.rs @@ -67,17 +67,25 @@ fn map_field_infos( .collect() } +const ANY_TYPE_NAME: &str = "any"; + fn map_type(t: &schema::Type) -> models::Type { - match t { + match t.to_owned().normalize_type() { + // Any can respresent any BSON value, including null, so it is always nullable + schema::Type::Any => models::Type::Nullable { + underlying_type: Box::new(models::Type::Named { + name: ANY_TYPE_NAME.to_owned(), + }), + }, schema::Type::Scalar(t) => models::Type::Named { name: t.graphql_name(), }, schema::Type::Object(t) => models::Type::Named { name: t.clone() }, schema::Type::ArrayOf(t) => models::Type::Array { - element_type: Box::new(map_type(t)), + element_type: Box::new(map_type(&t)), }, schema::Type::Nullable(t) => models::Type::Nullable { - underlying_type: Box::new(map_type(t)), + underlying_type: Box::new(map_type(&t)), }, } } diff --git a/crates/mongodb-support/src/align.rs b/crates/mongodb-support/src/align.rs index 25553f0f..02de15cb 100644 --- a/crates/mongodb-support/src/align.rs +++ b/crates/mongodb-support/src/align.rs @@ -1,24 +1,24 @@ use indexmap::IndexMap; use std::hash::Hash; -pub fn align_with_result(ts: IndexMap, mut us: IndexMap, ft: FT, fu: FU, ftu: FTU) -> Result, E> +pub fn align(ts: IndexMap, mut us: IndexMap, ft: FT, fu: FU, ftu: FTU) -> IndexMap where K: Hash + Eq, - FT: Fn(T) -> Result, - FU: Fn(U) -> Result, - FTU: Fn(T, U) -> Result, + FT: Fn(T) -> V, + FU: Fn(U) -> V, + FTU: Fn(T, U) -> V, { let mut result: IndexMap = IndexMap::new(); for (k, t) in ts { match us.swap_remove(&k) { - None => result.insert(k, ft(t)?), - Some(u) => result.insert(k, ftu(t, u)?), + None => result.insert(k, ft(t)), + Some(u) => result.insert(k, ftu(t, u)), }; } for (k, u) in us { - result.insert(k, fu(u)?); + result.insert(k, fu(u)); } - Ok(result) + result } From 0972b5dc09fdad49b5d051298f614dd30e45c88a Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 21:38:10 +1100 Subject: [PATCH 2/7] Avoid renormalizing recursive types --- crates/mongodb-connector/src/schema.rs | 37 ++++++++++++++------------ 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/crates/mongodb-connector/src/schema.rs b/crates/mongodb-connector/src/schema.rs index 47fa852e..cbfad763 100644 --- a/crates/mongodb-connector/src/schema.rs +++ b/crates/mongodb-connector/src/schema.rs @@ -70,24 +70,27 @@ fn map_field_infos( const ANY_TYPE_NAME: &str = "any"; fn map_type(t: &schema::Type) -> models::Type { - match t.to_owned().normalize_type() { - // Any can respresent any BSON value, including null, so it is always nullable - schema::Type::Any => models::Type::Nullable { - underlying_type: Box::new(models::Type::Named { - name: ANY_TYPE_NAME.to_owned(), - }), - }, - schema::Type::Scalar(t) => models::Type::Named { - name: t.graphql_name(), - }, - schema::Type::Object(t) => models::Type::Named { name: t.clone() }, - schema::Type::ArrayOf(t) => models::Type::Array { - element_type: Box::new(map_type(&t)), - }, - schema::Type::Nullable(t) => models::Type::Nullable { - underlying_type: Box::new(map_type(&t)), - }, + fn map_normalized_type(t: &schema::Type) -> models::Type { + match t { + // Any can respresent any BSON value, including null, so it is always nullable + schema::Type::Any => models::Type::Nullable { + underlying_type: Box::new(models::Type::Named { + name: ANY_TYPE_NAME.to_owned(), + }), + }, + schema::Type::Scalar(t) => models::Type::Named { + name: t.graphql_name(), + }, + schema::Type::Object(t) => models::Type::Named { name: t.clone() }, + schema::Type::ArrayOf(t) => models::Type::Array { + element_type: Box::new(map_normalized_type(&t)), + }, + schema::Type::Nullable(t) => models::Type::Nullable { + underlying_type: Box::new(map_normalized_type(&t)), + }, + } } + map_normalized_type(&t.clone().normalize_type()) } fn map_collection((name, collection): (&String, &schema::Collection)) -> models::CollectionInfo { From 72576acd824d31377dbec5a088746aa5f2a233d9 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 21:47:05 +1100 Subject: [PATCH 3/7] Add ScalarTypeCapabilities for the "any" type --- .../mongodb-agent-common/src/scalar_types_capabilities.rs | 6 ++++-- crates/mongodb-connector/src/schema.rs | 4 +--- crates/mongodb-support/src/lib.rs | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/crates/mongodb-agent-common/src/scalar_types_capabilities.rs b/crates/mongodb-agent-common/src/scalar_types_capabilities.rs index d57af1ba..fce1c322 100644 --- a/crates/mongodb-agent-common/src/scalar_types_capabilities.rs +++ b/crates/mongodb-agent-common/src/scalar_types_capabilities.rs @@ -11,9 +11,11 @@ use crate::comparison_function::{ComparisonFunction, ComparisonFunction as C}; use BsonScalarType as S; pub fn scalar_types_capabilities() -> HashMap { - all::() + let mut map = all::() .map(|t| (t.graphql_name(), capabilities(t))) - .collect::>() + .collect::>(); + map.insert(mongodb_support::ANY_TYPE_NAME.to_owned(), ScalarTypeCapabilities::new()); + map } pub fn aggregate_functions( diff --git a/crates/mongodb-connector/src/schema.rs b/crates/mongodb-connector/src/schema.rs index cbfad763..38c953f3 100644 --- a/crates/mongodb-connector/src/schema.rs +++ b/crates/mongodb-connector/src/schema.rs @@ -67,15 +67,13 @@ fn map_field_infos( .collect() } -const ANY_TYPE_NAME: &str = "any"; - fn map_type(t: &schema::Type) -> models::Type { fn map_normalized_type(t: &schema::Type) -> models::Type { match t { // Any can respresent any BSON value, including null, so it is always nullable schema::Type::Any => models::Type::Nullable { underlying_type: Box::new(models::Type::Named { - name: ANY_TYPE_NAME.to_owned(), + name: mongodb_support::ANY_TYPE_NAME.to_owned(), }), }, schema::Type::Scalar(t) => models::Type::Named { diff --git a/crates/mongodb-support/src/lib.rs b/crates/mongodb-support/src/lib.rs index a2c6fc08..4531cdf7 100644 --- a/crates/mongodb-support/src/lib.rs +++ b/crates/mongodb-support/src/lib.rs @@ -3,3 +3,5 @@ pub mod error; pub mod align; pub use self::bson_type::{BsonScalarType, BsonType}; + +pub const ANY_TYPE_NAME: &str = "any"; From 61111b021ea62eb993bb3833549ed8bb30f1e325 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 22:32:15 +1100 Subject: [PATCH 4/7] Add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a1543cf..074f8cae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This changelog documents the changes between release versions. - don't sample from collections that already have a schema - if no --sample-size given on command line, default sample size is 10 - new option --no-validator-schema to disable attempting to use validator schema +- Add `any` type and use it to represent mismatched types in sample documents ([PR #18](https://github.com/hasura/ndc-mongodb/pull/18)) ## [0.0.2] - 2024-03-26 - Rename CLI plugin to ndc-mongodb ([PR #13](https://github.com/hasura/ndc-mongodb/pull/13)) From fd6ad958daa0ffa9326293a83b57e0188c5f0eae Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 22:54:19 +1100 Subject: [PATCH 5/7] Add some documentation on unify_type --- crates/cli/src/introspection/type_unification.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs index 655c6811..2eb2cf69 100644 --- a/crates/cli/src/introspection/type_unification.rs +++ b/crates/cli/src/introspection/type_unification.rs @@ -18,6 +18,16 @@ type ObjectField = WithName; type ObjectType = WithName; /// Unify two types. +/// This is computing the join (or least upper bound) of the two types in a lattice +/// where `Any` is the Top element, Scalar(Undefined) is the Bottom element, and Nullable(T) >= T for all T. +/// +/// Any +/// / \ +/// ...Nullable types Nullable(T)... +/// | | +/// ... Non-nullabe types T... +/// \ / +/// Scalar(Undefined) pub fn unify_type(type_a: Type, type_b: Type) -> Type { let result_type = match (type_a, type_b) { // Union of any type with Any is Any From 0e0770cf1c9d23611b3bae393d74fb27384b7ac7 Mon Sep 17 00:00:00 2001 From: David Overton Date: Wed, 27 Mar 2024 23:17:46 +1100 Subject: [PATCH 6/7] A few minor fixes --- crates/cli/src/introspection/sampling.rs | 5 ++--- crates/cli/src/introspection/type_unification.rs | 10 +--------- crates/mongodb-connector/src/schema.rs | 4 ++-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs index c4018ef8..133d31e2 100644 --- a/crates/cli/src/introspection/sampling.rs +++ b/crates/cli/src/introspection/sampling.rs @@ -101,7 +101,7 @@ fn make_object_field( field_value: &Bson, ) -> (Vec, ObjectField) { let object_type_name = format!("{type_prefix}{field_name}"); - let (collected_otds, field_type) = make_field_type(&object_type_name, field_name, field_value); + let (collected_otds, field_type) = make_field_type(&object_type_name, field_value); let object_field = WithName::named( field_name.to_owned(), @@ -116,7 +116,6 @@ fn make_object_field( fn make_field_type( object_type_name: &str, - field_name: &str, field_value: &Bson, ) -> (Vec, Type) { fn scalar(t: BsonScalarType) -> (Vec, Type) { @@ -131,7 +130,7 @@ fn make_field_type( let mut result_type = Type::Scalar(Undefined); for elem in arr { let (elem_collected_otds, elem_type) = - make_field_type(object_type_name, field_name, elem); + make_field_type(object_type_name, elem); collected_otds = if collected_otds.is_empty() { elem_collected_otds } else { diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs index 2eb2cf69..6d2d7d55 100644 --- a/crates/cli/src/introspection/type_unification.rs +++ b/crates/cli/src/introspection/type_unification.rs @@ -20,14 +20,6 @@ type ObjectType = WithName; /// Unify two types. /// This is computing the join (or least upper bound) of the two types in a lattice /// where `Any` is the Top element, Scalar(Undefined) is the Bottom element, and Nullable(T) >= T for all T. -/// -/// Any -/// / \ -/// ...Nullable types Nullable(T)... -/// | | -/// ... Non-nullabe types T... -/// \ / -/// Scalar(Undefined) pub fn unify_type(type_a: Type, type_b: Type) -> Type { let result_type = match (type_a, type_b) { // Union of any type with Any is Any @@ -119,7 +111,7 @@ fn unify_object_type(object_type_a: ObjectType, object_type_b: ObjectType) -> Ob field_map_b, make_nullable_field, make_nullable_field, - |field_a, field_b| unify_object_field(field_a, field_b), + unify_object_field, ); WithName::named( diff --git a/crates/mongodb-connector/src/schema.rs b/crates/mongodb-connector/src/schema.rs index 38c953f3..f0512fe2 100644 --- a/crates/mongodb-connector/src/schema.rs +++ b/crates/mongodb-connector/src/schema.rs @@ -81,10 +81,10 @@ fn map_type(t: &schema::Type) -> models::Type { }, schema::Type::Object(t) => models::Type::Named { name: t.clone() }, schema::Type::ArrayOf(t) => models::Type::Array { - element_type: Box::new(map_normalized_type(&t)), + element_type: Box::new(map_normalized_type(t)), }, schema::Type::Nullable(t) => models::Type::Nullable { - underlying_type: Box::new(map_normalized_type(&t)), + underlying_type: Box::new(map_normalized_type(t)), }, } } From 06e2a3a7ae5d4c9bdd75245d96f409764a6752cd Mon Sep 17 00:00:00 2001 From: David Overton Date: Thu, 28 Mar 2024 09:21:21 +1100 Subject: [PATCH 7/7] Handle Type::Any when converting from JSON to BSON --- crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs b/crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs index 6ffa3bf8..dae77a72 100644 --- a/crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs +++ b/crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs @@ -53,6 +53,7 @@ pub fn json_to_bson( value: Value, ) -> Result { match expected_type { + Type::Any => serde_json::from_value::(value.clone()).map_err(JsonToBsonError::SerdeError), Type::Scalar(t) => json_to_bson_scalar(*t, value), Type::Object(object_type_name) => { let object_type = object_types