diff --git a/Cargo.lock b/Cargo.lock index 8bb92509..857e971c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -716,18 +716,18 @@ dependencies = [ [[package]] name = "enum-iterator" -version = "1.4.1" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7add3873b5dd076766ee79c8e406ad1a472c385476b9e38849f8eec24f1be689" +checksum = "600536cfe9e2da0820aa498e570f6b2b9223eec3ce2f835c8ae4861304fa4794" dependencies = [ "enum-iterator-derive", ] [[package]] name = "enum-iterator-derive" -version = "1.2.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb" +checksum = "03cdc46ec28bd728e67540c528013c6a10eb69a02eb31078a1bda695438cbfb8" dependencies = [ "proc-macro2", "quote", @@ -1493,14 +1493,17 @@ dependencies = [ "itertools 0.10.5", "mockall", "mongodb", + "mongodb-cli-plugin", "mongodb-support", "once_cell", "pretty_assertions", + "proptest", "regex", "schemars", "serde", "serde_json", "serde_with 3.7.0", + "test-helpers", "thiserror", "time", "tokio", @@ -1523,7 +1526,7 @@ dependencies = [ "proptest", "serde", "serde_json", - "these", + "test-helpers", "thiserror", "tokio", ] @@ -2891,10 +2894,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] -name = "these" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7162adbff4f8c44e938e0e51f6d3d829818c2ffefd793702a3a6f6ef0551de43" +name = "test-helpers" +version = "0.0.3" +dependencies = [ + "configuration", + "enum-iterator", + "mongodb", + "mongodb-support", + "proptest", +] [[package]] name = "thiserror" diff --git a/Cargo.toml b/Cargo.toml index f858132e..6fc76a51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,15 +3,16 @@ version = "0.0.3" [workspace] members = [ + "crates/cli", "crates/configuration", - "crates/mongodb-connector", - "crates/mongodb-agent-common", - "crates/mongodb-support", "crates/dc-api", - "crates/dc-api-types", "crates/dc-api-test-helpers", + "crates/dc-api-types", + "crates/mongodb-agent-common", + "crates/mongodb-connector", + "crates/mongodb-support", "crates/ndc-test-helpers", - "crates/cli" + "crates/test-helpers", ] resolver = "2" diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index e3225fd1..fb1da2ad 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -18,7 +18,8 @@ serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0.113", features = ["raw_value"] } thiserror = "1.0.57" tokio = { version = "1.36.0", features = ["full"] } -these = "2.0.0" [dev-dependencies] +test-helpers = { path = "../test-helpers" } + proptest = "1" diff --git a/crates/cli/src/introspection/mod.rs b/crates/cli/src/introspection/mod.rs index 057303c2..e1fb76d6 100644 --- a/crates/cli/src/introspection/mod.rs +++ b/crates/cli/src/introspection/mod.rs @@ -1,6 +1,7 @@ pub mod sampling; -pub mod validation_schema; pub mod type_unification; +pub mod validation_schema; +pub use sampling::{sample_schema_from_db, type_from_bson}; pub use validation_schema::get_metadata_from_validation_schema; -pub use sampling::sample_schema_from_db; \ No newline at end of file + diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs index 133d31e2..a049b5fa 100644 --- a/crates/cli/src/introspection/sampling.rs +++ b/crates/cli/src/introspection/sampling.rs @@ -114,6 +114,15 @@ fn make_object_field( (collected_otds, object_field) } +// Exported for use in tests +pub fn type_from_bson( + object_type_name: &str, + value: &Bson, +) -> (BTreeMap, Type) { + let (object_types, t) = make_field_type(object_type_name, value); + (WithName::into_map(object_types), t) +} + fn make_field_type( object_type_name: &str, field_value: &Bson, diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs index 6d2d7d55..bcdf5198 100644 --- a/crates/cli/src/introspection/type_unification.rs +++ b/crates/cli/src/introspection/type_unification.rs @@ -8,10 +8,7 @@ use configuration::{ }; use indexmap::IndexMap; use itertools::Itertools as _; -use mongodb_support::{ - align::align, - BsonScalarType::*, -}; +use mongodb_support::{align::align, BsonScalarType::*}; use std::string::String; type ObjectField = WithName; @@ -131,10 +128,7 @@ fn unify_object_type(object_type_a: ObjectType, object_type_b: ObjectType) -> Ob /// Unify the types of two `ObjectField`s. /// If the types are not unifiable then return an error. -fn unify_object_field( - object_field_a: ObjectField, - object_field_b: ObjectField, -) -> ObjectField { +fn unify_object_field(object_field_a: ObjectField, object_field_b: ObjectField) -> ObjectField { WithName::named( object_field_a.name, schema::ObjectField { @@ -185,6 +179,7 @@ mod tests { }; use mongodb_support::BsonScalarType; use proptest::{collection::hash_map, prelude::*}; + use test_helpers::arb_type; #[test] fn test_unify_scalar() -> Result<(), anyhow::Error> { @@ -208,45 +203,11 @@ mod tests { Ok(()) } - fn arb_bson_scalar_type() -> impl Strategy { - prop_oneof![ - Just(BsonScalarType::Double), - Just(BsonScalarType::Decimal), - Just(BsonScalarType::Int), - Just(BsonScalarType::Long), - Just(BsonScalarType::String), - Just(BsonScalarType::Date), - Just(BsonScalarType::Timestamp), - Just(BsonScalarType::BinData), - Just(BsonScalarType::ObjectId), - Just(BsonScalarType::Bool), - Just(BsonScalarType::Null), - Just(BsonScalarType::Regex), - Just(BsonScalarType::Javascript), - Just(BsonScalarType::JavascriptWithScope), - Just(BsonScalarType::MinKey), - Just(BsonScalarType::MaxKey), - Just(BsonScalarType::Undefined), - Just(BsonScalarType::DbPointer), - Just(BsonScalarType::Symbol), - ] - } - - fn arb_type() -> impl Strategy { - let leaf = prop_oneof![ - arb_bson_scalar_type().prop_map(Type::Scalar), - any::().prop_map(Type::Object) - ]; - leaf.prop_recursive(3, 10, 10, |inner| { - prop_oneof![ - inner.clone().prop_map(|t| Type::ArrayOf(Box::new(t))), - inner.prop_map(|t| Type::Nullable(Box::new(t))) - ] - }) - } - fn is_nullable(t: &Type) -> bool { - matches!(t, Type::Scalar(BsonScalarType::Null) | Type::Nullable(_) | Type::Any) + matches!( + t, + Type::Scalar(BsonScalarType::Null) | Type::Nullable(_) | Type::Any + ) } proptest! { diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 1d371af2..b0f30cac 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -8,6 +8,9 @@ use clap::{Parser, Subcommand}; use mongodb_agent_common::interface_types::MongoConfig; +// Exported for use in tests +pub use introspection::type_from_bson; + #[derive(Debug, Clone, Parser)] pub struct UpdateArgs { #[arg(long = "sample-size", value_name = "N", default_value_t = 10)] diff --git a/crates/configuration/src/schema/database.rs b/crates/configuration/src/schema/database.rs index 0abca261..a66dc909 100644 --- a/crates/configuration/src/schema/database.rs +++ b/crates/configuration/src/schema/database.rs @@ -36,6 +36,10 @@ pub enum Type { } impl Type { + pub fn is_nullable(&self) -> bool { + matches!(self, Type::Any | Type::Nullable(_) | Type::Scalar(BsonScalarType::Null)) + } + pub fn normalize_type(self) -> Type { match self { Type::Any => Type::Any, diff --git a/crates/configuration/src/with_name.rs b/crates/configuration/src/with_name.rs index deeb5eb0..13332908 100644 --- a/crates/configuration/src/with_name.rs +++ b/crates/configuration/src/with_name.rs @@ -52,7 +52,7 @@ impl From<(String, T)> for WithName { } } -#[derive(Debug, Hash, Eq, PartialEq, Ord, PartialOrd)] +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)] pub struct WithNameRef<'a, T> { pub name: &'a str, pub value: &'a T, diff --git a/crates/mongodb-agent-common/Cargo.toml b/crates/mongodb-agent-common/Cargo.toml index 8a3db86b..aaab9fcd 100644 --- a/crates/mongodb-agent-common/Cargo.toml +++ b/crates/mongodb-agent-common/Cargo.toml @@ -5,14 +5,16 @@ version = "0.1.0" edition = "2021" [dependencies] +configuration = { path = "../configuration" } +dc-api = { path = "../dc-api" } +dc-api-types = { path = "../dc-api-types" } +mongodb-support = { path = "../mongodb-support" } + anyhow = "1.0.71" async-trait = "^0.1" axum = { version = "0.6", features = ["headers"] } bytes = "^1" -configuration = { path = "../configuration" } -dc-api = { path = "../dc-api" } -dc-api-types = { path = "../dc-api-types" } -enum-iterator = "1.4.1" +enum-iterator = "^2.0.0" futures = "0.3.28" futures-util = "0.3.28" http = "^0.2" @@ -20,7 +22,6 @@ indexmap = { version = "1", features = ["serde"] } # must match the version that indent = "^0.1" itertools = "^0.10" mongodb = "2.8" -mongodb-support = { path = "../mongodb-support" } once_cell = "1" regex = "1" schemars = { version = "^0.8.12", features = ["smol_str"] } @@ -32,6 +33,10 @@ time = { version = "0.3.29", features = ["formatting", "parsing", "serde"] } tracing = "0.1" [dev-dependencies] +mongodb-cli-plugin = { path = "../cli" } +test-helpers = { path = "../test-helpers" } + mockall = "0.11.4" pretty_assertions = "1" +proptest = "1" tokio = { version = "1", features = ["full"] } diff --git a/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt b/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt new file mode 100644 index 00000000..8a816d59 --- /dev/null +++ b/crates/mongodb-agent-common/proptest-regressions/query/serialization/tests.txt @@ -0,0 +1,10 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 2efdea7f185f2f38ae643782b3523014ab7b8236e36a79cc6b7a7cac74b06f79 # shrinks to bytes = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 238, 161, 0] +cc 26e2543468ab6d4ffa34f9f8a2c920801ef38a35337557a8f4e74c92cf57e344 # shrinks to bson = Document({" ": Document({"ยก": DateTime(1970-01-01 0:00:00.001 +00:00:00)})}) +cc 7d760e540b56fedac7dd58e5bdb5bb9613b9b0bc6a88acfab3fc9c2de8bf026d # shrinks to bson = Document({"A": Array([Null, Undefined])}) +cc 21360610045c5a616b371fb8d5492eb0c22065d62e54d9c8a8761872e2e192f3 # shrinks to bson = Array([Document({}), Document({" ": Null})]) diff --git a/crates/mongodb-agent-common/src/procedure/mod.rs b/crates/mongodb-agent-common/src/procedure/mod.rs index cf193236..8c994418 100644 --- a/crates/mongodb-agent-common/src/procedure/mod.rs +++ b/crates/mongodb-agent-common/src/procedure/mod.rs @@ -5,7 +5,7 @@ use std::borrow::Cow; use std::collections::BTreeMap; use configuration::native_queries::NativeQuery; -use configuration::schema::{ObjectField, ObjectType}; +use configuration::schema::{ObjectField, ObjectType, Type}; use mongodb::options::SelectionCriteria; use mongodb::{bson, Database}; @@ -19,43 +19,47 @@ pub use self::interpolated_command::interpolated_command; pub struct Procedure<'a> { arguments: BTreeMap, command: Cow<'a, bson::Document>, - object_types: Cow<'a, BTreeMap>, parameters: Cow<'a, BTreeMap>, + result_type: Type, selection_criteria: Option>, } impl<'a> Procedure<'a> { - /// Note: the `object_types` argument here is not the object types from the native query - it - /// should be the set of *all* object types collected from schema and native query definitions. pub fn from_native_query( native_query: &'a NativeQuery, - object_types: &'a BTreeMap, arguments: BTreeMap, ) -> Self { Procedure { arguments, command: Cow::Borrowed(&native_query.command), - object_types: Cow::Borrowed(object_types), parameters: Cow::Borrowed(&native_query.arguments), + result_type: native_query.result_type.clone(), selection_criteria: native_query.selection_criteria.as_ref().map(Cow::Borrowed), } } - pub async fn execute(self, database: Database) -> Result { + pub async fn execute( + self, + object_types: &BTreeMap, + database: Database, + ) -> Result<(bson::Document, Type), ProcedureError> { let selection_criteria = self.selection_criteria.map(Cow::into_owned); let command = interpolate( - &self.object_types, + object_types, &self.parameters, self.arguments, &self.command, )?; let result = database.run_command(command, selection_criteria).await?; - Ok(result) + Ok((result, self.result_type)) } - pub fn interpolated_command(self) -> Result { + pub fn interpolated_command( + self, + object_types: &BTreeMap, + ) -> Result { interpolate( - &self.object_types, + object_types, &self.parameters, self.arguments, &self.command, diff --git a/crates/mongodb-agent-common/src/query/arguments/mod.rs b/crates/mongodb-agent-common/src/query/arguments.rs similarity index 95% rename from crates/mongodb-agent-common/src/query/arguments/mod.rs rename to crates/mongodb-agent-common/src/query/arguments.rs index ab84a740..5e5078c0 100644 --- a/crates/mongodb-agent-common/src/query/arguments/mod.rs +++ b/crates/mongodb-agent-common/src/query/arguments.rs @@ -1,5 +1,3 @@ -mod json_to_bson; - use std::collections::BTreeMap; use configuration::schema::{ObjectField, ObjectType, Type}; @@ -9,9 +7,7 @@ use mongodb::bson::Bson; use serde_json::Value; use thiserror::Error; -use self::json_to_bson::json_to_bson; - -pub use self::json_to_bson::{json_to_bson_scalar, JsonToBsonError}; +use super::serialization::{json_to_bson, JsonToBsonError}; #[derive(Debug, Error)] pub enum ArgumentError { diff --git a/crates/mongodb-agent-common/src/query/make_selector.rs b/crates/mongodb-agent-common/src/query/make_selector.rs index d969eebc..e84a8fc4 100644 --- a/crates/mongodb-agent-common/src/query/make_selector.rs +++ b/crates/mongodb-agent-common/src/query/make_selector.rs @@ -10,7 +10,7 @@ use mongodb_support::BsonScalarType; use crate::{ comparison_function::ComparisonFunction, interface_types::MongoAgentError, - query::arguments::json_to_bson_scalar, query::column_ref::column_ref, + query::serialization::json_to_bson_scalar, query::column_ref::column_ref, }; use BinaryArrayComparisonOperator as ArrOp; diff --git a/crates/mongodb-agent-common/src/query/mod.rs b/crates/mongodb-agent-common/src/query/mod.rs index b079d9d8..3f5c5df5 100644 --- a/crates/mongodb-agent-common/src/query/mod.rs +++ b/crates/mongodb-agent-common/src/query/mod.rs @@ -8,6 +8,7 @@ mod make_selector; mod make_sort; mod pipeline; mod relations; +pub mod serialization; use dc_api::JsonResponse; use dc_api_types::{QueryRequest, QueryResponse, Target}; diff --git a/crates/mongodb-agent-common/src/query/serialization/bson_to_json.rs b/crates/mongodb-agent-common/src/query/serialization/bson_to_json.rs new file mode 100644 index 00000000..d9201c35 --- /dev/null +++ b/crates/mongodb-agent-common/src/query/serialization/bson_to_json.rs @@ -0,0 +1,257 @@ +use std::collections::BTreeMap; + +use configuration::{ + schema::{ObjectField, ObjectType, Type}, + WithNameRef, +}; +use itertools::Itertools as _; +use mongodb::bson::{self, Bson}; +use mongodb_support::BsonScalarType; +use serde_json::{to_value, Number, Value}; +use thiserror::Error; +use time::{format_description::well_known::Iso8601, OffsetDateTime}; + +use super::json_formats; + +#[derive(Debug, Error)] +pub enum BsonToJsonError { + #[error("error reading date-time value from BSON: {0}")] + DateConversion(String), + + #[error("error converting 64-bit floating point number from BSON to JSON: {0}")] + DoubleConversion(f64), + + #[error("input object of type \"{0:?}\" is missing a field, \"{1}\"")] + MissingObjectField(Type, String), + + #[error("error converting value to JSON: {0}")] + Serde(#[from] serde_json::Error), + + // TODO: It would be great if we could capture a path into the larger BSON value here + #[error("expected a value of type {0:?}, but got {1}")] + TypeMismatch(Type, Bson), + + #[error("unknown object type, \"{0}\"")] + UnknownObjectType(String), +} + +type Result = std::result::Result; + +/// Converts BSON values to JSON. +/// +/// The BSON library already has a `Serialize` impl that can convert to JSON. But that +/// implementation emits Extended JSON which includes inline type tags in JSON output to +/// disambiguate types on the BSON side. We don't want those tags because we communicate type +/// information out of band. That is except for the `Type::Any` type where we do want to emit +/// Extended JSON because we don't have out-of-band information in that case. +pub fn bson_to_json( + expected_type: &Type, + object_types: &BTreeMap, + value: Bson, +) -> Result { + match expected_type { + Type::Any => Ok(value.into_canonical_extjson()), + Type::Scalar(scalar_type) => bson_scalar_to_json(*scalar_type, value), + Type::Object(object_type_name) => { + let object_type = object_types + .get(object_type_name) + .ok_or_else(|| BsonToJsonError::UnknownObjectType(object_type_name.to_owned()))?; + convert_object(object_type_name, object_type, object_types, value) + } + Type::ArrayOf(element_type) => convert_array(element_type, object_types, value), + Type::Nullable(t) => convert_nullable(t, object_types, value), + } +} + +// Converts values while checking against the expected type. But there are a couple of cases where +// we do implicit conversion where the BSON types have indistinguishable JSON representations, and +// values can be converted back to BSON without loss of meaning. +fn bson_scalar_to_json(expected_type: BsonScalarType, value: Bson) -> Result { + match (expected_type, value) { + (BsonScalarType::Null | BsonScalarType::Undefined, Bson::Null | Bson::Undefined) => { + Ok(Value::Null) + } + (BsonScalarType::MinKey, Bson::MinKey) => Ok(Value::Object(Default::default())), + (BsonScalarType::MaxKey, Bson::MaxKey) => Ok(Value::Object(Default::default())), + (BsonScalarType::Bool, Bson::Boolean(b)) => Ok(Value::Bool(b)), + (BsonScalarType::Double, v) => convert_small_number(expected_type, v), + (BsonScalarType::Int, v) => convert_small_number(expected_type, v), + (BsonScalarType::Long, Bson::Int64(n)) => Ok(Value::String(n.to_string())), + (BsonScalarType::Decimal, Bson::Decimal128(n)) => Ok(Value::String(n.to_string())), + (BsonScalarType::String, Bson::String(s)) => Ok(Value::String(s)), + (BsonScalarType::Symbol, Bson::Symbol(s)) => Ok(Value::String(s)), + (BsonScalarType::Date, Bson::DateTime(date)) => convert_date(date), + (BsonScalarType::Javascript, Bson::JavaScriptCode(s)) => Ok(Value::String(s)), + (BsonScalarType::JavascriptWithScope, Bson::JavaScriptCodeWithScope(v)) => convert_code(v), + (BsonScalarType::Regex, Bson::RegularExpression(regex)) => { + Ok(to_value::(regex.into())?) + } + (BsonScalarType::Timestamp, Bson::Timestamp(v)) => { + Ok(to_value::(v.into())?) + } + (BsonScalarType::BinData, Bson::Binary(b)) => { + Ok(to_value::(b.into())?) + } + (BsonScalarType::ObjectId, Bson::ObjectId(oid)) => Ok(to_value(oid)?), + (BsonScalarType::DbPointer, v) => Ok(v.into_canonical_extjson()), + (_, v) => Err(BsonToJsonError::TypeMismatch( + Type::Scalar(expected_type), + v, + )), + } +} + +fn convert_array( + element_type: &Type, + object_types: &BTreeMap, + value: Bson, +) -> Result { + let values = match value { + Bson::Array(values) => Ok(values), + _ => Err(BsonToJsonError::TypeMismatch( + Type::ArrayOf(Box::new(element_type.clone())), + value, + )), + }?; + let json_array = values + .into_iter() + .map(|value| bson_to_json(element_type, object_types, value)) + .try_collect()?; + Ok(Value::Array(json_array)) +} + +fn convert_object( + object_type_name: &str, + object_type: &ObjectType, + object_types: &BTreeMap, + value: Bson, +) -> Result { + let input_doc = match value { + Bson::Document(fields) => Ok(fields), + _ => Err(BsonToJsonError::TypeMismatch( + Type::Object(object_type_name.to_owned()), + value, + )), + }?; + let json_obj: serde_json::Map = object_type + .named_fields() + .filter_map(|field| { + let field_value_result = + get_object_field_value(object_type_name, field.clone(), &input_doc).transpose()?; + Some((field, field_value_result)) + }) + .map(|(field, field_value_result)| { + Ok(( + field.name.to_owned(), + bson_to_json(&field.value.r#type, object_types, field_value_result?)?, + )) + }) + .try_collect::<_, _, BsonToJsonError>()?; + Ok(Value::Object(json_obj)) +} + +// Gets value for the appropriate key from the input object. Returns `Ok(None)` if the value is +// missing, and the field is nullable. Returns `Err` if the value is missing and the field is *not* +// nullable. +fn get_object_field_value( + object_type_name: &str, + field: WithNameRef<'_, ObjectField>, + doc: &bson::Document, +) -> Result> { + let value = doc.get(field.name); + if value.is_none() && field.value.r#type.is_nullable() { + return Ok(None); + } + Ok(Some(value.cloned().ok_or_else(|| { + BsonToJsonError::MissingObjectField( + Type::Object(object_type_name.to_owned()), + field.name.to_owned(), + ) + })?)) +} + +fn convert_nullable( + underlying_type: &Type, + object_types: &BTreeMap, + value: Bson, +) -> Result { + match value { + Bson::Null => Ok(Value::Null), + non_null_value => bson_to_json(underlying_type, object_types, non_null_value), + } +} + +// Use custom conversion instead of type in json_formats to get canonical extjson output +fn convert_code(v: bson::JavaScriptCodeWithScope) -> Result { + Ok(Value::Object( + [ + ("$code".to_owned(), Value::String(v.code)), + ( + "$scope".to_owned(), + Into::::into(v.scope).into_canonical_extjson(), + ), + ] + .into_iter() + .collect(), + )) +} + +// We could convert directly from bson::DateTime to OffsetDateTime if the bson feature `time-0_3` +// were set. Unfortunately it is difficult for us to set that feature since we get bson via +// mongodb. +fn convert_date(date: bson::DateTime) -> Result { + let system_time = date.to_system_time(); + let offset_date: OffsetDateTime = system_time.into(); + let string = offset_date + .format(&Iso8601::DEFAULT) + .map_err(|err| BsonToJsonError::DateConversion(err.to_string()))?; + Ok(Value::String(string)) +} + +// We can mix up doubles and 32-bit ints because they both map to JSON numbers, we don't lose +// precision, and the carry approximately the same meaning when converted back to BSON with the +// reversed type. +fn convert_small_number(expected_type: BsonScalarType, value: Bson) -> Result { + match value { + Bson::Double(n) => Ok(Value::Number( + Number::from_f64(n).ok_or(BsonToJsonError::DoubleConversion(n))?, + )), + Bson::Int32(n) => Ok(Value::Number(n.into())), + _ => Err(BsonToJsonError::TypeMismatch( + Type::Scalar(expected_type), + value, + )), + } +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + use serde_json::json; + + use super::*; + + #[test] + fn serializes_document_with_missing_nullable_field() -> anyhow::Result<()> { + let expected_type = Type::Object("test_object".to_owned()); + let object_types = [( + "test_object".to_owned(), + ObjectType { + fields: [( + "field".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::String))), + description: None, + }, + )] + .into(), + description: None, + }, + )] + .into(); + let value = bson::doc! {}; + let actual = bson_to_json(&expected_type, &object_types, value.into())?; + assert_eq!(actual, json!({})); + Ok(()) + } +} diff --git a/crates/mongodb-agent-common/src/query/serialization/json_formats.rs b/crates/mongodb-agent-common/src/query/serialization/json_formats.rs new file mode 100644 index 00000000..9ab6c8d0 --- /dev/null +++ b/crates/mongodb-agent-common/src/query/serialization/json_formats.rs @@ -0,0 +1,109 @@ +//! Types defined just to get serialization logic for BSON "scalar" types that are represented in +//! JSON as composite structures. The types here are designed to match the representations of BSON +//! types in extjson. + +use mongodb::bson::{self, Bson}; +use serde::{Deserialize, Serialize}; +use serde_with::{base64::Base64, hex::Hex, serde_as}; + +#[serde_as] +#[derive(Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct BinData { + #[serde_as(as = "Base64")] + base64: Vec, + #[serde_as(as = "Hex")] + sub_type: [u8; 1], +} + +impl From for Bson { + fn from(value: BinData) -> Self { + Bson::Binary(bson::Binary { + bytes: value.base64, + subtype: value.sub_type[0].into(), + }) + } +} + +impl From for BinData { + fn from(value: bson::Binary) -> Self { + BinData { + base64: value.bytes, + sub_type: [value.subtype.into()], + } + } +} + +#[derive(Deserialize)] +pub struct JavaScriptCodeWithScope { + #[serde(rename = "$code")] + code: String, + #[serde(rename = "$scope")] + scope: bson::Document, // TODO: serialize as extjson! +} + +impl From for Bson { + fn from(value: JavaScriptCodeWithScope) -> Self { + Bson::JavaScriptCodeWithScope(bson::JavaScriptCodeWithScope { + code: value.code, + scope: value.scope, + }) + } +} + +impl From for JavaScriptCodeWithScope { + fn from(value: bson::JavaScriptCodeWithScope) -> Self { + JavaScriptCodeWithScope { + code: value.code, + scope: value.scope, + } + } +} + +#[derive(Deserialize, Serialize)] +pub struct Regex { + pattern: String, + options: String, +} + +impl From for Bson { + fn from(value: Regex) -> Self { + Bson::RegularExpression(bson::Regex { + pattern: value.pattern, + options: value.options, + }) + } +} + +impl From for Regex { + fn from(value: bson::Regex) -> Self { + Regex { + pattern: value.pattern, + options: value.options, + } + } +} + +#[derive(Deserialize, Serialize)] +pub struct Timestamp { + t: u32, + i: u32, +} + +impl From for Bson { + fn from(value: Timestamp) -> Self { + Bson::Timestamp(bson::Timestamp { + time: value.t, + increment: value.i, + }) + } +} + +impl From for Timestamp { + fn from(value: bson::Timestamp) -> Self { + Timestamp { + t: value.time, + i: value.increment, + } + } +} diff --git a/crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs b/crates/mongodb-agent-common/src/query/serialization/json_to_bson.rs similarity index 81% rename from crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs rename to crates/mongodb-agent-common/src/query/serialization/json_to_bson.rs index dae77a72..6dc32ef9 100644 --- a/crates/mongodb-agent-common/src/query/arguments/json_to_bson.rs +++ b/crates/mongodb-agent-common/src/query/serialization/json_to_bson.rs @@ -1,6 +1,9 @@ -use std::{collections::BTreeMap, str::FromStr}; +use std::{collections::BTreeMap, num::ParseIntError, str::FromStr}; -use configuration::schema::{ObjectType, Type}; +use configuration::{ + schema::{ObjectField, ObjectType, Type}, + WithNameRef, +}; use itertools::Itertools as _; use mongodb::bson::{self, Bson, Decimal128}; use mongodb_support::BsonScalarType; @@ -9,6 +12,8 @@ use serde_json::Value; use thiserror::Error; use time::{format_description::well_known::Iso8601, OffsetDateTime}; +use super::json_formats; + #[derive(Debug, Error)] pub enum JsonToBsonError { #[error("error converting \"{1}\" to type, \"{0:?}\"")] @@ -33,6 +38,9 @@ pub enum JsonToBsonError { #[error("inputs of type {0} are not implemented")] NotImplemented(BsonScalarType), + #[error("could not parse 64-bit integer input, {0}: {1}")] + ParseInt(String, #[source] ParseIntError), + #[error("error deserializing input: {0}")] SerdeError(#[from] serde_json::Error), @@ -53,7 +61,7 @@ pub fn json_to_bson( value: Value, ) -> Result { match expected_type { - Type::Any => serde_json::from_value::(value.clone()).map_err(JsonToBsonError::SerdeError), + Type::Any => serde_json::from_value::(value).map_err(JsonToBsonError::SerdeError), Type::Scalar(t) => json_to_bson_scalar(*t, value), Type::Object(object_type_name) => { let object_type = object_types @@ -71,7 +79,7 @@ pub fn json_to_bson_scalar(expected_type: BsonScalarType, value: Value) -> Resul let result = match expected_type { BsonScalarType::Double => Bson::Double(deserialize(expected_type, value)?), BsonScalarType::Int => Bson::Int32(deserialize(expected_type, value)?), - BsonScalarType::Long => Bson::Int64(deserialize(expected_type, value)?), + BsonScalarType::Long => convert_long(&from_string(expected_type, value)?)?, BsonScalarType::Decimal => Bson::Decimal128( Decimal128::from_str(&from_string(expected_type, value.clone())?).map_err(|err| { JsonToBsonError::ConversionErrorWithContext( @@ -83,8 +91,12 @@ pub fn json_to_bson_scalar(expected_type: BsonScalarType, value: Value) -> Resul ), BsonScalarType::String => Bson::String(deserialize(expected_type, value)?), BsonScalarType::Date => convert_date(&from_string(expected_type, value)?)?, - BsonScalarType::Timestamp => deserialize::(expected_type, value)?.into(), - BsonScalarType::BinData => deserialize::(expected_type, value)?.into(), + BsonScalarType::Timestamp => { + deserialize::(expected_type, value)?.into() + } + BsonScalarType::BinData => { + deserialize::(expected_type, value)?.into() + } BsonScalarType::ObjectId => Bson::ObjectId(deserialize(expected_type, value)?), BsonScalarType::Bool => match value { Value::Bool(b) => Bson::Boolean(b), @@ -98,10 +110,10 @@ pub fn json_to_bson_scalar(expected_type: BsonScalarType, value: Value) -> Resul Value::Null => Bson::Undefined, _ => incompatible_scalar_type(BsonScalarType::Undefined, value)?, }, - BsonScalarType::Regex => deserialize::(expected_type, value)?.into(), + BsonScalarType::Regex => deserialize::(expected_type, value)?.into(), BsonScalarType::Javascript => Bson::JavaScriptCode(deserialize(expected_type, value)?), BsonScalarType::JavascriptWithScope => { - deserialize::(expected_type, value)?.into() + deserialize::(expected_type, value)?.into() } BsonScalarType::MinKey => Bson::MinKey, BsonScalarType::MaxKey => Bson::MaxKey, @@ -112,81 +124,6 @@ pub fn json_to_bson_scalar(expected_type: BsonScalarType, value: Value) -> Resul Ok(result) } -/// Types defined just to get deserialization logic for BSON "scalar" types that are represented in -/// JSON as composite structures. The types here are designed to match the representations of BSON -/// types in extjson. -mod de { - use mongodb::bson::{self, Bson}; - use serde::Deserialize; - use serde_with::{base64::Base64, hex::Hex, serde_as}; - - #[serde_as] - #[derive(Deserialize)] - #[serde(rename_all = "camelCase")] - pub struct BinData { - #[serde_as(as = "Base64")] - base64: Vec, - #[serde_as(as = "Hex")] - sub_type: [u8; 1], - } - - impl From for Bson { - fn from(value: BinData) -> Self { - Bson::Binary(bson::Binary { - bytes: value.base64, - subtype: value.sub_type[0].into(), - }) - } - } - - #[derive(Deserialize)] - pub struct JavaScripCodetWithScope { - #[serde(rename = "$code")] - code: String, - #[serde(rename = "$scope")] - scope: bson::Document, - } - - impl From for Bson { - fn from(value: JavaScripCodetWithScope) -> Self { - Bson::JavaScriptCodeWithScope(bson::JavaScriptCodeWithScope { - code: value.code, - scope: value.scope, - }) - } - } - - #[derive(Deserialize)] - pub struct Regex { - pattern: String, - options: String, - } - - impl From for Bson { - fn from(value: Regex) -> Self { - Bson::RegularExpression(bson::Regex { - pattern: value.pattern, - options: value.options, - }) - } - } - - #[derive(Deserialize)] - pub struct Timestamp { - t: u32, - i: u32, - } - - impl From for Bson { - fn from(value: Timestamp) -> Self { - Bson::Timestamp(bson::Timestamp { - time: value.t, - increment: value.i, - }) - } - } -} - fn convert_array( element_type: &Type, object_types: &BTreeMap, @@ -209,22 +146,42 @@ fn convert_object( let input_fields: BTreeMap = serde_json::from_value(value)?; let bson_doc: bson::Document = object_type .named_fields() - .map(|field| { - let input_field_value = input_fields.get(field.name).ok_or_else(|| { - JsonToBsonError::MissingObjectField( - Type::Object(object_type_name.to_owned()), - field.name.to_owned(), - ) - })?; + .filter_map(|field| { + let field_value_result = + get_object_field_value(object_type_name, field.clone(), &input_fields) + .transpose()?; + Some((field, field_value_result)) + }) + .map(|(field, field_value_result)| { Ok(( field.name.to_owned(), - json_to_bson(&field.value.r#type, object_types, input_field_value.clone())?, + json_to_bson(&field.value.r#type, object_types, field_value_result?)?, )) }) .try_collect::<_, _, JsonToBsonError>()?; Ok(bson_doc.into()) } +// Gets value for the appropriate key from the input object. Returns `Ok(None)` if the value is +// missing, and the field is nullable. Returns `Err` if the value is missing and the field is *not* +// nullable. +fn get_object_field_value( + object_type_name: &str, + field: WithNameRef<'_, ObjectField>, + object: &BTreeMap, +) -> Result> { + let value = object.get(field.name); + if value.is_none() && field.value.r#type.is_nullable() { + return Ok(None); + } + Ok(Some(value.cloned().ok_or_else(|| { + JsonToBsonError::MissingObjectField( + Type::Object(object_type_name.to_owned()), + field.name.to_owned(), + ) + })?)) +} + fn convert_nullable( underlying_type: &Type, object_types: &BTreeMap, @@ -249,6 +206,13 @@ fn convert_date(value: &str) -> Result { ))) } +fn convert_long(value: &str) -> Result { + let n: i64 = value + .parse() + .map_err(|err| JsonToBsonError::ParseInt(value.to_owned(), err))?; + Ok(Bson::Int64(n)) +} + fn deserialize(expected_type: BsonScalarType, value: Value) -> Result where T: DeserializeOwned, @@ -281,7 +245,7 @@ mod tests { use std::{collections::BTreeMap, str::FromStr}; use configuration::schema::{ObjectField, ObjectType, Type}; - use mongodb::bson::{self, datetime::DateTimeBuilder, Bson}; + use mongodb::bson::{self, bson, datetime::DateTimeBuilder, Bson}; use mongodb_support::BsonScalarType; use pretty_assertions::assert_eq; use serde_json::json; @@ -322,7 +286,7 @@ mod tests { let input = json!({ "double": 3.14159, "int": 3, - "long": 3, + "long": "3", "decimal": "3.14159", "string": "hello", "date": "2024-03-22T00:59:01Z", @@ -423,4 +387,28 @@ mod tests { assert_eq!(actual, expected); Ok(()) } + + #[test] + fn deserializes_object_with_missing_nullable_field() -> anyhow::Result<()> { + let expected_type = Type::Object("test_object".to_owned()); + let object_types = [( + "test_object".to_owned(), + ObjectType { + fields: [( + "field".to_owned(), + ObjectField { + r#type: Type::Nullable(Box::new(Type::Scalar(BsonScalarType::String))), + description: None, + }, + )] + .into(), + description: None, + }, + )] + .into(); + let value = json!({}); + let actual = json_to_bson(&expected_type, &object_types, value)?; + assert_eq!(actual, bson!({})); + Ok(()) + } } diff --git a/crates/mongodb-agent-common/src/query/serialization/mod.rs b/crates/mongodb-agent-common/src/query/serialization/mod.rs new file mode 100644 index 00000000..31e63af4 --- /dev/null +++ b/crates/mongodb-agent-common/src/query/serialization/mod.rs @@ -0,0 +1,9 @@ +mod bson_to_json; +mod json_formats; +mod json_to_bson; + +#[cfg(test)] +mod tests; + +pub use self::bson_to_json::bson_to_json; +pub use self::json_to_bson::{json_to_bson, json_to_bson_scalar, JsonToBsonError}; diff --git a/crates/mongodb-agent-common/src/query/serialization/tests.rs b/crates/mongodb-agent-common/src/query/serialization/tests.rs new file mode 100644 index 00000000..e6eb52eb --- /dev/null +++ b/crates/mongodb-agent-common/src/query/serialization/tests.rs @@ -0,0 +1,35 @@ +use configuration::schema::Type; +use mongodb::bson::Bson; +use mongodb_cli_plugin::type_from_bson; +use mongodb_support::BsonScalarType; +use proptest::prelude::*; +use test_helpers::arb_bson::{arb_bson, arb_datetime}; + +use super::{bson_to_json, json_to_bson}; + +proptest! { + #[test] + fn converts_bson_to_json_and_back(bson in arb_bson()) { + let (object_types, inferred_type) = type_from_bson("test_object", &bson); + let error_context = |msg: &str, source: String| TestCaseError::fail(format!("{msg}: {source}\ninferred type: {inferred_type:?}\nobject types: {object_types:?}")); + let json = bson_to_json(&inferred_type, &object_types, bson.clone()).map_err(|e| error_context("error converting bson to json", e.to_string()))?; + let actual = json_to_bson(&inferred_type, &object_types, json.clone()).map_err(|e| error_context("error converting json to bson", e.to_string()))?; + prop_assert_eq!(actual, bson, + "\ninferred type: {:?}\nobject types: {:?}\njson_representation: {}", + inferred_type, + object_types, + serde_json::to_string_pretty(&json).unwrap() + ) + } +} + +proptest! { + #[test] + fn converts_datetime_from_bson_to_json_and_back(d in arb_datetime()) { + let t = Type::Scalar(BsonScalarType::Date); + let bson = Bson::DateTime(d); + let json = bson_to_json(&t, &Default::default(), bson.clone())?; + let actual = json_to_bson(&t, &Default::default(), json.clone())?; + prop_assert_eq!(actual, bson, "json representation: {}", json) + } +} diff --git a/crates/mongodb-connector/Cargo.toml b/crates/mongodb-connector/Cargo.toml index 0b380583..e89e8392 100644 --- a/crates/mongodb-connector/Cargo.toml +++ b/crates/mongodb-connector/Cargo.toml @@ -9,7 +9,7 @@ async-trait = "^0.1" configuration = { path = "../configuration" } dc-api = { path = "../dc-api" } dc-api-types = { path = "../dc-api-types" } -enum-iterator = "1.4.1" +enum-iterator = "^2.0.0" futures = "^0.3" http = "^0.2" indexmap = { version = "2.1.0", features = ["serde"] } diff --git a/crates/mongodb-connector/src/mutation.rs b/crates/mongodb-connector/src/mutation.rs index c751073c..76953ddc 100644 --- a/crates/mongodb-connector/src/mutation.rs +++ b/crates/mongodb-connector/src/mutation.rs @@ -1,7 +1,12 @@ +use std::collections::BTreeMap; + +use configuration::schema::ObjectType; use futures::future::try_join_all; use itertools::Itertools; use mongodb::Database; -use mongodb_agent_common::{interface_types::MongoConfig, procedure::Procedure}; +use mongodb_agent_common::{ + interface_types::MongoConfig, procedure::Procedure, query::serialization::bson_to_json, +}; use ndc_sdk::{ connector::MutationError, json_response::JsonResponse, @@ -17,7 +22,7 @@ pub async fn handle_mutation_request( let jobs = look_up_procedures(config, mutation_request)?; let operation_results = try_join_all( jobs.into_iter() - .map(|procedure| execute_procedure(database.clone(), procedure)), + .map(|procedure| execute_procedure(&config.object_types, database.clone(), procedure)), ) .await?; Ok(JsonResponse::Value(MutationResponse { operation_results })) @@ -37,9 +42,9 @@ fn look_up_procedures( name, arguments, .. } => { let native_query = config.native_queries.get(&name); - native_query.ok_or(name).map(|native_query| { - Procedure::from_native_query(native_query, &config.object_types, arguments) - }) + native_query + .ok_or(name) + .map(|native_query| Procedure::from_native_query(native_query, arguments)) } }) .partition_result(); @@ -55,18 +60,16 @@ fn look_up_procedures( } async fn execute_procedure( + object_types: &BTreeMap, database: Database, procedure: Procedure<'_>, ) -> Result { - let result = procedure - .execute(database.clone()) + let (result, result_type) = procedure + .execute(object_types, database.clone()) .await .map_err(|err| MutationError::InvalidRequest(err.to_string()))?; - - // TODO: instead of outputting extended JSON, map to JSON using a reverse of `json_to_bson` - // according to the native query result type - let json_result = - serde_json::to_value(result).map_err(|err| MutationError::Other(Box::new(err)))?; + let json_result = bson_to_json(&result_type, object_types, result.into()) + .map_err(|err| MutationError::Other(Box::new(err)))?; Ok(MutationOperationResults::Procedure { result: json_result, }) diff --git a/crates/mongodb-support/Cargo.toml b/crates/mongodb-support/Cargo.toml index 1a1d43a6..aecfc7f8 100644 --- a/crates/mongodb-support/Cargo.toml +++ b/crates/mongodb-support/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [dependencies] dc-api-types = { path = "../dc-api-types" } -enum-iterator = "1.4.1" +enum-iterator = "^2.0.0" indexmap = { version = "1", features = ["serde"] } # must match the version that ndc-client uses mongodb = "2.8" schemars = "^0.8.12" diff --git a/crates/test-helpers/Cargo.toml b/crates/test-helpers/Cargo.toml new file mode 100644 index 00000000..fc113da3 --- /dev/null +++ b/crates/test-helpers/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "test-helpers" +edition = "2021" +version.workspace = true + +[dependencies] +configuration = { path = "../configuration" } +mongodb-support = { path = "../mongodb-support" } + +enum-iterator = "^2.0.0" +mongodb = "2.8" +proptest = "1" + diff --git a/crates/test-helpers/src/arb_bson.rs b/crates/test-helpers/src/arb_bson.rs new file mode 100644 index 00000000..295e91c6 --- /dev/null +++ b/crates/test-helpers/src/arb_bson.rs @@ -0,0 +1,143 @@ +use std::time::SystemTime; + +use mongodb::bson::{self, oid::ObjectId, Bson}; +use proptest::{collection, prelude::*, sample::SizeRange}; + +pub fn arb_bson() -> impl Strategy { + arb_bson_with_options(Default::default()) +} + +#[derive(Clone, Debug)] +pub struct ArbBsonOptions { + /// max AST depth of generated values + pub depth: u32, + + /// number of AST nodes to target + pub desired_size: u32, + + /// minimum and maximum number of elements per array, or fields per document + pub branch_range: SizeRange, + + /// If set to false arrays are generated such that all elements have a uniform type according + /// to `type_unification` in the introspection crate. Note that we consider "nullable" a valid + /// type, so array elements will sometimes be null even if this is set to true. + pub heterogeneous_arrays: bool, +} + +impl Default for ArbBsonOptions { + fn default() -> Self { + Self { + depth: 8, + desired_size: 256, + branch_range: (0, 10).into(), + heterogeneous_arrays: true, + } + } +} + +pub fn arb_bson_with_options(options: ArbBsonOptions) -> impl Strategy { + let leaf = prop_oneof![ + Just(Bson::Null), + // TODO: Our type unification treats undefined as the bottom type so it unifies with other + // types, and therefore does not pass round-trip tests. + // Just(Bson::Undefined), + Just(Bson::MaxKey), + Just(Bson::MinKey), + any::().prop_map(Bson::Boolean), + any::().prop_map(Bson::Int32), + any::().prop_map(Bson::Int64), + any::().prop_map(Bson::Double), + arb_datetime().prop_map(Bson::DateTime), + arb_object_id().prop_map(Bson::ObjectId), + any::().prop_map(Bson::String), + any::().prop_map(Bson::Symbol), + arb_decimal().prop_map(Bson::Decimal128), + any::().prop_map(Bson::JavaScriptCode), + (any::(), any::()) + .prop_map(|(time, increment)| Bson::Timestamp(bson::Timestamp { time, increment })), + arb_binary().prop_map(Bson::Binary), + (".*", "i?l?m?s?u?x?").prop_map(|(pattern, options)| Bson::RegularExpression( + bson::Regex { pattern, options } + )), + // skipped DbPointer because it is deprecated, and does not have a public constructor + ]; + leaf.prop_recursive( + options.depth, + options.desired_size, + options.branch_range.end_incl().try_into().unwrap(), + move |inner| { + prop_oneof![ + arb_bson_array_recursive(inner.clone(), options.clone()).prop_map(Bson::Array), + arb_bson_document_recursive(inner.clone(), options.branch_range.clone()) + .prop_map(Bson::Document), + ( + any::(), + arb_bson_document_recursive(inner, options.branch_range.clone()) + ) + .prop_map(|(code, scope)| { + Bson::JavaScriptCodeWithScope(bson::JavaScriptCodeWithScope { code, scope }) + }), + ] + }, + ) +} + +fn arb_bson_array_recursive( + value: impl Strategy + 'static, + options: ArbBsonOptions, +) -> impl Strategy> { + if options.heterogeneous_arrays { + collection::vec(value, options.branch_range).boxed() + } else { + // To make sure the array is homogeneously-typed generate one arbitrary BSON value and + // replicate it. But we still want a chance to include null values because we can unify + // those into a non-Any type. So each array element has a 10% chance to be null instead of + // the generated value. + ( + value, + collection::vec(proptest::bool::weighted(0.9), options.branch_range), + ) + .prop_map(|(value, non_nulls)| { + non_nulls + .into_iter() + .map(|non_null| if non_null { value.clone() } else { Bson::Null }) + .collect() + }) + .boxed() + } +} + +pub fn arb_bson_document(size: impl Into) -> impl Strategy { + arb_bson_document_recursive(arb_bson(), size) +} + +fn arb_bson_document_recursive( + value: impl Strategy, + size: impl Into, +) -> impl Strategy { + collection::btree_map(".+", value, size).prop_map(|fields| fields.into_iter().collect()) +} + +fn arb_binary() -> impl Strategy { + let binary_subtype = any::().prop_map(Into::into); + let bytes = collection::vec(any::(), 1..256); + (binary_subtype, bytes).prop_map(|(subtype, bytes)| bson::Binary { subtype, bytes }) +} + +pub fn arb_datetime() -> impl Strategy { + any::().prop_map(bson::DateTime::from_system_time) +} + +// Generate bytes for a 128-bit decimal, and convert to a string and back to normalize. This does +// not produce a uniform probability distribution over decimal values so it would not make a good +// random number generator. But it is useful for testing serialization. +fn arb_decimal() -> impl Strategy { + any::<[u8; 128 / 8]>().prop_map(|bytes| { + let raw_decimal = bson::Decimal128::from_bytes(bytes); + raw_decimal.to_string().parse().unwrap() + }) +} + +fn arb_object_id() -> impl Strategy { + any::<[u8; 12]>().prop_map(Into::into) +} diff --git a/crates/test-helpers/src/arb_type.rs b/crates/test-helpers/src/arb_type.rs new file mode 100644 index 00000000..00c2f6e8 --- /dev/null +++ b/crates/test-helpers/src/arb_type.rs @@ -0,0 +1,22 @@ +use configuration::schema::Type; +use enum_iterator::Sequence as _; +use mongodb_support::BsonScalarType; +use proptest::prelude::*; + +pub fn arb_bson_scalar_type() -> impl Strategy { + (0..BsonScalarType::CARDINALITY) + .prop_map(|n| enum_iterator::all::().nth(n).unwrap()) +} + +pub fn arb_type() -> impl Strategy { + let leaf = prop_oneof![ + arb_bson_scalar_type().prop_map(Type::Scalar), + any::().prop_map(Type::Object) + ]; + leaf.prop_recursive(3, 10, 10, |inner| { + prop_oneof![ + inner.clone().prop_map(|t| Type::ArrayOf(Box::new(t))), + inner.prop_map(|t| Type::Nullable(Box::new(t))) + ] + }) +} diff --git a/crates/test-helpers/src/lib.rs b/crates/test-helpers/src/lib.rs new file mode 100644 index 00000000..751ce2d2 --- /dev/null +++ b/crates/test-helpers/src/lib.rs @@ -0,0 +1,5 @@ +pub mod arb_bson; +pub mod arb_type; + +pub use arb_bson::{arb_bson, arb_bson_with_options, ArbBsonOptions}; +pub use arb_type::arb_type;