diff --git a/CHANGELOG.md b/CHANGELOG.md index f4805ac7..4a83a187 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ This changelog documents the changes between release versions. - Upgrade dependencies to get fix for RUSTSEC-2024-0421, a vulnerability in domain name comparisons ([#138](https://github.com/hasura/ndc-mongodb/pull/138)) - Aggregations on empty document sets now produce `null` instead of failing with an error ([#136](https://github.com/hasura/ndc-mongodb/pull/136)) +- Handle collection validators with object fields that do not list properties ([#140](https://github.com/hasura/ndc-mongodb/pull/140)) #### Fix for RUSTSEC-2024-0421 / CVE-2024-12224 @@ -31,6 +32,25 @@ it uses the affected library exclusively to connect to MongoDB databases, and database URLs are supplied by trusted administrators. But better to be safe than sorry. +#### Validators with object fields that do not list properties + +If a collection validator species an property of type `object`, but does not specify a list of nested properties for that object then we will infer the `ExtendedJSON` type for that property. For a collection created with this set of options would have the type `ExtendedJSON` for its `reactions` field: + +```json +{ + "validator": { + "$jsonSchema": { + "bsonType": "object", + "properties": { + "reactions": { "bsonType": "object" }, + } + } + } +} +``` + +If the validator specifies a map of nested properties, but that map is empty, then we interpret that as an empty object type. + ## [1.5.0] - 2024-12-05 ### Added diff --git a/Cargo.lock b/Cargo.lock index 10b14f99..0790dd2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "addr2line" @@ -1680,14 +1680,13 @@ dependencies = [ [[package]] name = "mockall" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43766c2b5203b10de348ffe19f7e54564b64f3d6018ff7648d1e2d6d3a0f0a48" +checksum = "39a6bfcc6c8c7eed5ee98b9c3e33adc726054389233e201c95dab2d41a3839d2" dependencies = [ "cfg-if", "downcast", "fragile", - "lazy_static", "mockall_derive", "predicates", "predicates-tree", @@ -1695,9 +1694,9 @@ dependencies = [ [[package]] name = "mockall_derive" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cbce79ec385a1d4f54baa90a76401eb15d9cab93685f62e7e9f942aa00ae2" +checksum = "25ca3004c2efe9011bd4e461bd8256445052b9615405b4f7ea43fc8ca5c20898" dependencies = [ "cfg-if", "proc-macro2", @@ -1799,6 +1798,7 @@ name = "mongodb-cli-plugin" version = "1.5.0" dependencies = [ "anyhow", + "async-tempfile", "clap", "configuration", "enum-iterator", diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 944b2027..1ecc27c3 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -32,6 +32,9 @@ thiserror = "1.0.57" tokio = { version = "1.36.0", features = ["full"] } [dev-dependencies] +mongodb-agent-common = { path = "../mongodb-agent-common", features = ["test-helpers"] } + +async-tempfile = "^0.6.0" googletest = "^0.12.0" pretty_assertions = "1" proptest = "1" diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs index d557fac1..fcfc5e9d 100644 --- a/crates/cli/src/introspection/sampling.rs +++ b/crates/cli/src/introspection/sampling.rs @@ -9,8 +9,11 @@ use configuration::{ }; use futures_util::TryStreamExt; use mongodb::bson::{doc, Bson, Document}; -use mongodb_agent_common::state::ConnectorState; -use mongodb_support::BsonScalarType::{self, *}; +use mongodb_agent_common::mongodb::{CollectionTrait as _, DatabaseTrait}; +use mongodb_support::{ + aggregate::{Pipeline, Stage}, + BsonScalarType::{self, *}, +}; type ObjectField = WithName; type ObjectType = WithName; @@ -23,11 +26,10 @@ pub async fn sample_schema_from_db( sample_size: u32, all_schema_nullable: bool, config_file_changed: bool, - state: &ConnectorState, + db: &impl DatabaseTrait, existing_schemas: &HashSet, ) -> anyhow::Result> { let mut schemas = BTreeMap::new(); - let db = state.database(); let mut collections_cursor = db.list_collections().await?; while let Some(collection_spec) = collections_cursor.try_next().await? { @@ -37,7 +39,7 @@ pub async fn sample_schema_from_db( &collection_name, sample_size, all_schema_nullable, - state, + db, ) .await?; if let Some(collection_schema) = collection_schema { @@ -54,14 +56,17 @@ async fn sample_schema_from_collection( collection_name: &str, sample_size: u32, all_schema_nullable: bool, - state: &ConnectorState, + db: &impl DatabaseTrait, ) -> anyhow::Result> { - let db = state.database(); let options = None; let mut cursor = db - .collection::(collection_name) - .aggregate(vec![doc! {"$sample": { "size": sample_size }}]) - .with_options(options) + .collection(collection_name) + .aggregate( + Pipeline::new(vec![Stage::Other(doc! { + "$sample": { "size": sample_size } + })]), + options, + ) .await?; let mut collected_object_types = vec![]; let is_collection_type = true; diff --git a/crates/cli/src/introspection/validation_schema.rs b/crates/cli/src/introspection/validation_schema.rs index 507355e3..f90b0122 100644 --- a/crates/cli/src/introspection/validation_schema.rs +++ b/crates/cli/src/introspection/validation_schema.rs @@ -7,8 +7,8 @@ use configuration::{ use futures_util::TryStreamExt; use mongodb::bson::from_bson; use mongodb_agent_common::{ + mongodb::DatabaseTrait, schema::{get_property_description, Property, ValidatorSchema}, - state::ConnectorState, }; use mongodb_support::BsonScalarType; @@ -19,9 +19,8 @@ type ObjectType = WithName; type ObjectField = WithName; pub async fn get_metadata_from_validation_schema( - state: &ConnectorState, + db: &impl DatabaseTrait, ) -> Result, MongoAgentError> { - let db = state.database(); let mut collections_cursor = db.list_collections().await?; let mut schemas: Vec> = vec![]; @@ -152,10 +151,12 @@ fn make_field_type(object_type_name: &str, prop_schema: &Property) -> (Vec (vec![], Type::ExtendedJSON), + Property::Object { description: _, required, - properties, + properties: Some(properties), } => { let type_prefix = format!("{object_type_name}_"); let (otds, otd_fields): (Vec>, Vec) = properties @@ -177,7 +178,6 @@ fn make_field_type(object_type_name: &str, prop_schema: &Property) -> (Vec { diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 3fb92b9d..57bae3d1 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -3,6 +3,8 @@ mod exit_codes; mod introspection; mod logging; +#[cfg(test)] +mod tests; #[cfg(feature = "native-query-subcommand")] mod native_query; @@ -13,7 +15,7 @@ use clap::{Parser, Subcommand}; // Exported for use in tests pub use introspection::type_from_bson; -use mongodb_agent_common::state::try_init_state_from_uri; +use mongodb_agent_common::{mongodb::DatabaseTrait, state::try_init_state_from_uri}; #[cfg(feature = "native-query-subcommand")] pub use native_query::native_query_from_pipeline; @@ -49,7 +51,10 @@ pub struct Context { /// Run a command in a given directory. pub async fn run(command: Command, context: &Context) -> anyhow::Result<()> { match command { - Command::Update(args) => update(context, &args).await?, + Command::Update(args) => { + let connector_state = try_init_state_from_uri(context.connection_uri.as_ref()).await?; + update(context, &args, &connector_state.database()).await? + } #[cfg(feature = "native-query-subcommand")] Command::NativeQuery(command) => native_query::run(context, command).await?, @@ -58,12 +63,14 @@ pub async fn run(command: Command, context: &Context) -> anyhow::Result<()> { } /// Update the configuration in the current directory by introspecting the database. -async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { - let connector_state = try_init_state_from_uri(context.connection_uri.as_ref()).await?; - +async fn update( + context: &Context, + args: &UpdateArgs, + database: &impl DatabaseTrait, +) -> anyhow::Result<()> { let configuration_options = configuration::parse_configuration_options_file(&context.path).await?; - // Prefer arguments passed to cli, and fallback to the configuration file + // Prefer arguments passed to cli, and fall back to the configuration file let sample_size = match args.sample_size { Some(size) => size, None => configuration_options.introspection_options.sample_size, @@ -88,7 +95,7 @@ async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { if !no_validator_schema { let schemas_from_json_validation = - introspection::get_metadata_from_validation_schema(&connector_state).await?; + introspection::get_metadata_from_validation_schema(database).await?; configuration::write_schema_directory(&context.path, schemas_from_json_validation).await?; } @@ -97,7 +104,7 @@ async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { sample_size, all_schema_nullable, config_file_changed, - &connector_state, + database, &existing_schemas, ) .await?; diff --git a/crates/cli/src/tests.rs b/crates/cli/src/tests.rs new file mode 100644 index 00000000..b41ef57e --- /dev/null +++ b/crates/cli/src/tests.rs @@ -0,0 +1,129 @@ +use async_tempfile::TempDir; +use configuration::read_directory; +use mongodb::bson::{self, doc, from_document}; +use mongodb_agent_common::mongodb::{test_helpers::mock_stream, MockDatabaseTrait}; +use ndc_models::{CollectionName, FieldName, ObjectField, ObjectType, Type}; +use pretty_assertions::assert_eq; + +use crate::{update, Context, UpdateArgs}; + +#[tokio::test] +async fn required_field_from_validator_is_non_nullable() -> anyhow::Result<()> { + let collection_object_type = collection_schema_from_validator(doc! { + "bsonType": "object", + "required": ["title"], + "properties": { + "title": { "bsonType": "string", "maxLength": 100 }, + "author": { "bsonType": "string", "maxLength": 100 }, + } + }) + .await?; + + assert_eq!( + collection_object_type + .fields + .get(&FieldName::new("title".into())), + Some(&ObjectField { + r#type: Type::Named { + name: "String".into() + }, + arguments: Default::default(), + description: Default::default(), + }) + ); + + assert_eq!( + collection_object_type + .fields + .get(&FieldName::new("author".into())), + Some(&ObjectField { + r#type: Type::Nullable { + underlying_type: Box::new(Type::Named { + name: "String".into() + }) + }, + arguments: Default::default(), + description: Default::default(), + }) + ); + + Ok(()) +} + +#[tokio::test] +async fn validator_object_with_no_properties_becomes_extended_json_object() -> anyhow::Result<()> { + let collection_object_type = collection_schema_from_validator(doc! { + "bsonType": "object", + "title": "posts validator", + "additionalProperties": false, + "properties": { + "reactions": { "bsonType": "object" }, + } + }) + .await?; + + assert_eq!( + collection_object_type + .fields + .get(&FieldName::new("reactions".into())), + Some(&ObjectField { + r#type: Type::Nullable { + underlying_type: Box::new(Type::Named { + name: "ExtendedJSON".into() + }) + }, + arguments: Default::default(), + description: Default::default(), + }) + ); + + Ok(()) +} + +async fn collection_schema_from_validator(validator: bson::Document) -> anyhow::Result { + let mut db = MockDatabaseTrait::new(); + let config_dir = TempDir::new().await?; + + let context = Context { + path: config_dir.to_path_buf(), + connection_uri: None, + display_color: false, + }; + + let args = UpdateArgs { + sample_size: Some(100), + no_validator_schema: None, + all_schema_nullable: Some(false), + }; + + db.expect_list_collections().returning(move || { + let collection_spec = doc! { + "name": "posts", + "type": "collection", + "options": { + "validator": { + "$jsonSchema": &validator + } + }, + "info": { "readOnly": false }, + }; + Ok(mock_stream(vec![Ok( + from_document(collection_spec).unwrap() + )])) + }); + + update(&context, &args, &db).await?; + + let configuration = read_directory(config_dir).await?; + + let collection = configuration + .collections + .get(&CollectionName::new("posts".into())) + .expect("posts collection"); + let collection_object_type = configuration + .object_types + .get(&collection.collection_type) + .expect("posts object type"); + + Ok(collection_object_type.clone()) +} diff --git a/crates/mongodb-agent-common/Cargo.toml b/crates/mongodb-agent-common/Cargo.toml index 6ad0ca18..52511d7e 100644 --- a/crates/mongodb-agent-common/Cargo.toml +++ b/crates/mongodb-agent-common/Cargo.toml @@ -4,6 +4,10 @@ description = "logic that is common to v2 and v3 agent versions" edition = "2021" version.workspace = true +[features] +default = [] +test-helpers = ["dep:mockall", "dep:pretty_assertions"] # exports mock database impl + [dependencies] configuration = { path = "../configuration" } mongodb-support = { path = "../mongodb-support" } @@ -21,9 +25,11 @@ indexmap = { workspace = true } indent = "^0.1" itertools = { workspace = true } lazy_static = "^1.4.0" +mockall = { version = "^0.13.1", optional = true } mongodb = { workspace = true } ndc-models = { workspace = true } once_cell = "1" +pretty_assertions = { version = "1", optional = true } regex = "1" schemars = { version = "^0.8.12", features = ["smol_str"] } serde = { workspace = true } @@ -38,7 +44,7 @@ mongodb-cli-plugin = { path = "../cli" } ndc-test-helpers = { path = "../ndc-test-helpers" } test-helpers = { path = "../test-helpers" } -mockall = "^0.12.1" +mockall = "^0.13.1" pretty_assertions = "1" proptest = "1" tokio = { version = "1", features = ["full"] } diff --git a/crates/mongodb-agent-common/src/mongodb/collection.rs b/crates/mongodb-agent-common/src/mongodb/collection.rs index ea087442..4e2fca01 100644 --- a/crates/mongodb-agent-common/src/mongodb/collection.rs +++ b/crates/mongodb-agent-common/src/mongodb/collection.rs @@ -9,17 +9,17 @@ use mongodb::{ use mongodb_support::aggregate::Pipeline; use serde::de::DeserializeOwned; -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] use mockall::automock; -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] use super::test_helpers::MockCursor; /// Abstract MongoDB collection methods. This lets us mock a database connection in tests. The /// automock attribute generates a struct called MockCollectionTrait that implements this trait. /// The mock provides a variety of methods for mocking and spying on database behavior in tests. /// See https://docs.rs/mockall/latest/mockall/ -#[cfg_attr(test, automock( +#[cfg_attr(any(test, feature = "test-helpers"), automock( type DocumentCursor=MockCursor; type RowCursor=MockCursor; ))] @@ -28,8 +28,8 @@ pub trait CollectionTrait where T: DeserializeOwned + Unpin + Send + Sync + 'static, { - type DocumentCursor: Stream> + 'static; - type RowCursor: Stream> + 'static; + type DocumentCursor: Stream> + 'static + Unpin; + type RowCursor: Stream> + 'static + Unpin; async fn aggregate( &self, diff --git a/crates/mongodb-agent-common/src/mongodb/database.rs b/crates/mongodb-agent-common/src/mongodb/database.rs index 75181b0e..b17a7293 100644 --- a/crates/mongodb-agent-common/src/mongodb/database.rs +++ b/crates/mongodb-agent-common/src/mongodb/database.rs @@ -1,17 +1,18 @@ use async_trait::async_trait; use futures_util::Stream; +use mongodb::results::CollectionSpecification; use mongodb::{bson::Document, error::Error, options::AggregateOptions, Database}; use mongodb_support::aggregate::Pipeline; -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] use mockall::automock; use super::CollectionTrait; -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] use super::MockCollectionTrait; -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] use super::test_helpers::MockCursor; /// Abstract MongoDB database methods. This lets us mock a database connection in tests. The @@ -22,14 +23,16 @@ use super::test_helpers::MockCursor; /// I haven't figured out how to make generic associated types work with automock, so the type /// argument for `Collection` values produced via `DatabaseTrait::collection` is fixed to to /// `Document`. That's the way we're using collections in this app anyway. -#[cfg_attr(test, automock( +#[cfg_attr(any(test, feature = "test-helpers"), automock( type Collection = MockCollectionTrait; + type CollectionCursor = MockCursor; type DocumentCursor = MockCursor; ))] #[async_trait] pub trait DatabaseTrait { type Collection: CollectionTrait; - type DocumentCursor: Stream>; + type CollectionCursor: Stream> + Unpin; + type DocumentCursor: Stream> + Unpin; async fn aggregate( &self, @@ -40,11 +43,14 @@ pub trait DatabaseTrait { Options: Into> + Send + 'static; fn collection(&self, name: &str) -> Self::Collection; + + async fn list_collections(&self) -> Result; } #[async_trait] impl DatabaseTrait for Database { type Collection = mongodb::Collection; + type CollectionCursor = mongodb::Cursor; type DocumentCursor = mongodb::Cursor; async fn aggregate( @@ -63,4 +69,8 @@ impl DatabaseTrait for Database { fn collection(&self, name: &str) -> Self::Collection { Database::collection::(self, name) } + + async fn list_collections(&self) -> Result { + Database::list_collections(self).await + } } diff --git a/crates/mongodb-agent-common/src/mongodb/mod.rs b/crates/mongodb-agent-common/src/mongodb/mod.rs index 361dbf89..48f16304 100644 --- a/crates/mongodb-agent-common/src/mongodb/mod.rs +++ b/crates/mongodb-agent-common/src/mongodb/mod.rs @@ -3,7 +3,7 @@ mod database; pub mod sanitize; mod selection; -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] pub mod test_helpers; pub use self::{ @@ -11,9 +11,9 @@ pub use self::{ }; // MockCollectionTrait is generated by automock when the test flag is active. -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] pub use self::collection::MockCollectionTrait; // MockDatabase is generated by automock when the test flag is active. -#[cfg(test)] +#[cfg(any(test, feature = "test-helpers"))] pub use self::database::MockDatabaseTrait; diff --git a/crates/mongodb-agent-common/src/mongodb/test_helpers.rs b/crates/mongodb-agent-common/src/mongodb/test_helpers.rs index 473db605..c89b3b70 100644 --- a/crates/mongodb-agent-common/src/mongodb/test_helpers.rs +++ b/crates/mongodb-agent-common/src/mongodb/test_helpers.rs @@ -14,7 +14,6 @@ use super::{MockCollectionTrait, MockDatabaseTrait}; // is produced when calling `into_iter` on a `Vec`. - Jesse H. // // To produce a mock stream use the `mock_stream` function in this module. -#[cfg(test)] pub type MockCursor = futures::stream::Iter<> as IntoIterator>::IntoIter>; /// Create a stream that can be returned from mock implementations for diff --git a/crates/mongodb-agent-common/src/schema.rs b/crates/mongodb-agent-common/src/schema.rs index 26fd6845..63daf74e 100644 --- a/crates/mongodb-agent-common/src/schema.rs +++ b/crates/mongodb-agent-common/src/schema.rs @@ -18,26 +18,22 @@ pub struct ValidatorSchema { #[derive(Clone, Debug, Deserialize)] #[cfg_attr(test, derive(PartialEq))] -#[serde(untagged)] +#[serde(tag = "bsonType", rename_all = "camelCase")] pub enum Property { Object { - #[serde(rename = "bsonType", default = "default_bson_type")] - #[allow(dead_code)] - bson_type: BsonType, #[serde(skip_serializing_if = "Option::is_none")] description: Option, #[serde(skip_serializing_if = "Vec::is_empty", default)] required: Vec, - properties: IndexMap, + #[serde(skip_serializing_if = "Option::is_none")] + properties: Option>, }, Array { - #[serde(rename = "bsonType", default = "default_bson_type")] - #[allow(dead_code)] - bson_type: BsonType, #[serde(skip_serializing_if = "Option::is_none")] description: Option, items: Box, }, + #[serde(untagged)] Scalar { #[serde(rename = "bsonType", default = "default_bson_scalar_type")] bson_type: BsonScalarType, @@ -49,13 +45,11 @@ pub enum Property { pub fn get_property_description(p: &Property) -> Option { match p { Property::Object { - bson_type: _, description, required: _, properties: _, } => description.clone(), Property::Array { - bson_type: _, description, items: _, } => description.clone(), @@ -78,8 +72,8 @@ fn default_bson_type() -> BsonType { mod test { use indexmap::IndexMap; use mongodb::bson::{bson, from_bson}; - use mongodb_support::{BsonScalarType, BsonType}; + use pretty_assertions::assert_eq; use super::{Property, ValidatorSchema}; @@ -122,10 +116,9 @@ mod test { assert_eq!( from_bson::(input)?, Property::Object { - bson_type: BsonType::Object, description: Some("Name of places".to_owned()), required: vec!["name".to_owned(), "description".to_owned()], - properties: IndexMap::from([ + properties: Some(IndexMap::from([ ( "name".to_owned(), Property::Scalar { @@ -142,7 +135,7 @@ mod test { ) } ) - ]) + ])) } ); @@ -165,13 +158,11 @@ mod test { assert_eq!( from_bson::(input)?, Property::Array { - bson_type: BsonType::Array, description: Some("Location must be an array of objects".to_owned()), items: Box::new(Property::Object { - bson_type: BsonType::Object, description: None, required: vec!["name".to_owned(), "size".to_owned()], - properties: IndexMap::from([ + properties: Some(IndexMap::from([ ( "name".to_owned(), Property::Scalar { @@ -186,7 +177,7 @@ mod test { description: None } ) - ]) + ])) }), } ); @@ -250,10 +241,9 @@ mod test { properties: IndexMap::from([( "counts".to_owned(), Property::Object { - bson_type: BsonType::Object, description: None, required: vec!["xs".to_owned()], - properties: IndexMap::from([ + properties: Some(IndexMap::from([ ( "xs".to_owned(), Property::Scalar { @@ -268,7 +258,7 @@ mod test { description: None } ), - ]) + ])) } )]) } @@ -300,7 +290,7 @@ mod test { "description": "\"gpa\" must be a double if the field exists" }, "address": { - "bsonType": ["object"], + "bsonType": "object", "properties": { "city": { "bsonType": "string" }, "street": { "bsonType": "string" } @@ -350,10 +340,9 @@ mod test { ( "address".to_owned(), Property::Object { - bson_type: BsonType::Object, description: None, required: vec![], - properties: IndexMap::from([ + properties: Some(IndexMap::from([ ( "city".to_owned(), Property::Scalar { @@ -368,7 +357,7 @@ mod test { description: None, } ) - ]) + ])) } ) ]),