diff --git a/CHANGELOG.md b/CHANGELOG.md index 9af173d0..13b653e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ This changelog documents the changes between release versions. - Enables logging events from the MongoDB driver by setting the `RUST_LOG` variable ([#67](https://github.com/hasura/ndc-mongodb/pull/67)) - To log all events set `RUST_LOG=mongodb::command=debug,mongodb::connection=debug,mongodb::server_selection=debug,mongodb::topology=debug` - Relations with a single column mapping now use concise correlated subquery syntax in `$lookup` stage ([#65](https://github.com/hasura/ndc-mongodb/pull/65)) +- Add root `configuration.json` or `configuration.yaml` file to allow editing cli options. ([#68](https://github.com/hasura/ndc-mongodb/pull/68)) +- Update default sample size to 100. ([#68](https://github.com/hasura/ndc-mongodb/pull/68)) +- Add `all_schema_nullable` option defaulted to true. ([#68](https://github.com/hasura/ndc-mongodb/pull/68)) ## [0.0.5] - 2024-04-26 - Fix incorrect order of results for query requests with more than 10 variable sets (#37) diff --git a/crates/cli/src/introspection/sampling.rs b/crates/cli/src/introspection/sampling.rs index 86bce3c4..51dc41f9 100644 --- a/crates/cli/src/introspection/sampling.rs +++ b/crates/cli/src/introspection/sampling.rs @@ -1,6 +1,6 @@ use std::collections::{BTreeMap, HashSet}; -use super::type_unification::{unify_object_types, unify_type}; +use super::type_unification::{make_nullable_field, unify_object_types, unify_type}; use configuration::{ schema::{self, Type}, Schema, WithName, @@ -19,6 +19,8 @@ type ObjectType = WithName; /// are not unifiable. pub async fn sample_schema_from_db( sample_size: u32, + all_schema_nullalble: bool, + config_file_changed: bool, state: &ConnectorState, existing_schemas: &HashSet, ) -> anyhow::Result> { @@ -28,9 +30,9 @@ pub async fn sample_schema_from_db( while let Some(collection_spec) = collections_cursor.try_next().await? { let collection_name = collection_spec.name; - if !existing_schemas.contains(&collection_name) { + if !existing_schemas.contains(&collection_name) || config_file_changed { let collection_schema = - sample_schema_from_collection(&collection_name, sample_size, state).await?; + sample_schema_from_collection(&collection_name, sample_size, all_schema_nullalble, state).await?; schemas.insert(collection_name, collection_schema); } } @@ -40,6 +42,7 @@ pub async fn sample_schema_from_db( async fn sample_schema_from_collection( collection_name: &str, sample_size: u32, + all_schema_nullalble: bool, state: &ConnectorState, ) -> anyhow::Result { let db = state.database(); @@ -50,7 +53,7 @@ async fn sample_schema_from_collection( .await?; let mut collected_object_types = vec![]; while let Some(document) = cursor.try_next().await? { - let object_types = make_object_type(collection_name, &document); + let object_types = make_object_type(collection_name, &document, all_schema_nullalble); collected_object_types = if collected_object_types.is_empty() { object_types } else { @@ -71,13 +74,13 @@ async fn sample_schema_from_collection( }) } -fn make_object_type(object_type_name: &str, document: &Document) -> Vec { +fn make_object_type(object_type_name: &str, document: &Document, all_schema_nullalble: bool) -> Vec { let (mut object_type_defs, object_fields) = { let type_prefix = format!("{object_type_name}_"); let (object_type_defs, object_fields): (Vec>, Vec) = document .iter() .map(|(field_name, field_value)| { - make_object_field(&type_prefix, field_name, field_value) + make_object_field(&type_prefix, field_name, field_value, all_schema_nullalble) }) .unzip(); (object_type_defs.concat(), object_fields) @@ -99,17 +102,22 @@ fn make_object_field( type_prefix: &str, field_name: &str, field_value: &Bson, + all_schema_nullalble: bool, ) -> (Vec, ObjectField) { let object_type_name = format!("{type_prefix}{field_name}"); - let (collected_otds, field_type) = make_field_type(&object_type_name, field_value); - - let object_field = WithName::named( + let (collected_otds, field_type) = make_field_type(&object_type_name, field_value, all_schema_nullalble); + let object_field_value = WithName::named( field_name.to_owned(), schema::ObjectField { description: None, r#type: field_type, }, ); + let object_field = if all_schema_nullalble { + make_nullable_field(object_field_value) + } else { + object_field_value + }; (collected_otds, object_field) } @@ -118,12 +126,13 @@ fn make_object_field( pub fn type_from_bson( object_type_name: &str, value: &Bson, + all_schema_nullalble: bool, ) -> (BTreeMap, Type) { - let (object_types, t) = make_field_type(object_type_name, value); + let (object_types, t) = make_field_type(object_type_name, value, all_schema_nullalble); (WithName::into_map(object_types), t) } -fn make_field_type(object_type_name: &str, field_value: &Bson) -> (Vec, Type) { +fn make_field_type(object_type_name: &str, field_value: &Bson, all_schema_nullalble: bool) -> (Vec, Type) { fn scalar(t: BsonScalarType) -> (Vec, Type) { (vec![], Type::Scalar(t)) } @@ -135,7 +144,7 @@ fn make_field_type(object_type_name: &str, field_value: &Bson) -> (Vec (Vec { - let collected_otds = make_object_type(object_type_name, document); + let collected_otds = make_object_type(object_type_name, document, all_schema_nullalble); (collected_otds, Type::Object(object_type_name.to_owned())) } Bson::Boolean(_) => scalar(Bool), @@ -186,7 +195,7 @@ mod tests { fn simple_doc() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_int": 1, "my_string": "two"}; - let result = WithName::into_map::>(make_object_type(object_name, &doc)); + let result = WithName::into_map::>(make_object_type(object_name, &doc, false)); let expected = BTreeMap::from([( object_name.to_owned(), @@ -220,7 +229,7 @@ mod tests { fn array_of_objects() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": "wut", "baz": 3.77}]}; - let result = WithName::into_map::>(make_object_type(object_name, &doc)); + let result = WithName::into_map::>(make_object_type(object_name, &doc, false)); let expected = BTreeMap::from([ ( @@ -280,7 +289,7 @@ mod tests { fn non_unifiable_array_of_objects() -> Result<(), anyhow::Error> { let object_name = "foo"; let doc = doc! {"my_array": [{"foo": 42, "bar": ""}, {"bar": 17, "baz": 3.77}]}; - let result = WithName::into_map::>(make_object_type(object_name, &doc)); + let result = WithName::into_map::>(make_object_type(object_name, &doc, false)); let expected = BTreeMap::from([ ( diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs index dae7f3fa..61a8a377 100644 --- a/crates/cli/src/introspection/type_unification.rs +++ b/crates/cli/src/introspection/type_unification.rs @@ -72,10 +72,11 @@ pub fn unify_type(type_a: Type, type_b: Type) -> Type { // Anything else gives ExtendedJSON (_, _) => Type::ExtendedJSON, }; + result_type.normalize_type() } -fn make_nullable_field(field: ObjectField) -> ObjectField { +pub fn make_nullable_field(field: ObjectField) -> ObjectField { WithName::named( field.name, schema::ObjectField { diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 139db0e9..f171e515 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -12,11 +12,14 @@ use mongodb_agent_common::state::ConnectorState; #[derive(Debug, Clone, Parser)] pub struct UpdateArgs { - #[arg(long = "sample-size", value_name = "N", default_value_t = 10)] - sample_size: u32, + #[arg(long = "sample-size", value_name = "N", required = false)] + sample_size: Option, - #[arg(long = "no-validator-schema", default_value_t = false)] - no_validator_schema: bool, + #[arg(long = "no-validator-schema", required = false)] + no_validator_schema: Option, + + #[arg(long = "all-schema-nullable", required = false)] + all_schema_nullable: Option, } /// The command invoked by the user. @@ -41,7 +44,23 @@ pub async fn run(command: Command, context: &Context) -> anyhow::Result<()> { /// Update the configuration in the current directory by introspecting the database. async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { - if !args.no_validator_schema { + let configuration_options = configuration::parse_configuration_options_file(&context.path).await; + // Prefer arguments passed to cli, and fallback to the configuration file + let sample_size = match args.sample_size { + Some(size) => size, + None => configuration_options.introspection_options.sample_size + }; + let no_validator_schema = match args.no_validator_schema { + Some(validator) => validator, + None => configuration_options.introspection_options.no_validator_schema + }; + let all_schema_nullable = match args.all_schema_nullable { + Some(validator) => validator, + None => configuration_options.introspection_options.all_schema_nullable + }; + let config_file_changed = configuration::get_config_file_changed(&context.path).await?; + + if !no_validator_schema { let schemas_from_json_validation = introspection::get_metadata_from_validation_schema(&context.connector_state).await?; configuration::write_schema_directory(&context.path, schemas_from_json_validation).await?; @@ -49,7 +68,9 @@ async fn update(context: &Context, args: &UpdateArgs) -> anyhow::Result<()> { let existing_schemas = configuration::list_existing_schemas(&context.path).await?; let schemas_from_sampling = introspection::sample_schema_from_db( - args.sample_size, + sample_size, + all_schema_nullable, + config_file_changed, &context.connector_state, &existing_schemas, ) diff --git a/crates/configuration/src/configuration.rs b/crates/configuration/src/configuration.rs index 808eff82..b5a78bc3 100644 --- a/crates/configuration/src/configuration.rs +++ b/crates/configuration/src/configuration.rs @@ -4,6 +4,7 @@ use anyhow::{anyhow, ensure}; use itertools::Itertools; use mongodb_support::BsonScalarType; use ndc_models as ndc; +use serde::{Deserialize, Serialize}; use crate::{ native_procedure::NativeProcedure, @@ -45,6 +46,8 @@ pub struct Configuration { /// `native_queries/`, and `native_procedures/` subdirectories in the connector configuration /// directory. pub object_types: BTreeMap, + + pub options: ConfigurationOptions, } impl Configuration { @@ -52,6 +55,7 @@ impl Configuration { schema: serialized::Schema, native_procedures: BTreeMap, native_queries: BTreeMap, + options: ConfigurationOptions ) -> anyhow::Result { let object_types_iter = || merge_object_types(&schema, &native_procedures, &native_queries); let object_type_errors = { @@ -153,11 +157,12 @@ impl Configuration { native_procedures: internal_native_procedures, native_queries: internal_native_queries, object_types, + options }) } pub fn from_schema(schema: serialized::Schema) -> anyhow::Result { - Self::validate(schema, Default::default(), Default::default()) + Self::validate(schema, Default::default(), Default::default(), Default::default()) } pub async fn parse_configuration( @@ -167,6 +172,36 @@ impl Configuration { } } +#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ConfigurationOptions { + // Options for introspection + pub introspection_options: ConfigurationIntrospectionOptions, +} + +#[derive(Copy, Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ConfigurationIntrospectionOptions { + // For introspection how many documents should be sampled per collection. + pub sample_size: u32, + + // Whether to try validator schema first if one exists. + pub no_validator_schema: bool, + + // Default to setting all schema fields as nullable. + pub all_schema_nullable: bool, +} + +impl Default for ConfigurationIntrospectionOptions { + fn default() -> Self { + ConfigurationIntrospectionOptions { + sample_size: 100, + no_validator_schema: false, + all_schema_nullable: true, + } + } +} + fn merge_object_types<'a>( schema: &'a serialized::Schema, native_procedures: &'a BTreeMap, @@ -350,7 +385,7 @@ mod tests { )] .into_iter() .collect(); - let result = Configuration::validate(schema, native_procedures, Default::default()); + let result = Configuration::validate(schema, native_procedures, Default::default(), Default::default()); let error_msg = result.unwrap_err().to_string(); assert!(error_msg.contains("multiple definitions")); assert!(error_msg.contains("Album")); diff --git a/crates/configuration/src/directory.rs b/crates/configuration/src/directory.rs index 1e659561..b66eee8d 100644 --- a/crates/configuration/src/directory.rs +++ b/crates/configuration/src/directory.rs @@ -3,17 +3,18 @@ use futures::stream::TryStreamExt as _; use itertools::Itertools as _; use serde::{Deserialize, Serialize}; use std::{ - collections::{BTreeMap, HashSet}, - path::{Path, PathBuf}, + collections::{BTreeMap, HashSet}, fs::Metadata, path::{Path, PathBuf} }; -use tokio::fs; +use tokio::{fs, io::AsyncWriteExt}; use tokio_stream::wrappers::ReadDirStream; -use crate::{serialized::Schema, with_name::WithName, Configuration}; +use crate::{configuration::ConfigurationOptions, serialized::Schema, with_name::WithName, Configuration}; pub const SCHEMA_DIRNAME: &str = "schema"; pub const NATIVE_PROCEDURES_DIRNAME: &str = "native_procedures"; pub const NATIVE_QUERIES_DIRNAME: &str = "native_queries"; +pub const CONFIGURATION_OPTIONS_BASENAME: &str = "configuration"; +pub const CONFIGURATION_OPTIONS_METADATA: &str = ".configuration_metadata"; pub const CONFIGURATION_EXTENSIONS: [(&str, FileFormat); 3] = [("json", JSON), ("yaml", YAML), ("yml", YAML)]; @@ -47,7 +48,10 @@ pub async fn read_directory( .await? .unwrap_or_default(); - Configuration::validate(schema, native_procedures, native_queries) + let options = parse_configuration_options_file(dir) + .await; + + Configuration::validate(schema, native_procedures, native_queries, options) } /// Parse all files in a directory with one of the allowed configuration extensions according to @@ -108,6 +112,26 @@ where } } +pub async fn parse_configuration_options_file(dir: &Path) -> ConfigurationOptions { + let json_filename = CONFIGURATION_OPTIONS_BASENAME.to_owned() + ".json"; + let json_config_file = parse_config_file(&dir.join(json_filename), JSON).await; + if let Ok(config_options) = json_config_file { + return config_options + } + + let yaml_filename = CONFIGURATION_OPTIONS_BASENAME.to_owned() + ".yaml"; + let yaml_config_file = parse_config_file(&dir.join(yaml_filename), YAML).await; + if let Ok(config_options) = yaml_config_file { + return config_options + } + + // If a configuration file does not exist use defaults and write the file + let defaults: ConfigurationOptions = Default::default(); + let _ = write_file(dir, CONFIGURATION_OPTIONS_BASENAME, &defaults).await; + let _ = write_config_metadata_file(dir).await; + defaults +} + async fn parse_config_file(path: impl AsRef, format: FileFormat) -> anyhow::Result where for<'a> T: Deserialize<'a>, @@ -188,3 +212,52 @@ pub async fn list_existing_schemas( Ok(schemas.into_keys().collect()) } + +// Metadata file is just a dot filed used for the purposes of know if the user has updated their config to force refresh +// of the schema introspection. +async fn write_config_metadata_file( + configuration_dir: impl AsRef +) { + let dir = configuration_dir.as_ref(); + let file_result = fs::OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(dir.join(CONFIGURATION_OPTIONS_METADATA)) + .await; + + if let Ok(mut file) = file_result { + let _ = file.write_all(b"").await; + }; +} + +pub async fn get_config_file_changed( + dir: impl AsRef +) -> anyhow::Result { + let path = dir.as_ref(); + let dot_metadata: Result = fs::metadata( + &path.join(CONFIGURATION_OPTIONS_METADATA) + ).await; + let json_metadata = fs::metadata( + &path.join(CONFIGURATION_OPTIONS_BASENAME.to_owned() + ".json") + ).await; + let yaml_metadata = fs::metadata( + &path.join(CONFIGURATION_OPTIONS_BASENAME.to_owned() + ".yaml") + ).await; + + let compare = |dot_date, config_date| async move { + if dot_date < config_date { + let _ = write_config_metadata_file(path).await; + Ok(true) + } + else { + Ok(false) + } + }; + + match (dot_metadata, json_metadata, yaml_metadata) { + (Ok(dot), Ok(json), _) => compare(dot.modified()?, json.modified()?).await, + (Ok(dot), _, Ok(yaml)) => compare(dot.modified()?, yaml.modified()?).await, + _ => Ok(true) + } +} diff --git a/crates/configuration/src/lib.rs b/crates/configuration/src/lib.rs index c7c13e4f..9a99aa3d 100644 --- a/crates/configuration/src/lib.rs +++ b/crates/configuration/src/lib.rs @@ -10,5 +10,7 @@ pub use crate::configuration::Configuration; pub use crate::directory::list_existing_schemas; pub use crate::directory::read_directory; pub use crate::directory::write_schema_directory; +pub use crate::directory::parse_configuration_options_file; +pub use crate::directory::get_config_file_changed; pub use crate::serialized::Schema; pub use crate::with_name::{WithName, WithNameRef}; diff --git a/crates/mongodb-agent-common/src/query/native_query.rs b/crates/mongodb-agent-common/src/query/native_query.rs index 9657ce64..abdc51bd 100644 --- a/crates/mongodb-agent-common/src/query/native_query.rs +++ b/crates/mongodb-agent-common/src/query/native_query.rs @@ -193,6 +193,7 @@ mod tests { functions: Default::default(), procedures: Default::default(), native_procedures: Default::default(), + options: Default::default(), }; let request = query_request() diff --git a/crates/mongodb-agent-common/src/query/serialization/tests.rs b/crates/mongodb-agent-common/src/query/serialization/tests.rs index e6eb52eb..79ace254 100644 --- a/crates/mongodb-agent-common/src/query/serialization/tests.rs +++ b/crates/mongodb-agent-common/src/query/serialization/tests.rs @@ -10,7 +10,7 @@ use super::{bson_to_json, json_to_bson}; proptest! { #[test] fn converts_bson_to_json_and_back(bson in arb_bson()) { - let (object_types, inferred_type) = type_from_bson("test_object", &bson); + let (object_types, inferred_type) = type_from_bson("test_object", &bson, false); let error_context = |msg: &str, source: String| TestCaseError::fail(format!("{msg}: {source}\ninferred type: {inferred_type:?}\nobject types: {object_types:?}")); let json = bson_to_json(&inferred_type, &object_types, bson.clone()).map_err(|e| error_context("error converting bson to json", e.to_string()))?; let actual = json_to_bson(&inferred_type, &object_types, json.clone()).map_err(|e| error_context("error converting json to bson", e.to_string()))?;