From 0c281d1fe09ff5e4e325ff12d1a92d643f0c315c Mon Sep 17 00:00:00 2001 From: David Overton Date: Tue, 19 Mar 2024 17:30:35 +1100 Subject: [PATCH] Move type unfication code to its own module --- crates/cli/src/introspection/document.rs | 210 +----------------- crates/cli/src/introspection/mod.rs | 1 + .../cli/src/introspection/type_unification.rs | 209 +++++++++++++++++ 3 files changed, 215 insertions(+), 205 deletions(-) create mode 100644 crates/cli/src/introspection/type_unification.rs diff --git a/crates/cli/src/introspection/document.rs b/crates/cli/src/introspection/document.rs index f7a767af..e9bd3811 100644 --- a/crates/cli/src/introspection/document.rs +++ b/crates/cli/src/introspection/document.rs @@ -3,18 +3,13 @@ use configuration::{ Schema, }; use futures_util::TryStreamExt; -use indexmap::IndexMap; use mongodb::bson::{doc, Bson, Document}; use mongodb_agent_common::interface_types::MongoConfig; -use mongodb_support::{ - align::align_with_result, - BsonScalarType::{self, *}, -}; -use std::{ - fmt::{self, Display}, - string::String, -}; -use thiserror::Error; +use mongodb_support::BsonScalarType::{self, *}; + +use crate::introspection::type_unification::{unify_type, TypeUnificationContext}; + +use super::type_unification::{unify_object_types, unify_schema, TypeUnificationResult}; // Sample from all collections in the database pub async fn sample_schema_from_db( @@ -156,198 +151,3 @@ fn make_field_type( Bson::DbPointer(_) => scalar(DbPointer), } } - -#[derive(Debug)] -pub struct TypeUnificationContext { - object_type_name: String, - field_name: String, -} - -impl TypeUnificationContext { - fn new(object_type_name: &str, field_name: &str) -> Self { - TypeUnificationContext { - object_type_name: object_type_name.to_owned(), - field_name: field_name.to_owned(), - } - } -} - -impl Display for TypeUnificationContext { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "object type: {}, field: {}", - self.object_type_name, self.field_name - ) - } -} - -#[derive(Debug, Error)] -pub enum TypeUnificationError { - ScalarTypeMismatch(TypeUnificationContext, BsonScalarType, BsonScalarType), - ObjectTypeMismatch(String, String), - TypeKindMismatch(Type, Type), -} - -impl Display for TypeUnificationError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::ScalarTypeMismatch(context, scalar_a, scalar_b) => write!( - f, - "Scalar type mismatch {} {} at {}", - scalar_a.bson_name(), - scalar_b.bson_name(), - context - ), - Self::ObjectTypeMismatch(object_a, object_b) => { - write!(f, "Object type mismatch {} {}", object_a, object_b) - } - Self::TypeKindMismatch(type_a, type_b) => { - write!(f, "Object type mismatch {:?} {:?}", type_a, type_b) - } - } - } -} - -type TypeUnificationResult = Result; - -fn unify_type( - context: TypeUnificationContext, - type_a: Type, - type_b: Type, -) -> TypeUnificationResult { - match (type_a, type_b) { - // If one type is undefined, the union is the other type. - // This is used as the base case when inferring array types from documents. - (Type::Scalar(Undefined), type_b) => Ok(type_b), - (type_a, Type::Scalar(Undefined)) => Ok(type_a), - - // Union of any type with Null is the Nullable version of that type - (Type::Scalar(Null), type_b) => Ok(make_nullable(type_b)), - (type_a, Type::Scalar(Null)) => Ok(make_nullable(type_a)), - - (Type::Scalar(scalar_a), Type::Scalar(scalar_b)) => { - if scalar_a == scalar_b { - Ok(Type::Scalar(scalar_a)) - } else { - Err(TypeUnificationError::ScalarTypeMismatch( - context, scalar_a, scalar_b, - )) - } - } - (Type::Object(object_a), Type::Object(object_b)) => { - if object_a == object_b { - Ok(Type::Object(object_a)) - } else { - Err(TypeUnificationError::ObjectTypeMismatch(object_a, object_b)) - } - } - (Type::ArrayOf(elem_type_a), Type::ArrayOf(elem_type_b)) => { - let elem_type = unify_type(context, *elem_type_a, *elem_type_b)?; - Ok(Type::ArrayOf(Box::new(elem_type))) - } - (Type::Nullable(nullable_type_a), type_b) => { - let result_type = unify_type(context, *nullable_type_a, type_b)?; - Ok(make_nullable(result_type)) - } - (type_a, Type::Nullable(nullable_type_b)) => { - let result_type = unify_type(context, type_a, *nullable_type_b)?; - Ok(make_nullable(result_type)) - } - (type_a, type_b) => Err(TypeUnificationError::TypeKindMismatch(type_a, type_b)), - } -} - -fn make_nullable(t: Type) -> Type { - match t { - Type::Nullable(t) => Type::Nullable(t), - t => Type::Nullable(Box::new(t)), - } -} - -fn make_nullable_field(field: ObjectField) -> Result { - Ok(ObjectField { - name: field.name, - r#type: make_nullable(field.r#type), - description: field.description, - }) -} - -fn unify_object_type( - object_type_a: ObjectType, - object_type_b: ObjectType, -) -> TypeUnificationResult { - let field_map_a: IndexMap = object_type_a - .fields - .into_iter() - .map(|o| (o.name.to_owned(), o)) - .collect(); - let field_map_b: IndexMap = object_type_b - .fields - .into_iter() - .map(|o| (o.name.to_owned(), o)) - .collect(); - - let merged_field_map = align_with_result( - field_map_a, - field_map_b, - make_nullable_field, - make_nullable_field, - |field_a, field_b| unify_object_field(&object_type_a.name, field_a, field_b), - )?; - - Ok(ObjectType { - name: object_type_a.name, - fields: merged_field_map.into_values().collect(), - description: object_type_a.description.or(object_type_b.description), - }) -} - -fn unify_object_field( - object_type_name: &str, - object_field_a: ObjectField, - object_field_b: ObjectField, -) -> TypeUnificationResult { - let context = TypeUnificationContext::new(object_type_name, &object_field_a.name); - Ok(ObjectField { - name: object_field_a.name, - r#type: unify_type(context, object_field_a.r#type, object_field_b.r#type)?, - description: object_field_a.description.or(object_field_b.description), - }) -} - -fn unify_object_types( - object_types_a: Vec, - object_types_b: Vec, -) -> TypeUnificationResult> { - let type_map_a: IndexMap = object_types_a - .into_iter() - .map(|t| (t.name.to_owned(), t)) - .collect(); - let type_map_b: IndexMap = object_types_b - .into_iter() - .map(|t| (t.name.to_owned(), t)) - .collect(); - - let merged_type_map = align_with_result(type_map_a, type_map_b, Ok, Ok, unify_object_type)?; - - Ok(merged_type_map.into_values().collect()) -} - -// Unify two schemas. Assumes that the schemas describe mutually exclusive sets of collections. -fn unify_schema(schema_a: Schema, schema_b: Schema) -> TypeUnificationResult { - let collections = schema_a - .collections - .into_iter() - .chain(schema_b.collections.into_iter()) - .collect(); - let object_types = schema_a - .object_types - .into_iter() - .chain(schema_b.object_types.into_iter()) - .collect(); - Ok(Schema { - collections, - object_types, - }) -} diff --git a/crates/cli/src/introspection/mod.rs b/crates/cli/src/introspection/mod.rs index 0871e640..e2af4ee5 100644 --- a/crates/cli/src/introspection/mod.rs +++ b/crates/cli/src/introspection/mod.rs @@ -1,5 +1,6 @@ pub mod document; pub mod validation_schema; +pub mod type_unification; pub use validation_schema::get_metadata_from_validation_schema; pub use document::sample_schema_from_db; \ No newline at end of file diff --git a/crates/cli/src/introspection/type_unification.rs b/crates/cli/src/introspection/type_unification.rs new file mode 100644 index 00000000..ea8c815a --- /dev/null +++ b/crates/cli/src/introspection/type_unification.rs @@ -0,0 +1,209 @@ +use configuration::{ + schema::{ObjectField, ObjectType, Type}, + Schema, +}; +use indexmap::IndexMap; +use mongodb_support::{ + align::align_with_result, + BsonScalarType::{self, *}, +}; +use std::{ + fmt::{self, Display}, + string::String, +}; +use thiserror::Error; + +#[derive(Debug)] +pub struct TypeUnificationContext { + object_type_name: String, + field_name: String, +} + +impl TypeUnificationContext { + pub fn new(object_type_name: &str, field_name: &str) -> Self { + TypeUnificationContext { + object_type_name: object_type_name.to_owned(), + field_name: field_name.to_owned(), + } + } +} + +impl Display for TypeUnificationContext { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "object type: {}, field: {}", + self.object_type_name, self.field_name + ) + } +} + +#[derive(Debug, Error)] +pub enum TypeUnificationError { + ScalarTypeMismatch(TypeUnificationContext, BsonScalarType, BsonScalarType), + ObjectTypeMismatch(String, String), + TypeKindMismatch(Type, Type), +} + +impl Display for TypeUnificationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ScalarTypeMismatch(context, scalar_a, scalar_b) => write!( + f, + "Scalar type mismatch {} {} at {}", + scalar_a.bson_name(), + scalar_b.bson_name(), + context + ), + Self::ObjectTypeMismatch(object_a, object_b) => { + write!(f, "Object type mismatch {} {}", object_a, object_b) + } + Self::TypeKindMismatch(type_a, type_b) => { + write!(f, "Object type mismatch {:?} {:?}", type_a, type_b) + } + } + } +} + +pub type TypeUnificationResult = Result; + +pub fn unify_type( + context: TypeUnificationContext, + type_a: Type, + type_b: Type, +) -> TypeUnificationResult { + match (type_a, type_b) { + // If one type is undefined, the union is the other type. + // This is used as the base case when inferring array types from documents. + (Type::Scalar(Undefined), type_b) => Ok(type_b), + (type_a, Type::Scalar(Undefined)) => Ok(type_a), + + // Union of any type with Null is the Nullable version of that type + (Type::Scalar(Null), type_b) => Ok(make_nullable(type_b)), + (type_a, Type::Scalar(Null)) => Ok(make_nullable(type_a)), + + (Type::Scalar(scalar_a), Type::Scalar(scalar_b)) => { + if scalar_a == scalar_b { + Ok(Type::Scalar(scalar_a)) + } else { + Err(TypeUnificationError::ScalarTypeMismatch( + context, scalar_a, scalar_b, + )) + } + } + (Type::Object(object_a), Type::Object(object_b)) => { + if object_a == object_b { + Ok(Type::Object(object_a)) + } else { + Err(TypeUnificationError::ObjectTypeMismatch(object_a, object_b)) + } + } + (Type::ArrayOf(elem_type_a), Type::ArrayOf(elem_type_b)) => { + let elem_type = unify_type(context, *elem_type_a, *elem_type_b)?; + Ok(Type::ArrayOf(Box::new(elem_type))) + } + (Type::Nullable(nullable_type_a), type_b) => { + let result_type = unify_type(context, *nullable_type_a, type_b)?; + Ok(make_nullable(result_type)) + } + (type_a, Type::Nullable(nullable_type_b)) => { + let result_type = unify_type(context, type_a, *nullable_type_b)?; + Ok(make_nullable(result_type)) + } + (type_a, type_b) => Err(TypeUnificationError::TypeKindMismatch(type_a, type_b)), + } +} + +fn make_nullable(t: Type) -> Type { + match t { + Type::Nullable(t) => Type::Nullable(t), + t => Type::Nullable(Box::new(t)), + } +} + +fn make_nullable_field(field: ObjectField) -> Result { + Ok(ObjectField { + name: field.name, + r#type: make_nullable(field.r#type), + description: field.description, + }) +} + +fn unify_object_type( + object_type_a: ObjectType, + object_type_b: ObjectType, +) -> TypeUnificationResult { + let field_map_a: IndexMap = object_type_a + .fields + .into_iter() + .map(|o| (o.name.to_owned(), o)) + .collect(); + let field_map_b: IndexMap = object_type_b + .fields + .into_iter() + .map(|o| (o.name.to_owned(), o)) + .collect(); + + let merged_field_map = align_with_result( + field_map_a, + field_map_b, + make_nullable_field, + make_nullable_field, + |field_a, field_b| unify_object_field(&object_type_a.name, field_a, field_b), + )?; + + Ok(ObjectType { + name: object_type_a.name, + fields: merged_field_map.into_values().collect(), + description: object_type_a.description.or(object_type_b.description), + }) +} + +fn unify_object_field( + object_type_name: &str, + object_field_a: ObjectField, + object_field_b: ObjectField, +) -> TypeUnificationResult { + let context = TypeUnificationContext::new(object_type_name, &object_field_a.name); + Ok(ObjectField { + name: object_field_a.name, + r#type: unify_type(context, object_field_a.r#type, object_field_b.r#type)?, + description: object_field_a.description.or(object_field_b.description), + }) +} + +pub fn unify_object_types( + object_types_a: Vec, + object_types_b: Vec, +) -> TypeUnificationResult> { + let type_map_a: IndexMap = object_types_a + .into_iter() + .map(|t| (t.name.to_owned(), t)) + .collect(); + let type_map_b: IndexMap = object_types_b + .into_iter() + .map(|t| (t.name.to_owned(), t)) + .collect(); + + let merged_type_map = align_with_result(type_map_a, type_map_b, Ok, Ok, unify_object_type)?; + + Ok(merged_type_map.into_values().collect()) +} + +// Unify two schemas. Assumes that the schemas describe mutually exclusive sets of collections. +pub fn unify_schema(schema_a: Schema, schema_b: Schema) -> TypeUnificationResult { + let collections = schema_a + .collections + .into_iter() + .chain(schema_b.collections.into_iter()) + .collect(); + let object_types = schema_a + .object_types + .into_iter() + .chain(schema_b.object_types.into_iter()) + .collect(); + Ok(Schema { + collections, + object_types, + }) +}