Skip to content

Commit

Permalink
rework queries with variable sets so they use indexes (#83)
Browse files Browse the repository at this point in the history
* create indexes in mongodb fixtures

* capture expected types of variables

* map request variables to $documents stage, replace $facet with $lookup

* test variable name escaping function

* tests for query_variable_name

* use escaping in `variable` function to make it infallible

* replace variable map lookups with mongodb variable references

* some test updates, delegate to variable function

* fix make_selector

* run `db.aggregate` if query request has variable sets

* update response serialization for change in foreach response shape

* update one of the foreach unit tests

* update some stale comments

* handle responses with aggregates, update tests

* handle aggregate responses without rows

* add test for binary comparison bug that I incidentally fixed

* skip remote relationship integration tests in mongodb 5

* update changelog

* note breaking change in changelog

* change aggregate target in explain to match target in query
  • Loading branch information
hallettj committed Jun 24, 2024
1 parent 436e12f commit 6a5e208
Show file tree
Hide file tree
Showing 36 changed files with 933 additions and 395 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ This changelog documents the changes between release versions.
## [Unreleased]

- Fix bug with operator lookup when filtering on nested fields ([#82](https://github.com/hasura/ndc-mongodb/pull/82))
- Rework query plans for requests with variable sets to allow use of indexes ([#83](https://github.com/hasura/ndc-mongodb/pull/83))
- Fix: error when requesting query plan if MongoDB is target of a remote join ([#83](https://github.com/hasura/ndc-mongodb/pull/83))
- Breaking change: remote joins no longer work in MongoDB v5 ([#83](https://github.com/hasura/ndc-mongodb/pull/83))

## [0.1.0] - 2024-06-13

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions crates/integration-tests/src/tests/remote_relationship.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ use serde_json::json;

#[tokio::test]
async fn provides_source_and_target_for_remote_relationship() -> anyhow::Result<()> {
// Skip this test in MongoDB 5 because the example fails there. We're getting an error:
//
// > Kind: Command failed: Error code 5491300 (Location5491300): $documents' is not allowed in user requests, labels: {}
//
// This means that remote joins are not working in MongoDB 5
if let Ok(image) = std::env::var("MONGODB_IMAGE") {
if image == "mongo:5" {
return Ok(());
}
}

assert_yaml_snapshot!(
graphql_query(
r#"
Expand All @@ -29,6 +40,17 @@ async fn provides_source_and_target_for_remote_relationship() -> anyhow::Result<

#[tokio::test]
async fn handles_request_with_single_variable_set() -> anyhow::Result<()> {
// Skip this test in MongoDB 5 because the example fails there. We're getting an error:
//
// > Kind: Command failed: Error code 5491300 (Location5491300): $documents' is not allowed in user requests, labels: {}
//
// This means that remote joins are not working in MongoDB 5
if let Ok(image) = std::env::var("MONGODB_IMAGE") {
if image == "mongo:5" {
return Ok(());
}
}

assert_yaml_snapshot!(
run_connector_query(
query_request()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 2357e8c9d6e3a68dfeff6f95a955a86d866c87c8d2a33afb9846fe8e1006402a # shrinks to input = "·"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc fdd2dffdde1f114a438c67d891387aaca81b3df2676213ff17171208feb290ba # shrinks to variable_name = "", (type_a, type_b) = (Scalar(Bson(Double)), Scalar(Bson(Decimal)))
7 changes: 4 additions & 3 deletions crates/mongodb-agent-common/src/explain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ pub async fn explain_query(
let pipeline = query::pipeline_for_query_request(config, &query_plan)?;
let pipeline_bson = to_bson(&pipeline)?;

let aggregate_target = match QueryTarget::for_request(config, &query_plan).input_collection() {
Some(collection_name) => Bson::String(collection_name.to_owned()),
None => Bson::Int32(1),
let target = QueryTarget::for_request(config, &query_plan);
let aggregate_target = match (target.input_collection(), query_plan.has_variables()) {
(Some(collection_name), false) => Bson::String(collection_name.to_owned()),
_ => Bson::Int32(1),
};

let query_command = doc! {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ pub enum MongoAgentError {
Serialization(serde_json::Error),
UnknownAggregationFunction(String),
UnspecifiedRelation(String),
VariableNotDefined(String),
AdHoc(#[from] anyhow::Error),
}

Expand Down Expand Up @@ -88,10 +87,6 @@ impl MongoAgentError {
StatusCode::BAD_REQUEST,
ErrorResponse::new(&format!("Query referenced a relationship, \"{relation}\", but did not include relation metadata in `table_relationships`"))
),
VariableNotDefined(variable_name) => (
StatusCode::BAD_REQUEST,
ErrorResponse::new(&format!("Query referenced a variable, \"{variable_name}\", but it is not defined by the query request"))
),
AdHoc(err) => (StatusCode::INTERNAL_SERVER_ERROR, ErrorResponse::new(&err)),
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/mongodb-agent-common/src/mongo_query_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,4 @@ pub type QueryPlan = ndc_query_plan::QueryPlan<MongoConfiguration>;
pub type Relationship = ndc_query_plan::Relationship<MongoConfiguration>;
pub type Relationships = ndc_query_plan::Relationships<MongoConfiguration>;
pub type Type = ndc_query_plan::Type<MongoScalarType>;
pub type VariableTypes = ndc_query_plan::VariableTypes<MongoScalarType>;
117 changes: 97 additions & 20 deletions crates/mongodb-agent-common/src/mongodb/sanitize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ use std::borrow::Cow;

use anyhow::anyhow;
use mongodb::bson::{doc, Document};
use once_cell::sync::Lazy;
use regex::Regex;

use crate::interface_types::MongoAgentError;

Expand All @@ -15,28 +13,21 @@ pub fn get_field(name: &str) -> Document {
doc! { "$getField": { "$literal": name } }
}

/// Returns its input prefixed with "v_" if it is a valid MongoDB variable name. Valid names may
/// include the ASCII characters [_a-zA-Z0-9] or any non-ASCII characters. The exclusion of special
/// characters like `$` and `.` avoids potential code injection.
///
/// We add the "v_" prefix because variable names may not begin with an underscore, but in some
/// cases, like when using relation-mapped column names as variable names, we want to be able to
/// use names like "_id".
///
/// TODO: Instead of producing an error we could use an escaping scheme to unambiguously map
/// invalid characters to safe ones.
pub fn variable(name: &str) -> Result<String, MongoAgentError> {
static VALID_EXPRESSION: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^[_a-zA-Z0-9\P{ascii}]+$").unwrap());
if VALID_EXPRESSION.is_match(name) {
Ok(format!("v_{name}"))
/// Given a name returns a valid variable name for use in MongoDB aggregation expressions. Outputs
/// are guaranteed to be distinct for distinct inputs. Consistently returns the same output for the
/// same input string.
pub fn variable(name: &str) -> String {
let name_with_valid_initial = if name.chars().next().unwrap_or('!').is_ascii_lowercase() {
Cow::Borrowed(name)
} else {
Err(MongoAgentError::InvalidVariableName(name.to_owned()))
}
Cow::Owned(format!("v_{name}"))
};
escape_invalid_variable_chars(&name_with_valid_initial)
}

/// Returns false if the name contains characters that MongoDB will interpret specially, such as an
/// initial dollar sign, or dots.
/// initial dollar sign, or dots. This indicates whether a name is safe for field references
/// - variable names are more strict.
pub fn is_name_safe(name: &str) -> bool {
!(name.starts_with('$') || name.contains('.'))
}
Expand All @@ -52,3 +43,89 @@ pub fn safe_name(name: &str) -> Result<Cow<str>, MongoAgentError> {
Ok(Cow::Borrowed(name))
}
}

// The escape character must be a valid character in MongoDB variable names, but must not appear in
// lower-case hex strings. A non-ASCII character works if we specifically map it to a two-character
// hex escape sequence (see [ESCAPE_CHAR_ESCAPE_SEQUENCE]). Another option would be to use an
// allowed ASCII character such as 'x'.
const ESCAPE_CHAR: char = '·';

/// We want all escape sequences to be two-character hex strings so this must be a value that does
/// not represent an ASCII character, and that is <= 0xff.
const ESCAPE_CHAR_ESCAPE_SEQUENCE: u32 = 0xff;

/// MongoDB variable names allow a limited set of ASCII characters, or any non-ASCII character.
/// See https://www.mongodb.com/docs/manual/reference/aggregation-variables/
fn escape_invalid_variable_chars(input: &str) -> String {
let mut encoded = String::new();
for char in input.chars() {
match char {
ESCAPE_CHAR => push_encoded_char(&mut encoded, ESCAPE_CHAR_ESCAPE_SEQUENCE),
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => encoded.push(char),
char if char as u32 <= 127 => push_encoded_char(&mut encoded, char as u32),
char => encoded.push(char),
}
}
encoded
}

/// Escape invalid characters using the escape character followed by a two-character hex sequence
/// that gives the character's ASCII codepoint
fn push_encoded_char(encoded: &mut String, char: u32) {
encoded.push(ESCAPE_CHAR);
let zero_pad = if char < 0x10 { "0" } else { "" };
encoded.push_str(&format!("{zero_pad}{char:x}"));
}

#[cfg(test)]
mod tests {
use proptest::prelude::*;

use super::{escape_invalid_variable_chars, ESCAPE_CHAR, ESCAPE_CHAR_ESCAPE_SEQUENCE};

proptest! {
// Escaped strings must be consistent and distinct. A round-trip test demonstrates this.
#[test]
fn escaping_variable_chars_roundtrips(input: String) {
let encoded = escape_invalid_variable_chars(&input);
let decoded = unescape_invalid_variable_chars(&encoded);
prop_assert_eq!(decoded, input, "encoded string: {}", encoded)
}
}

proptest! {
#[test]
fn escaped_variable_names_are_valid(input: String) {
let encoded = escape_invalid_variable_chars(&input);
prop_assert!(
encoded.chars().all(|char|
char as u32 > 127 ||
char.is_ascii_alphanumeric() ||
char == '_'
),
"encoded string contains only valid characters\nencoded string: {}",
encoded
)
}
}

fn unescape_invalid_variable_chars(input: &str) -> String {
let mut decoded = String::new();
let mut chars = input.chars();
while let Some(char) = chars.next() {
if char == ESCAPE_CHAR {
let escape_sequence = [chars.next().unwrap(), chars.next().unwrap()];
let code_point =
u32::from_str_radix(&escape_sequence.iter().collect::<String>(), 16).unwrap();
if code_point == ESCAPE_CHAR_ESCAPE_SEQUENCE {
decoded.push(ESCAPE_CHAR)
} else {
decoded.push(char::from_u32(code_point).unwrap())
}
} else {
decoded.push(char)
}
}
decoded
}
}
6 changes: 6 additions & 0 deletions crates/mongodb-agent-common/src/mongodb/stage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ use super::{accumulator::Accumulator, pipeline::Pipeline, Selection};
/// https://www.mongodb.com/docs/manual/reference/operator/aggregation-pipeline/#std-label-aggregation-pipeline-operator-reference
#[derive(Clone, Debug, PartialEq, Serialize)]
pub enum Stage {
/// Returns literal documents from input expressions.
///
/// See https://www.mongodb.com/docs/manual/reference/operator/aggregation/documents/#mongodb-pipeline-pipe.-documents
#[serde(rename = "$documents")]
Documents(Vec<bson::Document>),

/// Filters the document stream to allow only matching documents to pass unmodified into the
/// next pipeline stage. [`$match`] uses standard MongoDB queries. For each input document,
/// outputs either one document (a match) or zero documents (no match).
Expand Down
34 changes: 26 additions & 8 deletions crates/mongodb-agent-common/src/procedure/interpolated_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ mod tests {
use configuration::{native_mutation::NativeMutation, MongoScalarType};
use mongodb::bson::doc;
use mongodb_support::BsonScalarType as S;
use ndc_models::Argument;
use pretty_assertions::assert_eq;
use serde_json::json;

Expand Down Expand Up @@ -175,8 +176,13 @@ mod tests {
};

let input_arguments = [
("id".to_owned(), json!(1001)),
("name".to_owned(), json!("Regina Spektor")),
("id".to_owned(), Argument::Literal { value: json!(1001) }),
(
"name".to_owned(),
Argument::Literal {
value: json!("Regina Spektor"),
},
),
]
.into_iter()
.collect();
Expand Down Expand Up @@ -232,10 +238,12 @@ mod tests {

let input_arguments = [(
"documents".to_owned(),
json!([
{ "ArtistId": 1001, "Name": "Regina Spektor" } ,
{ "ArtistId": 1002, "Name": "Ok Go" } ,
]),
Argument::Literal {
value: json!([
{ "ArtistId": 1001, "Name": "Regina Spektor" } ,
{ "ArtistId": 1002, "Name": "Ok Go" } ,
]),
},
)]
.into_iter()
.collect();
Expand Down Expand Up @@ -289,8 +297,18 @@ mod tests {
};

let input_arguments = [
("prefix".to_owned(), json!("current")),
("basename".to_owned(), json!("some-coll")),
(
"prefix".to_owned(),
Argument::Literal {
value: json!("current"),
},
),
(
"basename".to_owned(),
Argument::Literal {
value: json!("some-coll"),
},
),
]
.into_iter()
.collect();
Expand Down
5 changes: 5 additions & 0 deletions crates/mongodb-agent-common/src/procedure/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::collections::BTreeMap;
use configuration::native_mutation::NativeMutation;
use mongodb::options::SelectionCriteria;
use mongodb::{bson, Database};
use ndc_models::Argument;

use crate::mongo_query_plan::Type;
use crate::query::arguments::resolve_arguments;
Expand Down Expand Up @@ -61,6 +62,10 @@ fn interpolate(
arguments: BTreeMap<String, serde_json::Value>,
command: &bson::Document,
) -> Result<bson::Document, ProcedureError> {
let arguments = arguments
.into_iter()
.map(|(name, value)| (name, Argument::Literal { value }))
.collect();
let bson_arguments = resolve_arguments(parameters, arguments)?;
interpolated_command(command, &bson_arguments)
}
Loading

0 comments on commit 6a5e208

Please sign in to comment.