Skip to content

Commit 004eb1e

Browse files
committed
graph, node: move normalize_sql_ident to shared location and normalize context idents at config load time
1 parent 9170629 commit 004eb1e

File tree

3 files changed

+120
-24
lines changed

3 files changed

+120
-24
lines changed

graph/src/amp/manifest/data_source/raw.rs

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use anyhow::anyhow;
88
use arrow::{array::RecordBatch, datatypes::Schema};
99
use futures03::future::try_join_all;
1010
use itertools::Itertools;
11-
use lazy_regex::regex_is_match;
1211
use semver::Version;
1312
use serde::Deserialize;
1413
use slog::{debug, Logger};
@@ -22,7 +21,9 @@ use crate::{
2221
auto_block_hash_decoder, auto_block_number_decoder, auto_block_timestamp_decoder,
2322
},
2423
error::IsDeterministic,
25-
sql::{BlockRangeQueryBuilder, ContextQuery, ValidQuery},
24+
sql::{
25+
normalize_sql_ident, validate_ident, BlockRangeQueryBuilder, ContextQuery, ValidQuery,
26+
},
2627
},
2728
components::link_resolver::{LinkResolver, LinkResolverContext},
2829
data::subgraph::DeploymentHash,
@@ -80,7 +81,8 @@ impl RawDataSource {
8081
let logger = logger.new(slog::o!("data_source" => name.clone()));
8182
debug!(logger, "Resolving data source");
8283

83-
validate_ident(&name).map_err(|e| e.source_context("invalid `name`"))?;
84+
validate_ident(&name)
85+
.map_err(|e| Error::InvalidValue(e).source_context("invalid `name`"))?;
8486
Self::validate_kind(kind)?;
8587

8688
let source = source
@@ -380,7 +382,8 @@ impl RawAbi {
380382
) -> Result<Abi, Error> {
381383
let Self { name, file } = self;
382384

383-
validate_ident(&name).map_err(|e| e.source_context("invalid `name`"))?;
385+
validate_ident(&name)
386+
.map_err(|e| Error::InvalidValue(e).source_context("invalid `name`"))?;
384387
let contract = Self::resolve_contract(logger, link_resolver, file).await?;
385388

386389
Ok(Abi { name, contract })
@@ -448,7 +451,8 @@ impl RawTable {
448451
) -> Result<Table, Error> {
449452
let Self { name, query, file } = self;
450453

451-
validate_ident(&name).map_err(|e| e.source_context("invalid `name`"))?;
454+
validate_ident(&name)
455+
.map_err(|e| Error::InvalidValue(e).source_context("invalid `name`"))?;
452456
let query = match Self::resolve_query(query, source, abis)? {
453457
Some(query) => query,
454458
None => Self::resolve_file(logger, link_resolver, file, source, abis).await?,
@@ -459,7 +463,7 @@ impl RawTable {
459463

460464
for field in schema.fields() {
461465
validate_ident(field.name()).map_err(|e| {
462-
e.source_context(format!(
466+
Error::InvalidValue(e).source_context(format!(
463467
"invalid query output schema: invalid column '{}'",
464468
field.name()
465469
))
@@ -685,22 +689,6 @@ impl IsDeterministic for Error {
685689
}
686690
}
687691

688-
fn validate_ident(s: &str) -> Result<(), Error> {
689-
if !regex_is_match!("^[a-zA-Z_][a-zA-Z0-9_-]{0,100}$", s) {
690-
return Err(Error::InvalidValue(
691-
anyhow!("invalid identifier '{s}': must start with a letter or an underscore, and contain only letters, numbers, hyphens, and underscores")
692-
));
693-
}
694-
Ok(())
695-
}
696-
697-
fn normalize_sql_ident(s: &str) -> String {
698-
match validate_ident(s) {
699-
Ok(()) => s.to_lowercase(),
700-
Err(_e) => sqlparser_latest::ast::Ident::with_quote('"', s).to_string(),
701-
}
702-
}
703-
704692
#[cfg(test)]
705693
mod tests {
706694
use super::*;

graph/src/amp/sql/mod.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,30 @@
11
pub mod query_builder;
22

33
pub use self::query_builder::{BlockRangeQueryBuilder, ContextQuery, ValidQuery};
4+
5+
use anyhow::anyhow;
6+
use lazy_regex::regex_is_match;
7+
8+
/// Validates that `s` is a simple SQL identifier: starts with a letter or
9+
/// underscore and contains only `[a-zA-Z0-9_-]`, up to 101 characters.
10+
pub fn validate_ident(s: &str) -> Result<(), anyhow::Error> {
11+
if !regex_is_match!("^[a-zA-Z_][a-zA-Z0-9_-]{0,100}$", s) {
12+
return Err(anyhow!(
13+
"invalid identifier '{s}': must start with a letter or an underscore, \
14+
and contain only letters, numbers, hyphens, and underscores"
15+
));
16+
}
17+
Ok(())
18+
}
19+
20+
/// Normalizes a SQL identifier for safe interpolation into SQL `FROM` clauses.
21+
///
22+
/// Simple identifiers (matching `validate_ident`) are lowercased and returned
23+
/// unquoted. Identifiers with special characters are double-quoted using
24+
/// `sqlparser_latest::ast::Ident::with_quote`.
25+
pub fn normalize_sql_ident(s: &str) -> String {
26+
match validate_ident(s) {
27+
Ok(()) => s.to_lowercase(),
28+
Err(_) => sqlparser_latest::ast::Ident::with_quote('"', s).to_string(),
29+
}
30+
}

node/src/config.rs

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use graph::{
2+
amp::sql::normalize_sql_ident,
23
anyhow::Error,
34
blockchain::BlockchainKind,
45
components::network_provider::{AmpChainConfig, AmpChainNames, ChainName},
@@ -155,8 +156,8 @@ impl Config {
155156
AmpChainConfig {
156157
address: uri,
157158
token: amp.token.clone(),
158-
context_dataset: amp.context_dataset.clone(),
159-
context_table: amp.context_table.clone(),
159+
context_dataset: normalize_sql_ident(&amp.context_dataset),
160+
context_table: normalize_sql_ident(&amp.context_table),
160161
network: amp.network.clone(),
161162
},
162163
);
@@ -2528,4 +2529,84 @@ fdw_pool_size = [
25282529
assert_eq!(amp.context_table, "blocks");
25292530
assert_eq!(amp.network, Some("ethereum-mainnet".to_string()));
25302531
}
2532+
2533+
#[test]
2534+
fn amp_chain_config_normalizes_context_idents() {
2535+
let section = toml::from_str::<ChainSection>(
2536+
r#"
2537+
ingestor = "block_ingestor_node"
2538+
[mainnet]
2539+
shard = "primary"
2540+
provider = []
2541+
[mainnet.amp]
2542+
address = "http://localhost:50051"
2543+
context_dataset = "ns/data@v1"
2544+
context_table = "my/table@2"
2545+
"#,
2546+
)
2547+
.unwrap();
2548+
2549+
let config = Config {
2550+
node: NodeId::new("test").unwrap(),
2551+
general: None,
2552+
stores: {
2553+
let mut s = std::collections::BTreeMap::new();
2554+
s.insert(
2555+
"primary".to_string(),
2556+
toml::from_str::<Shard>(r#"connection = "postgresql://u:p@h/db""#).unwrap(),
2557+
);
2558+
s
2559+
},
2560+
chains: section,
2561+
deployment: toml::from_str("[[rule]]\nshards = [\"primary\"]\nindexers = [\"test\"]")
2562+
.unwrap(),
2563+
};
2564+
2565+
let map = config.amp_chain_configs().unwrap();
2566+
let mainnet = map.get("mainnet").expect("mainnet should be in map");
2567+
2568+
// Identifiers with special characters should be double-quoted
2569+
assert_eq!(mainnet.context_dataset, "\"ns/data@v1\"");
2570+
assert_eq!(mainnet.context_table, "\"my/table@2\"");
2571+
}
2572+
2573+
#[test]
2574+
fn amp_chain_config_lowercases_simple_context_idents() {
2575+
let section = toml::from_str::<ChainSection>(
2576+
r#"
2577+
ingestor = "block_ingestor_node"
2578+
[mainnet]
2579+
shard = "primary"
2580+
provider = []
2581+
[mainnet.amp]
2582+
address = "http://localhost:50051"
2583+
context_dataset = "Eth"
2584+
context_table = "Blocks"
2585+
"#,
2586+
)
2587+
.unwrap();
2588+
2589+
let config = Config {
2590+
node: NodeId::new("test").unwrap(),
2591+
general: None,
2592+
stores: {
2593+
let mut s = std::collections::BTreeMap::new();
2594+
s.insert(
2595+
"primary".to_string(),
2596+
toml::from_str::<Shard>(r#"connection = "postgresql://u:p@h/db""#).unwrap(),
2597+
);
2598+
s
2599+
},
2600+
chains: section,
2601+
deployment: toml::from_str("[[rule]]\nshards = [\"primary\"]\nindexers = [\"test\"]")
2602+
.unwrap(),
2603+
};
2604+
2605+
let map = config.amp_chain_configs().unwrap();
2606+
let mainnet = map.get("mainnet").expect("mainnet should be in map");
2607+
2608+
// Simple identifiers should be lowercased and unquoted
2609+
assert_eq!(mainnet.context_dataset, "eth");
2610+
assert_eq!(mainnet.context_table, "blocks");
2611+
}
25312612
}

0 commit comments

Comments
 (0)