Skip to content

Commit f16ec40

Browse files
committed
improve doc of sophia_c14n
also slight improvement of the code
1 parent 9fb2bda commit f16ec40

File tree

1 file changed

+30
-15
lines changed

1 file changed

+30
-15
lines changed

c14n/src/rdfc10.rs

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use crate::_cnq::nq;
1818
use crate::_permutations::for_each_permutation_of;
1919
use crate::hash::{HashFunction, Sha256, Sha384};
2020

21-
/// Return a canonical N-quads representation of `d`, where
21+
/// Write into `w` a canonical N-quads representation of `d`, where
2222
/// + blank nodes are canonically [relabelled](`relabel`) with
2323
/// - the [SHA-256](Sha256) hash function,
2424
/// - the [`DEFAULT_DEPTH_FACTOR`],
@@ -30,8 +30,8 @@ pub fn normalize<D: Dataset, W: io::Write>(d: &D, w: W) -> Result<(), C14nError<
3030
normalize_with::<Sha256, D, W>(d, w, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT)
3131
}
3232

33-
/// Return a canonical N-quads representation of `d`, where
34-
/// + blank nodes are canonically [relabelled](`relabel`) with
33+
/// Write into `w` a canonical N-quads representation of `d`, where
34+
/// + blank nodes are canonically [relabelled](`relabel_sha384`) with
3535
/// - the [SHA-384](Sha384) hash function,
3636
/// - the [`DEFAULT_DEPTH_FACTOR`],
3737
/// - the [`DEFAULT_PERMUTATION_LIMIT`];
@@ -42,8 +42,11 @@ pub fn normalize_sha384<D: Dataset, W: io::Write>(d: &D, w: W) -> Result<(), C14
4242
normalize_with::<Sha384, D, W>(d, w, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT)
4343
}
4444

45-
/// Return a canonical N-quads representation of `d`, where
46-
/// - blank nodes are canonically [relabelled](`relabel_with`) with the given `depth_factor`,
45+
/// Write into `w` a canonical N-quads representation of `d`, where
46+
/// + blank nodes are canonically [relabelled](`relabel_with`) with
47+
/// - the [hash function](HashFunction) `H`,
48+
/// - the given `depth_factor`,
49+
/// - the given `permutation_limit`;
4750
/// - quads are sorted in codepoint order.
4851
///
4952
/// See also [`normalize`].
@@ -111,12 +114,24 @@ pub fn relabel_sha384<D: Dataset>(d: &D) -> Result<(C14nQuads<D>, C14nIdMap), C1
111114
relabel_with::<Sha384, D>(d, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT)
112115
}
113116

114-
/// Return a [`Dataset`] isomorphic to `d`, with canonical blank node labels,
115-
/// restricting the number of recursion of RDFC-1.0 to `depth_factor` per blank node,
116-
/// and restricting the size of permutations to `permutation_limit`!.
117+
/// Return a [`Dataset`] isomorphic to `d`, with canonical blank node labels.
118+
///
119+
/// The generic parameter `H` determines which [hash function](HashFunction)
120+
/// the algorithm should use internally
121+
/// (RDFC-1.0 uses [SHA-256](Sha256) by default).
122+
///
123+
/// The parameters `depth_factor` and `permutation_limit`
124+
/// are used to stop the algorithm if the computation becomes too complex,
125+
/// in order to secure it agains [dataset poisoning](https://www.w3.org/TR/rdf-canon/#dataset-poisoning).
126+
/// The default values ([`DEFAULT_DEPTH_FACTOR`]) and [`DEFAULT_PERMUTATION_LIMIT`])
127+
/// are expected to work with any "realistic" dataset.
117128
///
118-
/// These restrictions prevents the algorithm from blocking on pathological graphs with little practical utility
119-
/// (e.g. big cycles or cliques of undistinguishable blank nodes).
129+
/// More preciselity:
130+
/// * the algorithm will not recurse more deeply than`depth_factor`*N,
131+
/// where N is the total number of blank nodes in the dataset;
132+
/// * the algorithl will not try to disambiguate more than
133+
/// `permutation_limit` undistinguishable blank nodes
134+
/// (blank nodes with the same immediate neighbourhood).
120135
///
121136
/// Implements <https://www.w3.org/TR/rdf-canon/#canon-algorithm>
122137
///
@@ -133,6 +148,11 @@ pub fn relabel_with<'a, H: HashFunction, D: Dataset>(
133148
let mut state = C14nState::<H, _>::new(depth_factor, permutation_limit);
134149
// Step 2
135150
for quad in &quads {
151+
if quad.p().is_blank_node() {
152+
return Err(C14nError::Unsupported(
153+
"RDFC-1.0 does not support blank node as predicate".to_string(),
154+
));
155+
}
136156
for component in iter_spog(quad.spog()) {
137157
if component.is_triple() || component.is_variable() {
138158
return Err(C14nError::Unsupported(
@@ -148,11 +168,6 @@ pub fn relabel_with<'a, H: HashFunction, D: Dataset>(
148168
}
149169
}
150170
}
151-
if quad.p().is_blank_node() {
152-
return Err(C14nError::Unsupported(
153-
"RDFC-1.0 does not support blank node as predicate".to_string(),
154-
));
155-
}
156171
}
157172
// Step 3
158173
for (bnid, quads) in state.b2q.iter() {

0 commit comments

Comments
 (0)