Skip to content

Commit 4511da9

Browse files
committed
fix issues in c14n
- some new tests from the test suite didn't pass (related to duplicate triples) - there was a bug when sorting quads, when only one of them had a graph name
1 parent 506570a commit 4511da9

File tree

2 files changed

+31
-14
lines changed

2 files changed

+31
-14
lines changed

c14n/src/_c14n_term.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,16 @@ impl<T: Term> Term for C14nTerm<T> {
8282
}
8383

8484
pub fn cmp_c14n_terms<'a, 'b, T: Term>(
85-
t1: &'a C14nTerm<T>,
86-
t2: &'a C14nTerm<T>,
85+
t1: Option<&'a C14nTerm<T>>,
86+
t2: Option<&'a C14nTerm<T>>,
8787
buf1: &'b mut String,
8888
buf2: &'b mut String,
8989
) -> Ordering {
90-
nq(t1, buf1);
91-
nq(t2, buf2);
90+
if let Some(t1) = t1 {
91+
nq(t1, buf1);
92+
}
93+
if let Some(t2) = t2 {
94+
nq(t2, buf2);
95+
}
9296
buf1.cmp(&buf2)
9397
}

c14n/src/rdfc10.rs

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::fmt::Write;
77
use std::io;
88
use std::rc::Rc;
99

10-
use sophia_api::dataset::{DTerm, Dataset};
10+
use sophia_api::dataset::{DTerm, SetDataset};
1111
use sophia_api::quad::{iter_spog, Quad, Spog};
1212
use sophia_api::term::{BnodeId, Term};
1313

@@ -25,7 +25,7 @@ use crate::hash::{HashFunction, Sha256, Sha384};
2525
/// - quads are sorted in codepoint order.
2626
///
2727
/// See also [`normalize_with`].
28-
pub fn normalize<D: Dataset, W: io::Write>(d: &D, w: W) -> Result<(), C14nError<D::Error>> {
28+
pub fn normalize<D: SetDataset, W: io::Write>(d: &D, w: W) -> Result<(), C14nError<D::Error>> {
2929
normalize_with::<Sha256, D, W>(d, w, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT)
3030
}
3131

@@ -37,7 +37,10 @@ pub fn normalize<D: Dataset, W: io::Write>(d: &D, w: W) -> Result<(), C14nError<
3737
/// - quads are sorted in codepoint order.
3838
///
3939
/// See also [`normalize_with`].
40-
pub fn normalize_sha384<D: Dataset, W: io::Write>(d: &D, w: W) -> Result<(), C14nError<D::Error>> {
40+
pub fn normalize_sha384<D: SetDataset, W: io::Write>(
41+
d: &D,
42+
w: W,
43+
) -> Result<(), C14nError<D::Error>> {
4144
normalize_with::<Sha384, D, W>(d, w, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT)
4245
}
4346

@@ -49,7 +52,7 @@ pub fn normalize_sha384<D: Dataset, W: io::Write>(d: &D, w: W) -> Result<(), C14
4952
/// - quads are sorted in codepoint order.
5053
///
5154
/// See also [`normalize`].
52-
pub fn normalize_with<H: HashFunction, D: Dataset, W: io::Write>(
55+
pub fn normalize_with<H: HashFunction, D: SetDataset, W: io::Write>(
5356
d: &D,
5457
mut w: W,
5558
depth_factor: f32,
@@ -61,7 +64,7 @@ pub fn normalize_with<H: HashFunction, D: Dataset, W: io::Write>(
6164
// we sort the quads, but comparing the terms based on ther NQ serialization,
6265
// which amounts to sorting the N-Quads lines without materializing them
6366
quads.sort_unstable_by(|q1, q2| {
64-
for (t1, t2) in iter_spog(q1.spog()).zip(iter_spog(q2.spog())) {
67+
for (t1, t2) in iter_spog_opt(q1.spog()).zip(iter_spog_opt(q2.spog())) {
6568
buf1.clear();
6669
buf2.clear();
6770
let o = cmp_c14n_terms(t1, t2, &mut buf1, &mut buf2);
@@ -95,7 +98,7 @@ pub fn normalize_with<H: HashFunction, D: Dataset, W: io::Write>(
9598
/// Implements <https://www.w3.org/TR/rdf-canon/#canon-algorithm>
9699
///
97100
/// See also [`normalize`].
98-
pub fn relabel<D: Dataset>(d: &D) -> Result<(C14nQuads<D>, C14nIdMap), C14nError<D::Error>> {
101+
pub fn relabel<D: SetDataset>(d: &D) -> Result<(C14nQuads<D>, C14nIdMap), C14nError<D::Error>> {
99102
relabel_with::<Sha256, D>(d, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT)
100103
}
101104

@@ -109,7 +112,9 @@ pub fn relabel<D: Dataset>(d: &D) -> Result<(C14nQuads<D>, C14nIdMap), C14nError
109112
/// Implements <https://www.w3.org/TR/rdf-canon/#canon-algorithm>
110113
///
111114
/// See also [`normalize`].
112-
pub fn relabel_sha384<D: Dataset>(d: &D) -> Result<(C14nQuads<D>, C14nIdMap), C14nError<D::Error>> {
115+
pub fn relabel_sha384<D: SetDataset>(
116+
d: &D,
117+
) -> Result<(C14nQuads<D>, C14nIdMap), C14nError<D::Error>> {
113118
relabel_with::<Sha384, D>(d, DEFAULT_DEPTH_FACTOR, DEFAULT_PERMUTATION_LIMIT)
114119
}
115120

@@ -135,7 +140,7 @@ pub fn relabel_sha384<D: Dataset>(d: &D) -> Result<(C14nQuads<D>, C14nIdMap), C1
135140
/// Implements <https://www.w3.org/TR/rdf-canon/#canon-algorithm>
136141
///
137142
/// See also [`relabel`], [`normalize_with`].
138-
pub fn relabel_with<'a, H: HashFunction, D: Dataset>(
143+
pub fn relabel_with<'a, H: HashFunction, D: SetDataset>(
139144
d: &'a D,
140145
depth_factor: f32,
141146
permutation_limit: usize,
@@ -497,6 +502,14 @@ fn smaller_path(path1: &str, path2: &str) -> bool {
497502
}
498503
}
499504

505+
/// Iter over all the components of a [`Quad`] as Option.
506+
///
507+
/// Compared to [`iter_spog`], this function always return 4 components.
508+
fn iter_spog_opt<T: Quad>(q: T) -> impl Iterator<Item = Option<T::Term>> {
509+
let (spo, g) = q.to_spog();
510+
spo.into_iter().map(Some).chain(std::iter::once(g))
511+
}
512+
500513
#[cfg(test)]
501514
mod test {
502515
use super::*;
@@ -701,15 +714,15 @@ _:c14n4 <http://example.com/#p> _:c14n3 .
701714
assert!(got == exp);
702715
}
703716

704-
pub fn c14n_nquads<D: Dataset>(d: &D) -> Result<String, C14nError<D::Error>> {
717+
pub fn c14n_nquads<D: SetDataset>(d: &D) -> Result<String, C14nError<D::Error>> {
705718
let mut output = Vec::<u8>::new();
706719
normalize(d, &mut output)?;
707720
Ok(unsafe { String::from_utf8_unchecked(output) })
708721
}
709722

710723
/// Simplisitic Quad parser, useful for writing test cases.
711724
/// It is based on eq_quad below.
712-
fn ez_quads<'a>(lines: &[&'a str]) -> Vec<Spog<SimpleTerm<'a>>> {
725+
fn ez_quads<'a>(lines: &[&'a str]) -> std::collections::HashSet<Spog<SimpleTerm<'a>>> {
713726
lines.iter().map(|line| ez_quad(line)).collect()
714727
}
715728

0 commit comments

Comments
 (0)