Skip to content

Commit

Permalink
improving version 0.0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
angelip2303 committed Oct 22, 2023
1 parent 9c914bf commit 99a062f
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ edition = "2021"
[dependencies]
zarr3 = { git = "https://github.com/clbarnes/zarr3-rs.git" }
rdf-rs = { path = "./rdf-rs" }
rio_api = "0.8.4"
sophia = { version = "0.7.2" }
ndarray = { version = "0.15.6", features = [ "rayon" ] }
bimap = "0.6.3"
11 changes: 11 additions & 0 deletions resources/rdf.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<http://example.org/alan> <http://example.org/instanceOf> <http://example.org/Human> .
<http://example.org/alan> <http://example.org/placeOfBirth> <http://example.org/warrington> .
<http://example.org/alan> <http://example.org/placeOfDeath> <http://example.org/wilmslow> .
<http://example.org/alan> <http://example.org/dateOfBirth> "1912-06-23"^^<http://www.w3.org/2001/XMLSchemadate> .
<http://example.org/alan> <http://example.org/employer> <http://example.org/GCHQ> .
<http://example.org/warrington> <http://example.org/country> <http://example.org/uk> .
<http://example.org/wilmslow> <http://example.org/country> <http://example.org/uk> .
<http://example.org/wilmslow> <http://example.org/instanceOf> <http://example.org/town> .
<http://example.org/bombe> <http://example.org/discoverer> <http://example.org/alan> .
<http://example.org/bombe> <http://example.org/instanceOf> <http://example.org/computer> .
<http://example.org/bombe> <http://example.org/manufacturer> <http://example.org/GCHQ> .
76 changes: 42 additions & 34 deletions src/remote_hdt.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use ndarray::{ArcArray, ArcArray1, Array2, Axis, Ix3};
use bimap::BiHashMap;
use ndarray::{ArcArray, ArcArray1, Array2, ArrayBase, Axis, Dim, Ix3, IxDynImpl, OwnedArcRepr};
use rdf_rs::RdfParser;
use sophia::term::BoxTerm;
use std::path::PathBuf;
use std::str::FromStr;
use zarr3::codecs::bb::gzip_codec::GzipCodec;
use zarr3::prelude::smallvec::smallvec;
use zarr3::prelude::{
create_root_group, Array, ArrayMetadataBuilder, ArrayRegion, GroupMetadata, ReadableMetadata,
Expand All @@ -10,7 +13,7 @@ use zarr3::store::filesystem::FileSystemStore;
use zarr3::store::{NodeKey, NodeName};
use zarr3::{ArcArrayD, CoordVec};

type ArcArray3 = ArcArray<u8, Ix3>;
pub type ArcArray3 = ArcArray<u8, Ix3>;

#[derive(Default)]
pub struct Domain {
Expand Down Expand Up @@ -427,8 +430,9 @@ impl<'a> RemoteHDT<'a> {
// 4. Build the structure of the Array; as such, several parameters of it are
// tweaked. Namely, the size of the array, the size of the chunks, the name
// of the different dimensions and the default values
let arr_meta = ArrayMetadataBuilder::<bool>::new(&self.reference_system.shape_u64(domain))
let arr_meta = ArrayMetadataBuilder::<u8>::new(&self.reference_system.shape_u64(domain))
.dimension_names(self.reference_system.dimension_names())?
.push_bb_codec(GzipCodec::default())
.set_attribute(
"subjects".to_string(),
subjects
Expand Down Expand Up @@ -462,7 +466,7 @@ impl<'a> RemoteHDT<'a> {
Err(_) => return Err(String::from("Error parsing the NodeName")),
};

let arr = match root_group.create_array::<bool>(node_name, arr_meta) {
let arr = match root_group.create_array::<u8>(node_name, arr_meta) {
Ok(array) => array,
Err(_) => return Err(String::from("Error creating the Array")),
};
Expand All @@ -472,47 +476,44 @@ impl<'a> RemoteHDT<'a> {
// the provided values (second vector). What's more, an offset can be set;
// that is, we can insert the created array with and X and Y shift. Lastly,
// the region is written provided the aforementioned data and offset
let data = match ArcArrayD::from_shape_vec(self.reference_system.shape(domain).to_vec(), {
let mut v =
vec![false; domain.subjects_size * domain.predicates_size * domain.objects_size];
let data = self.create_array(domain, dump, subjects, predicates, objects)?;
let offset = smallvec![0, 0, 0];

// TODO: could this be done using rayon or a multi-threaded approach.
// Maybe using chunks instead of a region and having several chunks of
// the same size (i.e 100x100). Then we write in parallel?
if arr.write_region(&offset, data).is_err() {
return Err(String::from("Error writing to the Array"));
};

Ok(self)
}

fn create_array(
&self,
domain: &Domain,
dump: RdfParser,
subjects: BiHashMap<BoxTerm, usize>,
predicates: BiHashMap<BoxTerm, usize>,
objects: BiHashMap<BoxTerm, usize>,
) -> Result<ArrayBase<OwnedArcRepr<u8>, Dim<IxDynImpl>>, String> {
match ArcArrayD::from_shape_vec(self.reference_system.shape(domain).to_vec(), {
let mut v: Vec<u8> =
vec![0u8; domain.subjects_size * domain.predicates_size * domain.objects_size];
let slice = v.as_mut_slice();
dump.graph.iter().for_each(|[subject, predicate, object]| {
slice[self.reference_system.index(
subjects.get_by_left(subject).unwrap().to_owned(),
predicates.get_by_left(predicate).unwrap().to_owned(),
objects.get_by_left(object).unwrap().to_owned(),
domain,
)] = true;
)] = 1u8;
});
slice.to_vec()
}) {
Ok(data) => data,
Ok(data) => Ok(data),
Err(_) => return Err(String::from("Error creating the data Array")),
};

println!("{}", data);

let offset = smallvec![0, 0, 0];

// TODO: could this be done using rayon or a multi-threaded approach.
// Maybe using chunks instead of a region and having several chunks of
// the same size (i.e 100x100). Then we write in parallel?
if arr.write_region(&offset, data).is_err() {
return Err(String::from("Error writing to the Array"));
};

println!("== Array ========================================================");
println!(
"{:?}",
arr.read_region(ArrayRegion::from_offset_shape(
&[0, 0, 0],
&self.reference_system.shape_u64(domain)
))
.unwrap()
.unwrap()
);

Ok(self)
}
}

pub fn parse(mut self) -> Result<Self, String> {
Expand Down Expand Up @@ -589,6 +590,13 @@ impl<'a> RemoteHDT<'a> {

Ok(self)
}

pub fn get_array(self) -> Result<ArcArray3, String> {
match self.array {
Some(array) => Ok(array),
None => Err(String::from("Array is None")),
}
}
}

impl Engine for RemoteHDT<'_> {
Expand Down
45 changes: 45 additions & 0 deletions tests/write_read_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use std::fs::remove_dir_all;

use remote_hdt::remote_hdt::{ArcArray3, RemoteHDTBuilder};

#[test]
fn write_read_test() {
let _ = remove_dir_all("root.zarr").unwrap();

let _ = RemoteHDTBuilder::new("root.zarr")
.reference_system(remote_hdt::remote_hdt::ReferenceSystem::SPO)
.rdf_path("resources/rdf.nt")
.array_name("array_name")
.build()
.serialize();

let expected = ArcArray3::from_shape_vec(
(4, 8, 9),
vec![
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
],
)
.unwrap();

let actual = RemoteHDTBuilder::new("root.zarr")
.reference_system(remote_hdt::remote_hdt::ReferenceSystem::SPO)
.array_name("array_name")
.build()
.parse()
.unwrap()
.get_array()
.unwrap();

assert_eq!(actual, expected);

let _ = remove_dir_all("root.zarr");
}

0 comments on commit 99a062f

Please sign in to comment.