Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
n-dusan committed Feb 26, 2024
1 parent bcc9b2c commit 5664369
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 0 deletions.
144 changes: 144 additions & 0 deletions src/history/changes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
use crate::{
db::{self, DatabaseConnection},
stelae::{archive::Archive, types::repositories::Repository},
};
use sophia::{api::{ns::{rdf, NsTerm}, prelude::*, term::SimpleTerm}, turtle::parser::turtle::TurtleParser};
use sophia::inmem::graph::LightGraph;
use sophia::xml::parser;
use sophia::turtle::serializer::nt::NtSerializer;
use sophia::{api::ns::Namespace, inmem::graph::FastGraph};
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
/// Inserts changes from the archive into the database
///
/// # Errors
/// Errors if the changes cannot be inserted into the archive
// const NAMESPACE_URL: &str = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#";
// const OLL: Namespace<&str> = Namespace::new(NAMESPACE_URL).unwrap();

// const DOCUMENT_VERSION: NsTerm = OLL.get("DocumentVersion").unwrap();

#[actix_web::main]
pub async fn insert(
raw_archive_path: &str,
archive_path: PathBuf,
stele: Option<String>,
) -> std::io::Result<()> {
let conn = match db::init::connect(&archive_path).await {
Ok(conn) => conn,
Err(err) => {
tracing::error!(
"error: could not connect to database. Confirm that DATABASE_URL env var is set correctly."
);
tracing::error!("Error: {:?}", err);
std::process::exit(1);
}
};
if let Some(stele) = stele {
insert_changes_single_stele()?;
} else {
insert_changes_archive(&conn, raw_archive_path, &archive_path).unwrap_or_else(|err| {
tracing::error!("Failed to insert changes into archive");
tracing::error!("{:?}", err);
});
}
Ok(())
}

fn insert_changes_single_stele() -> std::io::Result<()> {
Ok(())
}

/// Insert changes from the archive into the database
fn insert_changes_archive(
conn: &DatabaseConnection,
raw_archive_path: &str,
archive_path: &Path,
) -> anyhow::Result<()> {
let archive = Archive::parse(
archive_path.to_path_buf(),
&PathBuf::from(raw_archive_path),
false,
)?;

for (name, mut stele) in archive.stelae {
if let Some(repositories) = stele.get_repositories()? {
let Some(rdf_repo) = repositories.get_rdf_repository() else {
continue;
};
let rdf_repo_path = archive_path.to_path_buf().join(&rdf_repo.name);
if !rdf_repo_path.exists() {
anyhow::bail!(
"RDF repository should exist on disk but not found: {}",
rdf_repo_path.display()
);
}
insert_changes_from_rdf_repository(conn, rdf_repo_path, &name, rdf_repo)?;
}
}
Ok(())
}

/// Insert changes from the RDF repository into the database
fn insert_changes_from_rdf_repository(
conn: &DatabaseConnection,
rdf_repo_path: PathBuf,
name: &str,
rdf_repo: &Repository,
) -> anyhow::Result<()> {
tracing::info!("Inserting changes from RDF repository: {}", name);
tracing::info!("RDF repository path: {}", rdf_repo_path.display());

// let response = reqwest::get(NAMESPACE_URL).await?.text().await?;
let mut graph = FastGraph::new();

for entry in WalkDir::new(&rdf_repo_path) {
match entry {
Ok(entry) if is_rdf(&entry) => {
tracing::debug!("Parsing file: {:?}", entry.path());
let file = std::fs::File::open(entry.path())?;
let reader = std::io::BufReader::new(file);
parser::parse_bufread(reader).add_to_graph(&mut graph)?;
}
Ok(entry) => {
tracing::debug!("Skipping non-RDF file: {:?}", entry.path());
continue;
}
Err(err) => {
tracing::error!("Error reading file: {:?}", err);
}
}
}
let namespace_url = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#";
let oll: Namespace<&str> = Namespace::new(namespace_url).unwrap();

let document_version: NsTerm = oll.get("DocumentVersion").unwrap();
let doc_id = oll.get("docId").unwrap();
// println!("Graph: {:?}", graph);
let documents = graph.triples_matching(Any, Any, [document_version]);

for triple in documents {
let triple = triple.unwrap();
let document = triple.s();

let mut doc_id_triples = graph.triples_matching(
[document],
[doc_id],
Any,
);
for t in doc_id_triples {
// println!("t: {:?} {:?} {:?}", t?.s(), t?.p(), t?.o());
let object = t?.o();
if let SimpleTerm::LiteralDatatype(value, _) = object {
dbg!(&value);
}

}
}
Ok(())
}

/// Check if the entry is an RDF file
fn is_rdf(entry: &walkdir::DirEntry) -> bool {
entry.path().extension() == Some("rdf".as_ref())
}
2 changes: 2 additions & 0 deletions src/history/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod changes;
pub mod rdf_namespaces;
5 changes: 5 additions & 0 deletions src/history/rdf_namespaces.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// use sophia::api::ns::Namespace;

// const NAMESPACE_URL: &str = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#";

// pub const OLL: Namespace = Namespace::new(NAMESPACE_URL).unwrap();

0 comments on commit 5664369

Please sign in to comment.