diff --git a/src/history/changes.rs b/src/history/changes.rs new file mode 100644 index 0000000..aa6bd07 --- /dev/null +++ b/src/history/changes.rs @@ -0,0 +1,144 @@ +use crate::{ + db::{self, DatabaseConnection}, + stelae::{archive::Archive, types::repositories::Repository}, +}; +use sophia::{api::{ns::{rdf, NsTerm}, prelude::*, term::SimpleTerm}, turtle::parser::turtle::TurtleParser}; +use sophia::inmem::graph::LightGraph; +use sophia::xml::parser; +use sophia::turtle::serializer::nt::NtSerializer; +use sophia::{api::ns::Namespace, inmem::graph::FastGraph}; +use std::path::{Path, PathBuf}; +use walkdir::WalkDir; +/// Inserts changes from the archive into the database +/// +/// # Errors +/// Errors if the changes cannot be inserted into the archive +// const NAMESPACE_URL: &str = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#"; +// const OLL: Namespace<&str> = Namespace::new(NAMESPACE_URL).unwrap(); + +// const DOCUMENT_VERSION: NsTerm = OLL.get("DocumentVersion").unwrap(); + +#[actix_web::main] +pub async fn insert( + raw_archive_path: &str, + archive_path: PathBuf, + stele: Option, +) -> std::io::Result<()> { + let conn = match db::init::connect(&archive_path).await { + Ok(conn) => conn, + Err(err) => { + tracing::error!( + "error: could not connect to database. Confirm that DATABASE_URL env var is set correctly." + ); + tracing::error!("Error: {:?}", err); + std::process::exit(1); + } + }; + if let Some(stele) = stele { + insert_changes_single_stele()?; + } else { + insert_changes_archive(&conn, raw_archive_path, &archive_path).unwrap_or_else(|err| { + tracing::error!("Failed to insert changes into archive"); + tracing::error!("{:?}", err); + }); + } + Ok(()) +} + +fn insert_changes_single_stele() -> std::io::Result<()> { + Ok(()) +} + +/// Insert changes from the archive into the database +fn insert_changes_archive( + conn: &DatabaseConnection, + raw_archive_path: &str, + archive_path: &Path, +) -> anyhow::Result<()> { + let archive = Archive::parse( + archive_path.to_path_buf(), + &PathBuf::from(raw_archive_path), + false, + )?; + + for (name, mut stele) in archive.stelae { + if let Some(repositories) = stele.get_repositories()? { + let Some(rdf_repo) = repositories.get_rdf_repository() else { + continue; + }; + let rdf_repo_path = archive_path.to_path_buf().join(&rdf_repo.name); + if !rdf_repo_path.exists() { + anyhow::bail!( + "RDF repository should exist on disk but not found: {}", + rdf_repo_path.display() + ); + } + insert_changes_from_rdf_repository(conn, rdf_repo_path, &name, rdf_repo)?; + } + } + Ok(()) +} + +/// Insert changes from the RDF repository into the database +fn insert_changes_from_rdf_repository( + conn: &DatabaseConnection, + rdf_repo_path: PathBuf, + name: &str, + rdf_repo: &Repository, +) -> anyhow::Result<()> { + tracing::info!("Inserting changes from RDF repository: {}", name); + tracing::info!("RDF repository path: {}", rdf_repo_path.display()); + + // let response = reqwest::get(NAMESPACE_URL).await?.text().await?; + let mut graph = FastGraph::new(); + + for entry in WalkDir::new(&rdf_repo_path) { + match entry { + Ok(entry) if is_rdf(&entry) => { + tracing::debug!("Parsing file: {:?}", entry.path()); + let file = std::fs::File::open(entry.path())?; + let reader = std::io::BufReader::new(file); + parser::parse_bufread(reader).add_to_graph(&mut graph)?; + } + Ok(entry) => { + tracing::debug!("Skipping non-RDF file: {:?}", entry.path()); + continue; + } + Err(err) => { + tracing::error!("Error reading file: {:?}", err); + } + } + } + let namespace_url = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#"; + let oll: Namespace<&str> = Namespace::new(namespace_url).unwrap(); + + let document_version: NsTerm = oll.get("DocumentVersion").unwrap(); + let doc_id = oll.get("docId").unwrap(); + // println!("Graph: {:?}", graph); + let documents = graph.triples_matching(Any, Any, [document_version]); + + for triple in documents { + let triple = triple.unwrap(); + let document = triple.s(); + + let mut doc_id_triples = graph.triples_matching( + [document], + [doc_id], + Any, + ); + for t in doc_id_triples { + // println!("t: {:?} {:?} {:?}", t?.s(), t?.p(), t?.o()); + let object = t?.o(); + if let SimpleTerm::LiteralDatatype(value, _) = object { + dbg!(&value); + } + + } + } + Ok(()) +} + +/// Check if the entry is an RDF file +fn is_rdf(entry: &walkdir::DirEntry) -> bool { + entry.path().extension() == Some("rdf".as_ref()) +} diff --git a/src/history/mod.rs b/src/history/mod.rs new file mode 100644 index 0000000..12a0080 --- /dev/null +++ b/src/history/mod.rs @@ -0,0 +1,2 @@ +pub mod changes; +pub mod rdf_namespaces; \ No newline at end of file diff --git a/src/history/rdf_namespaces.rs b/src/history/rdf_namespaces.rs new file mode 100644 index 0000000..05261f3 --- /dev/null +++ b/src/history/rdf_namespaces.rs @@ -0,0 +1,5 @@ +// use sophia::api::ns::Namespace; + +// const NAMESPACE_URL: &str = "https://open.law/us/ngo/oll/_ontology/v0.1/ontology.owl#"; + +// pub const OLL: Namespace = Namespace::new(NAMESPACE_URL).unwrap();