Skip to content

Commit

Permalink
Merge pull request #44 from openlawlibrary/ndusan/optimize-history-da…
Browse files Browse the repository at this point in the history
…tabase

refact: optimize stelae database by hashing composite keys; small ergonomic improvements
  • Loading branch information
n-dusan authored Jun 19, 2024
2 parents 7f015fa + 461c50c commit c4a5936
Show file tree
Hide file tree
Showing 48 changed files with 1,603 additions and 1,361 deletions.
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "stelae"
description = "A collection of tools in Rust and Python for preserving, authenticating, and accessing laws in perpetuity."
version = "0.3.0-alpha.3"
version = "0.3.0-alpha.4"
edition = "2021"
readme = "README.md"
license = "AGPL-3.0"
Expand All @@ -16,6 +16,7 @@ actix-web = "4"
actix-service = "2.0"
actix-http = "3.2"
async-trait = "0.1.77"
md-5 = "0.10.6"
mime = "0.3.17"
mime_guess = "2.0.4"
anyhow = "1.0"
Expand Down
2 changes: 1 addition & 1 deletion migrations/sqlite/20240115152953_initial_db.down.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ DROP TABLE IF EXISTS publication_has_publication_versions;
DROP TABLE IF EXISTS publication_version;
DROP TABLE IF EXISTS publication;
DROP TABLE IF EXISTS version;
DROP TABLE IF EXISTS library_document;
DROP TABLE IF EXISTS library;
DROP TABLE IF EXISTS document_element;
DROP TABLE IF EXISTS document;
DROP TABLE IF EXISTS stele;

Expand Down
103 changes: 44 additions & 59 deletions migrations/sqlite/20240115152953_initial_db.up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,113 +7,98 @@ CREATE TABLE stele (
CREATE TABLE document (
doc_id TEXT PRIMARY KEY
);
CREATE TABLE library (
mpath TEXT PRIMARY KEY
);
CREATE TABLE library_document (
collection_mpath TEXT,
CREATE TABLE document_element (
doc_mpath TEXT,
url TEXT,
doc_id TEXT,
start DATE,
end DATE,
CONSTRAINT fk_coll_mpath
FOREIGN KEY (collection_mpath)
REFERENCES library(mpath),
CONSTRAINT fk_doc_id
FOREIGN KEY (doc_id)
REFERENCES document(doc_id),
PRIMARY KEY (collection_mpath, doc_id)
PRIMARY KEY (doc_mpath)
);
CREATE TABLE library (
mpath TEXT PRIMARY KEY,
url TEXT
);
CREATE TABLE publication (
id TEXT,
name TEXT,
date INTEGER,
stele TEXT,
revoked INTEGER,
last_valid_publication_name TEXT,
last_valid_publication_id TEXT,
last_valid_version TEXT,
CONSTRAINT fk_last_valid_version
FOREIGN KEY (last_valid_version)
REFERENCES version(codified_date),
CONSTRAINT fk_last_valid_publication
FOREIGN KEY (last_valid_publication_name, stele)
REFERENCES publication(name, stele),
FOREIGN KEY (last_valid_publication_id)
REFERENCES publication(id),
CONSTRAINT fk_stele
FOREIGN KEY (stele)
REFERENCES stele(name)
ON DELETE CASCADE,
PRIMARY KEY (name, stele)
PRIMARY KEY (id)
);
CREATE TABLE version(
codified_date TEXT PRIMARY KEY
);
CREATE TABLE publication_version (
id TEXT,
version TEXT,
publication TEXT,
stele TEXT,
publication_id TEXT,
build_reason TEXT,
CONSTRAINT fk_publication
FOREIGN KEY (publication, stele)
REFERENCES publication(name, stele)
FOREIGN KEY (publication_id)
REFERENCES publication(id)
ON DELETE CASCADE,
CONSTRAINT fk_version
FOREIGN KEY (version)
REFERENCES version(codified_date),
PRIMARY KEY (publication, version, stele)
PRIMARY KEY (id)
);
CREATE TABLE publication_has_publication_versions (
publication TEXT,
referenced_publication TEXT,
referenced_version TEXT,
stele TEXT,
CONSTRAINT fk_publication FOREIGN KEY (publication, stele) REFERENCES publication(name, stele) ON DELETE CASCADE,
CONSTRAINT fk_referenced_publication FOREIGN KEY (referenced_publication, referenced_version, stele) REFERENCES publication_version(publication, version, stele) ON DELETE CASCADE,
PRIMARY KEY (publication, referenced_publication, referenced_version, stele)
);
CREATE TABLE version(
codified_date TEXT PRIMARY KEY
publication_id TEXT,
publication_version_id TEXT,
CONSTRAINT fk_publication FOREIGN KEY (publication_id) REFERENCES publication(id) ON DELETE CASCADE,
CONSTRAINT fk_referenced_publication_version FOREIGN KEY (publication_version_id) REFERENCES publication_version(id) ON DELETE CASCADE,
PRIMARY KEY (publication_id, publication_version_id)
);
CREATE TABLE document_change (
doc_mpath TEXT,
status TEXT,
url TEXT,
id TEXT,
status INTEGER,
change_reason TEXT,
publication TEXT,
version TEXT,
stele TEXT,
doc_id TEXT,
CONSTRAINT fk_doc_id
FOREIGN KEY (doc_id)
REFERENCES document(doc_id)
publication_version_id TEXT,
doc_mpath TEXT,
CONSTRAINT fk_doc_el
FOREIGN KEY (doc_mpath)
REFERENCES document_element(doc_mpath)
ON DELETE CASCADE,
CONSTRAINT fk_publication_version
FOREIGN KEY (publication, version, stele)
REFERENCES publication_version(publication, version, stele)
FOREIGN KEY (publication_version_id)
REFERENCES publication_version(id)
ON DELETE CASCADE,
PRIMARY KEY (doc_mpath, status, publication, version, stele)
PRIMARY KEY (id)
);
CREATE INDEX document_change_doc_mpath_idx ON document_change(doc_mpath COLLATE NOCASE);
CREATE TABLE library_change (
publication TEXT,
version TEXT,
stele TEXT,
publication_version_id TEXT,
status TEXT,
library_mpath TEXT,
url TEXT,
CONSTRAINT fk_publication_version
FOREIGN KEY (publication, version, stele)
REFERENCES publication_version(publication, version, stele)
FOREIGN KEY (publication_version_id)
REFERENCES publication_version(id)
ON DELETE CASCADE,
PRIMARY KEY (publication, version, stele, library_mpath, status)
PRIMARY KEY (publication_version_id, library_mpath, status)
);
CREATE TABLE changed_library_document (
publication TEXT,
version TEXT,
stele TEXT,
doc_mpath TEXT,
status TEXT,
library_mpath TEXT,
url TEXT,
document_change_id TEXT,
CONSTRAINT fk_document_change
FOREIGN KEY (publication, version, stele, doc_mpath, status)
REFERENCES document_change(publication, version, stele, doc_mpath, status)
FOREIGN KEY (document_change_id)
REFERENCES document_change(id)
ON DELETE CASCADE,
PRIMARY KEY (publication, version, stele, library_mpath, doc_mpath, status)
PRIMARY KEY (document_change_id, library_mpath)
);
CREATE INDEX library_change_library_mpath_idx ON library_change(library_mpath COLLATE NOCASE);
CREATE INDEX changed_library_document_library_mpath_idx ON changed_library_document(library_mpath COLLATE NOCASE);
Expand Down
40 changes: 38 additions & 2 deletions src/db/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Database related module.
#![allow(clippy::unreachable)]
use async_trait::async_trait;
use sqlx::Transaction;
use std::str::FromStr;

use sqlx::any::{self, AnyPoolOptions};
Expand All @@ -12,8 +13,6 @@ use tracing::instrument;
pub mod init;
/// Models for the database.
pub mod models;
/// Statements for the database.
pub mod statements;

#[async_trait]
/// Generic Database
Expand All @@ -25,6 +24,17 @@ pub trait Db {
async fn connect(url: &str) -> anyhow::Result<DatabaseConnection>;
}

#[async_trait]
/// Generic transaction
pub trait Tx {
/// Begin a transaction.
async fn begin(pool: AnyPool) -> anyhow::Result<DatabaseTransaction>;
/// Commit a transaction.
async fn commit(self) -> anyhow::Result<()>;
/// Rollback a transaction.
async fn rollback(self) -> anyhow::Result<()>;
}

/// Type of database connection.
#[derive(Debug, Clone)]
pub enum DatabaseKind {
Expand All @@ -43,6 +53,12 @@ pub struct DatabaseConnection {
pub kind: DatabaseKind,
}

/// Database transaction.
pub struct DatabaseTransaction {
/// Database transaction.
pub tx: Transaction<'static, sqlx::Any>,
}

#[async_trait]
impl Db for DatabaseConnection {
/// Connects to a database.
Expand Down Expand Up @@ -72,3 +88,23 @@ impl Db for DatabaseConnection {
Ok(connection)
}
}

#[async_trait]
impl Tx for DatabaseTransaction {
/// Begin a transaction.
async fn begin(pool: AnyPool) -> anyhow::Result<Self> {
let tx = pool.begin().await?;
Ok(Self { tx })
}
/// Commit a transaction.
async fn commit(self) -> anyhow::Result<()> {
self.tx.commit().await?;
Ok(())
}

/// Rollback a transaction.
async fn rollback(self) -> anyhow::Result<()> {
self.tx.rollback().await?;
Ok(())
}
}
21 changes: 0 additions & 21 deletions src/db/models/changed_library_document.rs

This file was deleted.

31 changes: 31 additions & 0 deletions src/db/models/changed_library_document/manager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//! Manager for the changed library document model.
use super::ChangedLibraryDocument;
use crate::db::{models::BATCH_SIZE, DatabaseTransaction};
use async_trait::async_trait;
use sqlx::QueryBuilder;

#[async_trait]
impl super::TxManager for DatabaseTransaction {
/// Upsert a bulk of changed library documents into the database.
///
/// # Errors
/// Errors if the changed library documents cannot be inserted into the database.
async fn insert_bulk(
&mut self,
changed_library_document: Vec<ChangedLibraryDocument>,
) -> anyhow::Result<()> {
let mut query_builder = QueryBuilder::new(
"INSERT OR IGNORE INTO changed_library_document ( library_mpath, document_change_id ) ",
);
for chunk in changed_library_document.chunks(BATCH_SIZE) {
query_builder.push_values(chunk, |mut bindings, cl| {
bindings
.push_bind(&cl.library_mpath)
.push_bind(&cl.document_change_id);
});
let query = query_builder.build();
query.execute(&mut *self.tx).await?;
}
Ok(())
}
}
34 changes: 34 additions & 0 deletions src/db/models/changed_library_document/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use async_trait::async_trait;
use serde::{Deserialize, Serialize};

pub mod manager;

/// Trait for managing transactional changed library documents.
#[async_trait]
pub trait TxManager {
/// Insert bulk of changed library documents.
async fn insert_bulk(
&mut self,
changed_library_document: Vec<ChangedLibraryDocument>,
) -> anyhow::Result<()>;
}

#[derive(sqlx::FromRow, Deserialize, Serialize)]
/// Model for library (collection) change events.
pub struct ChangedLibraryDocument {
/// Foreign key reference to `document_change` id.
pub document_change_id: String,
/// Materialized path to the library
pub library_mpath: String,
}

impl ChangedLibraryDocument {
/// Create a new library change.
#[must_use]
pub const fn new(document_change_id: String, library_mpath: String) -> Self {
Self {
document_change_id,
library_mpath,
}
}
}
8 changes: 0 additions & 8 deletions src/db/models/document.rs

This file was deleted.

23 changes: 23 additions & 0 deletions src/db/models/document/manager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//! Manager for the document model.
use crate::db::DatabaseTransaction;
use async_trait::async_trait;

#[async_trait]
impl super::TxManager for DatabaseTransaction {
/// Upsert a new document into the database.
///
/// # Errors
/// Errors if the document cannot be inserted into the database.
async fn create(&mut self, doc_id: &str) -> anyhow::Result<Option<i64>> {
let statement = "
INSERT OR IGNORE INTO document ( doc_id )
VALUES ( $1 )
";
let id = sqlx::query(statement)
.bind(doc_id)
.execute(&mut *self.tx)
.await?
.last_insert_id();
Ok(id)
}
}
Loading

0 comments on commit c4a5936

Please sign in to comment.