diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..392d473 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,108 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 2 +indent_style = space +insert_final_newline = true +max_line_length = 120 +tab_width = 2 +trim_trailing_whitespace = true +ij_continuation_indent_size = 8 +ij_formatter_off_tag = @formatter:off +ij_formatter_on_tag = @formatter:on +ij_formatter_tags_enabled = false +ij_smart_tabs = false +ij_visual_guides = none +ij_wrap_on_typing = false + +[*.rs] +indent_size = 4 +max_line_length = 100 +ij_continuation_indent_size = 4 +ij_rust_align_multiline_chained_methods = false +ij_rust_align_multiline_parameters = true +ij_rust_align_multiline_parameters_in_calls = true +ij_rust_align_ret_type = true +ij_rust_align_type_params = false +ij_rust_align_where_bounds = true +ij_rust_align_where_clause = false +ij_rust_allow_one_line_match = false +ij_rust_block_comment_at_first_column = false +ij_rust_indent_where_clause = true +ij_rust_keep_blank_lines_in_code = 2 +ij_rust_keep_blank_lines_in_declarations = 2 +ij_rust_keep_indents_on_empty_lines = false +ij_rust_keep_line_breaks = true +ij_rust_line_comment_add_space = true +ij_rust_line_comment_at_first_column = false +ij_rust_min_number_of_blanks_between_items = 1 +ij_rust_preserve_punctuation = false +ij_rust_spaces_around_assoc_type_binding = false + +[.editorconfig] +ij_editorconfig_align_group_field_declarations = false +ij_editorconfig_space_after_colon = false +ij_editorconfig_space_after_comma = true +ij_editorconfig_space_before_colon = false +ij_editorconfig_space_before_comma = false +ij_editorconfig_spaces_around_assignment_operators = true + +[{*.har,*.json}] +ij_json_keep_blank_lines_in_code = 0 +ij_json_keep_indents_on_empty_lines = false +ij_json_keep_line_breaks = true +ij_json_space_after_colon = true +ij_json_space_after_comma = true +ij_json_space_before_colon = true +ij_json_space_before_comma = false +ij_json_spaces_within_braces = false +ij_json_spaces_within_brackets = false +ij_json_wrap_long_lines = false + +[{*.htm,*.html,*.sht,*.shtm,*.shtml}] +indent_size = 4 +ij_html_add_new_line_before_tags = body,div,p,form,h1,h2,h3 +ij_html_align_attributes = true +ij_html_align_text = false +ij_html_attribute_wrap = normal +ij_html_block_comment_at_first_column = true +ij_html_do_not_align_children_of_min_lines = 0 +ij_html_do_not_break_if_inline_tags = title,h1,h2,h3,h4,h5,h6,p +ij_html_do_not_indent_children_of_tags = html,body,thead,tbody,tfoot +ij_html_enforce_quotes = false +ij_html_inline_tags = a,abbr,acronym,b,basefont,bdo,big,br,cite,cite,code,dfn,em,font,i,img,input,kbd,label,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var +ij_html_keep_blank_lines = 2 +ij_html_keep_indents_on_empty_lines = false +ij_html_keep_line_breaks = true +ij_html_keep_line_breaks_in_text = true +ij_html_keep_whitespaces = false +ij_html_keep_whitespaces_inside = span,pre,textarea +ij_html_line_comment_at_first_column = true +ij_html_new_line_after_last_attribute = never +ij_html_new_line_before_first_attribute = never +ij_html_quote_style = double +ij_html_remove_new_line_before_tags = br +ij_html_space_after_tag_name = false +ij_html_space_around_equality_in_attribute = false +ij_html_space_inside_empty_tag = false +ij_html_text_wrap = normal + +[{*.toml,Cargo.lock,Gopkg.lock,Pipfile}] +ij_toml_keep_indents_on_empty_lines = false + +[*.md] +trim_trailing_whitespace = true + +[{Makefile,*.mk}] +indent_size = 4 +tab_width = 4 + +[*.sparql] +indent_size = 4 + +[{*.yml,*.yaml}] +indent_size = 2 +trim_trailing_whitespace = true + diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..563dcf3 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +* @jgeluk + diff --git a/.github/workflows/check-commits.yml b/.github/workflows/check-commits.yml new file mode 100644 index 0000000..38521a1 --- /dev/null +++ b/.github/workflows/check-commits.yml @@ -0,0 +1,13 @@ +on: [push] + +jobs: + cog_check_job: + runs-on: ubuntu-latest + name: check conventional commit compliance + steps: + - uses: actions/checkout@main + with: + fetch-depth: 0 + + - name: Conventional commits check + uses: cocogitto/cocogitto-action@v3 diff --git a/.github/workflows/release-changelog.yml b/.github/workflows/release-changelog.yml new file mode 100644 index 0000000..e4868f7 --- /dev/null +++ b/.github/workflows/release-changelog.yml @@ -0,0 +1,28 @@ +on: + workflow_dispatch: + +jobs: + release: + runs-on: ubuntu-latest + steps: + - name: Perform release + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Cocogitto release + id: release + uses: cocogitto/cocogitto-action@v3 + with: + release: true + git-user: 'Release Bot' + git-user-email: 'jacobus.geluk@ekgf.org' + + - name: Generate Changelog + run: cog changelog --at ${{ steps.release.outputs.version }} -t full_hash > GITHUB_CHANGELOG.md + + - name: Upload github release + uses: softprops/action-gh-release@v1 + with: + body_path: GITHUB_CHANGELOG.md + tag_name: ${{ steps.release.outputs.version }} diff --git a/.gitignore b/.gitignore index d01bd1a..efe3eb1 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,8 @@ Cargo.lock # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ \ No newline at end of file +#.idea/ + +# Added by cargo + +/target diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..c89d7ef --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "editor.trimAutoWhitespace": true, + "files.autoSaveWorkspaceFilesOnly": true, + "files.trimTrailingWhitespace": true +} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e7fd5b0 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,197 @@ +# Contributing to GraphArch + +Thank you for considering contributing to **GraphArch**! + +We appreciate your interest in improving this +project, and we're excited to work with you. +This document provides guidelines to help you +get started with contributing to GraphArch. + +## How Can You Contribute? + +There are several ways you can contribute to GraphArch: + +1. **Report Bugs**: + + If you come across a bug, please create an + issue describing the problem and how to + reproduce it. + +2. **Suggest New Features**: + + If you have an idea for a new feature or an + enhancement, feel free to open an issue to + discuss it. + +3. **Code Contributions**: + + You can contribute to the development of + GraphArch by fixing bugs, + adding new features, + or improving the documentation. + +4. **Improve Documentation**: + + Documentation is key to making GraphArch + accessible. + You can help by improving existing + documentation or writing new guides. + +## Contributor License Agreement (CLA) + +To contribute to this project, you must sign +the Contributor License Agreement (CLA). +We use [CLA assistant](https://cla-assistant.io/) to streamline this process. +You will be prompted to sign the CLA when you +make your first contribution. + +## Getting Started (as a developer) + +### Create an Issue + +Before creating a branch, please create an +issue describing your intent. +This helps the community understand the +purpose of your work and allows for +discussion and feedback before you +begin implementation. + +### Fork the Repository + +Start by forking the repository to your +GitHub account. + +### Clone the Repository + +Clone the forked repository to your +local machine. + +```shell +git clone https://github.com/ekgf/grapharg.git +cd grapharch +``` + +### Create a Branch + +Once your issue has been approved, create a new branch for your feature or bug fix. + +```shell +git switch -c feature/your-feature-name +``` + +### Make Your Changes + +Implement your changes, make sure they are +well-tested, and follow the coding standards +of the project. + +### Commit Your Changes + +Write a meaningful commit message — +compliant with the "conventional commits" standard, +see below — that explains +what your changes do. + +```shell +git add . +git commit -m "feat: add new feature" + +# In the commit body or footer, reference the issue: +Closes # +``` + +#### More detail + +We enforce the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) standard. +Please use clear and descriptive commit messages that follow this convention. +Additionally, every commit should reference the issue it addresses in the message body or footer, +using `Closes #` or `Fixes #`. + +Examples of valid commit messages: + +```text +feat(sparql): add support for SPARQL endpoint + +Closes #12 +``` + +```text +fix(docs): correct typo + +Fixes #34 +``` + +```text +chore: update dependencies to latest versions + +Related to #56 +``` + +- See more examples: https://gist.github.com/qoomon/5dfcdf8eec66a051ecd85625518cfd13 + + +### Push Your Branch + +Push your branch to your forked repository. + +```shell +git push origin feature/your-feature-name +``` + +### Create a Pull Request + +Go to the original repository on GitHub and +open a pull request from your forked repository. +Describe your changes thoroughly in the pull +request description. + +## Why Use Conventional Commits + +- Automatically generating CHANGELOGs. +- Automatically determining a semantic version + bump (based on the types of commits landed). +- Communicating the nature of changes to + teammates, the public, and other stakeholders. +- Triggering build and publish processes. +- Making it easier for people to contribute to + your projects, by allowing them to explore a + more structured commit history. + +## Guidelines + +### Coding Standards + +Please ensure your code follows best practices +and is formatted correctly. +`rustfmt` must be used before committing, +and this will be verified by a +[GitHub Actions workflow](.github/workflows/check-formatting.yml), +just like the Conventional Commits check. + +### Testing + +Ensure that you write tests for new features +or bug fixes. +Use `cargo test` to run all tests and verify +that everything works as expected. + +## Code of Conduct + +We follow the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT.md). +By participating, you are expected to uphold this code. +Please report any behavior that violates this code to +[info@ekgf.org](mailto:info@ekgf.org). + +## Issues and Feature Requests + +If you encounter any issues, have questions +— or have feature requests — please open an issue on the +[GitHub issues page](https://github.com/ekgf/grapharg/issues). + +## Contact + +If you have any questions, feel free to reach out at [info@ekgf.org](mailto:info@ekgf.org). + +--- + +Thank you for helping us make GraphArch a great tool for the community! diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..96b5379 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "grapharch" +version = "0.0.1" +edition = "2024" +rust-version = "1.85" + +[dependencies] diff --git a/README.md b/README.md index 6f965c5..a356737 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,141 @@ -# graph-doc -A tool to generate documentation for a given graph (knowledge graph, shape graph or ontology) as website or PDF +# GraphArch + +**GraphArch** is an open-source tool developed +by the Enterprise Knowledge Graph Forum ([EKGF](ekgf.org)), +a Managed Community of the +Object Management Group ([OMG](https://www.omg.org/communities/enterprise-knowledge-graph-forum.htm)). +The tool aims to generate comprehensive +documentation for knowledge graphs, shape graphs, +labelled property graphs, semantic graphs, +or ontologies, +with outputs available as both websites +(markdown) and +PDFs ([typst](https://typst.app/docs/)). +This project is part of an effort to +streamline graph documentation and governance, +enhancing clarity and accessibility for +various stakeholders. + +## Project Overview + +GraphArch serves two primary use cases: + +1. **Ontology and Schema Documentation** + + GraphArch can document ontologies, + whether OWL-based or non-semantic tech, + such as LPG graph schemas from platforms + like Neo4j or TigerGraph. + This includes individual ontologies as + well as ontology families, + where multiple related ontologies might + be documented collectively, such as the + Financial Industry Business Ontology (FIBO). + +2. **Graph Database Endpoint or EKG Documentation** + + GraphArch can connect to graph database + endpoints (SPARQL, Cypher, GSQL, etc.) + and generate documentation through discovery. + It analyzes the data structures, provides + profiling, and creates reports based on the + detected entities. + This could include a breakdown of the classes, distribution of instances, + identification of PII, and a summary of data models used, generating outputs like + dashboards or reports. + +## Key Features + +- **Ontology Precision**: + GraphArch emphasizes governance and precision in documenting ontologies, + schemas, and their relationships. It aims to deliver librarian-level + accuracy of definitions, domain ownership, and control for enterprise data management. +- **Business-Oriented Profiling**: + GraphArch offers capabilities for graph database endpoint analysis that + can be particularly useful for business users to understand and describe + their data in their own terms. +- **Flexible Output Formats**: + Documentation can be generated as user-friendly websites or professionally + formatted PDF reports. +- **Onboard Graph Engine**: + The tool includes [OxiGraph](https://github.com/oxigraph/oxigraph), + a graph database that can be used to generate data for further documentation, + integrating both input options seamlessly. + + [!NOTE] + It may be necessary, as a feature, to support other graph databases + as well for this intermediate processing. + [RDFox](https://www.oxfordsemantic.tech) would be particularly useful + since it can be baked into the Rust program that we intend to build and + has ultimate speed, OWL reasoning and rule capabilities (SHACL, Datalog). + +## Getting Started + +This repository will serve as the foundation for GraphArch's ongoing development. +Contributions are welcome to help grow this initiative! + +### Prerequisites + +- **Rust**: + Ensure you have Rust (Cargo) installed. +- **Graph Database**: + If you wish to connect GraphArch to an existing + graph database, ensure you have the relevant endpoint and credentials available. + +### Installation + +To get started with GraphArch, clone this repository and install the necessary dependencies: + +```bash +# Clone the repository +git clone https://github.com/ekgf/grapharch.git + +# Navigate into the directory +cd grapharch + +# Install Rust and Cargo +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +``` + +### Run + +```bash +cargo run +``` + +### Usage + +GraphArch can be used in two primary modes: + +1. **Ontology Documentation Mode**: + To generate documentation for ontologies or schemas, use the following command: + + ```bash + cargo run grapharch --mode ontology --input + ``` + +2. **Graph Endpoint Discovery Mode**: + To connect to a graph database endpoint and generate reports: + + ```bash + cargo run grapharch --mode graph --url + ``` + +## Contributing + +We welcome contributions from the community! +Please refer to our [Contributing Guidelines](CONTRIBUTING.md) +for details on how to get involved. + +## License + +GraphArch is released under the [MIT License](LICENSE). + +## Contact + +For more information about GraphArch or the Enterprise Knowledge Graph Forum, +please contact us at [info@ekgf.org](mailto:info@ekgf.org). + +--- + +Join us in building a comprehensive tool for documenting and exploring the world of enterprise graphs! diff --git a/cog.toml b/cog.toml new file mode 100644 index 0000000..1ccbd2e --- /dev/null +++ b/cog.toml @@ -0,0 +1,38 @@ +from_latest_tag = false +ignore_merge_commits = true +generate_mono_repository_global_tag = true +branch_whitelist = ["main", "release/**"] +tag_prefix = "" + +pre_bump_hooks = [ + "cargo build --release", + "cargo set-version --workspace {{version}}", +] +post_bump_hooks = [ + "git push --no-verify", + "git push origin {{version}} --no-verify", +] + +[git_hooks] + +[git_hooks.commit-msg] +script = """#!/bin/sh +set -e +cog verify --file $1 +cog check +cargo +nightly fmt -v --all --check +cargo +nightly clippy +""" + + +[commit_types] +chore = { changelog_title = "Misc", omit_from_changelog = true } +release = { changelog_title = "Releases" } + +[changelog] +path = "CHANGELOG.md" +template = "remote" +remote = "github.com" +owner = "EKGF" +repository = "grapharch" +authors = [{ signature = "Jacobus Geluk", username = "jgeluk" }] diff --git a/docs/contribute/git-setup.md b/docs/contribute/git-setup.md new file mode 100644 index 0000000..7d7dd41 --- /dev/null +++ b/docs/contribute/git-setup.md @@ -0,0 +1,20 @@ +# Git setup + +## Set the right email address before committing + +```shell +git config user.email "youremail@yourdomain.com" +git config user.name "Your Name" +``` + +## Try to avoid merge commits + +```shell +git config --local branch.autosetuprebase always +git config --local merge.ff only +``` + +## References + +- [How to prevent merge conflicts](https://dev.to/github/how-to-prevent-merge-conflicts-or-at-least-have-less-of-them-109p) +- [Git rebase for preventing merge commits](https://jenchan.biz/blog/git-rebase-for-preventing-merge-commits) diff --git a/docs/contribute/macos.md b/docs/contribute/macos.md new file mode 100644 index 0000000..777ad4f --- /dev/null +++ b/docs/contribute/macos.md @@ -0,0 +1,52 @@ +# MacOS notes + +## install rust with brew + +On the Rust website, they recommend to [install rust](https://www.rust-lang.org/tools/install) +using this command: + +```shell +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +``` + +As you can see, it first installs `rustup` which is kind of a special install script +for all the various tools around rust, including rust itself. +They call that a "tool chain". +With `rustup` you can install or update or change your rust toolchain. + +Rustup itself does not really change much, so you can just as well install it +via homebrew instead and leave it to a regular 'brew upgrade' to upgrade rustup +now and then as well. + +```shell +brew install rustup +``` + +Then go the git repo root directory and type + +```shell +rustup toolchain install nightly +rustup override set nightly +``` + +## yamlfmt + +Install yamlfmt — to format `.yml` or `.yaml` files — +by using `brew`: + +```shell +brew install yamlfmt +``` + +## cocogitto + +We recommend using cocogitto for anything to do with "conventional commits". + +Install it as follows: + +```shell +cargo install cocogitto +cog install-hook --all +``` + +Check the [Cocogitto User Guide](https://docs.cocogitto.io/guide/init.html) diff --git a/docs/feature/README.md b/docs/feature/README.md new file mode 100644 index 0000000..3eacd0f --- /dev/null +++ b/docs/feature/README.md @@ -0,0 +1,15 @@ +# GraphArch Features + +TODO: Just a few examples of features, there's so much more + +## Sources + +- [git repo](./source-git-repo.md) +- [sparql endpoint](./source-sparql-endpoint.md) + +## Targets + +### HTML + +- [markdown for mkdocs](./target-markdown-for-mkdocs) +- [markdown for sphinx](./target-markdown-for-sphinx) diff --git a/docs/feature/source-git-repo.md b/docs/feature/source-git-repo.md new file mode 100644 index 0000000..4a1aaaa --- /dev/null +++ b/docs/feature/source-git-repo.md @@ -0,0 +1,34 @@ +# Source — git repository + +[!NOTE] Status +Idea + +Allow users to specify a particular git repository to be their source +of input for any information about ontologies, schema's and the like. + +We could create a shallow bare clone of that repo, for caching purposes +and to provide fast access for various types of scans. + +## RDF, OWL, SHACL, SKOS + +Find any RDF files and try to document them. + +## Non-RDF + +Sky is the limit here. We could search for existing +documentation in the form of markdown, ascidoc, word etc. + +We could scan for Excel spreadsheets and document them. + +Erwin diagrams, UML, Mermaid, SQL schema, and so forth. + +## License + +Find the license and ownership info. + +## Git log + +Derive documentation from the git-log. +Contributors, change frequency, percentage contributed, +and so forth. + diff --git a/docs/feature/source-sparql-endpoint.md b/docs/feature/source-sparql-endpoint.md new file mode 100644 index 0000000..f83e693 --- /dev/null +++ b/docs/feature/source-sparql-endpoint.md @@ -0,0 +1,14 @@ +# Source — SPARQL endpoint + +[!NOTE] Status +Idea + +Scan a given database, pointed to with a given SPARQL endpoint URL, +for things to document such as: + +- OWL Ontologies +- SKOS Taxonomies +- SHACL Shapes + +Or, find "annotations" that people left behind in the graph, +that instruct grapharch how to generate documentation. diff --git a/docs/feature/target-markdown-for-mkdocs.md b/docs/feature/target-markdown-for-mkdocs.md new file mode 100644 index 0000000..e0fe88a --- /dev/null +++ b/docs/feature/target-markdown-for-mkdocs.md @@ -0,0 +1,6 @@ +# Target — Markdown for MkDocs + +[!NOTE] Status +Idea + +Generate MkDocs compliant markdown documentation. diff --git a/docs/feature/target-markdown-for-sphinx.md b/docs/feature/target-markdown-for-sphinx.md new file mode 100644 index 0000000..59f17dd --- /dev/null +++ b/docs/feature/target-markdown-for-sphinx.md @@ -0,0 +1,6 @@ +# Target — Markdown for Sphinx + +[!NOTE] Status +Idea + +Generate [Sphinx compliant markdown documentation](https://www.sphinx-doc.org/en/master/usage/markdown.html#markdown). diff --git a/grapharch.code-workspace b/grapharch.code-workspace new file mode 100644 index 0000000..4065757 --- /dev/null +++ b/grapharch.code-workspace @@ -0,0 +1,17 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "git.enableCommitSigning": true, + "git-graph.repository.commits.fetchAvatars": true, + "git-graph.repository.commits.showSignatureStatus": true, + "git-graph.repository.sign.commits": true, + "git.autofetch": "all", + "git.pullBeforeCheckout": true, + "evenBetterToml.taplo.bundled": true, + "editor.formatOnSave": true + } +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..3cfadd9 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,6 @@ +[toolchain] +# At this phase of the project we may just as well use nightly +channel = "nightly" +components = [ "rustfmt", "clippy" ] +# We may want run grapharch in the browser +targets = [ "wasm32-unknown-unknown" ] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +}