Skip to content

Commit ae16f88

Browse files
author
github-actions
committed
Generate latest docs on CI, from commit ccac903.
0 parents  commit ae16f88

File tree

727 files changed

+268295
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

727 files changed

+268295
-0
lines changed

.devcontainer/devcontainer.json

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/*
2+
* Copyright 2022-2023 Alibaba Group Holding Limited.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
18+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
19+
// README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node
20+
{
21+
"name": "GraphAr",
22+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
23+
"image": "registry.cn-hongkong.aliyuncs.com/graphscope/graphar-dev:latest",
24+
// "image": "ubuntu:22.04",
25+
26+
// Features to add to the dev container. More info: https://containers.dev/features.
27+
"features": {
28+
"ghcr.io/devcontainers/features/common-utils:2":{
29+
"installZsh": "true",
30+
"configureZshAsDefaultShell": "true",
31+
"installOhMyZsh": true,
32+
"upgradePackages": "false"
33+
}
34+
},
35+
// Configure tool-specific properties.
36+
"customizations": {
37+
// Configure properties specific to VS Code.
38+
"vscode": {
39+
"settings": {},
40+
"extensions": [
41+
"streetsidesoftware.code-spell-checker",
42+
"eamodio.gitlens",
43+
"github.copilot",
44+
"github.copilot-labs"
45+
]
46+
}
47+
},
48+
49+
// Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
50+
"remoteUser": "graphar",
51+
52+
// Use 'postCreateCommand' to run commands before the container is created.
53+
"initializeCommand": "sudo docker pull registry.cn-hongkong.aliyuncs.com/graphscope/graphar-dev:latest",
54+
55+
// Uncomment this to enable C++ and Rust debugging in containers
56+
// "capAdd": ["SYS_PTRACE"],
57+
// "securityOpt": ["seccomp=unconfined"],
58+
59+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
60+
// "forwardPorts": [3000],
61+
62+
// Use 'portsAttributes' to set default properties for specific forwarded ports.
63+
// More info: https://containers.dev/implementors/json_reference/#port-attributes
64+
// "portsAttributes": {
65+
// "9000": {
66+
// "label": "Hello Remote World",
67+
// "onAutoForward": "notify"
68+
// }
69+
// },
70+
71+
// Use 'postCreateCommand' to run commands after the container is created.
72+
// "postCreateCommand": "yarn install"
73+
74+
// Improve performance
75+
76+
// Uncomment these to mount a folder to a volume
77+
// https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-targeted-named-volume
78+
// "mounts": [
79+
// "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume"
80+
// ],
81+
82+
83+
// Uncomment these to use a named volume for your entire source tree
84+
// https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-named-volume-for-your-entire-source-tree
85+
// "workspaceMount": "source=gs,target=/workspaces,type=volume",
86+
// "workspaceFolder": "/workspaces"
87+
"postCreateCommand": "sudo chown -R graphar /workspaces && bash pre-commit/install-hook.sh && bash pre-commit/prepare-commit-msg"
88+
}

.gitleaks.toml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
title = "Gitleaks for Vineyard"
2+
3+
[extend]
4+
useDefault = true
5+
6+
[[rules]]
7+
description = "Alibaba AccessKey ID"
8+
id = "alibaba-access-key-id"
9+
regex = '''(?i)((LTAI)[a-z0-9]+)'''
10+
keywords = [
11+
"ltai",
12+
]
13+
14+
[[rules]]
15+
description = "Alibaba AccessKey ID"
16+
id = "alibaba-access-id-in-config"
17+
regex = '''(?i)((access).?id\s*=\s*.+)'''
18+
keywords = [
19+
"access",
20+
]
21+
22+
[[rules]]
23+
description = "Alibaba AccessKey ID"
24+
id = "alibaba-access-key-in-config"
25+
regex = '''(?i)((access).?key\s*=\s*.+)'''
26+
keywords = [
27+
"access",
28+
]
29+
30+
[[rules]]
31+
description = "Alibaba AccessKey ID"
32+
id = "alibaba-access-secret-in-config"
33+
regex = '''(?i)((access).?secret\s*=\s*.+)'''
34+
keywords = [
35+
"access",
36+
"secret",
37+
]
38+
39+
[[rules]]
40+
description = "Alibaba AccessKey ID"
41+
id = "alibaba-access-key-id-in-config"
42+
regex = '''(?i)((access).?key.?id\s*=\s*.+)'''
43+
keywords = [
44+
"access",
45+
]
46+
47+
[rules.allowlist]
48+
paths = [
49+
'''modules/io/python/drivers/io/tests/test_open.py''',
50+
'''modules/io/python/drivers/io/tests/test_serialize.py''',
51+
]
52+
53+
[[rules]]
54+
description = "Alibaba AccessKey ID"
55+
id = "alibaba-access-key-secret-in-config"
56+
regex = '''(?i)((access).?key.?secret\s*=\s*.+)'''
57+
keywords = [
58+
"access",
59+
"secret",
60+
]
61+
62+
[rules.allowlist]
63+
paths = [
64+
'''modules/io/python/drivers/io/tests/test_open.py''',
65+
'''modules/io/python/drivers/io/tests/test_serialize.py''',
66+
]
67+
68+
[[rules]]
69+
description = "Alibaba AccessKey ID"
70+
id = "alibaba-secret-access-key-in-config"
71+
regex = '''(?i)((secret).?access.?key\s*=\s*.+)'''
72+
keywords = [
73+
"access",
74+
"secret",
75+
]
76+
77+
[allowlist]
78+
paths = [
79+
'''build''',
80+
'''docs/_build''',
81+
'''docs/_templates/footer.html''',
82+
'''thirdparty''',
83+
]

.licenserc.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
header:
2+
license:
3+
spdx-id: Apache-2.0
4+
copyright-owner: Apache Software Foundation
5+
content: |
6+
Copyright 2022-2023 Alibaba Group Holding Limited.
7+
8+
Licensed under the Apache License, Version 2.0 (the "License");
9+
you may not use this file except in compliance with the License.
10+
You may obtain a copy of the License at
11+
12+
http://www.apache.org/licenses/LICENSE-2.0
13+
14+
Unless required by applicable law or agreed to in writing, software
15+
distributed under the License is distributed on an "AS IS" BASIS,
16+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
See the License for the specific language governing permissions and
18+
limitations under the License.
19+
20+
paths-ignore:
21+
- 'dist'
22+
- 'licenses'
23+
- '**/*.md'
24+
- 'LICENSE'
25+
- 'NOTICE'
26+
- 'testing'
27+
- 'spark/src/test/resources'
28+
- 'java/src/test/resources'
29+
- '.licenserc.yaml'
30+
- '.gitignore'
31+
- '.gitleaks.toml'
32+
- '.gitmodules'
33+
- 'pre-commit-config.yaml'
34+
- 'docs'
35+
- '**/.gitignore'
36+
- 'spark/.scalafix.conf'
37+
- 'spark/.scalafmt.conf'
38+
- 'cpp/apidoc'
39+
- 'spark/src/main/scala/com/alibaba/graphar/datasources'
40+
- '*.md'
41+
- '*.rst'
42+
43+
comment: on-failure
44+
45+
# If you don't want to check dependencies' license compatibility, remove the following part
46+
dependency:
47+
files:
48+
- spark/pom.xml # If this is a maven project.
49+
- java/pom.xml # If this is a maven project.

.nojekyll

Whitespace-only changes.

.pre-commit-config.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright 2022-2023 Alibaba Group Holding Limited.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
repos:
16+
- repo: https://github.com/zricethezav/gitleaks
17+
rev: v8.15.0
18+
hooks:
19+
- id: gitleaks
20+
args:
21+
- '--verbose'
22+

_images/edge_logical_table.png

141 KB
Loading

_images/edge_physical_table1.png

233 KB
Loading

_images/edge_physical_table2.png

236 KB
Loading

_images/overview.png

328 KB
Loading

_images/property_graph.png

254 KB
Loading

_images/vertex_logical_table.png

64.3 KB
Loading

_images/vertex_physical_table.png

470 KB
Loading

_panels_static/panels-bootstrap.5fd3999ee7762ccc51105388f4a9d115.css

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
:root {
2+
--tabs-color-label-active: hsla(231, 99%, 66%, 1);
3+
--tabs-color-label-inactive: rgba(178, 206, 245, 0.62);
4+
--tabs-color-overline: rgb(207, 236, 238);
5+
--tabs-color-underline: rgb(207, 236, 238);
6+
--tabs-size-label: 1rem;
7+
}

_sources/cpp/examples/bgl.rst.txt

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
Co-Work with BGL
2+
============================
3+
4+
The `Boost Graph Library (BGL) <https://cs.brown.edu/~jwicks/boost/libs/graph/doc/>`_ is the first C++ library to apply the principles of generic programming to the construction of the advanced data structures and algorithms used in graph computations. The BGL graph interface and graph components are generic in the same sense as the Standard Template Library (STL). And it provides some built-in algorithms which cover a core set of algorithm patterns and a larger set of graph algorithms.
5+
6+
We take calculating CC as an example, to demonstrate how BGL works with GraphAr. A weakly connected component is a maximal subgraph of a graph such that for every pair of vertices in it, there is an undirected path connecting them. And the CC algorithm is to identify all such components in a graph. Learn more about `the CC algorithm <https://en.wikipedia.org/wiki/Connected_component>`_.
7+
8+
The source code of CC based on BGL can be found at `bgl_example.cc`_. In this program, the graph information file is first read to get the metadata:
9+
10+
.. code:: C++
11+
12+
std::string path = ... // the path of the graph information file
13+
auto graph_info = GraphArchive::GraphInfo::Load(path).value();
14+
15+
And then, the vertex collection and the edge collection are established as the handles to access the graph data:
16+
17+
.. code:: C++
18+
19+
auto maybe_vertices = GraphArchive::VerticesCollection::Make(graph_info, "person");
20+
auto vertices = maybe_vertices.value();
21+
auto maybe_edges = GraphArchive::EdgesCollection::Make(graph_info, "person", "knows", "person", GraphArchive::AdjListType::ordered_by_source);
22+
auto edges = maybe_edges.value();
23+
24+
Next, we construct the in-memory graph data structure for BGL by traversing the vertices and edges via GraphAr's high-level reading interface (the vertex iterator and the edge iterator):
25+
26+
.. code:: C++
27+
28+
// define the Graph type in BGL
29+
typedef boost::adjacency_list<boost::vecS, // use vector to store edges
30+
boost::vecS, // use vector to store vertices
31+
boost::undirectedS, // undirected
32+
boost::property<boost::vertex_name_t, int64_t>, // vertex property
33+
boost::no_property> Graph; // no edge property
34+
// descriptors for vertex in BGL
35+
typedef typename boost::graph_traits<Graph>::vertex_descriptor Vertex;
36+
37+
// declare a graph object with (num_vertices) vertices and an edge iterator
38+
std::vector<std::pair<GraphArchive::IdType, GraphArchive::IdType>> edges_array;
39+
auto it_begin = edges->begin(), it_end = edges->end();
40+
for (auto it = it_begin; it != it_end; ++it)
41+
edges_array.push_back(std::make_pair(it.source(), it.destination()));
42+
Graph g(edges_array.begin(), edges_array.end(), num_vertices);
43+
44+
// define the internal vertex property "id"
45+
boost::property_map<Graph, boost::vertex_name_t>::type id = get(boost::vertex_name_t(), g);
46+
auto v_it_begin = vertices->begin(), v_it_end = vertices->end();
47+
for (auto it = v_it_begin; it != v_it_end; ++it) {
48+
auto vertex = *it;
49+
boost::put(id, vertex.id(), vertex.property<int64_t>("id").value());
50+
}
51+
52+
After that, an internal CC algorithm provided by BGL is called:
53+
54+
.. code:: C++
55+
56+
// define the external vertex property "component"
57+
std::vector<int> component(num_vertices);
58+
// call algorithm: cc
59+
int cc_num = boost::connected_components(g, &component[0]);
60+
std::cout << "Total number of components: " << cc_num << std::endl;
61+
62+
Finally, we could use a **VerticesBuilder** of GraphAr to write the results to new generated GAR files:
63+
64+
.. code:: C++
65+
66+
// construct a new property group
67+
GraphArchive::Property cc = {"cc", GraphArchive::int32(), false};
68+
std::vector<GraphArchive::Property> property_vector = {cc};
69+
auto group = GraphArchive::CreatePropertyGroup(property_vector, GraphArchive::FileType::PARQUET);
70+
71+
// construct the new vertex info
72+
std::string vertex_label = "cc_result", vertex_prefix = "result/";
73+
int chunk_size = 100;
74+
auto new_info = GraphArchive::CreateVertexInfo(vertex_label, chunk_size, {group}, vertex_prefix);
75+
76+
// access the vertices via the index map and vertex iterator of BGL
77+
typedef boost::property_map<Graph, boost::vertex_index_t>::type IndexMap;
78+
IndexMap index = boost::get(boost::vertex_index, g);
79+
typedef boost::graph_traits<Graph>::vertex_iterator vertex_iter;
80+
std::pair<vertex_iter, vertex_iter> vp;
81+
82+
// dump the results through the VerticesBuilder
83+
GraphArchive::builder::VerticesBuilder builder(new_info, "/tmp/");
84+
for (vp = boost::vertices(g); vp.first!= vp.second; ++vp.first) {
85+
Vertex v = *vp.first;
86+
GraphArchive::builder::Vertex vertex(index[v]);
87+
vertex.AddProperty(cc.name, component[index[v]]);
88+
builder.AddVertex(vertex);
89+
}
90+
builder.Dump();
91+
92+
93+
.. _bgl_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/bgl_example.cc
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
Integrate into GraphScope
2+
============================
3+
4+
`GraphScope <https://graphscope.io/>`_ is a unified distributed graph computing platform that provides a one-stop environment for performing diverse graph operations on a cluster through a user-friendly Python interface. As an important application case of GraphAr, we have integrated it into GraphScope.
5+
6+
GraphScope works on a graph G fragmented via a partition strategy picked by the user and each worker maintains a fragment of G. Given a query, it posts the same query to all the workers and computes following the BSP (Bulk Synchronous Parallel) model. More specifically, each worker first executes processing against its local fragment, to compute partial answers in parallel. And then each worker may exchange partial results with other processors via synchronous message passing.
7+
8+
To integrate GraphAr into GraphScope, we implemented *ArrowFragmentBuilder* and *ArrowFragmentWriter*. *ArrowFragmentBuilder* establishes the fragments for workers of GraphScope through reading GAR files in parallel. Conversely, *ArrowFragmentWriter* can take the GraphScope fragments and save them as GAR files. If you're interested in knowing more about the implementation, please refer to the `source code <https://github.com/v6d-io/v6d/commit/0eda2067e45fbb4ac46892398af0edc84fe1c27b>`_.
9+
10+
11+
Performance Report
12+
------------------------
13+
14+
Parameter settings
15+
``````````````````
16+
The time performance of *ArrowFragmentBuilder* and *ArrowFragmentWriter* in GraphScope is heavily dependent on the partitioning of the graph into GAR files, that is, the *vertex chunk size* and *edge chunk size*, which are specified in the vertex information file and in the edge information file, respectively. See `GraphAr File Format <../user-guide/file-format.html>`_ to understand the chunk size definitions in GAR.
17+
18+
Generally speaking, fewer chunks are created if the file size is large. On small graphs, this can be disadvantageous as it reduces the degree of parallelism, prolonging disk I/O time. On the other hand, having too many small files increases the overhead associated with the file system and the file parser.
19+
20+
We have conducted micro benchmarks to compare the time performance for reading/writing GAR files by *ArrowFragmentBuilder*/*ArrowFragmentWriter*, across different *vertex chunk size* and *edge chunk size* configurations. The settings we recommend for *vertex chunk size* and *edge chunk size* are **2^18** and **2^22**, respectively, which lead to efficient performance in most cases. These settings can be used as the reference values when integrating GraphAr into other systems besides GraphScope.
21+
22+
Time performance results
23+
````````````````````````
24+
Here we report the performance results of *ArrowFragmentBuilder*, and compare it with loading the same graph through the default loading strategy of GraphScope (through reading the csv files in parallel) . The execution time reported below includes loading the graph data from the disk into memory, as well as building GraphScope fragments from such data. The experiments are conducted on a cluster of 4 AliCloud ecs.r6.6xlarge instances (24vCPU, 192GB memory), and using `com-friendster <https://snap.stanford.edu/data/com-Friendster.html>`_ (a simple graph) and `ldbc-snb-30 <https://ldbcouncil.org/benchmarks/snb/>`_ (a multi-labeled property graph) as datasets.
25+
26+
+----------------+---------+-----------------+-----------------+
27+
| Dataset | Workers | Default Loading | GraphAr Loading |
28+
+================+=========+=================+=================+
29+
| com-friendster | 4 | 282s | 54s |
30+
+----------------+---------+-----------------+-----------------+
31+
| ldbc-snb-30 | 4 | 196s | 40s |
32+
+----------------+---------+-----------------+-----------------+

_sources/cpp/examples/index.rst.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Examples
2+
---------
3+
4+
.. toctree::
5+
:maxdepth: 2
6+
7+
bgl
8+
graphscope
9+
out-of-core

0 commit comments

Comments
 (0)