From a1e001c26acd8ba2e27a26a0be899a2f2f9d70d1 Mon Sep 17 00:00:00 2001 From: Jiaqi Liu Date: Wed, 6 Nov 2024 20:36:08 +0800 Subject: [PATCH] Support 2 types of expand - apoc & plain DFS (#7) --- .gitignore | 3 + .lycheeignore | 1 + README.md | 50 ++++-- jetty-start.sh | 30 ++++ src/main/java/org/qubitpi/wilhelm/Graph.java | 155 ++++++++++++++++ .../java/org/qubitpi/wilhelm/Language.java | 52 +++++- src/main/java/org/qubitpi/wilhelm/Link.java | 165 ++++++++++++++++++ src/main/java/org/qubitpi/wilhelm/Node.java | 150 ++++++++++++++++ .../{DataServlet.java => Neo4JServlet.java} | 150 ++++++++++------ src/main/resources/logback.xml | 7 +- .../org/qubitpi/wilhelm/GraphSpec.groovy | 77 ++++++++ .../org/qubitpi/wilhelm/LinkSpec.groovy | 78 +++++++++ .../org/qubitpi/wilhelm/NodeSpec.groovy | 74 ++++++++ ...TSpec.groovy => Neo4JServletITSpec.groovy} | 23 +-- ...letSpec.groovy => Neo4JServletSpec.groovy} | 6 +- 15 files changed, 924 insertions(+), 97 deletions(-) create mode 100755 jetty-start.sh create mode 100644 src/main/java/org/qubitpi/wilhelm/Graph.java create mode 100644 src/main/java/org/qubitpi/wilhelm/Link.java create mode 100644 src/main/java/org/qubitpi/wilhelm/Node.java rename src/main/java/org/qubitpi/wilhelm/web/endpoints/{DataServlet.java => Neo4JServlet.java} (70%) create mode 100644 src/test/groovy/org/qubitpi/wilhelm/GraphSpec.groovy create mode 100644 src/test/groovy/org/qubitpi/wilhelm/LinkSpec.groovy create mode 100644 src/test/groovy/org/qubitpi/wilhelm/NodeSpec.groovy rename src/test/groovy/org/qubitpi/wilhelm/web/endpoints/{DataServletITSpec.groovy => Neo4JServletITSpec.groovy} (89%) rename src/test/groovy/org/qubitpi/wilhelm/web/endpoints/{DataServletSpec.groovy => Neo4JServletSpec.groovy} (95%) diff --git a/.gitignore b/.gitignore index a4498b0..943bdbd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,7 @@ target/ logback/ .DS_Store +jetty-base/ +jetty-home-11.0.15/ application.properties +jetty-home-11.0.15.tar.gz diff --git a/.lycheeignore b/.lycheeignore index 39c854f..86a6cfa 100644 --- a/.lycheeignore +++ b/.lycheeignore @@ -2,3 +2,4 @@ file:///* https://querydsl.com/* https://uel.java.net/ https://www.openapis.org/ +https://api.paion-data.dev/* diff --git a/README.md b/README.md index 228c938..1ac38d5 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,29 @@ Aristotle ========= ![Java Version Badge][Java Version Badge] -![HashiCorp Packer Badge][HashiCorp Packer Badge] -![HashiCorp Terraform Badge][HashiCorp Terraform Badge] [![Apache License Badge]][Apache License, Version 2.0] Aristotle is a [JSR 370] [JAX-RS] webservice of CRUD operations against a graph database. It supports Neo4J now. -Configuration -------------- +Start Locally in Jetty +---------------------- -- `NEO4J_URI` -- `NEO4J_USERNAME` -- `NEO4J_PASSWORD` -- `NEO4J_DATABASE` +Navigate to a dedicated directory; make sure port 8080 is not occupied and the following environment variables are set: + +```console +export NEO4J_URI= +export NEO4J_USERNAME= +export NEO4J_PASSWORD= +export NEO4J_DATABASE= +``` + +Then start webservice with: + +```bash +./jetty-start.sh +``` + +Press `Ctr-C` to stop the webservice and delete generated directories if needed when done. Test ---- @@ -26,25 +36,31 @@ mvn clean verify Deployment ---------- -```bash -mvn clean package -``` +This is a one-person project. Agility outplays team scaling, so deployment is manual and pretty much follows +[jetty-start.sh](./jetty-start.sh) + +### Sending Logs to ELK Cloud + +Simply add Logstash integration and install agent on the production server. The logs will be available on integration +dashboard. ### Gateway Registration ```bash export GATEWAY_PUBLIC_IP=52.53.186.26 +# vocabulary paged & count curl -v -i -s -k -X POST https://api.paion-data.dev:8444/services \ --data name=wilhelm-ws-languages \ - --data url="http://${GATEWAY_PUBLIC_IP}:8080/v1/data/languages" + --data url="http://${GATEWAY_PUBLIC_IP}:8080/v1/neo4j/languages" curl -i -k -X POST https://api.paion-data.dev:8444/services/wilhelm-ws-languages/routes \ --data "paths[]=/wilhelm/languages" \ --data name=wilhelm-ws-languages +# expand curl -v -i -s -k -X POST https://api.paion-data.dev:8444/services \ --data name=wilhelm-ws-expand \ - --data url="http://${GATEWAY_PUBLIC_IP}:8080/v1/data/expand" + --data url="http://${GATEWAY_PUBLIC_IP}:8080/v1/neo4j/expand" curl -i -k -X POST https://api.paion-data.dev:8444/services/wilhelm-ws-expand/routes \ --data "paths[]=/wilhelm/expand" \ --data name=wilhelm-ws-expand @@ -54,8 +70,9 @@ We should see `HTTP/1.1 201 Created` as signs of success. #### Example requests: -- https://api.paion-data.dev/wilhelm/languages/german?perPage=100&page=1 -- https://api.paion-data.dev/wilhelm/expand/nämlich +- vocabulary count: https://api.paion-data.dev/wilhelm/languages/german?perPage=100&page=1 +- query vocabulary paged: https://api.paion-data.dev/wilhelm/languages/german/count +- expand: https://api.paion-data.dev/wilhelm/expand/nämlich License ------- @@ -65,9 +82,6 @@ The use and distribution terms for [Aristotle]() are covered by the [Apache Lice [Apache License Badge]: https://img.shields.io/badge/Apache%202.0-F25910.svg?style=for-the-badge&logo=Apache&logoColor=white [Apache License, Version 2.0]: https://www.apache.org/licenses/LICENSE-2.0 -[HashiCorp Packer Badge]: https://img.shields.io/badge/Packer-02A8EF?style=for-the-badge&logo=Packer&logoColor=white -[HashiCorp Terraform Badge]: https://img.shields.io/badge/Terraform-7B42BC?style=for-the-badge&logo=terraform&logoColor=white - [Java Version Badge]: https://img.shields.io/badge/Java-17-brightgreen?style=for-the-badge&logo=OpenJDK&logoColor=white [JAX-RS]: https://jcp.org/en/jsr/detail?id=370 [JSR 370]: https://jcp.org/en/jsr/detail?id=370 diff --git a/jetty-start.sh b/jetty-start.sh new file mode 100755 index 0000000..3aca33f --- /dev/null +++ b/jetty-start.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -x +set -e + +# Copyright Jiaqi Liu +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mvn clean package -Dcheckstyle.skip -DskipTests + +wget -O jetty-home-11.0.15.tar.gz https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-home/11.0.15/jetty-home-11.0.15.tar.gz +tar -xzvf jetty-home-11.0.15.tar.gz +export JETTY_HOME=$(pwd)/jetty-home-11.0.15 + +mkdir -p jetty-base +cd jetty-base +java -jar $JETTY_HOME/start.jar --add-module=annotations,server,http,deploy,servlet,webapp,resources,jsp + +mv ../target/wilhelm-ws-1.0-SNAPSHOT.war webapps/ROOT.war +java -jar $JETTY_HOME/start.jar diff --git a/src/main/java/org/qubitpi/wilhelm/Graph.java b/src/main/java/org/qubitpi/wilhelm/Graph.java new file mode 100644 index 0000000..230c08a --- /dev/null +++ b/src/main/java/org/qubitpi/wilhelm/Graph.java @@ -0,0 +1,155 @@ +/* + * Copyright Jiaqi Liu + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.qubitpi.wilhelm; + +import com.fasterxml.jackson.annotation.JsonIncludeProperties; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import jakarta.validation.constraints.NotNull; +import net.jcip.annotations.Immutable; +import net.jcip.annotations.ThreadSafe; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * A JSON-serializable object representation of a knowledge graph in wilhelm-ws. + */ +@Immutable +@ThreadSafe +@SuppressWarnings("ClassCanBeRecord") +@JsonIncludeProperties({ "nodes", "links" }) +public class Graph { + + private static final Logger LOG = LoggerFactory.getLogger(Graph.class); + private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); + + private final Set nodes; + private final Set links; + + /** + * All-args constructor. + * + * @param nodes The set of all nodes contained in this Graph, cannot be {@code null} + * @param links The set of all links contained in this Graph, cannot be {@code null} + */ + public Graph(@NotNull final Set nodes, @NotNull final Set links) { + this.nodes = new HashSet<>(Objects.requireNonNull(nodes)); + this.links = new HashSet<>(Objects.requireNonNull(links)); + } + + /** + * Creates a new {@link Graph} instance with no initial nodes or links in it. + * + * @return a new instance + */ + public static Graph emptyGraph() { + return new Graph(new HashSet<>(), new HashSet<>()); + } + + /** + * Returns whether or not this {@link Graph} has neither nodes noe links. + * + * @return {@code true} if no nodes or links exist in this {@link Graph}, or {@code false} otherwise. + */ + public boolean isEmpty() { + return getNodes().isEmpty() && getLinks().isEmpty(); + } + + /** + * Returns all weakly connected neighbors of a specified node. + *

+ * If the node has no such neighrbors, this method returns an empty list + * + * @param node a node from this {@link Graph} + * + * @return all nodes each of which has a link between it and the provided node. + */ + @NotNull + public Set getUndirectedNeighborsOf(final Node node) { + final Set neighborIds = getLinks().stream() + .filter(link -> + node.getId().equals(link.getSourceNodeId()) || node.getId().equals(link.getTargetNodeId()) + ) + .flatMap(link -> Stream.of(link.getSourceNodeId(), link.getTargetNodeId())) + .filter(id -> !node.getId().equals(id)) + .collect(Collectors.toUnmodifiableSet()); + + return getNodes().stream() + .filter(it -> neighborIds.contains(it.getId())) + .collect(Collectors.toUnmodifiableSet()); + } + + /** + * Combines the nodes and links from this {@link Graph} instance and the other one and returns a new {@link Graph}. + * + * @param that the other {@link Graph} instance to be merged with this {@link Graph} + * + * @return a new instance + */ + public Graph merge(@NotNull final Graph that) { + return new Graph( + Stream.of(this.getNodes(), that.getNodes()).flatMap(Set::stream).collect(Collectors.toSet()), + Stream.of(this.getLinks(), that.getLinks()).flatMap(Set::stream).collect(Collectors.toSet()) + ); + } + + /** + * Returns an unmodifiable view of all the nodes in this Graph instance. + * + * @return an immutable list of nodes + */ + @NotNull + public Set getNodes() { + return Collections.unmodifiableSet(nodes); + } + + /** + * Returns an unmodifiable view of all the links in this Graph instance. + * + * @return an immutable list of links + */ + @NotNull + public Set getLinks() { + return Collections.unmodifiableSet(links); + } + + /** + * Returns a JSON serialization of this Graph instance. It contains 2 fields: nodes and links, each of which is a + * list of nodes and links respectively. Each list element is itself a JSON object whose structure are defined by + * Jackson's serialization on {@link Node} and {@link Link}. + * + * @return a JSON string + */ + @NotNull + @Override + public String toString() { + try { + return JSON_MAPPER.writeValueAsString(this); + } catch (final JsonProcessingException exception) { + LOG.error(exception.getMessage()); + throw new IllegalStateException(exception); + } + } +} diff --git a/src/main/java/org/qubitpi/wilhelm/Language.java b/src/main/java/org/qubitpi/wilhelm/Language.java index ce5ebee..df31869 100644 --- a/src/main/java/org/qubitpi/wilhelm/Language.java +++ b/src/main/java/org/qubitpi/wilhelm/Language.java @@ -20,6 +20,7 @@ import net.jcip.annotations.ThreadSafe; import java.util.Arrays; +import java.util.function.Function; import java.util.stream.Collectors; /** @@ -59,7 +60,7 @@ public enum Language { } /** - * Constructs a {@link Language} from its client-side name. + * Constructs a {@link Language} from its {@link #getDatabaseName() database name}. * * @param language The client-side requested language name * @@ -67,17 +68,53 @@ public enum Language { * * @throws IllegalArgumentException if the language name is not a valid one */ + @NotNull + public static Language ofDatabaseName(@NotNull final String language) { + return valueOf(language, Language::getDatabaseName); + } + + /** + * Constructs a {@link Language} from its {@link #getPathName() client-side name}. + * + * @param language The client-side requested language name + * + * @return a new instance + * + * @throws IllegalArgumentException if the language name is not a valid one + */ + @NotNull public static Language ofClientValue(@NotNull final String language) throws IllegalArgumentException { + return valueOf(language, Language::getPathName); + } + + /** + * Converts a string value to a {@link Language} object. + *

+ * The string value must match one of the {@link #getDatabaseName() database name} or + * {@link #getPathName() client API name}. + * + * @param language A string whose value is equal to either {@link #getDatabaseName() database name} or + * {@link #getPathName() client API name} + * @param nameExtractor If {@code language} matches {@link #getDatabaseName()}, use {@link #getDatabaseName()}; + * otherwise use {@link #getPathName()} + * + * @return a new instance + * + * @throws IllegalArgumentException if the language name is not a valid one + */ + private static Language valueOf(@NotNull final String language, final Function nameExtractor) { return Arrays.stream(values()) - .filter(value -> value.pathName.equals(language)) + .filter(value -> nameExtractor.apply(value).equals(language)) .findFirst() .orElseThrow(() -> new IllegalArgumentException( String.format( "'%s' is not a recognized language. Acceptable ones are %s", language, - Arrays.stream(values()).map(Language::getPathName).collect(Collectors.joining(", ") - ) - ))); + Arrays.stream(values()) + .map(nameExtractor) + .collect(Collectors.joining(", ")) + ) + )); } @NotNull @@ -89,4 +126,9 @@ public String getPathName() { public String getDatabaseName() { return databaseName; } + + @Override + public String toString() { + return getDatabaseName(); + } } diff --git a/src/main/java/org/qubitpi/wilhelm/Link.java b/src/main/java/org/qubitpi/wilhelm/Link.java new file mode 100644 index 0000000..3f69838 --- /dev/null +++ b/src/main/java/org/qubitpi/wilhelm/Link.java @@ -0,0 +1,165 @@ +/* + * Copyright Jiaqi Liu + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.qubitpi.wilhelm; + +import com.fasterxml.jackson.annotation.JsonIncludeProperties; + +import org.neo4j.driver.types.Relationship; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import jakarta.validation.constraints.NotNull; +import net.jcip.annotations.Immutable; +import net.jcip.annotations.ThreadSafe; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * A JSON-serializable object representation of a directed link of knowledge graph in wilhelm-ws. + *

+ * A {@link Link} has 4 public attributes: + *

    + *
  1. A string used as the caption for rendering the link + *
  2. The {@link Node#getId() unique identifier of node} originating this link + *
  3. The {@link Node#getId() unique identifier of node} pointed to by this link + *
  4. A map containing all other information encapsulated + *
+ */ +@Immutable +@ThreadSafe +@JsonIncludeProperties({ "label", "sourceNodeId", "targetNodeId", "attributes" }) +public class Link { + + private static final Logger LOG = LoggerFactory.getLogger(Link.class); + + private final String label; + private final String sourceNodeId; + private final String targetNodeId; + private final Map attributes; + + /** + * All-args constructor. + * + * @param label The caption for the rendering of the link + * @param sourceNodeId The {@link Node#getId() ID} of the node originating this directed {@link Link} + * @param targetNodeId The {@link Node#getId() ID} of the node pointed to by this directed {@link Link} + * @param attributes The fields attached to this node other than label + * + * @throws NullPointerException if any argument is {@code null} + */ + private Link( + @NotNull final String label, + @NotNull final String sourceNodeId, + @NotNull final String targetNodeId, + @NotNull final Map attributes + ) { + this.label = Objects.requireNonNull(label); + this.sourceNodeId = Objects.requireNonNull(sourceNodeId); + this.targetNodeId = Objects.requireNonNull(targetNodeId); + this.attributes = new HashMap<>(Objects.requireNonNull(attributes)); // defensive copy + } + + /** + * Converts a Neo4J API relationship to a wilhelm-ws {@link Link}. + *

+ * The Neo4J relationship must contain a property called "name", otherwise an unchecked exception is thrown. The + * {@link Relationship#startNodeElementId()} would be the {@link Node#getId() source node ID}; the + * {@link Relationship#endNodeElementId()} would be the {@link Node#getId() target node ID} the "name" property + * would be the {@link #getLabel() label of this node.}; the rest of the properties would be the + * {@link #getAttributes() attributes} of this node + * + * @param relationship A Neo4J Java driver API relationship + * + * @return a new instance of converted {@link Link} + * + * @throws NullPointerException if {@code relationship} is {@code null} + * @throws IllegalStateException if {@code relationship} is missing a "name" property + */ + public static Link valueOf(final Relationship relationship) { + final String labelKey = "name"; + if (!Objects.requireNonNull(relationship).asMap().containsKey(labelKey)) { + LOG.error("Neo4J relationship does not contain '{}' attribute: {}", labelKey, relationship.asMap()); + throw new IllegalStateException( + "There seems to be a data format mismatch between Wilhelm webservice and Neo4J database. " + + "Please file an issue at https://github.com/QubitPi/wilhelm-ws/issues for a fix" + ); + } + + final String label = relationship.asMap().get(labelKey).toString(); + final Map attributes = relationship.asMap().entrySet().stream() + .filter(entry -> !labelKey.equals(entry.getKey())) + .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); + + return new Link(label, relationship.startNodeElementId(), relationship.endNodeElementId(), attributes); + } + + @NotNull + public String getLabel() { + return label; + } + + @NotNull + public String getSourceNodeId() { + return sourceNodeId; + } + + @NotNull + public String getTargetNodeId() { + return targetNodeId; + } + + /** + * Returns an immutable view of the attributes of this Link. + * + * @return an unmodifiable map + */ + @NotNull + public Map getAttributes() { + return Collections.unmodifiableMap(attributes); + } + + @Override + public boolean equals(final Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + final Link that = (Link) other; + return Objects.equals(getLabel(), that.getLabel()) && Objects.equals( + getSourceNodeId(), + that.getSourceNodeId() + ) && Objects.equals( + getTargetNodeId(), + that.getTargetNodeId() + ) && Objects.equals(getAttributes(), that.getAttributes()); + } + + @Override + public int hashCode() { + return Objects.hash(getLabel(), getSourceNodeId(), getTargetNodeId(), getAttributes()); + } + + @Override + public String toString() { + return String.format("(%s)-%s-(%s)", getSourceNodeId(), getLabel(), getTargetNodeId()); + } +} diff --git a/src/main/java/org/qubitpi/wilhelm/Node.java b/src/main/java/org/qubitpi/wilhelm/Node.java new file mode 100644 index 0000000..82554ce --- /dev/null +++ b/src/main/java/org/qubitpi/wilhelm/Node.java @@ -0,0 +1,150 @@ +/* + * Copyright Jiaqi Liu + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.qubitpi.wilhelm; + +import com.fasterxml.jackson.annotation.JsonIncludeProperties; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import jakarta.validation.constraints.NotNull; +import net.jcip.annotations.Immutable; +import net.jcip.annotations.ThreadSafe; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * A JSON-serializable object representation of a knowledge graph node in wilhelm-ws. + *

+ * A {@link Node} has 3 public attributes: + *

    + *
  1. A unique string identifier within a knowledge graph + *
  2. A string used as the caption for rendering the node + *
  3. A map containing all other information encapsulated + *
+ */ +@Immutable +@ThreadSafe +@JsonIncludeProperties({ "id", "label", "attributes" }) +public class Node { + + private static final Logger LOG = LoggerFactory.getLogger(Node.class); + + private final String id; + private final String label; + private final Map attributes; + + /** + * All-args constructor. + * + * @param id The unique identifier of a node within a graph. Does not need to be unique across the database. + * @param label The caption for the rendering of the node + * @param attributes The fields attached to this node other than label + * + * @throws NullPointerException if any argument is {@code null} + */ + private Node(@NotNull final String id, @NotNull final String label, @NotNull final Map attributes) { + this.id = Objects.requireNonNull(id); + this.label = Objects.requireNonNull(label); + this.attributes = new HashMap<>(Objects.requireNonNull(attributes)); // defensive copy + } + + /** + * Converts a Neo4J API node to a wilhelm-ws {@link Node}. + *

+ * The Neo4J node must contain a property called "name", otherwise an unchecked exception is thrown. The + * {@link org.neo4j.driver.types.Node#elementId()} would be the {@link #getId() ID of this node}; the "name" + * property would be the {@link #getLabel() label of this node.}; the rest of the properties would be the + * {@link #getAttributes() attributes} of this node + * + * @param node A Neo4J Java driver API node + * + * @return a new instance of converted {@link Node} + * + * @throws NullPointerException if {@code node} is {@code null} + * @throws IllegalStateException if {@code node} is missing a "name" property + */ + public static Node valueOf(@NotNull final org.neo4j.driver.types.Node node) { + final String labelKey = "name"; + if (!Objects.requireNonNull(node).asMap().containsKey(labelKey)) { + LOG.error("Neo4J node does not contain '{}' attribute: {}", labelKey, node.asMap()); + throw new IllegalStateException( + "There seems to be a data format mismatch between Wilhelm webservice and Neo4J database. " + + "Please file an issue at https://github.com/QubitPi/wilhelm-ws/issues for a fix" + ); + } + + final String label = node.asMap().get(labelKey).toString(); + final Map attributes = node.asMap().entrySet().stream() + .filter(entry -> !labelKey.equals(entry.getKey())) + .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); + + return new Node(node.elementId(), label, attributes); + } + + @NotNull + public String getId() { + return id; + } + + @NotNull + public String getLabel() { + return label; + } + + /** + * Returns an immutable view of the attributes of this Node. + * + * @return an unmodifiable map + */ + @NotNull + public Map getAttributes() { + return Collections.unmodifiableMap(attributes); + } + + @Override + public boolean equals(final Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + final Node that = (Node) other; + return Objects.equals(getId(), that.getId()); + } + + @Override + public int hashCode() { + return Objects.hash(getId()); + } + + /** + * Returns a string representation of this Node. + *

+ * The content of the string equals to {@link #getLabel() the label} of this Node. + * + * @return a human-readable caption of this node + */ + @Override + public String toString() { + return getLabel(); + } +} diff --git a/src/main/java/org/qubitpi/wilhelm/web/endpoints/DataServlet.java b/src/main/java/org/qubitpi/wilhelm/web/endpoints/Neo4JServlet.java similarity index 70% rename from src/main/java/org/qubitpi/wilhelm/web/endpoints/DataServlet.java rename to src/main/java/org/qubitpi/wilhelm/web/endpoints/Neo4JServlet.java index 53b687a..c0c6c31 100644 --- a/src/main/java/org/qubitpi/wilhelm/web/endpoints/DataServlet.java +++ b/src/main/java/org/qubitpi/wilhelm/web/endpoints/Neo4JServlet.java @@ -23,13 +23,19 @@ import org.neo4j.driver.QueryConfig; import org.neo4j.driver.Value; import org.neo4j.driver.internal.types.InternalTypeSystem; +import org.qubitpi.wilhelm.Graph; import org.qubitpi.wilhelm.Language; import org.qubitpi.wilhelm.LanguageCheck; +import org.qubitpi.wilhelm.Link; +import org.qubitpi.wilhelm.Node; import org.qubitpi.wilhelm.config.ApplicationConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import jakarta.inject.Inject; import jakarta.inject.Singleton; import jakarta.validation.constraints.NotNull; +import jakarta.ws.rs.DefaultValue; import jakarta.ws.rs.GET; import jakarta.ws.rs.Path; import jakarta.ws.rs.PathParam; @@ -41,12 +47,10 @@ import net.jcip.annotations.ThreadSafe; import java.util.AbstractMap; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.Stream; import java.util.stream.StreamSupport; /** @@ -55,38 +59,25 @@ @Singleton @Immutable @ThreadSafe -@Path("/data") +@Path("/neo4j") @Produces(MediaType.APPLICATION_JSON) -public class DataServlet { +public class Neo4JServlet { + private static final Logger LOG = LoggerFactory.getLogger(Neo4JServlet.class); private static final ApplicationConfig APPLICATION_CONFIG = ConfigFactory.create(ApplicationConfig.class); private static final String NEO4J_URL = APPLICATION_CONFIG.neo4jUrl(); private static final String NEO4J_USERNAME = APPLICATION_CONFIG.neo4jUsername(); private static final String NEO4J_PASSWORD = APPLICATION_CONFIG.neo4jPassword(); private static final String NEO4J_DATABASE = APPLICATION_CONFIG.neo4jDatabase(); - /** * Constructor for dependency injection. */ @Inject - public DataServlet() { + public Neo4JServlet() { // intentionally left blank } - /** - * A webservice sanity-check endpoint. - * - * @return 200 OK response - */ - @GET - @Path("/healthcheck") - public Response healthcheck() { - return Response - .status(Response.Status.OK) - .build(); - } - /** * Returns the total number of terms of a specified langauges. * @@ -158,56 +149,107 @@ public Response getVocabularyByLanguagePaged( @Produces(MediaType.APPLICATION_JSON) @SuppressWarnings("MultipleStringLiterals") public Response expand(@NotNull @PathParam("word") final String word) { + return expandApoc(word, "3"); + } + + /** + * Recursively find all related terms and definitions of a word using multiple Cypher queries with a plain BFS + * algorithm. + *

+ * This is good for large sub-graph expand because it breaks huge memory consumption into sub-expand queries. But + * this endpoint sends multiple queries to database which incurs roundtrips and large Network I/O + * + * @param word The word to expand + * + * @return a JSON representation of the expanded sub-graph + */ + @GET + @Path("/expandDfs/{word}") + @Produces(MediaType.APPLICATION_JSON) + @SuppressWarnings("MultipleStringLiterals") + public Response expandDfs(@NotNull @PathParam("word") final String word) { + return Response + .status(Response.Status.OK) + .entity(expandDfs(word, new HashSet<>())) + .build(); + } + + /** + * Recursively find all related terms and definitions of a word using multiple Cypher queries with a plain BFS + * algorithm. + * + * @param label The word to expand + * @param visited A record that keeps track of visited nodes + * + * @return the expanded sub-graph + */ + private Graph expandDfs(@NotNull final String label, @NotNull final Set visited) { + if (visited.contains(label)) { + return Graph.emptyGraph(); + } + + visited.add(label); + final Graph oneHopExpand = (Graph) expandApoc(label, "1").getEntity(); + final Node wordNode = oneHopExpand.getNodes().stream() + .filter(node -> label.equals(node.getLabel())) + .findFirst() + .orElseThrow(() -> { + final String message = String.format("'%s' was not found in graph %s", label, oneHopExpand); + LOG.error(message); + return new IllegalArgumentException(message); + }); + return oneHopExpand.getUndirectedNeighborsOf(wordNode).stream() + .map(neighbor -> expandDfs(neighbor.getLabel(), visited)) + .reduce(oneHopExpand, Graph::merge); + } + + /** + * Recursively find all related terms and definitions of a word using a single Cypher query with apoc extension. + *

+ * This is bad for large sub-graph expand because it will exhaust memories allocated for the query in database. This + * is good for small-subgraph expand when WS and database are far away from each other. + * + * @param word The word to expand + * @param maxHops The max length of expanded path. Use "-1" for unlimitedly long path. + * + * @return a JSON representation of the expanded sub-graph. The format of the JSON would be + */ + @GET + @Path("/expandApoc/{word}") + @Produces(MediaType.APPLICATION_JSON) + @SuppressWarnings("MultipleStringLiterals") + public Response expandApoc( + @NotNull @PathParam("word") final String word, + @NotNull @QueryParam("maxHops") @DefaultValue("-1") final String maxHops + ) { + LOG.info("apoc expanding '{}' with max hops of {}", word, maxHops); + final String query = String.format( """ - MATCH (term:Term{name:'%s'}) - CALL apoc.path.expand(term, "RELATED|DEFINITION", null, 1, -1) + MATCH (node{name:'%s'}) + CALL apoc.path.expand(node, "LINK", null, 1, %s) YIELD path RETURN path, length(path) AS hops ORDER BY hops; """, - word + word.replace("'", "\\'"), maxHops ); final EagerResult result = executeNativeQuery(query); - final Map>> responseBody = Map.of( - "nodes", new ArrayList<>(), - "links", new ArrayList<>() - ); + final Set nodes = new HashSet<>(); + final Set links = new HashSet<>(); result.records().stream() .map(record -> record.get("path").asPath()) .forEach(path -> { - path.nodes().forEach(node -> responseBody.get("nodes").add( - Stream.of( - node.asMap(), - Collections.singletonMap("id", node.elementId()) - ) - .flatMap(map -> map.entrySet().stream()) - .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)) - )); - path.relationships().forEach(relationship -> responseBody.get("links").add( - Stream.of( - relationship.asMap(), - Collections.singletonMap( - "sourceNodeId", - relationship.startNodeElementId() - ), - Collections.singletonMap( - "targetNodeId", - relationship.endNodeElementId() - ) - ) - .flatMap(map -> map.entrySet().stream()) - .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)) - )); + path.nodes().forEach(node -> nodes.add(Node.valueOf(node))); + path.relationships().forEach(relationship -> links.add(Link.valueOf(relationship))); }); - return Response .status(Response.Status.OK) - .entity(responseBody) + .entity(new Graph(nodes, links)) .build(); } @@ -239,6 +281,8 @@ record -> record.keys() * @param query A standard cypher query string * * @return query's native result + * + * @throws IllegalStateException if a query execution error occurs */ @NotNull private EagerResult executeNativeQuery(@NotNull final String query) { diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index 696f341..b9979b2 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -23,11 +23,14 @@ limitations under the License. - + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} -%kvp- %msg%n + - + diff --git a/src/test/groovy/org/qubitpi/wilhelm/GraphSpec.groovy b/src/test/groovy/org/qubitpi/wilhelm/GraphSpec.groovy new file mode 100644 index 0000000..1f9f70d --- /dev/null +++ b/src/test/groovy/org/qubitpi/wilhelm/GraphSpec.groovy @@ -0,0 +1,77 @@ +/* + * Copyright Jiaqi Liu + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.qubitpi.wilhelm + +import com.fasterxml.jackson.databind.ObjectMapper + +import groovy.json.JsonSlurper +import spock.lang.Specification + +class GraphSpec extends Specification { + + def "JSON serialization of Graph includes 2 attributes - nodes and links"() { + when: "a Graph object is serialized to a JSON" + def actual = new JsonSlurper().parseText( + new ObjectMapper().writeValueAsString( + new Graph(new HashSet(), new HashSet()) + ) + ) + + then: "the JSON has 2 fields" + actual.size() == 2 + + and: "the fields are nodes and links" + actual.keySet().contains("nodes") + actual.keySet().contains("links") + } + + @SuppressWarnings('GroovyAccessibility') + def "When a graph has 4 nodes, one being isolated, 1 node (A) has 2 neighbors with one being the source and the other being the target node, getting neighbors on A returns 2 nodes"() { + given: "an isolated node" + Node isolated = new Node( "isolated", "isolated", [:]) + + and: "a node with 2 neighbors" + Node node = new Node( "node", "node", [:]) + Node neighbor1 = new Node( "neighbor1", "neighbor1", [:]) + Node neighbor2 = new Node( "neighbor2", "neighbor2", [:]) + + Link link1 = new Link("pointing from node to neighbor1", "node", "neighbor1", [:]) + Link link2 = new Link("pointing from neighbor2 to node", "neighbor2", "node", [:]) + + and: "the 4 nodes and 2 links belong to a graph under test" + Graph graph = new Graph([isolated, node, neighbor1, neighbor2] as Set, [link1, link2] as Set) + + when: "retrieving the neighbors of the node" + Set actual = graph.getUndirectedNeighborsOf(node) + + then: "both neighbor1 and neighbor2 are within the result" + actual == [neighbor1, neighbor2] as Set + } + + @SuppressWarnings('GroovyAccessibility') + def "Merging 2 graphs combines their nodes and links"() { + given: "2 graphs to be merged" + Graph graph1 = new Graph([new Node( "node1", "node1", [:])] as Set, [new Link("link1", "", "", [:])] as Set) + Graph graph2 = new Graph([new Node( "node2", "node2", [:])] as Set, [new Link("link2", "", "", [:])] as Set) + + when: "2 graphs are merged" + Graph actual = graph1.merge(graph2) + + then: "the new graph contains all nodes and links from both graph1 and graph2" + actual.nodes == [new Node( "node1", "node1", [:]), new Node( "node2", "node2", [:])] as Set + actual.links == [new Link("link1", "", "", [:]), new Link("link2", "", "", [:])] as Set + } +} diff --git a/src/test/groovy/org/qubitpi/wilhelm/LinkSpec.groovy b/src/test/groovy/org/qubitpi/wilhelm/LinkSpec.groovy new file mode 100644 index 0000000..0b85ed8 --- /dev/null +++ b/src/test/groovy/org/qubitpi/wilhelm/LinkSpec.groovy @@ -0,0 +1,78 @@ +/* + * Copyright Jiaqi Liu + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.qubitpi.wilhelm + +import com.fasterxml.jackson.databind.ObjectMapper + +import org.neo4j.driver.types.Relationship + +import groovy.json.JsonSlurper +import spock.lang.Specification + +class LinkSpec extends Specification { + + @SuppressWarnings('GroovyAccessibility') + def "JSON serialization of Link includes 4 attributes, one of them is '#attribute'"() { + when: "a Node object is serialized to a JSON" + def actual = new JsonSlurper().parseText( + new ObjectMapper().writeValueAsString( + new Link( "my link", "node1", "node2", [type: "follows"]) + ) + ) + + then: "the JSON has 4 fields" + actual.size() == 4 + + and: "the fields contains one of the required fields" + actual.keySet().contains(attribute) + + where: + _ | attribute + _ | "label" + _ | "sourceNodeId" + _ | "targetNodeId" + _ | "attributes" + } + + def "when a Neo4J relationship does not contain 'name' property, an error is thrown"() { + when: "a Neo4J node has no properties" + Link.valueOf(Mock(Relationship) {asMap() >> [:]}) + + then: "IllegalStateException is thrown complaining about the missing required property" + Exception exception = thrown() + exception instanceof IllegalStateException + exception.message == "There seems to be a data format mismatch between Wilhelm webservice and Neo4J database. " + + "Please file an issue at https://github.com/QubitPi/wilhelm-ws/issues for a fix" + } + + def "Neo4J relationship gets converted to a wilhelm-ws link"() { + when: "a happy path Neo4J relationship is being converted to a transparent link" + Link actual = Link.valueOf(Mock(Relationship) { + asMap() >> [ + name: "my node", + type: "follows" + ] + startNodeElementId() >> "node1" + endNodeElementId() >> "node2" + }) + + then: "the transparent link is fully initialized" + actual.label == "my node" + actual.sourceNodeId == "node1" + actual.targetNodeId == "node2" + actual.attributes == [type: "follows"] + } +} diff --git a/src/test/groovy/org/qubitpi/wilhelm/NodeSpec.groovy b/src/test/groovy/org/qubitpi/wilhelm/NodeSpec.groovy new file mode 100644 index 0000000..9a1d3d2 --- /dev/null +++ b/src/test/groovy/org/qubitpi/wilhelm/NodeSpec.groovy @@ -0,0 +1,74 @@ +/* + * Copyright Jiaqi Liu + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.qubitpi.wilhelm + +import com.fasterxml.jackson.databind.ObjectMapper + +import groovy.json.JsonSlurper +import spock.lang.Specification + +class NodeSpec extends Specification { + + @SuppressWarnings('GroovyAccessibility') + def "JSON serialization of Node includes 3 attributes, one of them is '#attribute'"() { + when: "a Node object is serialized to a JSON" + def actual = new JsonSlurper().parseText( + new ObjectMapper().writeValueAsString( + new Node( "my ID", "my node", [color: "blue", size: "medium"]) + ) + ) + + then: "the JSON has 3 fields" + actual.size() == 3 + + and: "the fields contains one of the required fields" + actual.keySet().contains(attribute) + + where: + _ | attribute + _ | "id" + _ | "label" + _ | "attributes" + } + + def "when a Neo4J node does not contain 'name' property, an error is thrown"() { + when: "a Neo4J node has no properties" + Node.valueOf(Mock(org.neo4j.driver.types.Node) {asMap() >> [:]}) + + then: "IllegalStateException is thrown complaining about the missing required property" + Exception exception = thrown() + exception instanceof IllegalStateException + exception.message == "There seems to be a data format mismatch between Wilhelm webservice and Neo4J database. " + + "Please file an issue at https://github.com/QubitPi/wilhelm-ws/issues for a fix" + } + + def "Neo4J node gets converted to a wilhelm-ws node"() { + when: "a happy path Neo4J node is being converted to a transparent node" + Node actual = Node.valueOf(Mock(org.neo4j.driver.types.Node) { + asMap() >> [ + name: "my node", + color: "blue", + size: "medium" + ] + elementId() >> "my id" + }) + + then: "the transparent node is fully initialized" + actual.id == "my id" + actual.label == "my node" + actual.attributes == [color: "blue", size: "medium"] + } +} diff --git a/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/DataServletITSpec.groovy b/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/Neo4JServletITSpec.groovy similarity index 89% rename from src/test/groovy/org/qubitpi/wilhelm/web/endpoints/DataServletITSpec.groovy rename to src/test/groovy/org/qubitpi/wilhelm/web/endpoints/Neo4JServletITSpec.groovy index d72137e..7a94b98 100644 --- a/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/DataServletITSpec.groovy +++ b/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/Neo4JServletITSpec.groovy @@ -46,7 +46,7 @@ import java.time.Duration import java.time.temporal.ChronoUnit @Testcontainers -class DataServletITSpec extends Specification { +class Neo4JServletITSpec extends Specification { static final int PORT = 8080 @@ -86,15 +86,6 @@ class DataServletITSpec extends Specification { server.stop() } - def "Healthchecking endpoints returns 200"() { - expect: - RestAssured.given() - .when() - .get("/data/healthcheck") - .then() - .statusCode(200) - } - def "Get count by language returns a list of one map entry, whose key is 'count' and value is the total"() { expect: RestAssured @@ -102,7 +93,7 @@ class DataServletITSpec extends Specification { .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON) .when() - .get("/data/languages/german/count") + .get("/neo4j/languages/german/count") .then() .statusCode(200) .body("[0].count", greaterThan(1)) @@ -116,7 +107,7 @@ class DataServletITSpec extends Specification { .accept(MediaType.APPLICATION_JSON) .queryParams([perPage: "10", page: "1"]) .when() - .get("/data/languages/german") + .get("/neo4j/languages/german") .then() .statusCode(200) .body("[0]", hasKey("term")) @@ -131,15 +122,15 @@ class DataServletITSpec extends Specification { .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON) .when() - .get("/data/languages/myInvalidLanguage") + .get("/neo4j/languages/myInvalidLanguage") .then() .statusCode(400) .body(equalTo("'myInvalidLanguage' is not a recognized language. Acceptable ones are german, ancientGreek, latin")) where: _ | endpoint - _ | "/data/languages/myInvalidLanguage" - _ | "/data/languages/myInvalidLanguage/count" + _ | "/neo4j/languages/myInvalidLanguage" + _ | "/neo4j/languages/myInvalidLanguage/count" } def "Expand a word returns a map of two keys - 'nodes' & 'links'"() { @@ -149,7 +140,7 @@ class DataServletITSpec extends Specification { .contentType(MediaType.APPLICATION_JSON) .accept(MediaType.APPLICATION_JSON) .when() - .get("/data/expand/nämlich") + .get("/neo4j/expand/nämlich") .then() .statusCode(200) .body("", hasKey("nodes")) diff --git a/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/DataServletSpec.groovy b/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/Neo4JServletSpec.groovy similarity index 95% rename from src/test/groovy/org/qubitpi/wilhelm/web/endpoints/DataServletSpec.groovy rename to src/test/groovy/org/qubitpi/wilhelm/web/endpoints/Neo4JServletSpec.groovy index 7d29293..3dbb781 100644 --- a/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/DataServletSpec.groovy +++ b/src/test/groovy/org/qubitpi/wilhelm/web/endpoints/Neo4JServletSpec.groovy @@ -22,7 +22,7 @@ import org.neo4j.driver.internal.types.InternalTypeSystem import spock.lang.Specification import spock.lang.Unroll -class DataServletSpec extends Specification { +class Neo4JServletSpec extends Specification { @SuppressWarnings('GroovyAccessibility') def "Embedded Neo4J Value objects are recursively expanded to become plain JSONable map"() { @@ -53,7 +53,7 @@ class DataServletSpec extends Specification { } expect: - DataServlet.expand(value) == [ + Neo4JServlet.expand(value) == [ term: [ name: "Hallo", language: "German" @@ -73,7 +73,7 @@ class DataServletSpec extends Specification { } expect: - DataServlet.isTerminalValue(value) == isTerminalType + Neo4JServlet.isTerminalValue(value) == isTerminalType where: valueType || isTerminalType