diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.html b/README.html new file mode 100644 index 0000000..58a81b0 --- /dev/null +++ b/README.html @@ -0,0 +1,93 @@ +Untitled Document.md +

OntoNetHub

+

OntoNetHub is a Web-based application meant to deal with the management of ontology networks.
+This include the upload, deletion, storage, and indexing of an ontology part of a network.

+

Requirements

+

OntoNetHub is designed as an extension of Apache Stanbol and released as a Docker component. Hence, users need Docker to build and run OntoNetHub.

+

Building and Running

+

First run your Docker instance. Then type the following command on terminal from the root of the project (i.e. the folder ontonethub) for building the components:

+
docker-compose build
+
+

Finally, type the following command on terminal for running the OntoNetHub.

+
docker-compose up
+
+

After that OntoNetHub is available on your browser at http://localhost:8000/stanbol/ontonethub.

+

Usage

+

The following sections describe the functionalities provided by the OntoNetHub. More details about the usage of the OntoNetHub via its HTTP REST API can be found in the Swagger descriptor (i.e. ontonethub.yaml) included in this release.

+
Uploading an ontology
+

An ontology can be uploaded by perfoming a HTTP POST request to the path /stanbol/ontonethub/ontology. The upload triggers the indexing of ontological terms and the physical storage of the ontology itself. The indexing is performed by using rdfs:label and rdfs:comment annotations associated with the OWL entities part of the ontology. The index resulting from an indexing process in a Solr index. The OntoNetHub relies on the Stanbol EntityHub for manging everything associated with the indexing generation and idexing searching.
+The following is an example of a curl request for uploading an ontology to the OntoNetHub.

+
curl -X POST \
+  -H "Content-type: multipart/form-data; boundary=----WebKitFormBoundaryzeZR8KqAYJyI2jPL" \
+  -H "Accept: application/json" \
+  -F name=person \
+  -F description="The ontology for representing persons." \
+  -F baseUri=http://data.gov.it/ontologies/person \
+  -F data=@person.rdf \
+  http://localhost:8000/stanbol/ontonethub/ontology
+
+

The request above returns the following JSON.

+
{
+  "monitoringService":"http:\/\/localhost:8000\/stanbol\/jobs\/tGEjuGTUi8b_Vm5OneRSkg",
+  "ontologyId":"tGEjuGTUi8b_Vm5OneRSkg"
+}
+
+

Where:

+ +

The URL provided by the monitoringService can be queried via HTTP GET. The following is a curl example.

+
curl -H "Accept: application/json" http://localhost:8000/stanbol/jobs/tGEjuGTUi8b_Vm5OneRSkg
+
+

The request above returns the following JSON:

+
{
+  "status": "finished",
+  "outputLocation": "http://localhost:8000/stanbol/ontonethub/ontology/tGEjuGTUi8b_Vm5OneRSkg",
+  "messages": [
+    "You can remove this job using DELETE",
+  ]
+}
+
+
Accessing an ontology within the OntoNetHub
+

The outputLocation, part of the JSON returned by the job service, provides the URL to access the information about the specific ontology indexed. The following is a curl example

+
curl -H "Accept: application/json" http://localhost:8000/stanbol/ontonethub/ontology/tGEjuGTUi8b_Vm5OneRSkg
+
+

The output of the request above is the following:

+
{
+  "id":"tGEjuGTUi8b_Vm5OneRSkg",
+  "ontologySource":"http:\/\/localhost:8000\/stanbol\/\/ontonethub\/ontology\/tGEjuGTUi8b_Vm5OneRSkg\/source",
+  "name":"person",
+  "description":"The ontology for representing persons.",
+  "ontologyIRI":"http:\/\/data.gov.it\/ontologies\/person",
+  "owlClasses":66,
+  "objectProperties":60,
+  "datatypeProperties":9,
+  "annotationProperties":22,
+  "individuals":0,
+  "importedOntologies":7
+}
+
+

Where are reported metadata (i.e. name, description, and ontologyIRI) and statistics (i.e.
+number of owlClasses, objectProperties, datatypeProperties, annotationProperties, individuals, and importedOntologies). The attribute ontology ontologySource provides the URL to access the OWL source of the ontology. For example, the following curl request return the JSON-LD serialisation of the person ontology.

+
curl -H "Accept: application/json-ld" http://localhost:8080/stanbol/ontonethub/ontology/tGEjuGTUi8b_Vm5OneRSkg/source
+
+
Deleting an ontology from the OntoNetHub
+

An ontology can be deleted from the OntoNetHub by perfoming an HTTP DELETE request to the resource representing the ontology within the OntoNetHub. The following is an example:

+
curl -X DELETE http://localhost:8080/stanbol/ontonethub/ontology/44HDRw9NEKK4gAfQprG_ZQ
+
+
Querying the OntoNetHub
+

It is possible to query the OntoNetHub for retrieving OWL entities from the ontologies managed by the OntoNetHub. The following is an example of query for searching all the OWL entities having an annotation (i.e. rdfs:label or rdfs:comment) that match the string Persona in Italian.

+
curl -X POST "http://localhost:8000/ontonethub/ontologies/find" -H  "accept: application/json" -H  "content-type: application/x-www-form-urlencoded" -d "name=Persona&lang=it”
+
+

It is possilble to use wildcards (i.e. *) in queries. Hence, if we want to find all possible terms staring with the word Pers the example above is converted to the following:

+
curl -X POST "http://localhost:8000/ontonethub/ontologies/find" -H  "accept: application/json" -H  "content-type: application/x-www-form-urlencoded" -d "name=Pers*&lang=it”
+
+

In order to query a specific ontology instead of the whole set of ontologies managed by the OntoNetHub the path of the requests has to be set to http://localhost:8000/ontonethub/ontology/{ontologyID}/find, where ontologyID has to be replaced with a proper ontology identifier, e.g. 44HDRw9NEKK4gAfQprG_ZQ as used in previous examples.

+

Compiling from source code

+

The OnteNetHub is released along with the source code, which is available in the folder ontonethub-src. The source code is written in Java and can be built by using Maven. The following command can be used for bulding the source code if executed by command line from the root of the ontonethub-src folder:

+
mvn clean install
+
+

Once the compilation process finishes the WAR application stanbol.war is available in the folder ontonethub-src/ontonethub-war/target. The stanbol.war can be deployed in any application server (e.g. Tomcat). We remark that the docker component part of this release provides a Tomcat service.

+ + \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..c2532f9 --- /dev/null +++ b/README.md @@ -0,0 +1,119 @@ +# OntoNetHub + +OntoNetHub is a Web-based application meant to deal with the management of ontology networks. +This include the upload, deletion, storage, and indexing of an ontology part of a network. + +### Requirements + +OntoNetHub is designed as an extension of [`Apache Stanbol`](https://stanbol.apache.org/) and released as a Docker component. Hence, users need [`Docker`](https://docs.docker.com/) to build and run OntoNetHub. + +### Building and Running + +First run your Docker instance. Then type the following command on terminal from the root of the project (i.e. the folder `ontonethub`) for building the components: + +``` +docker-compose build +``` + +Finally, type the following command on terminal for running the OntoNetHub. + +``` +docker-compose up +``` + +After that OntoNetHub is available on your browser at [`http://localhost:8000/stanbol/ontonethub`](http://localhost:8000/stanbol/ontonethub). + +### Usage +The following sections describe the functionalities provided by the OntoNetHub. More details about the usage of the OntoNetHub via its HTTP REST API can be found in the Swagger descriptor (i.e. `ontonethub.yaml`) included in this release. + +##### Uploading an ontology +An ontology can be uploaded by perfoming a HTTP POST request to the path `/stanbol/ontonethub/ontology`. The upload triggers the indexing of ontological terms and the physical storage of the ontology itself. The indexing is performed by using `rdfs:label` and `rdfs:comment` annotations associated with the OWL entities part of the ontology. The index resulting from an indexing process in a Solr index. The OntoNetHub relies on the Stanbol EntityHub for manging everything associated with the indexing generation and idexing searching. +The following is an example of a `curl` request for uploading an ontology to the OntoNetHub. + +``` +curl -X POST \ + -H "Content-type: multipart/form-data; boundary=----WebKitFormBoundaryzeZR8KqAYJyI2jPL" \ + -H "Accept: application/json" \ + -F name=person \ + -F description="The ontology for representing persons." \ + -F baseUri=http://data.gov.it/ontologies/person \ + -F data=@person.rdf \ + http://localhost:8000/stanbol/ontonethub/ontology +``` +The request above returns the following JSON. +``` +{ + "monitoringService":"http:\/\/localhost:8000\/stanbol\/jobs\/tGEjuGTUi8b_Vm5OneRSkg", + "ontologyId":"tGEjuGTUi8b_Vm5OneRSkg" +} +``` +Where: + - the `monitoringService` represent the resource to query for obtaining the status (i.e. finished, running, aborted) of the job associated with the upload of an ontology; + - the `ontologyId` is the identifier of the ontology within the OntoNetHub. + +The URL provided by the `monitoringService` can be queried via HTTP GET. The following is a curl example. + +``` +curl -H "Accept: application/json" http://localhost:8000/stanbol/jobs/tGEjuGTUi8b_Vm5OneRSkg +``` +The request above returns the following JSON: +``` +{ + "status": "finished", + "outputLocation": "http://localhost:8000/stanbol/ontonethub/ontology/tGEjuGTUi8b_Vm5OneRSkg", + "messages": [ + "You can remove this job using DELETE", + ] +} +``` + +##### Accessing an ontology within the OntoNetHub +The `outputLocation`, part of the JSON returned by the job service, provides the URL to access the information about the specific ontology indexed. The following is a curl example +``` +curl -H "Accept: application/json" http://localhost:8000/stanbol/ontonethub/ontology/tGEjuGTUi8b_Vm5OneRSkg +``` +The output of the request above is the following: +``` +{ + "id":"tGEjuGTUi8b_Vm5OneRSkg", + "ontologySource":"http:\/\/localhost:8000\/stanbol\/\/ontonethub\/ontology\/tGEjuGTUi8b_Vm5OneRSkg\/source", + "name":"person", + "description":"The ontology for representing persons.", + "ontologyIRI":"http:\/\/data.gov.it\/ontologies\/person", + "owlClasses":66, + "objectProperties":60, + "datatypeProperties":9, + "annotationProperties":22, + "individuals":0, + "importedOntologies":7 +} +``` +Where are reported metadata (i.e. name, description, and ontologyIRI) and statistics (i.e. +number of owlClasses, objectProperties, datatypeProperties, annotationProperties, individuals, and importedOntologies). The attribute ontology `ontologySource` provides the URL to access the OWL source of the ontology. For example, the following curl request return the [`JSON-LD`](https://json-ld.org/) serialisation of the person ontology. +``` +curl -H "Accept: application/json-ld" http://localhost:8080/stanbol/ontonethub/ontology/tGEjuGTUi8b_Vm5OneRSkg/source +``` + +##### Deleting an ontology from the OntoNetHub +An ontology can be deleted from the OntoNetHub by perfoming an HTTP DELETE request to the resource representing the ontology within the OntoNetHub. The following is an example: +``` +curl -X DELETE http://localhost:8080/stanbol/ontonethub/ontology/44HDRw9NEKK4gAfQprG_ZQ +``` + +##### Querying the OntoNetHub +It is possible to query the OntoNetHub for retrieving OWL entities from the ontologies managed by the OntoNetHub. The following is an example of query for searching all the OWL entities having an annotation (i.e. `rdfs:label` or `rdfs:comment`) that match the string `Persona` in Italian. +``` +curl -X POST "http://localhost:8000/ontonethub/ontologies/find" -H "accept: application/json" -H "content-type: application/x-www-form-urlencoded" -d "name=Persona&lang=it” +``` +It is possilble to use wildcards (i.e. `*`) in queries. Hence, if we want to find all possible terms staring with the word `Pers` the example above is converted to the following: +``` +curl -X POST "http://localhost:8000/ontonethub/ontologies/find" -H "accept: application/json" -H "content-type: application/x-www-form-urlencoded" -d "name=Pers*&lang=it” +``` +In order to query a specific ontology instead of the whole set of ontologies managed by the OntoNetHub the path of the requests has to be set to `http://localhost:8000/ontonethub/ontology/{ontologyID}/find`, where `ontologyID` has to be replaced with a proper ontology identifier, e.g. 44HDRw9NEKK4gAfQprG_ZQ as used in previous examples. + +### Compiling from source code +The OnteNetHub is released along with the source code, which is available in the folder `ontonethub-src`. The source code is written in Java and can be built by using [`Maven`](https://maven.apache.org/). The following command can be used for bulding the source code if executed by command line from the root of the `ontonethub-src` folder: +``` +mvn clean install +``` +Once the compilation process finishes the WAR application `stanbol.war` is available in the folder `ontonethub-src/ontonethub-war/target`. The `stanbol.war` can be deployed in any application server (e.g. [`Tomcat`](https://tomcat.apache.org/)). We remark that the docker component part of this release provides a Tomcat service. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100755 index 0000000..7c7b354 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,23 @@ +version: '2' +services: + ontonethub: + build: ./ontonethub-src + container_name: ontonethub-src + depends_on: + - tomcat + entrypoint: /ontonethub-src/startup.sh + volumes: + - myshare:/usr/local/tomcat/webapps + tomcat: + image: tomcat + ports: + - 8000:8080 + environment: + - CATALINA_OPTS=-Xmx1g -Xms1g -XX:MaxPermSize=1024m + volumes: + - myshare:/usr/local/tomcat/webapps +volumes: + myshare: + driver: local + + diff --git a/ontonethub-src/Dockerfile b/ontonethub-src/Dockerfile new file mode 100755 index 0000000..63eb5b9 --- /dev/null +++ b/ontonethub-src/Dockerfile @@ -0,0 +1,10 @@ +FROM maven:3.5.0-jdk-8 +ADD . /ontonethub-src + +WORKDIR /ontonethub-src/indexing +RUN ["mvn", "clean", "install"] + +RUN ["mv", "target/indexing-genericrdf-0.1.jar", "../ontonethub/src/main/resources/executables/indexing-genericrdf.jar"] + +WORKDIR /ontonethub-src +RUN ["mvn", "clean", "install"] diff --git a/ontonethub-src/base.jersey/pom.xml b/ontonethub-src/base.jersey/pom.xml new file mode 100644 index 0000000..62bb4f6 --- /dev/null +++ b/ontonethub-src/base.jersey/pom.xml @@ -0,0 +1,150 @@ + + + + 4.0.0 + + + org.apache.stanbol + stanbol-parent + 6 + ../../../parent + + + org.apache.stanbol + org.apache.stanbol.commons.web.base.jersey + 1.0.0 + bundle + + Apache Stanbol Commons Web Base Jersey + Stanbol Jersey based implementation of HTTP application bundle. + + + Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/commons/web/base.jersey + + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/commons/web/base.jersey + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/stanbol.apache.org/branches/release-1.0.0-branch + + + + + + + org.apache.felix + maven-scr-plugin + + + org.apache.felix + maven-bundle-plugin + true + + + + org.apache.stanbol.commons.web.base.jersey;version=${project.version} + + + org.osgi.service.http; version="[1.2.0,2)", + * + + + + + + + + + + + org.slf4j + slf4j-api + + + org.apache.stanbol + org.apache.stanbol.commons.web.base + 1.0.0 + + + + + org.glassfish.jersey.containers + + jersey-container-servlet-core + 2.2 + + + + javax.annotation + javax.annotation-api + 1.2-b04 + + + + javax.servlet + servlet-api + + + + + + org.osgi + org.osgi.core + + + org.osgi + org.osgi.compendium + + + org.apache.felix + org.apache.felix.scr.annotations + + + + junit + junit + test + + + org.slf4j + slf4j-simple + test + + + + + diff --git a/ontonethub-src/base.jersey/src/license/THIRD-PARTY.properties b/ontonethub-src/base.jersey/src/license/THIRD-PARTY.properties new file mode 100644 index 0000000..5459b26 --- /dev/null +++ b/ontonethub-src/base.jersey/src/license/THIRD-PARTY.properties @@ -0,0 +1,28 @@ +# Generated by org.codehaus.mojo.license.AddThirdPartyMojo +#------------------------------------------------------------------------------- +# Already used licenses in project : +# - Apache Software License +# - Apache Software License, Version 2.0 +# - BSD License +# - CDDL + GPLv2 with classpath exception +# - CDDL+GPL License +# - Common Development And Distribution License (CDDL), Version 1.0 +# - Common Development And Distribution License (CDDL), Version 1.1 +# - Common Public License, Version 1.0 +# - Eclipse Public License 1.0 +# - Eclipse Public License, Version 1.0 +# - GNU General Public License (GPL), Version 2 with classpath exception +# - GNU Lesser General Public License (LGPL) +# - GNU Lesser General Public License (LGPL), Version 2.1 +# - ICU License +# - MIT License +# - MPL 1.1 +# - New BSD License +# - Public Domain License +#------------------------------------------------------------------------------- +# Please fill the missing licenses for dependencies : +# +# +#Mon Sep 05 12:24:48 CEST 2016 +javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0 +xerces--xercesImpl--2.7.1=The Apache Software License, Version 2.0 diff --git a/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/DefaultApplication.java b/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/DefaultApplication.java new file mode 100644 index 0000000..6d1c292 --- /dev/null +++ b/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/DefaultApplication.java @@ -0,0 +1,69 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.stanbol.commons.web.base.jersey; + +import java.util.HashSet; +import java.util.Set; + +import javax.ws.rs.core.Application; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Define the list of available resources and providers to be used by the Stanbol JAX-RS Endpoint. + */ +public class DefaultApplication extends Application { + + @SuppressWarnings("unused") + private static final Logger log = LoggerFactory.getLogger(DefaultApplication.class); + + protected final Set> contributedClasses = new HashSet>(); + + protected final Set contributedSingletons = new HashSet(); + + + @Override + public Set> getClasses() { + Set> classes = new HashSet>(); + // resources contributed buy other bundles + classes.addAll(contributedClasses); + //TODO check if clerezza rdf.jaxrs prvoder fits the purpose + // message body writers, hard-coded for now + //classes.add(GraphWriter.class); + //classes.add(JenaModelWriter.class); + //classes.add(ResultSetWriter.class); + return classes; + } + + @Override + public Set getSingletons() { + Set singletons = new HashSet(); + singletons.addAll(contributedSingletons); + return singletons; + } + + public void contributeClasses(Set> classes) { + contributedClasses.addAll(classes); + } + + public void contributeSingletons(Set singletons) { + contributedSingletons.addAll(singletons); + } + +} diff --git a/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/EditableLayoutConfiguration.java b/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/EditableLayoutConfiguration.java new file mode 100644 index 0000000..d517640 --- /dev/null +++ b/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/EditableLayoutConfiguration.java @@ -0,0 +1,91 @@ +/* + * Copyright 2013 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.stanbol.commons.web.base.jersey; + +import java.util.List; + +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Service; +import org.apache.stanbol.commons.web.base.LinkResource; +import org.apache.stanbol.commons.web.base.NavigationLink; +import org.apache.stanbol.commons.web.base.ScriptResource; +import org.apache.stanbol.commons.web.base.resource.LayoutConfiguration; + +/** + * This class is here for supporting legacy templates + * + * @deprecated this sets global css-links and script imports, they should be set + * specifically in templates + */ +@Component +@Service({LayoutConfiguration.class, EditableLayoutConfiguration.class}) +public class EditableLayoutConfiguration implements LayoutConfiguration { + + private List navigationLinks; + private String rootUrl; + + public static final String SYSTEM_CONSOLE = "system/console"; + private String staticResourcesRootUrl; + private List linkResources; + private List scriptResources; + + + @Override + public List getNavigationLinks() { + return navigationLinks; + } + + + @Override + public String getRootUrl() { + return rootUrl; + } + + void setRootUrl(String rootUrl) { + this.rootUrl = rootUrl; + } + + @Override + public String getStaticResourcesRootUrl() { + return staticResourcesRootUrl; + } + + @Override + public List getRegisteredLinkResources() { + return linkResources; + } + + @Override + public List getRegisteredScriptResources() { + return scriptResources; + } + + void setStaticResourcesRootUrl(String staticResourcesRootUrl) { + this.staticResourcesRootUrl = staticResourcesRootUrl; + } + + void setLinkResources(List linkResources) { + this.linkResources = linkResources; + } + + void setScriptResources(List scriptResources) { + this.scriptResources = scriptResources; + } + + void setNavigationsLinks(List navigationLinks) { + this.navigationLinks = navigationLinks; + } +} diff --git a/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/JerseyEndpoint.java b/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/JerseyEndpoint.java new file mode 100644 index 0000000..7f97b97 --- /dev/null +++ b/ontonethub-src/base.jersey/src/main/java/org/apache/stanbol/commons/web/base/jersey/JerseyEndpoint.java @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.stanbol.commons.web.base.jersey; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Dictionary; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.List; +import java.util.Set; + +import javax.servlet.ServletContext; +import javax.servlet.ServletException; + +import org.apache.felix.scr.annotations.Activate; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Deactivate; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Reference; +import org.apache.felix.scr.annotations.ReferenceCardinality; +import org.apache.felix.scr.annotations.ReferencePolicy; +import org.osgi.framework.Bundle; +import org.osgi.framework.BundleContext; +import org.osgi.service.cm.ConfigurationException; +import org.osgi.service.component.ComponentContext; +import org.osgi.service.http.HttpService; +import org.osgi.service.http.NamespaceException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import org.apache.felix.scr.annotations.References; +import org.apache.stanbol.commons.web.base.LinkResource; +import org.apache.stanbol.commons.web.base.NavigationLink; +import org.apache.stanbol.commons.web.base.ScriptResource; +import org.apache.stanbol.commons.web.base.WebFragment; +import org.glassfish.jersey.server.ResourceConfig; +import org.glassfish.jersey.servlet.ServletContainer; + +/** + * Jersey-based RESTful endpoint for the Stanbol Enhancer engines and store. + *

+ * This OSGi component serves as a bridge between the OSGi context and the Servlet context available to JAX-RS + * resources. + */ +@Component(immediate = true, metatype = true) +@References({ + @Reference(name = "webFragment", + referenceInterface = WebFragment.class, + cardinality = ReferenceCardinality.OPTIONAL_MULTIPLE, + policy = ReferencePolicy.DYNAMIC), + @Reference(name="component", referenceInterface=Object.class, + target="(javax.ws.rs=true)", + cardinality=ReferenceCardinality.OPTIONAL_MULTIPLE, + policy=ReferencePolicy.DYNAMIC), + @Reference(name="navigationLink", referenceInterface=NavigationLink.class, + cardinality=ReferenceCardinality.OPTIONAL_MULTIPLE, + policy=ReferencePolicy.DYNAMIC)}) +public class JerseyEndpoint { + + private final Logger log = LoggerFactory.getLogger(getClass()); + + @Property(value = "/") + public static final String ALIAS_PROPERTY = "org.apache.stanbol.commons.web.alias"; + + @Property(value = "/static") + public static final String STATIC_RESOURCES_URL_ROOT_PROPERTY = "org.apache.stanbol.commons.web.static.url"; + + + @Reference + private EditableLayoutConfiguration layoutConfiguration; + + /** + * The origins allowed for multi-host requests + */ + @Property(cardinality = 100, value = {"*"}) + public static final String CORS_ORIGIN = "org.apache.stanbol.commons.web.cors.origin"; + + @Property(cardinality = 100, value = {"Location"}) + public static final String CORS_ACCESS_CONTROL_EXPOSE_HEADERS = "org.apache.stanbol.commons.web.cors.access_control_expose_headers"; + + @Reference + HttpService httpService; + + protected ComponentContext componentContext; + + protected ServletContext servletContext; + + protected final List webFragments = new ArrayList(); + + protected final List registeredAliases = new ArrayList(); + + protected Set corsOrigins; + + protected Set exposedHeaders; + private Set components = new HashSet(); + private List navigationLinks = new ArrayList(); + + public Dictionary getInitParams() { + Dictionary initParams = new Hashtable(); + // make jersey automatically turn resources into Viewable models and + // hence lookup matching freemarker templates + initParams.put("com.sun.jersey.config.feature.ImplicitViewables", "true"); + return initParams; + } + + @Activate + protected void activate(ComponentContext ctx) throws IOException, + ServletException, + NamespaceException, + ConfigurationException { + componentContext = ctx; + // init corsOrigins + Object values = componentContext.getProperties().get(CORS_ORIGIN); + if (values instanceof String && !((String) values).isEmpty()) { + corsOrigins = Collections.singleton((String) values); + } else if (values instanceof String[]) { + corsOrigins = new HashSet(Arrays.asList((String[]) values)); + } else if (values instanceof Iterable) { + corsOrigins = new HashSet(); + for (Object value : (Iterable) values) { + if (value != null && !value.toString().isEmpty()) { + corsOrigins.add(value.toString()); + } + } + } else { + throw new ConfigurationException(CORS_ORIGIN, + "CORS origin(s) MUST be a String, String[], Iterable (value:" + values + ")"); + } + + // parse headers to be exposed + values = componentContext.getProperties().get(CORS_ACCESS_CONTROL_EXPOSE_HEADERS); + if (values instanceof String && !((String) values).isEmpty()) { + exposedHeaders = Collections.singleton((String) values); + } else if (values instanceof String[]) { + exposedHeaders = new HashSet(Arrays.asList((String[]) values)); + } else if (values instanceof Iterable) { + exposedHeaders = new HashSet(); + for (Object value : (Iterable) values) { + if (value != null && !value.toString().isEmpty()) { + exposedHeaders.add(value.toString()); + } + } + } else { + exposedHeaders = new HashSet(); + } + if (!webFragments.isEmpty()) { + initJersey(); + } + } + + /** Initialize the Jersey subsystem */ + private synchronized void initJersey() throws NamespaceException, ServletException { + if (componentContext == null) { + //we have not yet been activated + return; + } + //end of STANBOL-1073 work around + if (componentContext == null) { + log.debug(" ... can not init Jersey Endpoint - Component not yet activated!"); + //throw new IllegalStateException("Null ComponentContext, not activated?"); + return; + } + + shutdownJersey(); + + log.info("(Re)initializing the Stanbol Jersey subsystem"); + + // register all the JAX-RS resources into a a JAX-RS application and bind it to a configurable URL + // prefix + DefaultApplication app = new DefaultApplication(); + String staticUrlRoot = (String) componentContext.getProperties().get( + STATIC_RESOURCES_URL_ROOT_PROPERTY); + String applicationAlias = (String) componentContext.getProperties().get(ALIAS_PROPERTY); + + // incrementally contribute fragment resources + List linkResources = new ArrayList(); + List scriptResources = new ArrayList(); + for (WebFragment fragment : webFragments) { + log.debug("Registering web fragment '{}' into jaxrs application", fragment.getName()); + linkResources.addAll(fragment.getLinkResources()); + scriptResources.addAll(fragment.getScriptResources()); + navigationLinks.removeAll(fragment.getNavigationLinks()); + navigationLinks.addAll(fragment.getNavigationLinks()); + app.contributeClasses(fragment.getJaxrsResourceClasses()); + app.contributeSingletons(fragment.getJaxrsResourceSingletons()); + } + app.contributeSingletons(components); + Collections.sort(linkResources); + Collections.sort(scriptResources); + Collections.sort(navigationLinks); + + // bind the aggregate JAX-RS application to a dedicated servlet + ServletContainer container = new ServletContainer( + ResourceConfig.forApplication(app)); + Bundle appBundle = componentContext.getBundleContext().getBundle(); + httpService.registerServlet(applicationAlias, container, getInitParams(), null); + registeredAliases.add(applicationAlias); + + // forward the main Stanbol OSGi runtime context so that JAX-RS resources can lookup arbitrary + // services + servletContext = container.getServletContext(); + servletContext.setAttribute(BundleContext.class.getName(), componentContext.getBundleContext()); + layoutConfiguration.setRootUrl(applicationAlias); + //servletContext.setAttribute(BaseStanbolResource.ROOT_URL, applicationAlias); + layoutConfiguration.setStaticResourcesRootUrl(staticUrlRoot); + //servletContext.setAttribute(BaseStanbolResource.STATIC_RESOURCES_ROOT_URL, staticUrlRoot); + layoutConfiguration.setLinkResources(linkResources); + //servletContext.setAttribute(BaseStanbolResource.LINK_RESOURCES, linkResources); + layoutConfiguration.setScriptResources(scriptResources); + //servletContext.setAttribute(BaseStanbolResource.SCRIPT_RESOURCES, scriptResources); + layoutConfiguration.setNavigationsLinks(navigationLinks); + //servletContext.setAttribute(BaseStanbolResource.NAVIGATION_LINKS, navigationLinks); + servletContext.setAttribute(CORS_ORIGIN, corsOrigins); + servletContext.setAttribute(CORS_ACCESS_CONTROL_EXPOSE_HEADERS, exposedHeaders); + + log.info("JerseyEndpoint servlet registered at {}", applicationAlias); + } + + /** Shutdown Jersey, if there's anything to do */ + private synchronized void shutdownJersey() { + log.debug("Unregistering aliases {}", registeredAliases); + for (String alias : registeredAliases) { + httpService.unregister(alias); + } + registeredAliases.clear(); + } + + @Deactivate + protected void deactivate(ComponentContext ctx) { + shutdownJersey(); + servletContext = null; + componentContext = null; + } + + protected void bindWebFragment(WebFragment webFragment) throws IOException, + ServletException, + NamespaceException { + // TODO: support some ordering for jax-rs resource and template overrides? + webFragments.add(webFragment); + initJersey(); + } + + protected void unbindWebFragment(WebFragment webFragment) throws IOException, + ServletException, + NamespaceException { + navigationLinks.removeAll(webFragment.getNavigationLinks()); + webFragments.remove(webFragment); + initJersey(); + } + + protected void bindComponent(Object component) throws IOException, + ServletException, + NamespaceException { + components.add(component); + initJersey(); + } + + protected void unbindComponent(Object component) throws IOException, + ServletException, + NamespaceException { + components.remove(component); + initJersey(); + } + + protected void bindNavigationLink(NavigationLink navigationLink) { + navigationLinks.add(navigationLink); + } + + protected void unbindNavigationLink(NavigationLink navigationLink) { + navigationLinks.remove(navigationLink); + } + + public List getWebFragments() { + return webFragments; + } +} diff --git a/ontonethub-src/base.jersey/src/main/resources/OSGI-INF/metatype/metatype.properties b/ontonethub-src/base.jersey/src/main/resources/OSGI-INF/metatype/metatype.properties new file mode 100644 index 0000000..2ea7a1a --- /dev/null +++ b/ontonethub-src/base.jersey/src/main/resources/OSGI-INF/metatype/metatype.properties @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#=============================================================================== +#Properties and Options used to configure +#=============================================================================== + +org.apache.stanbol.commons.web.base.jersey.JerseyEndpoint.name=Apache Stanbol Web Application +org.apache.stanbol.commons.web.base.jersey.JerseyEndpoint.description=The main Apache Stanbol Web Endpoint where all the different modules (Enhancer, Entityhub, ...) register itself. + + +org.apache.stanbol.commons.web.alias.name=Alias +org.apache.stanbol.commons.web.alias.description=The Alias for the Stanbol Web Application + +org.apache.stanbol.commons.web.static.url.name=Static +org.apache.stanbol.commons.web.static.url.description=The path to the static resources + +org.apache.stanbol.commons.web.cors.origin.name=CORS origins +org.apache.stanbol.commons.web.cors.origin.description=The CORS origins: This list defines the hosts content returned by Stanbol supports multi-site-scripting. Add '*' to allow any domain. Note: As soon as '*' is added all the other restrictions are ignored. + +org.apache.stanbol.commons.web.cors.access_control_expose_headers.name=CORS Access-Control-Expose-Headers +org.apache.stanbol.commons.web.cors.access_control_expose_headers.description=This is a list containing the headers whose values to be accessed through the XMLHttpRequest instance in Java Script. \ No newline at end of file diff --git a/ontonethub-src/bundlelists/ontonethub/pom.xml b/ontonethub-src/bundlelists/ontonethub/pom.xml new file mode 100755 index 0000000..2f61fe8 --- /dev/null +++ b/ontonethub-src/bundlelists/ontonethub/pom.xml @@ -0,0 +1,44 @@ + + + + + 4.0.0 + + + org.apache.stanbol + apache-stanbol-bundlelists + 1.0.0 + .. + + + org.apache.stanbol + org.apache.stanbol.launchers.bundlelists.ontonethub + 1.0.0 + partialbundlelist + + Apache Stanbol Bundlelist for OntoNet hub + + + + + org.apache.sling + maven-launchpad-plugin + + + + diff --git a/ontonethub-src/bundlelists/ontonethub/src/license/THIRD-PARTY.properties b/ontonethub-src/bundlelists/ontonethub/src/license/THIRD-PARTY.properties new file mode 100644 index 0000000..e0901b9 --- /dev/null +++ b/ontonethub-src/bundlelists/ontonethub/src/license/THIRD-PARTY.properties @@ -0,0 +1,25 @@ +# Generated by org.codehaus.mojo.license.AddThirdPartyMojo +#------------------------------------------------------------------------------- +# Already used licenses in project : +# - Apache License Version 2.0, January 2004 +# - Apache Software License +# - Apache Software License, Version 2.0 +# - BSD License +# - Common Development And Distribution License (CDDL), Version 1.0 +# - Common Development And Distribution License (CDDL), Version 1.1 +# - Common Public License, Version 1.0 +# - Eclipse Public License, Version 1.0 +# - GNU General Public License (GPL), Version 2 with classpath exception +# - GNU Lesser General Public License (LGPL) +# - GNU Lesser General Public License (LGPL), Version 2.1 +# - GNU Lesser General Public License, Version 2.1 +# - ICU License +# - MIT License +# - Public Domain License +# - Revised BSD License +#------------------------------------------------------------------------------- +# Please fill the missing licenses for dependencies : +# +# +#Wed Sep 07 09:18:16 CEST 2016 +xerces--xercesImpl--2.7.1=The Apache Software License, Version 2.0 diff --git a/ontonethub-src/bundlelists/ontonethub/src/main/bundles/list.xml b/ontonethub-src/bundlelists/ontonethub/src/main/bundles/list.xml new file mode 100644 index 0000000..94ebd7e --- /dev/null +++ b/ontonethub-src/bundlelists/ontonethub/src/main/bundles/list.xml @@ -0,0 +1,35 @@ + + + + + + + org.apache.clerezza + rdf.jena.tdb.storage + 1.1.1 + + + + + + it.cnr.istc.stlab + ontonethub.web + 0.1 + + + diff --git a/ontonethub-src/indexing/.project b/ontonethub-src/indexing/.project new file mode 100644 index 0000000..0d387f0 --- /dev/null +++ b/ontonethub-src/indexing/.project @@ -0,0 +1,23 @@ + + + org.apache.stanbol.entityhub.indexing.genericrdf + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/ontonethub-src/indexing/README.md b/ontonethub-src/indexing/README.md new file mode 100644 index 0000000..162ed09 --- /dev/null +++ b/ontonethub-src/indexing/README.md @@ -0,0 +1,248 @@ +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +# Default Indexing Tool for RDF + +This tool provides a default configuration for creating a SOLr index of RDF +files (e.g. a SKOS export of a thesaurus or a set of foaf files) + +## Building + +If not yet built during the build process of the entityhub call + + mvn install + +to build the jar with all the dependencies used later for indexing. + +If the build succeeds go to the /target directory and copy the + + org.apache.stanbol.entityhub.indexing.genericrdf-*.jar + +to the directory you would like to start the indexing. + +## Indexing + +### (1) Initialize the configuration + +The default configuration is initialized by calling + + java -jar org.apache.stanbol.entityhub.indexing.genericrdf-*.jar init + +This will create a sub-folder "indexing" in the current directory. +Within this folder all the + +* configurations (indexing/config) +* source files (indexing/resources) +* created files (indexing/destination) +* distribution files (indexing/distribution) + +will be located. + +### (2) Adapt the configuration + +The configuration is located within the + + indexing/config + +directory. + +The indexer supports two indexing modes + +1. Iterate over the data and lookup the scores for entities (default). +For this mode the "entityDataIterable" and an "entityScoreProvider" MUST BE +configured. If no entity scores are available, a default entityScoreProvider +provides no entity scores. This mode is typically used to index all entities of +a dataset. +2. Iterate over the entity IDs and Scores and lookup the data. For this Mode an +"entityIdIterator" and an "entityDataProvider" MUST BE configured. This mode is +typically used if only a small sub-set of a large dataset is indexed. This might +be the case if Entity-Scores are available and users want only to index the e.g. +10000 most important Entities or if a dataset contains Entities of many different +types but one wants only include entities of a specific type (e.g. Species in +DBpedia). + + +The configuration of the mentioned components is contained in the main indexing +configuration file explained below. + +#### Main indexing configuration (indexing.properties) + +This file contains the main configuration for the indexing process. + +* the "name" property MUST BE set to the name of the referenced site to be created +by the indexing process +* the "entityDataIterable" is used to configure the component iterating over the +RDF data to be indexed. The "source" parameter refers to the directory the RDF +files to be indexed are searched. The RDF files can be compressed with 'gz', +'bz2' or 'zip'. It is even supported to load multiple RDF files contained in a +single ZIP archive. +* the "entityScoreProvider" is used to provide the ranking for entities. A +typical example is the number of incoming links. Such rankings are typically +used to weight recommendations and sort result lists. (e.g. by a query for +"Paris" it is much more likely that a user refers to Paris in France as to one +of the two Paris in Texas). If no rankings are available you should use the +"org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider". +* the "scoreNormalizer" is only useful in case entity scores are available. +This component is used to normalize rankings or also to filter entities with +low rankings. +* the "entityProcessor" is used to process (map, convert, filter) information +of entities before indexing. The mapping configuration is provided in an separate +file (default "mapping.txt"). +* the "entityPostProcessor" is used to process already indexed entities in a +2nd iteration. This has the advantage, that processors used in the post-processing +can assume that all raw data are already present within IndexingDestination. +For this step the IndexingDestination is used for both source and destination. +See also [STANBOL-591](https://issues.apache.org/jira/browse/STANBOL-591) +* Indexes need to provide the configurations used to store entities. The +"fieldConfiguration" allows to specify this. Typically it is the same mapping +file as used for the "entityProcessor" however this is not a requirement. +* the "indexingDestination" property is used to configure the target for the +indexing. Currently there is only a single implementation that stores the indexed +data within a SolrYard. The "boosts" parameter can be used to boost (see Solr +Documentation for details) specific fields (typically labels) for full text +searches. +* all properties starting with "org.apache.stanbol.entityhub.site." are used for + the configuration of the referenced site. + +Please note also the documentation within the "indexing.properties" file for details. + +#### Mapping configuration (mappings.txt) + +Mappings are used for three different purposes: + +1. During the indexing process by the "entityProcessor" to process the +information of each entity +2. At runtime by the local Cache to process single Entities that are updated in the cache. +3. At runtime by the Entityhub when importing an Entity from a referenced Site. + +The configurations for (1) and (2) are typically identical. For (3) one might +want to use a different configuration. The default configuration assumes to +use the same configuration (mappings.txt) for (1) and (2) and no specific +configuration for (3). + +The mappings.txt in its default already include mappings for popular ontologies +such as Dublin Core, SKOS and FOAF. Domain specific mappings can be added to +this configuration. + +#### Score Normalizer configuration + +The default configuration also provides examples for configurations of the +different score normalisers. However by default they are not used. + +* "minscore.properties": Example of how to configure minimum score for Entities +to be indexed +* "scorerange.properties": Example of how to normalise the maximum/minimum score + of Entities to the configured range. + +NOTE: + +* To use score normalisation, scores need to be provided for Entities. This means +an "entityScoreProvider" or an "entityIdIterator" needs to be configured +(indexing.properties). +* Multiple score normalisers can be used. The call order is determined by the +configuration of the "scoreNormalizer" property (indexing.properties). + +### (3) Provide the RDF files to be indexed + +All sources for the indexing process need to be located within the the + + indexing/resources + +directory + +By default the RDF files need to be located within + + indexing/resources/rdfdata + +however this can be changed via the "source" parameter of the "entityDataIterable" +or "entityDataProvider" property in the main indexing configuration (indexing.properties). + + +Supported RDF files are: + +* RDF/XML (by using one of "rdf", "owl", "xml" as extension): Note that this +encoding is not well suited for importing large RDF datasets. +* N-Triples (by using "nt" as extension): This is the preferred format for +importing (especially large) RDF datasets. +* NTurtle (by using "ttl" as extension) +* N3 (by using "n3" as extension) +* NQuards (by using "nq" as extension): Note that all named graphs will be +imported into the same index. +* Trig (by using "trig" as extension) + +Supported compression formats are: + +* "gz" and "bz2" files: One need to use double file extensions to indicate both +the used compression and RDF file format (e.g. myDump.nt.bz2) +* "zip": For ZIP archives all files within the archive are treated separately. +That means that even if a ZIP archive contains multiple RDF files, all of them +will be imported. + +### (4) Create the Index + + java -Xmx1024m -jar org.apache.stanbol.entityhub.indexing.genericrdf-*.jar index + +Note that calling the utility with the option -h will print the help. + + +## Use the created index with the Entityhub + +After the indexing completes the distribution folder + + /indexing/dist + +will contain two files + +1. org.apache.stanbol.data.site.{name}-{version}.jar: This is a Bundle that can +be installed to any OSGI environment running the Apache Stanbol Entityhub. When +Started it will create and configure + + * a "ReferencedSite" accessible at "http://{host}/{root}/entityhub/site/{name}" + * a "Cache" used to connect the ReferencedSite with your Data and + * a "SolrYard" that managed the data indexed by this utility. + + When installing this bundle the Site will not be yet work, because this Bundle + does not contain the indexed data but only the configuration for the Solr Index. + +2. {name}.solrindex.zip: This is the ZIP archive with the indexed data. This +file will be requested by the Apache Stanbol Data File Provider after installing +the Bundle described above. To install the data you need copy this file to the +"/sling/datafiles" folder within the working directory of your Stanbol Server. + + If you copy the ZIP archive before installing the bundle, the data will be + picked up during the installation of the bundle automatically. If you provide + the file afterwards you will also need to restart the SolrYard installed by the + Bundle. + +{name} denotes to the value you configured for the "name" property within the +"indexing.properties" file. + +### A note about blank nodes + +If your input data sets contain large numbers of blank nodes, you may find that +you have problems running out of heap space during indexing. This is because Jena +(like many semantic stores) keeps a store of blank nodes in core memory while +importing. Keeping in mind that EntityHub does not support the use of blank nodes, +there is a means of indexing such data sets nonetheless. You can convert them to +named nodes and then index. There is a convenient tool packaged with Stanbol for +this purpose, called "Urify" (org.apache.stanbol.entityhub.indexing.Urify). +It is available in the runnable JAR file built by this indexer. To use it, put that +JAR on your classpath, and you can execute Urify, giving it a list of files to process. +Use the "-h" or "--help" flag to see options for Urify: + + java -Xmx1024m -cp org.apache.stanbol.entityhub.indexing.genericrdf-*.jar \ + org.apache.stanbol.entityhub.indexing.Urify --help + + \ No newline at end of file diff --git a/ontonethub-src/indexing/dependency-reduced-pom.xml b/ontonethub-src/indexing/dependency-reduced-pom.xml new file mode 100644 index 0000000..3d1519a --- /dev/null +++ b/ontonethub-src/indexing/dependency-reduced-pom.xml @@ -0,0 +1,61 @@ + + + 4.0.0 + it.cnr.it.stlab + indexing-genericrdf + Entityhub Indexing Generic RDF + 0.1 + This Indexing Tool provides a default configuration to index any + kind of RDF data. Users might want to adapt some configurations to the specifica + of the RDF data. + + + Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/stanbol.apache.org/branches/release-1.0.0-branch + + + + + maven-jar-plugin + + + + true + org.apache.stanbol.entityhub.indexing.Main + + + + + + maven-shade-plugin + + + package + + shade + + + + + + + * + + + + + + + + + + + diff --git a/ontonethub-src/indexing/pom.xml b/ontonethub-src/indexing/pom.xml new file mode 100644 index 0000000..489c56c --- /dev/null +++ b/ontonethub-src/indexing/pom.xml @@ -0,0 +1,87 @@ + + + + + 4.0.0 + + it.cnr.it.stlab + indexing-genericrdf + 0.1 + jar + + Entityhub Indexing Generic RDF + + This Indexing Tool provides a default configuration to index any + kind of RDF data. Users might want to adapt some configurations to the specifica + of the RDF data. + + + + + Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/stanbol.apache.org/branches/release-1.0.0-branch + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + true + org.apache.stanbol.entityhub.indexing.Main + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + * + + + + + + + + + package + + shade + + + + + + + + + + + org.apache.stanbol + org.apache.stanbol.entityhub.indexing.genericrdf + 1.0.0 + runtime + + + + diff --git a/ontonethub-src/indexing/src/license/THIRD-PARTY.properties b/ontonethub-src/indexing/src/license/THIRD-PARTY.properties new file mode 100644 index 0000000..7115201 --- /dev/null +++ b/ontonethub-src/indexing/src/license/THIRD-PARTY.properties @@ -0,0 +1,40 @@ +# Generated by org.codehaus.mojo.license.AddThirdPartyMojo +#------------------------------------------------------------------------------- +# Already used licenses in project : +# - All files contained in this JAR are licensed under the Apache 2.0 license, unless noted differently in their source (see swing2swt). +# - Apache Software License +# - Apache Software License, Version 2.0 +# - BSD License +# - BSD-style +# - Common Development And Distribution License (CDDL), Version 1.0 +# - Common Development And Distribution License (CDDL), Version 1.1 +# - Common Public License, Version 1.0 +# - Eclipse Public License 1.0 +# - Eclipse Public License, Version 1.0 +# - GNU General Public License (GPL), Version 2 with classpath exception +# - GNU Lesser General Public License (LGPL) +# - GNU Lesser General Public License (LGPL), Version 2.1 +# - GNU Lesser General Public License, Version 2.1 +# - ICU License +# - MIT License +# - New BSD License +# - New BSD license +# - Public Domain License +# - Revised BSD License +# - iCal4j - License +#------------------------------------------------------------------------------- +# Please fill the missing licenses for dependencies : +# +# +#Mon Sep 05 14:09:53 CEST 2016 +antlr--antlr--2.7.2=Public Domain +commons-beanutils--commons-beanutils--1.7.0=The Apache Software License, Version 2.0 +dom4j--dom4j--1.1=BSD-style +jakarta-regexp--jakarta-regexp--1.4=The Apache Software License, Version 2.0 +javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0 +javax.servlet.jsp--jsp-api--2.1=Common Development And Distribution License (CDDL), Version 1.0 +org.apache.zookeeper--zookeeper--3.4.5=The Apache Software License, Version 2.0 +org.restlet.jee--org.restlet--2.1.1=The Apache Software License, Version 2.0 +org.restlet.jee--org.restlet.ext.servlet--2.1.1=The Apache Software License, Version 2.0 +oro--oro--2.0.8=The Apache Software License, Version 2.0 +xerces--xercesImpl--2.7.1=The Apache Software License, Version 2.0 diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/entityTypes.properties b/ontonethub-src/indexing/src/main/resources/indexing/config/entityTypes.properties new file mode 100644 index 0000000..57521ea --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/entityTypes.properties @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#NOTE: This configuration file can be used for both +# * org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter +# * org.apache.stanbol.entityhub.indexing.source.jenatdb.ResourceFilterIterator +# however users need to consider that the Jena TDB ResourceFilterIterator does +# not support wildcards '*'. So the default configuration used by this file +# will not work. + +#Configuration for the FieldValueFilter + +#This can be used to configure specific rdf:types to be indexed. Entities with +#other types will be filtered and not be included in the local DBpedia.org +#index + +#How to configure + +#The key 'field' can be used to configure the field the filters are applied +# - 'rdf:type' is used as default for the field +# - Only a single field is supported. However one can configure multiple instances +# with different configurations in the 'indexing.properties' file. +# - It is possible to use a full URI or prefix:localname for all prefixes registered +# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum' + +#field=rdf:type + +#The key 'values' is used to specify the filter +# - If NOT present, than Entities with NO values for the field are filtered. All +# others are accepted +# - The value '*' deactivates filtering +# - Multiple types are supported. Configurations are separated by ';' +# - It is possible to use full URIs are prefix:local name for all prefixes registered +# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum' +# - null can be used to explicitly include Entities with no value + +#Examples + +#This deactivate filtering +values=* + +#This activated filtering for Persons, Places and Organisation and also includes +#all entities with no type +#values=null;dbp-ont:Person;dbp-ont:Place;dbp-ont:Organisation; + +#The following two configurations would only index entities with no values for the +#configured field +#values=null +#values= + diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/fieldboosts.properties b/ontonethub-src/indexing/src/main/resources/indexing/config/fieldboosts.properties new file mode 100644 index 0000000..68b6a67 --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/fieldboosts.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#This file can be used to configre field boosts for Solr/Lucene +#use the full qualified URI of the field as key ant the float boost factor +#for the field as value. A value of 1.0 is the default. + +#NOTE: full UTF-8 is supported for keys! + +#This defines boosts for "label" like properties of typically used ontologies +http://www.w3.org/2000/01/rdf-schema#label=3 +http://purl.org/dc/terms/title=3 +http://purl.org/dc/elements/1.1/title=3 +http://xmlns.com/foaf/0.1/name=3 +http://schema.org/name=3 +http://www.w3.org/2004/02/skos/core#prefLabel=3 +http://www.w3.org/2004/02/skos/core#altLabel=1.5 \ No newline at end of file diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/fst.config b/ontonethub-src/indexing/src/main/resources/indexing/config/fst.config new file mode 100644 index 0000000..39b00dd --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/fst.config @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#Syntax +#index={indexField};[store={storeField}] +index=rdfs:label \ No newline at end of file diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/iditerator.properties b/ontonethub-src/indexing/src/main/resources/indexing/config/iditerator.properties new file mode 100644 index 0000000..82247ba --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/iditerator.properties @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DEFAULT CONFIGURATION FOR THE +# org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator +# This implementation reads Entity IDs and Scores form a line of a text file + +# NOTE: All values provided in this file are the default values + +# the text file with the data (located relative to the resource +# (indexing/resource) directory). The default name for the file is +# "entityScores.tsv" +source=entityScores.tsv + +# configure the position of the score and the entity id +id-pos=1 +score-pos=2 + +# if the Id field only contains the local name of the entity the id-namespace +# property can be used to configure the namespace. The default is to use no +# namespace -> meaning the the ID contains the full qualified name +#id-namespace=http://example.org/entities/ + +# separator between the id and score (default TAB) +separator= + +#URL encoding/decoding of entity IDs +encodeIds=false +decodeIds=false + +# trimming of lines (default is false) +trimLine=false +# trimming the entity (default is true) +trimEntity=true + +#the charset used to read the data from the file (default UTF-8) +charset=UTF-8 diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/indexing.properties b/ontonethub-src/indexing/src/main/resources/indexing/config/indexing.properties new file mode 100644 index 0000000..af908c0 --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/indexing.properties @@ -0,0 +1,317 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ------------ +# Index Metadata +# ------------ + +# Here the name of the dataset MUST be specified by the user +# It MUST BE a single word with no spaces. +name=changeme + +# an optional short description may be used. If missing default descriptions are +# created. +description=short description (http://www.example.org) + +# The "Synchronized" property is supported by some Entityhub Yard implementations +# to automatically update/delete local data as soon as the created archive with +# the indexed data is updated/deleted in the /datafiles folder of Apache Stanbol +# By default this feature is activated. For very big indexes users might want to +# disable this feature to allow the deletion of the archive after the index was +# initialised successfully. +# By default this feature is enabled. Uncommend the next line to deactivate it. +Synchronized=true + +# ------------ +# Indexing Mode dependent Configurations: (see readme.md for details) +# ------------ + +# The indexing Tool support two modes. See (1) and (2) for details. + +# (1) Iterate over Data and lookup scores: (default) +# ------------ + +# use the Jena TDB as source for indexing the RDF data located within +# "indexing/resource/rdfdata" +entityDataIterable=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata + +#NOTE: if you want to index Bnodes you need to activate the bnode parameter +# see STANBOL-765 for details (and documentation) +#entityDataIterable=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata,bnode:true + +# The EntityScore Provider needs to provide the scores for indexed entities +# use the NoEntityScoreProvider if no scores are available +entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider + +# The EntityFieldScoreProvider can be used to use the value of an property as score +# the property can be configured by the "field" parameter +# Scores are parsed from numbers and strings that can be converted to numbers. +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider,field:http://www.example.org/myOntology#score + +# The EntityIneratorToScoreProviderAdapter can be used to adapt any configured +# "entityIdIterator" to an "entityScoreProvider". See also the comments for +# "entityIdIterator". +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter + +# Indexing VCARD + +# Alternative configuration for indexing vCard files +# change the config for the vcard indexer in the "vcard.properties" file +#entityDataIterable=org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource,config:vcard +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider + + + +# (2) Iterate over Entity IDs and lookup Data +# ------------ + +# First one needs to provide an EntityIterator +# Typically the LineBasedEntityIterator implementation is used. The configuration +# for this implementation is typically provided in an own file. A default +# configuration is provided by the iditerator.properties file. +#entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,config:iditerator + +# This EntityIterator allows to use a simple Triple filter to select entities for Indexing. +# It uses the same configuration as "FieldValueFilter" but DOES NOT support +# Wildcards. See "FieldValueFilter" for details on how to configure! +# +# NOTE: Can only be used if Jena TDB (jenatdb.RdfIndexingSource) is used as +# indexing source! +#entityIdIterator=org.apache.stanbol.entityhub.indexing.source.jenatdb.ResourceFilterIterator,config:entityTypes.properties + +# Second a entityDataProvide needs to be specified. Here we use the Jena TDB +# Note that this implementation implements both entityDataIterable AND +# entityDataProvider. +# RDF data needs to be located in the "indexing/resource/rdfdata" +# entityDataProvider=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata + +# ------------ +#Score Normalizer +# ------------ + +# Entity Scores are normalised by the ScoreNormalizer +# if no score normaliser is configured the scores will be used as provided by +# the entities +#scoreNormalizer= + +# ScoreNormalizer can be chained as shown by the following example configuration +# The score for an entity is first processed by the last normalizer +#scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:scorerange;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer,config:minscore + +# Different Implementations: +# - RangeNormaliser: allows to define a range for score values. see +# "scorerange.properties" for possible configurations +# - NaturalLogNormaliser: Should be used if the score represents the number of +# incommings links. +# - MinScoreNormalizer: allows to prevent indexing of all entities with a score +# lower than the configured minimum. see "minscore.properties" for possible +# configurations. + +# ------------ +# Entity Processor +# ------------ + +# Multiple Entity processors can be used for indexing entities. The are separated by ';' +# and are executed in the order of definition. + +# FiledMapperProcessor: +# +# entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor +# +# This processor ensures that "field mappings" are executed while indexing entities. +# By default it will use the mappings configured by the "fieldConfiguraton" +# property. To use other mappings one can use the "mappings" parameter (e.g. +# mappings:otherMappings.txt + +# FieldValueFilter +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes +# +# This allows to define a field and values that are used to filter entities. Only Entities +# that do have one of the defined values as actual value of the defined field will +# get indexed. This is typically used to filter entities by rdf:type, but can be used +# for any URI property. See the default entityTypes.properties file for more information + +# ResourceUriFilter +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.ResourceUriFilter,prefix:http://www.eample.org/ +# +# This allows to filter all resource that do not start with the configured prefix + +# LdpathProcessor +# +# This allows to use simple LDpath statements to process entities. Such as mapping +# only properties of entities with a specific type +# +# skos:prefLabel = .[rdf:type is ]/rdfs:label; +# +# Praameters: +# * append:[true/flase] (default=true) If the result of the LDpath program is +# appended to the processed entity or if the processed entity should be +# replaced with the results of the LDpath program +# * ldpath:{file} (required, no default) The {file} containing the LDpath +# program used by this processor. {file} is relative to the config directory. +# +# NOTEs: +# * The LdpathProcessor has only access to the local properties of the currently +# indexed entity. LDPath statements that refer other information such as paths +# with a lengths > 1 or inverse properties will not work +# * Processors can be chained by defining multiple Processor instances in the +# configuration and separating them with ';'. This allows to use multiple +# LdpathProcessor instances and/or to chain LdpathProcessor(s) with others +# such as the "FiledMapperProcessor". Processors are executed as defined +# within the configuration of the "entityProcessor" property. +# * When using the FiledMapperProcessor on results of the LdpathProcessor make +# sure that the fields defined in the LDpath statements are indexed by the +# FiledMapperProcessor. Otherwise such values will NOT be indexed! +# org.apache.stanbol.entityhub.indexing.core.processor.LdpathProcessor,ldpath:ldpath-mapping.txt,append:true + +# GeonamesUriProcessor +# +# A typical case is that geonames URIs are missing the tailing '/'. This processor +# will search for geonames URIs and correct them. +# org.apache.stanbol.entityhub.indexing.core.processor.GeonamesUriProcessor + +# WikipediaToDBPediaUriProcessor +# +# This processor will rewrite Wikipedia URIs to DBPedia URIs +# (e.g. "http://de.wikipedia.org/wiki/Hawaii" to "http://de.dbpedia.org/resource/Hawaii" +# org.apache.stanbol.entityhub.indexing.core.processor.WikipediaToDBPediaUriProcessor + +# EmptyProcessor +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.EmptyProcessor +# +# This processor can be used to deactivate EntityProcessing + +# Default Entity Processor configuration +entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes;org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor + +# ------------ +# Index Field Configuration +# ------------ + +# An index needs to provide its configuration. This is used at runtime if +# entities are updated. +fieldConfiguration=mappings.txt + + +# ------------ +# Post-Processing +# ------------ + +# The indexing tool now supports a post-processing step that is executed after +# all entities are already indexed to the indexing destination. +# For this step the configured IndexingDestination is used as both the source +# and the target. +# The post-processing allows to apply an additional set of EntityProcessor +# instances to each indexed Entity. +# In principle all EntityProcessor implementations as used for entity processing +# during the normal indexing phase can also be used for post-processing. However +# as there are already all information available within the IndexingDestination +# this phase can also be used to perform processing steps that would not be +# easily possible during the indexing phase. + +# LdpathPostProcessor +# +# EntityProcessor that wraps the IndexingDestination as RDF Backend and +# therefore allows to execute any kind of LDpath programs based on the indexed +# Entity data. +# Typical use cases of this processor include: +# * indexing transitive closures +# skos:broaderTransitive = (skos:broader)* +# * collecting labels of referenced entities to be used for disambiguation (e.g. +# use lables of linked concepts in a SKOS concept scheme : +# = *[rdf:type is skos:Concept]/(skos:prefLabel | skos:altLabel) +# * advanced indexing rules that need paths longer than one (e.g. adding +# labels of redirects pointing to an entity +# rdfs:label = rdfs:label | (^rdfs:seeAlso/rdfs:label) +# +# Parameter: This uses the same parameter as the LdpathProcessor. + +#entityPostProcessor=org.apache.stanbol.entityhub.indexing.core.processor.LdpathPostProcessor,ldpath:ldpath-post-mapping.txt,append:true + + +# ------------ +# Indexing Destination +# ------------ + +# A SolrYard is used as destination for indexing +# To boost some fields (typically labels) one can use the fieldboosts.properties +# A default field boost configuration is provided. +# A default fst.config is also provided (see STANBOL-1167) +indexingDestination=org.apache.stanbol.entityhub.indexing.destination.solryard.SolrYardIndexingDestination,boosts:fieldboosts,fstConf:fst.config + + +# ------------ +# Additional configurations for ReferencedSite +# ------------ + +# All the following properties are optional, but can be used to configure +# the referenced site used to access the indexed data within the Entityhub + +# The entity prefixes are used to determine if an entity needs to be searched +# on a referenced site. If not specified requests for any entity will be +# forwarded to this referenced site. +# use ';' to seperate multiple values +#org.apache.stanbol.entityhub.site.entityPrefix=http://example.org/resource;urn:mycompany: + +# Configuration the remote Service +# If the indexed data are also available remotly (e.g. by a Linked data endpoint) +# than it is possible to allow also direct access to such entities +# (a) retrieving entities (access URI and EntityDereferencer implementation) +#org.apache.stanbol.entityhub.site.accessUri="http://example.org/resource" +#org.apache.stanbol.entityhub.site.dereferencerType= +# available EntityDereferencer implementation +# - org.apache.stanbol.entityhub.dereferencer.CoolUriDereferencer +# - org.apache.stanbol.entityhub.dereferencer.SparqlDereferencer + +# (b) search entities (queryUri and EntitySearcher implementation) +#org.apache.stanbol.entityhub.site.queryUri=http://example.org/sparql +#org.apache.stanbol.entityhub.site.searcherType= +# available EntitySearcher implementation +# - org.apache.stanbol.entityhub.searcher.SparqlSearcher (generic SPARQL) +# - org.apache.stanbol.entityhub.searcher.LarqSearcher (Larq SPARQL extensions) +# - org.apache.stanbol.entityhub.searcher.VirtuosoSearcher (Virtuoso SPARQL extensions) + +# The referenced site can also specify additional mappings to be used in the +# case an entity of this site is imported to the Entityhub. +# Typically the same mappings as used for the indexing are a good start. +# However one might want to copy some values (e.g. labels) to commonly used +# fields used by the Entityhub +org.apache.stanbol.entityhub.site.fieldMappings=mappings.txt + + +# License(s) +# Add here the name and URLs of the license to be used for all entities +# provided by this referenced site +# NOTE: licenseName and licenseUrl MUST use the ordering as below! +# This example shows dual licensing with "cc by-sa" and GNU +#org.apache.stanbol.entityhub.site.licenseName=Creative Commons Attribution-ShareAlike 3.0;GNU Free Documentation License +#org.apache.stanbol.entityhub.site.licenseUrl=http://creativecommons.org/licenses/by-sa/3.0/;http://www.gnu.org/licenses/fdl.html + +# Attribution +# Some Licenses require attributions. This properties can be used to provide a +# link to the site with the attribution and the attribution text +#org.apache.stanbol.entityhub.site.attributionUrl=http://example.org/About.html +#org.apache.stanbol.entityhub.site.attribution=To the universe + + +# Fail on Error loading Resource Files +# The indexing tool can be configured to fail on error loading resource files. +# By default, a resource file will be ignored when an error occurs while trying to load it +failOnErrorLoadingResource=false + + diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/mappings.txt b/ontonethub-src/indexing/src/main/resources/indexing/config/mappings.txt new file mode 100644 index 0000000..dbbbe92 --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/mappings.txt @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#NOTE: THIS IS A DEFAULT MAPPING SPECIFICATION THAT INCLUDES MAPPINGS FOR +# COMMON ONTOLOGIES. USERS MIGHT WANT TO ADAPT THIS CONFIGURATION BY +# COMMENTING/UNCOMMENTING AND/OR ADDING NEW MAPPINGS + +# --- Define the Languages for all fields --- +# to restrict languages to be imported (for all fields) +#| @=null;en;de;fr;it + +#NOTE: null is used to import labels with no specified language + +# --- Define the Languages for all fields --- +# Uncomment to restrict indexing to a specific list of languages, otherwise all +# languages are indexed +#| @=null;en;de;fr;it + +# --- RDF RDFS and OWL Mappings --- +# This configuration only index properties that are typically used to store +# instance data defined by such namespaces. This excludes ontology definitions + +# NOTE that nearly all other ontologies are are using properties of these three +# schemas, therefore it is strongly recommended to include such information! + +rdf:type | d=entityhub:ref + +rdfs:label +rdfs:comment +rdfs:seeAlso | d=entityhub:ref + + +owl:sameAs | d=entityhub:ref + +#If one likes to also index ontologies one should add the following statements +#owl:* +#rdfs:* + +# --- Dublin Core (DC) --- +# The default configuration imports all dc-terms data and copies values for the +# old dc-elements standard over to the according properties of the dc-terms +# standard. + +# NOTE that a lot of other ontologies are also using DC for some of there data +# therefore it is strongly recommended to include such information! + +#mapping for all dc-terms properties +dc:* + +# copy dc:title to rdfs:label +dc:title > rdfs:label + +# deactivated by default, because such mappings are mapped to dc-terms +#dc-elements:* + +# mappings for the dc-elements properties to the dc-terms +dc-elements:contributor > dc:contributor +dc-elements:coverage > dc:coverage +dc-elements:creator > dc:creator +dc-elements:date > dc:date +dc-elements:description > dc:description +dc-elements:format > dc:format +dc-elements:identifier > dc:identifier +dc-elements:language > dc:language +dc-elements:publisher > dc:publisher +dc-elements:relation > dc:relation +dc-elements:rights > dc:rights +dc-elements:source > dc:source +dc-elements:subject > dc:subject +dc-elements:title > dc:title +dc-elements:type > dc:type +#also use dc-elements:title as label +dc-elements:title > rdfs:label + +# --- Social Networks (via foaf) --- +#The Friend of a Friend schema is often used to describe social relations between people +foaf:* + +# copy the name of a person over to rdfs:label +foaf:name > rdfs:label + +# additional data types checks +foaf:knows | d=entityhub:ref +foaf:made | d=entityhub:ref +foaf:maker | d=entityhub:ref +foaf:member | d=entityhub:ref +foaf:homepage | d=xsd:anyURI +foaf:depiction | d=xsd:anyURI +foaf:img | d=xsd:anyURI +foaf:logo | d=xsd:anyURI +#page about the entity +foaf:page | d=xsd:anyURI + + +# --- Schema.org -- + +# Defines an Ontology used by search engines (Google, Yahoo and Bing) for +# indexing websites. + +schema:* +# Copy all names of schema instances over to rdfs:label +schema:name > rdfs:label + +# --- Simple Knowledge Organization System (SKOS) --- + +# A common data model for sharing and linking knowledge organization systems +# via the Semantic Web. Typically used to encode controlled vocabularies as +# a thesaurus +skos:* + +# copy all SKOS labels (preferred, alternative and hidden) over to rdfs:label +skos:prefLabel > rdfs:label +skos:altLabel > rdfs:label +skos:hiddenLabel > rdfs:label + +# copy values of **Match relations to the according related, broader and narrower +skos:relatedMatch > skos:related +skos:broadMatch > skos:broader +skos:narrowMatch > skos:skos:narrower + +#similar mappings for transitive variants are not contained, because transitive +#reasoning is not directly supported by the Entityhub. + +# Some SKOS thesaurus do use "skos:transitiveBroader" and "skos:transitiveNarrower" +# however such properties are only intended to be used by reasoners to +# calculate transitive closures over broader/narrower hierarchies. +# see http://www.w3.org/TR/skos-reference/#L2413 for details +# to correct such cases we will copy transitive relations to their counterpart +skos:narrowerTransitive > skos:narrower +skos:broaderTransitive > skos:broader + + +# --- Semantically-Interlinked Online Communities (SIOC) --- + +# An ontology for describing the information in online communities. +# This information can be used to export information from online communities +# and to link them together. The scope of the application areas that SIOC can +# be used for includes (and is not limited to) weblogs, message boards, +# mailing lists and chat channels. +sioc:* + +# --- biographical information (bio) +# A vocabulary for describing biographical information about people, both living +# and dead. (see http://vocab.org/bio/0.1/) +bio:* + +# --- Rich Site Summary (rss) --- +rss:* + +# --- GoodRelations (gr) --- +# GoodRelations is a standardised vocabulary for product, price, and company data +gr:* + +# --- Creative Commons Rights Expression Language (cc) +# The Creative Commons Rights Expression Language (CC REL) lets you describe +# copyright licenses in RDF. +cc:* + + + + + + diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/minscore.properties b/ontonethub-src/indexing/src/main/resources/indexing/config/minscore.properties new file mode 100644 index 0000000..9df8944 --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/minscore.properties @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#to include entities with the configured min-score +inclusive=true +#the required minimum number of incomming links +min-score=2 \ No newline at end of file diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/scorerange.properties b/ontonethub-src/indexing/src/main/resources/indexing/config/scorerange.properties new file mode 100644 index 0000000..ba33bf4 --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/scorerange.properties @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# the range is always [0..{upper-bound}] +upper-bound=1 \ No newline at end of file diff --git a/ontonethub-src/indexing/src/main/resources/indexing/config/vcard.properties b/ontonethub-src/indexing/src/main/resources/indexing/config/vcard.properties new file mode 100644 index 0000000..5440cc7 --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/indexing/config/vcard.properties @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#Configurations for the vcard indexing source. +#This file only configures this component to activate (actually use it) you need +#to activate it in the 'indexing.properties' file by activating: +#'entityDataIterable=org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource' +# and deactivating all other entityDataIterable and entityIdIterator +#configurations. +#An example configuration is contained in the 'indexing.properties' file. +#Search for the 'Indexing VCARD' section + +#name of the folder with the vcard files (relative to /indexing/resource) +#'vcard' is the default. You can add multiple folders by splitting them with ',' +source=vcard +#The prefix used for the created instance +#URIs will use {prefix}{type}/{name} where +# {prefix} is the configured value +# {type} is "person" or "organization" +# {name} is the value of FN for persons and ORG for organizations. However +# spaces are replaces with '-' and URL encoded +prefix=http://www.exampe.com/changeme/ +#The encoding used to read the vCard file +#parse an empty value to use plattform encoding +#default is UTF8 +encoding=UTF8 diff --git a/ontonethub-src/indexing/src/main/resources/log4j.properties b/ontonethub-src/indexing/src/main/resources/log4j.properties new file mode 100644 index 0000000..dc63190 --- /dev/null +++ b/ontonethub-src/indexing/src/main/resources/log4j.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Generally print only WARN and ERROR messages +log4j.rootLogger=WARN, A1 +# to the console +log4j.appender.A1=org.apache.log4j.ConsoleAppender +#using the pattern layout +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +# with this configuration +log4j.appender.A1.layout.ConversionPattern=%d{HH:mm:ss,SSS} [%t] %-5p %c{2} - %m%n +# however log also INFO messages of the indexing components +log4j.logger.org.apache.stanbol.entityhub.indexing=INFO +# for loggings during importing RDF data +log4j.logger.com.hp.hpl.jena=INFO +#solrtexttagger is very verbose (even on WARN level) +log4j.logger.org.opensextant.solrtexttagger=ERROR \ No newline at end of file diff --git a/ontonethub-src/ontonethub-war/pom.xml b/ontonethub-src/ontonethub-war/pom.xml new file mode 100644 index 0000000..a14620b --- /dev/null +++ b/ontonethub-src/ontonethub-war/pom.xml @@ -0,0 +1,208 @@ + + + + 4.0.0 + + org.apache.stanbol + stanbol-parent + 6 + ../../parent + + + org.apache.stanbol + org.apache.stanbol.launchers.full-war + 0.1 + war + + OntoNetHub WAR + WAR packaging for OntoNetHub WAR Launcher + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/launchers/full + + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/launchers/full + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/release-1.0.0-branch/stanbol.apache.org + + + + /stanbol + 8080 + ${project.build.directory}${file.separator}stanbol + + + + stanbol + + + org.apache.sling + maven-launchpad-plugin + + + prepare-package + + prepare-package + + + + + + false + + + + org.apache.maven.plugins + maven-jar-plugin + + + org.apache.maven.plugins + maven-war-plugin + + WEB-INF/classes/META-INF/* + stanbol + + + + ${project.build.directory}/launchpad-bundles + + WEB-INF + + + ${project.build.outputDirectory}/META-INF + META-INF + + + + + + org.apache.rat + apache-rat-plugin + + + + + + + org.apache.tomcat.maven + tomcat7-maven-plugin + 2.2 + + ${stanbol.context} + ${stanbol.port} + + ${stanbol.home} + + true + ${project.build.directory}/${project.build.finalName}/ + + + + org.apache.maven.plugins + maven-deploy-plugin + + + true + + + + + + + + org.apache.sling + org.apache.sling.launchpad.base + webapp + war + runtime + + + + + org.apache.stanbol + org.apache.stanbol.launchers.bundlelists.osgiframework + 1.0.0 + partialbundlelist + provided + + + + + org.apache.clerezza.provisioning + rdf + partialbundlelist + provided + + + + + org.apache.stanbol + org.apache.stanbol.launchers.bundlelists.stanbolcommons + 1.0.0 + partialbundlelist + provided + + + + + + + + org.apache.stanbol + org.apache.stanbol.launchers.bundlelists.entityhub.core + 1.0.0 + partialbundlelist + provided + + + org.apache.stanbol + org.apache.stanbol.launchers.bundlelists.entityhub.clerezza + 1.0.0 + partialbundlelist + provided + + + org.apache.stanbol + org.apache.stanbol.launchers.bundlelists.ontonethub + 1.0.0 + partialbundlelist + provided + + + + + + + diff --git a/ontonethub-src/ontonethub-war/src/license/THIRD-PARTY.properties b/ontonethub-src/ontonethub-war/src/license/THIRD-PARTY.properties new file mode 100644 index 0000000..749e184 --- /dev/null +++ b/ontonethub-src/ontonethub-war/src/license/THIRD-PARTY.properties @@ -0,0 +1,27 @@ +# Generated by org.codehaus.mojo.license.AddThirdPartyMojo +#------------------------------------------------------------------------------- +# Already used licenses in project : +# - Apache Software License +# - Apache Software License, Version 2.0 +# - BSD License +# - Common Development And Distribution License (CDDL), Version 1.0 +# - Common Development And Distribution License (CDDL), Version 1.1 +# - Common Public License, Version 1.0 +# - Eclipse Public License 1.0 +# - Eclipse Public License, Version 1.0 +# - GNU General Public License (GPL), Version 2 with classpath exception +# - GNU Lesser General Public License (LGPL) +# - GNU Lesser General Public License (LGPL), Version 2.1 +# - ICU License +# - MIT License +# - New BSD License +# - Public Domain License +# - Revised BSD License +# - The SAX License +# - The W3C License +#------------------------------------------------------------------------------- +# Please fill the missing licenses for dependencies : +# +# +#Wed Sep 07 09:24:10 CEST 2016 +xerces--xercesImpl--2.7.1=The Apache Software License, Version 2.0 diff --git a/ontonethub-src/ontonethub-war/src/main/bundles/list.xml b/ontonethub-src/ontonethub-war/src/main/bundles/list.xml new file mode 100644 index 0000000..2e37beb --- /dev/null +++ b/ontonethub-src/ontonethub-war/src/main/bundles/list.xml @@ -0,0 +1,65 @@ + + + + + + + + + + + + org.apache.stanbol + org.apache.stanbol.commons.web.sparql + 1.0.0 + + + + + + + + + + + + + + + + + diff --git a/ontonethub-src/ontonethub-war/src/main/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg b/ontonethub-src/ontonethub-war/src/main/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg new file mode 100644 index 0000000..30642b3 --- /dev/null +++ b/ontonethub-src/ontonethub-war/src/main/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Example default OSGi config that's loaded +# at startup if Sling's org.apache.sling.launchpad.installer +# bundle is active. +# +# The filename defines the config PID. + +# Factory configs should be named like foo.bar-X.cfg +# whereo.foo.bar is the factory PID and X a unique value +# for that PID. The actual config PID is then automatically +# generated PID, and the value of X is stored as an alias +# property in the configuration. + +message=This test config should be loaded at startup + +anotherValue = This is AnotherValue. \ No newline at end of file diff --git a/ontonethub-src/ontonethub-war/src/main/sling/common.properties b/ontonethub-src/ontonethub-war/src/main/sling/common.properties new file mode 100644 index 0000000..ec136a7 --- /dev/null +++ b/ontonethub-src/ontonethub-war/src/main/sling/common.properties @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This file is loaded by Apache Sling during startup. Properties defined +# in this file are copied over to the sling.properties file in the {sling.home} +# directory. + +# The stanbol home directory +# by default this is set to the same value as sling.home +org.osgi.framework.startlevel.beginning=40 +clerezza.shell.disable=true +# set the sling file installer dir to {working-dir}/stanbol/fileinstall +sling.fileinstall.dir=${sling.home}/fileinstall +sling.system.packages.servletdescription=javax.servlet.descriptor;version=3.0.0 \ No newline at end of file diff --git a/ontonethub-src/ontonethub-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg b/ontonethub-src/ontonethub-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg new file mode 100644 index 0000000..30642b3 --- /dev/null +++ b/ontonethub-src/ontonethub-war/src/main/webapp/WEB-INF/resources/config/org.apache.stanbol.examples.ExampleBootstrapConfig.cfg @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Example default OSGi config that's loaded +# at startup if Sling's org.apache.sling.launchpad.installer +# bundle is active. +# +# The filename defines the config PID. + +# Factory configs should be named like foo.bar-X.cfg +# whereo.foo.bar is the factory PID and X a unique value +# for that PID. The actual config PID is then automatically +# generated PID, and the value of X is stored as an alias +# property in the configuration. + +message=This test config should be loaded at startup + +anotherValue = This is AnotherValue. \ No newline at end of file diff --git a/ontonethub-src/ontonethub-war/src/main/webapp/WEB-INF/web.xml b/ontonethub-src/ontonethub-war/src/main/webapp/WEB-INF/web.xml new file mode 100644 index 0000000..abc85d9 --- /dev/null +++ b/ontonethub-src/ontonethub-war/src/main/webapp/WEB-INF/web.xml @@ -0,0 +1,73 @@ + + + + Stanbol Full Web Application + + + + + org.apache.sling.launchpad.webapp.SlingSessionListener + + + + + Stanbol Servlet + stanbol + org.apache.sling.launchpad.webapp.SlingServlet + + + sling.home + ${user.dir}/stanbol/${context.path} + + + org.osgi.framework.startlevel.beginning + 40 + + + stanbol.home + ${sling.home} + + + sling.fileinstall.dir + ${sling.home}/fileinstall + + 100 + + + + + stanbol + /* + + diff --git a/ontonethub-src/ontonethub/.classpath b/ontonethub-src/ontonethub/.classpath new file mode 100644 index 0000000..d9d5fdd --- /dev/null +++ b/ontonethub-src/ontonethub/.classpath @@ -0,0 +1,666 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ontonethub-src/ontonethub/.project b/ontonethub-src/ontonethub/.project new file mode 100644 index 0000000..b4303d8 --- /dev/null +++ b/ontonethub-src/ontonethub/.project @@ -0,0 +1,25 @@ + + + it.cnr.istc.stlab.ontonethub.web + This Indexing Tool provides a default configuration to index any + kind of RDF data. Users might want to adapt some configurations to the specifica + of the RDF data. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/ontonethub-src/ontonethub/.settings/org.eclipse.core.resources.prefs b/ontonethub-src/ontonethub/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..abdea9a --- /dev/null +++ b/ontonethub-src/ontonethub/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/main/resources=UTF-8 +encoding/=UTF-8 diff --git a/ontonethub-src/ontonethub/.settings/org.eclipse.jdt.core.prefs b/ontonethub-src/ontonethub/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..e990b8e --- /dev/null +++ b/ontonethub-src/ontonethub/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,6 @@ +#Mon Jul 31 12:34:33 CEST 2017 +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.source=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 diff --git a/ontonethub-src/ontonethub/.settings/org.eclipse.m2e.core.prefs b/ontonethub-src/ontonethub/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..f897a7f --- /dev/null +++ b/ontonethub-src/ontonethub/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/ontonethub-src/ontonethub/README.md b/ontonethub-src/ontonethub/README.md new file mode 100644 index 0000000..162ed09 --- /dev/null +++ b/ontonethub-src/ontonethub/README.md @@ -0,0 +1,248 @@ +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +# Default Indexing Tool for RDF + +This tool provides a default configuration for creating a SOLr index of RDF +files (e.g. a SKOS export of a thesaurus or a set of foaf files) + +## Building + +If not yet built during the build process of the entityhub call + + mvn install + +to build the jar with all the dependencies used later for indexing. + +If the build succeeds go to the /target directory and copy the + + org.apache.stanbol.entityhub.indexing.genericrdf-*.jar + +to the directory you would like to start the indexing. + +## Indexing + +### (1) Initialize the configuration + +The default configuration is initialized by calling + + java -jar org.apache.stanbol.entityhub.indexing.genericrdf-*.jar init + +This will create a sub-folder "indexing" in the current directory. +Within this folder all the + +* configurations (indexing/config) +* source files (indexing/resources) +* created files (indexing/destination) +* distribution files (indexing/distribution) + +will be located. + +### (2) Adapt the configuration + +The configuration is located within the + + indexing/config + +directory. + +The indexer supports two indexing modes + +1. Iterate over the data and lookup the scores for entities (default). +For this mode the "entityDataIterable" and an "entityScoreProvider" MUST BE +configured. If no entity scores are available, a default entityScoreProvider +provides no entity scores. This mode is typically used to index all entities of +a dataset. +2. Iterate over the entity IDs and Scores and lookup the data. For this Mode an +"entityIdIterator" and an "entityDataProvider" MUST BE configured. This mode is +typically used if only a small sub-set of a large dataset is indexed. This might +be the case if Entity-Scores are available and users want only to index the e.g. +10000 most important Entities or if a dataset contains Entities of many different +types but one wants only include entities of a specific type (e.g. Species in +DBpedia). + + +The configuration of the mentioned components is contained in the main indexing +configuration file explained below. + +#### Main indexing configuration (indexing.properties) + +This file contains the main configuration for the indexing process. + +* the "name" property MUST BE set to the name of the referenced site to be created +by the indexing process +* the "entityDataIterable" is used to configure the component iterating over the +RDF data to be indexed. The "source" parameter refers to the directory the RDF +files to be indexed are searched. The RDF files can be compressed with 'gz', +'bz2' or 'zip'. It is even supported to load multiple RDF files contained in a +single ZIP archive. +* the "entityScoreProvider" is used to provide the ranking for entities. A +typical example is the number of incoming links. Such rankings are typically +used to weight recommendations and sort result lists. (e.g. by a query for +"Paris" it is much more likely that a user refers to Paris in France as to one +of the two Paris in Texas). If no rankings are available you should use the +"org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider". +* the "scoreNormalizer" is only useful in case entity scores are available. +This component is used to normalize rankings or also to filter entities with +low rankings. +* the "entityProcessor" is used to process (map, convert, filter) information +of entities before indexing. The mapping configuration is provided in an separate +file (default "mapping.txt"). +* the "entityPostProcessor" is used to process already indexed entities in a +2nd iteration. This has the advantage, that processors used in the post-processing +can assume that all raw data are already present within IndexingDestination. +For this step the IndexingDestination is used for both source and destination. +See also [STANBOL-591](https://issues.apache.org/jira/browse/STANBOL-591) +* Indexes need to provide the configurations used to store entities. The +"fieldConfiguration" allows to specify this. Typically it is the same mapping +file as used for the "entityProcessor" however this is not a requirement. +* the "indexingDestination" property is used to configure the target for the +indexing. Currently there is only a single implementation that stores the indexed +data within a SolrYard. The "boosts" parameter can be used to boost (see Solr +Documentation for details) specific fields (typically labels) for full text +searches. +* all properties starting with "org.apache.stanbol.entityhub.site." are used for + the configuration of the referenced site. + +Please note also the documentation within the "indexing.properties" file for details. + +#### Mapping configuration (mappings.txt) + +Mappings are used for three different purposes: + +1. During the indexing process by the "entityProcessor" to process the +information of each entity +2. At runtime by the local Cache to process single Entities that are updated in the cache. +3. At runtime by the Entityhub when importing an Entity from a referenced Site. + +The configurations for (1) and (2) are typically identical. For (3) one might +want to use a different configuration. The default configuration assumes to +use the same configuration (mappings.txt) for (1) and (2) and no specific +configuration for (3). + +The mappings.txt in its default already include mappings for popular ontologies +such as Dublin Core, SKOS and FOAF. Domain specific mappings can be added to +this configuration. + +#### Score Normalizer configuration + +The default configuration also provides examples for configurations of the +different score normalisers. However by default they are not used. + +* "minscore.properties": Example of how to configure minimum score for Entities +to be indexed +* "scorerange.properties": Example of how to normalise the maximum/minimum score + of Entities to the configured range. + +NOTE: + +* To use score normalisation, scores need to be provided for Entities. This means +an "entityScoreProvider" or an "entityIdIterator" needs to be configured +(indexing.properties). +* Multiple score normalisers can be used. The call order is determined by the +configuration of the "scoreNormalizer" property (indexing.properties). + +### (3) Provide the RDF files to be indexed + +All sources for the indexing process need to be located within the the + + indexing/resources + +directory + +By default the RDF files need to be located within + + indexing/resources/rdfdata + +however this can be changed via the "source" parameter of the "entityDataIterable" +or "entityDataProvider" property in the main indexing configuration (indexing.properties). + + +Supported RDF files are: + +* RDF/XML (by using one of "rdf", "owl", "xml" as extension): Note that this +encoding is not well suited for importing large RDF datasets. +* N-Triples (by using "nt" as extension): This is the preferred format for +importing (especially large) RDF datasets. +* NTurtle (by using "ttl" as extension) +* N3 (by using "n3" as extension) +* NQuards (by using "nq" as extension): Note that all named graphs will be +imported into the same index. +* Trig (by using "trig" as extension) + +Supported compression formats are: + +* "gz" and "bz2" files: One need to use double file extensions to indicate both +the used compression and RDF file format (e.g. myDump.nt.bz2) +* "zip": For ZIP archives all files within the archive are treated separately. +That means that even if a ZIP archive contains multiple RDF files, all of them +will be imported. + +### (4) Create the Index + + java -Xmx1024m -jar org.apache.stanbol.entityhub.indexing.genericrdf-*.jar index + +Note that calling the utility with the option -h will print the help. + + +## Use the created index with the Entityhub + +After the indexing completes the distribution folder + + /indexing/dist + +will contain two files + +1. org.apache.stanbol.data.site.{name}-{version}.jar: This is a Bundle that can +be installed to any OSGI environment running the Apache Stanbol Entityhub. When +Started it will create and configure + + * a "ReferencedSite" accessible at "http://{host}/{root}/entityhub/site/{name}" + * a "Cache" used to connect the ReferencedSite with your Data and + * a "SolrYard" that managed the data indexed by this utility. + + When installing this bundle the Site will not be yet work, because this Bundle + does not contain the indexed data but only the configuration for the Solr Index. + +2. {name}.solrindex.zip: This is the ZIP archive with the indexed data. This +file will be requested by the Apache Stanbol Data File Provider after installing +the Bundle described above. To install the data you need copy this file to the +"/sling/datafiles" folder within the working directory of your Stanbol Server. + + If you copy the ZIP archive before installing the bundle, the data will be + picked up during the installation of the bundle automatically. If you provide + the file afterwards you will also need to restart the SolrYard installed by the + Bundle. + +{name} denotes to the value you configured for the "name" property within the +"indexing.properties" file. + +### A note about blank nodes + +If your input data sets contain large numbers of blank nodes, you may find that +you have problems running out of heap space during indexing. This is because Jena +(like many semantic stores) keeps a store of blank nodes in core memory while +importing. Keeping in mind that EntityHub does not support the use of blank nodes, +there is a means of indexing such data sets nonetheless. You can convert them to +named nodes and then index. There is a convenient tool packaged with Stanbol for +this purpose, called "Urify" (org.apache.stanbol.entityhub.indexing.Urify). +It is available in the runnable JAR file built by this indexer. To use it, put that +JAR on your classpath, and you can execute Urify, giving it a list of files to process. +Use the "-h" or "--help" flag to see options for Urify: + + java -Xmx1024m -cp org.apache.stanbol.entityhub.indexing.genericrdf-*.jar \ + org.apache.stanbol.entityhub.indexing.Urify --help + + \ No newline at end of file diff --git a/ontonethub-src/ontonethub/dependency-reduced-pom.xml b/ontonethub-src/ontonethub/dependency-reduced-pom.xml new file mode 100644 index 0000000..62a17a4 --- /dev/null +++ b/ontonethub-src/ontonethub/dependency-reduced-pom.xml @@ -0,0 +1,64 @@ + + + + apache-stanbol-entityhub-indexing + org.apache.stanbol + 1.0.0 + + 4.0.0 + org.apache.stanbol.entityhub.indexing.genericrdf + Apache Stanbol Entityhub Indexing Generic RDF + This Indexing Tool provides a default configuration to index any + kind of RDF data. Users might want to adapt some configurations to the specifica + of the RDF data. + + + Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/stanbol.apache.org/branches/release-1.0.0-branch + + + + + maven-jar-plugin + + + + true + org.apache.stanbol.entityhub.indexing.Main + + + + + + maven-shade-plugin + + + package + + shade + + + + + + + * + + + + + + + + + + + diff --git a/ontonethub-src/ontonethub/pom.xml b/ontonethub-src/ontonethub/pom.xml new file mode 100644 index 0000000..0238a1c --- /dev/null +++ b/ontonethub-src/ontonethub/pom.xml @@ -0,0 +1,344 @@ + + + + + 4.0.0 + + org.apache.stanbol + stanbol-parent + 6 + .. + + + it.cnr.istc.stlab + ontonethub.web + 0.1 + bundle + + Apache Stanbol Extension - OntonetHub + + + This Stanbol extension provides a bundle that allows to manage and index ontologies. + + + + + Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/entityhub/indexing/genericrdf + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/stanbol.apache.org/branches/release-1.0.0-branch + + + + + + org.apache.felix + maven-bundle-plugin + true + + + + guava, + protobuf-java, + cal10n-api;scope=compile|runtime + + true + + + + !org.glassfish.jersey.internal.*, + !org.glassfish.jersey.message.*, + !org.jvnet.mimepull, + !arq.*, + !net.sf.ehcache, + !net.spy.memcached, + !org.opensextant.*, + !org.apache.log4j.jmx, + !org.jboss.logging, + + !com.jcraft.jzlib, + !org.eclipse.*, + !junit.*, + !javax.servlet.jsp, + !org.jets3t.service, + !org.mortbay.*, + !org.apache.tools.ant.*, + !org.jdom.*, + !com.sun.jersey.spi.container.servlet, + !com.sun.jndi.ldap, + !com.thoughtworks.paranamer, + !org.jets3t.service.*, + !org.kosmix.kosmosfs.access, + !org.osgi.service.component.annotations, + !org.restlet.*, + !org.slf4j.impl, + !org.xerial.snappy, + !org.znerd.xmlenc, + !sun.net.dns, + !sun.net.util, + !org.apache.commons.net.ftp, + !org.apache.commons.net.util, + + * + + + + + + org.apache.felix + maven-scr-plugin + + + org.apache.rat + apache-rat-plugin + + + + src/test/resources/*.txt + src/license/THIRD-PARTY.properties + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.1 + + 1.8 + 1.8 + + + + + + + + + + + + org.apache.stanbol + org.apache.stanbol.commons.namespaceprefix.provider.stanbol + 1.0.0 + + + org.apache.stanbol + org.apache.stanbol.commons.namespaceprefix.provider.prefixcc + 1.0.0 + + + + + + + + org.apache.stanbol + org.apache.stanbol.entityhub.indexing.source.jenatdb + 1.0.0 + runtime + + + log4j-over-slf4j + org.slf4j + + + + + org.apache.jena + jena-osgi + 2.13.0 + + + + + org.freemarker + freemarker + 2.3.20 + + + + + org.apache.stanbol + org.apache.stanbol.commons.jobs.api + 1.0.0 + + + + + org.apache.stanbol + org.apache.stanbol.commons.web.base + 1.0.0 + + + org.apache.stanbol + org.apache.stanbol.commons.web.viewable + 1.0.0 + + + + + org.apache.stanbol + org.apache.stanbol.entityhub.servicesapi + 1.0.0 + + + org.apache.stanbol + org.apache.stanbol.entityhub.model.clerezza + 1.0.0 + + + org.apache.stanbol + org.apache.stanbol.entityhub.ldpath + 1.0.0 + + + + + + + org.glassfish.jersey.media + jersey-media-multipart + 2.7 + + + + + + + org.codehaus.jettison + jettison + + + + + + com.fasterxml.jackson.core + jackson-databind + 2.6.3 + + + + + commons-fileupload + commons-fileupload + 1.3.1 + + + + + org.apache.jena + jena-arq + 2.13.0 + + + + org.apache.jena + jena-arq + 2.13.0 + + + org.apache.jena + jena-tdb + 1.0.2 + + + + + com.google.guava + guava + 18.0 + + + + + com.google.protobuf + protobuf-java + 2.4.0a + + + + + ch.qos.cal10n + cal10n-api + 0.8.1 + + + + + + + org.apache.stanbol + org.apache.stanbol.ontologymanager.servicesapi + 1.0.0 + + + org.apache.stanbol + org.apache.stanbol.ontologymanager.sources.owlapi + 1.0.0 + + + + + + diff --git a/ontonethub-src/ontonethub/prov.rdf b/ontonethub-src/ontonethub/prov.rdf new file mode 100644 index 0000000..a4cb97f --- /dev/null +++ b/ontonethub-src/ontonethub/prov.rdf @@ -0,0 +1,2382 @@ + + + + + qualified + + + + hadActivity + The _optional_ Activity of an Influence, which used, generated, invalidated, or was the responsibility of some Entity. This property is _not_ used by ActivityInfluence (use prov:activity instead). + + wasActivityOfInfluence + The multiple rdfs:domain assertions are intended. One is simpler and works for OWL-RL, the union is more specific but is not recognized by OWL-RL. + derivations + This property has multiple RDFS domains to suit multiple OWL Profiles. See <a href="#owl-profile">PROV-O OWL Profile</a>. + + + collections + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-removal + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + qualifiedRemoval + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-removal + + collections + prov:qualifiedRemoval shows the details of a removal, in particular the removed keys. + + + + + + + 1 + + + + + + + + derivations + + + qualifiedSourceOf + qualified + + If this Entity prov:hadPrimarySource Entity :e, then it can qualify how using prov:qualifiedPrimarySource [ a prov:PrimarySource; prov:entity :e; :foo :bar ]. + + qualifiedPrimarySource + + + + + + + + + A reference to the principal section of the PROV-CONSTRAINTS document that describes this concept. + + + + + Working Group Note version 2013-04-30 + + + + + W3C PROV Linking Across Provenance Bundles Ontology (PROV-LINKS) + This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/ +). All feedback is welcome. + + + + value + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-attribute-value + Provides a value that is a direct representation of an entity. + entities-activities + expanded + This property serves the same purpose as rdf:value, but has been reintroduced to avoid some of the definitional ambiguity in the RDF specification (specifically, 'may be used in describing structured values'). + + The editor's definition comes from http://www.w3.org/TR/rdf-primer/#rdfvalue + + + + + + expanded + A location can be an identifiable geographic place (ISO 19112), but it can also be a non-geographic place such as a directory, row, or column. As such, there are numerous ways in which location can be expressed, such as by a coordinate, address, landmark, and so forth. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-attribute + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-attribute-location + Location + + + + + + + hadPrimarySource property is a particular case of wasDerivedFrom (see http://www.w3.org/TR/prov-dm/#term-original-source) that aims to give credit to the source that originated some information. + + + + + + + + + + + + + + + + + + + + wasGeneratedBy + + + starting-point + + + generated + + + entities-activities + + + 1 + + + + + + agents-responsibility + + + + + + + + wasAssociateFor + wasAssociatedWith + starting-point + An prov:Agent that had some (unspecified) responsibility for the occurrence of this prov:Activity. + + + + + + + + + expanded + The Location of any resource. + This property has multiple RDFS domains to suit multiple OWL Profiles. See <a href="#owl-profile">PROV-O OWL Profile</a>. + locationOf + + This property is not functional because the many values could be at a variety of granularies (In this building, in this room, in that chair). + The naming of prov:atLocation parallels prov:atTime, and is not named prov:hadLocation to avoid conflicting with the convention that prov:had* properties are used on prov:Influence classes. + + + atLocation + + + + + + + qualifiedCommunicationOf + qualified + + qualifiedCommunication + + + + entities-activities + + If this Activity prov:wasInformedBy Activity :a, then it can qualify how it was influenced using prov:qualifiedCommunication [ a prov:Communication; prov:activity :a; :foo :bar ]. + + + entityOfInfluence + + + + qualified + This property behaves in spirit like rdf:object; it references the object of a prov:wasInfluencedBy triple. + The prov:entity property references an prov:Entity which influenced a resource. This property applies to an prov:EntityInfluence, which is given by a subproperty of prov:qualifiedInfluence from the influenced prov:Entity, prov:Activity or prov:Agent. + entity + + + + + Activity that identifies the replacement of a resource. + + Replace + + + + + influencer + hadInfluence + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-influence + + This property is used as part of the qualified influence pattern. Subclasses of prov:Influence use these subproperties to reference the resource (Entity, Agent, or Activity) whose influence is being qualified. + + qualified + Subproperties of prov:influencer are used to cite the object of an unqualified PROV-O triple whose predicate is a subproperty of prov:wasInfluencedBy (e.g. prov:used, prov:wasGeneratedBy). prov:influencer is used much like rdf:object is used. + This property and its subproperties are used in the same way as the rdf:object property, i.e. to reference the object of an unqualified prov:wasInfluencedBy or prov:influenced triple. + + + + + contextOf + + asInBundle + + + prov:asInBundle is used to specify which bundle the general entity of a prov:mentionOf property is described. + +When :x prov:mentionOf :y and :y is described in Bundle :b, the triple :x prov:asInBundle :b is also asserted to cite the Bundle in which :y was described. + + + + qualified + + activityOfInfluence + + + This property behaves in spirit like rdf:object; it references the object of a prov:wasInfluencedBy triple. + + The prov:activity property references an prov:Activity which influenced a resource. This property applies to an prov:ActivityInfluence, which is given by a subproperty of prov:qualifiedInfluence from the influenced prov:Entity, prov:Activity or prov:Agent. + activity + + + + invalidatedAtTime + The time at which an entity was invalidated (i.e., no longer usable). + entities-activities + + + It is the intent that the property chain holds: (prov:qualifiedInvalidation o prov:atTime) rdfs:subPropertyOf prov:invalidatedAtTime. + + + + expanded + + + + Activity that identifies the issuance (e.g., publication) of a resource. + + Submit + + + + + ended + + End is when an activity is deemed to have ended. An end may refer to an entity, known as trigger, that terminated the activity. + entities-activities + + + + + + expanded + + wasEndedBy + + + 1 + + + + + An object property to express the accountability of an agent towards another agent. The subordinate agent acted on behalf of the responsible agent in an actual activity. + + + + agents-responsibility + + actedOnBehalfOf + + hadDelegate + + + + starting-point + + + + + + + + + + + qualifiedUsingActivity + + + + + + + started + wasStartedBy + + expanded + + + entities-activities + Start is when an activity is deemed to have started. A start may refer to an entity, known as trigger, that initiated the activity. + + + + + + + + + entities-activities + + If this Activity prov:used Entity :e, then it can qualify how it used it using prov:qualifiedUsage [ a prov:Usage; prov:entity :e; :foo :bar ]. + qualifiedUsingActivity + + qualifiedUsage + + + qualified + + + + + + starting-point + + entities-activities + An activity is something that occurs over a period of time and acts upon or with entities; it may include consuming, processing, transforming, modifying, relocating, using, or generating entities. + Activity + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Activity + + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Activity + + + + + + + + + + If this Activity prov:generated Entity :e, then it can qualify how it performed the Generation using prov:qualifiedGeneration [ a prov:Generation; prov:entity :e; :foo :bar ]. + + qualified + entities-activities + qualifiedGenerationOf + qualifiedGeneration + + + + + + + + + + + + qualifiedDelegationOf + + + + + qualifiedCommunicationOf + + + + + + + PROV-O does not define all property inverses. The directionalities defined in PROV-O should be given preference over those not defined. However, if users wish to name the inverse of a PROV-O property, the local name given by prov:inverse should be used. + + + + derivations + + Influence + Influence is the capacity of an entity, activity, or agent to have an effect on the character, development, or behavior of another by means of usage, start, end, generation, invalidation, communication, derivation, attribution, association, or delegation. + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-influence + + + An instance of prov:Influence provides additional descriptions about the binary prov:wasInfluencedBy relation from some influenced Activity, Entity, or Agent to the influencing Activity, Entity, or Agent. For example, :stomach_ache prov:wasInfluencedBy :spoon; prov:qualifiedInfluence [ a prov:Influence; prov:entity :spoon; :foo :bar ] . Because prov:Influence is a broad relation, the more specific relations (Communication, Delegation, End, etc.) should be used when applicable. + Because prov:Influence is a broad relation, its most specific subclasses (e.g. prov:Communication, prov:Delegation, prov:End, prov:Revision, etc.) should be used when applicable. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-influence + qualified + + + + + + + hadDelegate + + + + + hadInfluence + + + + + + + + + The more specific subproperties of prov:wasDerivedFrom (i.e., prov:wasQuotedFrom, prov:wasRevisionOf, prov:hadPrimarySource) should be used when applicable. + + hadDerivation + + + derivations + + + + starting-point + wasDerivedFrom + A derivation is a transformation of an entity into another, an update of an entity resulting in a new one, or the construction of a new entity based on a pre-existing entity. + + + + Generation is the completion of production of a new entity by an activity. This entity did not exist before generation and becomes available for usage after this generation. + qualified + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Generation + entities-activities + Generation + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Generation + An instance of prov:Generation provides additional descriptions about the binary prov:wasGeneratedBy relation from a generated prov:Entity to the prov:Activity that generated it. For example, :cake prov:wasGeneratedBy :baking; prov:qualifiedGeneration [ a prov:Generation; prov:activity :baking; :foo :bar ]. + + + + + + wasUsedInDerivation + + + + + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-insertion + + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + prov:qualifiedInsertion shows the details of an insertion, in particular the inserted key-entity pairs. + collections + collections + + qualifiedInsertion + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-insertion + + + + + + + + hadDerivation + + + + 1 + + + + + + + + + wasMemberOf + + + + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + Start is when an activity is deemed to have been started by an entity, known as trigger. The activity did not exist before its start. Any usage, generation, or invalidation involving an activity follows the activity's start. A start may refer to a trigger entity that set off the activity, or to an activity, known as starter, that generated the trigger. + + entities-activities + An instance of prov:Start provides additional descriptions about the binary prov:wasStartedBy relation from some started prov:Activity to an prov:Entity that started it. For example, :foot_race prov:wasStartedBy :bang; prov:qualifiedStart [ a prov:Start; prov:entity :bang; :foo :bar; prov:atTime '2012-03-09T08:05:08-05:00'^^xsd:dateTime ] . + qualified + + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Start + + Start + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Start + + + + qualifiedSourceOf + + + + + + + + + entities-activities + + invalidated + + expanded + wasInvalidatedBy + + + + + alternateOf + + + + alternate + + Two alternate entities present aspects of the same thing. These aspects may be the same or different, and the alternate entities may or may not overlap in time. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-alternate + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-alternate + expanded + alternateOf + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-removal + The property used by a prov:Insertion and prov:Removal to cite the prov:Dictionary that was prov:derivedByInsertionFrom or prov:derivedByRemovalFrom another dictionary. + dictionary + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-removal + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-insertion + + + collections + + collections + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-insertion + + + + derivations + qualified + + + hadGeneration + + generatedAsDerivation + + The _optional_ Generation involved in an Entity's Derivation. + + + Activity that identifies the acceptance of a resource (e.g., an article in a conference) + + Accept + + + + + wasRoleIn + + + + + + It is not recommended that the type AgentInfluence be asserted without also asserting one of its more specific subclasses. + qualified + AgentInfluence provides additional descriptions of an Agent's binary influence upon any other kind of resource. Instances of AgentInfluence use the prov:agent property to cite the influencing Agent. + AgentInfluence is the capacity of an agent to have an effect on the character, development, or behavior of another by means of attribution, association, delegation, or other. + + AgentInfluence + + + + entities-activities + The time at which an entity was completely created and is available for use. + + + + It is the intent that the property chain holds: (prov:qualifiedGeneration o prov:atTime) rdfs:subPropertyOf prov:generatedAtTime. + generatedAtTime + + + + expanded + + + + + qualified + The prov:agent property references an prov:Agent which influenced a resource. This property applies to an prov:AgentInfluence, which is given by a subproperty of prov:qualifiedInfluence from the influenced prov:Entity, prov:Activity or prov:Agent. + + + + This property behaves in spirit like rdf:object; it references the object of a prov:wasInfluencedBy triple. + agentOfInfluence + agent + + + + + + + + + + + + + + + wasInfluencedBy + + + + + + + + A reference to the principal section of the PROV-DM document that describes this concept. + + + + + access-and-query + Relates a resource to a provenance pingback service that may receive additional provenance links about the resource. + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/#provenance-pingback + provenance pingback + + + + The key of a prov:KeyEntityPair, which is an element of a prov:Dictionary. + + collections + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-membership + + collections + + pairKey + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-membership + + + + revisedEntity + + + + + + + Revision is a derivation (see http://www.w3.org/TR/prov-dm/#term-Revision). Moreover, according to +http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#term-Revision 23 April 2012 'wasRevisionOf is a strict sub-relation of wasDerivedFrom since two entities e2 and e1 may satisfy wasDerivedFrom(e2,e1) without being a variant of each other.' + + + + + wasInvalidatedBy + + + + + + + + + + + + agentOfInfluence + + + + + + + + + qualifiedAttributionOf + + + + + + + + + wasGeneratedBy + + + + + + + Classify prov-o terms into three categories, including 'starting-point', 'qualifed', and 'extended'. This classification is used by the prov-o html document to gently introduce prov-o terms to its users. + + + + + Type for a generic provenance query service. Mainly for use in RDF provenance query service descriptions, to facilitate discovery in linked data environments. + access-and-query + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/#provenance-query-service-discovery + + ProvenanceService + + + + + + + + + + + An instance of prov:Delegation provides additional descriptions about the binary prov:actedOnBehalfOf relation from a performing prov:Agent to some prov:Agent for whom it was performed. For example, :mixing prov:wasAssociatedWith :toddler . :toddler prov:actedOnBehalfOf :mother; prov:qualifiedDelegation [ a prov:Delegation; prov:entity :mother; :foo :bar ]. + qualified + agents-responsibility + + + + Delegation is the assignment of authority and responsibility to an agent (by itself or by another agent) to carry out a specific activity as a delegate or representative, while the agent it acts on behalf of retains some responsibility for the outcome of the delegated work. + +For example, a student acted on behalf of his supervisor, who acted on behalf of the department chair, who acted on behalf of the university; all those agents are responsible in some way for the activity that took place but we do not say explicitly who bears responsibility and to what degree. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-delegation + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-delegation + Delegation + + + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-End + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + entities-activities + + + qualified + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-End + An instance of prov:End provides additional descriptions about the binary prov:wasEndedBy relation from some ended prov:Activity to an prov:Entity that ended it. For example, :ball_game prov:wasEndedBy :buzzer; prov:qualifiedEnd [ a prov:End; prov:entity :buzzer; :foo :bar; prov:atTime '2012-03-09T08:05:08-05:00'^^xsd:dateTime ]. + End + + End is when an activity is deemed to have been ended by an entity, known as trigger. The activity no longer exists after its end. Any usage, generation, or invalidation involving an activity precedes the activity's end. An end may refer to a trigger entity that terminated the activity, or to an activity, known as ender that generated the trigger. + + + + + + + + + + + + + + + + + + This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). + +If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All feedback is welcome. + Recommendation version 2013-04-30 + W3C PROVenance Interchange Ontology (PROV-O) + + + + + This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). + +If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All feedback is welcome. + Dublin Core extensions of the W3C PROVenance Interchange Ontology (PROV-O) + + + + + If this Activity prov:wasAssociatedWith Agent :ag, then it can qualify the Association using prov:qualifiedAssociation [ a prov:Association; prov:agent :ag; :foo :bar ]. + agents-responsibility + qualifiedAssociation + qualifiedAssociationOf + + + + + + + qualified + + + + activityOfInfluence + + + + + + + + hadRevision + derivations + expanded + wasRevisionOf + + A revision is a derivation that revises an entity into a revised version. + + + + + + + + A collection is an entity that provides a structure to some constituents, which are themselves entities. These constituents are said to be member of the collections. + expanded + collections + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-collection + + Collection + + + + + + + + + + + + locationOf + + + + + + + + + + + + + + + + + + When the prov-o term does not have a definition drawn from prov-dm, and the prov-o editor provides one. + + + + + derivations + + Because prov:qualifiedInfluence is a broad relation, the more specific relations (qualifiedCommunication, qualifiedDelegation, qualifiedEnd, etc.) should be used when applicable. + + qualifiedInfluence + + qualifiedInfluenceOf + + qualified + + + + The entity of a prov:KeyEntityPair, which is an element of a prov:Dictionary. + + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-membership + pairKey + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-membership + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + + collections + + + + + + + + qualifiedEndOf + + + + + + + + + + An empty collection is a collection without members. + collections + expanded + + EmptyCollection + + + + + + + + + + + expanded + + expanded + + hadMember + wasMemberOf + + + + + + + qualifiedStartOf + + + + + relates a generic provenance query service resource (type prov:ServiceDescription) to a specific query service description (e.g. a prov:DirectQueryService or a sd:Service). + access-and-query + serviceDescribedBy + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/rovenance-query-service-description + describesService + + + + + wasPlanOf + + + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-removal + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-removal + collections + + collections + Removal + Removal is a derivation that describes the transformation of a dictionary into another, by removing one or more keys. + + + + + + + + + + + + + + + + + + + + + agents-responsibility + + qualified + This property has multiple RDFS domains to suit multiple OWL Profiles. See <a href="#owl-profile">PROV-O OWL Profile</a>. + wasInfluencedBy + + + + influenced + The sub-properties of prov:wasInfluencedBy can be elaborated in more detail using the Qualification Pattern. For example, the binary relation :baking prov:used :spoon can be qualified by asserting :baking prov:qualifiedUsage [ a prov:Usage; prov:entity :spoon; prov:atLocation :kitchen ] . + +Subproperties of prov:wasInfluencedBy may also be asserted directly without being qualified. + +prov:wasInfluencedBy should not be used without also using one of its subproperties. + + Because prov:wasInfluencedBy is a broad relation, its more specific subproperties (e.g. prov:wasInformedBy, prov:actedOnBehalfOf, prov:wasEndedBy, etc.) should be used when applicable. + + + + + + W3C PROVenance Interchange + + + + + This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). + +If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/ +Archives/Public/public-prov-comments/). All feedback is welcome. + + + + + + + + + + + + + + + + + + wasPrimarySourceOf + + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-removal + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + + + collections + removedKey + The key removed in a Removal. + + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-removal + + + 0 + + + + + + contributed + + + + + + + If this Entity prov:wasInvalidatedBy Activity :a, then it can qualify how it was invalidated using prov:qualifiedInvalidation [ a prov:Invalidation; prov:activity :a; :foo :bar ]. + qualifiedInvalidationOf + + + + + qualified + entities-activities + qualifiedInvalidation + + + + + A definition quoted from PROV-DM or PROV-CONSTRAINTS that describes the concept expressed with this OWL term. + + + + + qualified + The PROV data model is implicitly based on a notion of instantaneous events (or just events), that mark transitions in the world. Events include generation, usage, or invalidation of entities, as well as starting or ending of activities. This notion of event is not first-class in the data model, but it is useful for explaining its other concepts and its semantics. + An instantaneous event, or event for short, happens in the world and marks a change in the world, in its activities and in its entities. The term 'event' is commonly used in process algebra with a similar meaning. Events represent communications or interactions; they are assumed to be atomic and instantaneous. + entities-activities + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#dfn-event + InstantaneousEvent + + + + + + + + + agents-responsibility + + + SoftwareAgent + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-agent + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-types + A software agent is running software. + http://www.w3.org/TR/2012/WD-prov-dm-20120703/prov-n.html#expression-types + + http://www.w3.org/TR/2012/WD-prov-dm-20120703/prov-dm.html#term-agent + expanded + + + + + + + Activity that identifies the Copyrighting activity associated to a resource. + + Copyright + + + + + + + specializationOf + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-specialization + + + http://www.w3.org/TR/2012/WD-prov-dm-20120703/prov-n.html#expression-specialization + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-specialization + expanded + An entity that is a specialization of another shares all aspects of the latter, and additionally presents more specific aspects of the same thing as the latter. In particular, the lifetime of the entity being specialized contains that of any specialization. Examples of aspects include a time period, an abstraction, and a context associated with the entity. + http://www.w3.org/TR/2012/WD-prov-dm-20120703/prov-constraints.html#prov-dm-constraints-fig + + + generalizationOf + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + alternate + http://www.w3.org/TR/2012/WD-prov-dm-20120703/prov-dm.html#term-specialization + + + + access-and-query + Type for a generic provenance query service. Mainly for use in RDF provenance query service descriptions, to facilitate discovery in linked data environments. + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/#provenance-query-service-discovery + + ServiceDescription + + + + + + + + + + + + + + + qualified + A quotation is the repeat of (some or all of) an entity, such as text or image, by someone who may or may not be its original author. Quotation is a particular case of derivation. + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-quotation + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-quotation + An instance of prov:Quotation provides additional descriptions about the binary prov:wasQuotedFrom relation from some taken prov:Entity from an earlier, larger prov:Entity. For example, :here_is_looking_at_you_kid prov:wasQuotedFrom :casablanca_script; prov:qualifiedQuotation [ a prov:Quotation; prov:entity :casablanca_script; :foo :bar ]. + + derivations + Quotation + + + + + + + + + + + + + + + + + qualifiedDerivationOf + + derivations + + + + + If this Entity prov:wasDerivedFrom Entity :e, then it can qualify how it was derived using prov:qualifiedDerivation [ a prov:Derivation; prov:entity :e; :foo :bar ]. + + qualifiedDerivation + qualified + + + + + + + + + + generalizationOf + + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-insertion + + collections + + collections + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-insertion + An object property to refer to the prov:KeyEntityPair inserted into a prov:Dictionary. + insertedKeyEntityPair + + + + + + Role with the function of publishing a resource. The Agent assigned to this role is associated with a Publish Activity + + Publisher + + + + + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-influence + influencee: an identifier (o2) for an entity, activity, or agent; + + + + qualifiedAttributionOf + + + + + agents-responsibility + + + + qualified + qualifiedAttribution + If this Entity prov:wasAttributedTo Agent :ag, then it can qualify how it was influenced using prov:qualifiedAttribution [ a prov:Attribution; prov:agent :ag; :foo :bar ]. + + + 1 + + + + + + + + Derivation is a particular case of trace (see http://www.w3.org/TR/prov-dm/#term-trace), since it links an entity to another entity that contributed to its existence. + + + + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-attribution + Attribution + + An instance of prov:Attribution provides additional descriptions about the binary prov:wasAttributedTo relation from an prov:Entity to some prov:Agent that had some responsible for it. For example, :cake prov:wasAttributedTo :baker; prov:qualifiedAttribution [ a prov:Attribution; prov:entity :baker; :foo :bar ]. + agents-responsibility + qualified + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-attribution + + Attribution is the ascribing of an entity to an agent. + +When an entity e is attributed to agent ag, entity e was generated by some unspecified activity that in turn was associated to agent ag. Thus, this relation is useful when the activity is not known, or irrelevant. + + + Activity that identifies the modification of a resource. + + Modify + + + + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + Entity + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-entity + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Entity + starting-point + entities-activities + + An entity is a physical, digital, conceptual, or other kind of thing with some fixed aspects; entities may be real or imaginary. + + + + + + + + + + + + qualified + prov:hadRole references the Role (i.e. the function of an entity with respect to an activity), in the context of an instantaneous usage, generation, association, start, and end. + + The _optional_ Role that an Entity assumed in the context of an Activity. For example, :baking prov:used :spoon; prov:qualified [ a prov:Usage; prov:entity :spoon; prov:hadRole roles:mixing_implement ]. + + + + hadRole + + This property has multiple RDFS domains to suit multiple OWL Profiles. See <a href="#owl-profile">PROV-O OWL Profile</a>. + wasRoleIn + agents-responsibility + + + + + + + + + + + + + + + access-and-query + Indicates a provenance-URI for a resource; the resource identified by this property presents a provenance record about its subject or anchor resource. + provenanceOf + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/#resource-represented-as-html + has_provenance + + + + + + + Quotation is a particular case of derivation (see http://www.w3.org/TR/prov-dm/#term-quotation) in which an entity is derived from an original entity by copying, or "quoting", some or all of it. + + + + + An instance of prov:Revision provides additional descriptions about the binary prov:wasRevisionOf relation from some newer prov:Entity to an earlier prov:Entity. For example, :draft_2 prov:wasRevisionOf :draft_1; prov:qualifiedRevision [ a prov:Revision; prov:entity :draft_1; :foo :bar ]. + derivations + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-revision + + A revision is a derivation for which the resulting entity is a revised version of some original. The implication here is that the resulting entity contains substantial content from the original. Revision is a particular case of derivation. + Revision + qualified + + + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Revision + + + + qualifiedStartOf + + + If this Activity prov:wasStartedBy Entity :e1, then it can qualify how it was started using prov:qualifiedStart [ a prov:Start; prov:entity :e1; :foo :bar ]. + + + + entities-activities + qualifiedStart + + qualified + + + + anchorOf + Indicates anchor URI for a potentially dynamic resource instance. + access-and-query + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/#resource-represented-as-html + has_anchor + + + + + + + + + Indicates a provenance query service that can access provenance related to its subject or anchor resource. + provenanceQueryServiceOf + access-and-query + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/ + hasProvenanceService + + + + + + + + The dictionary was derived from the other by insertion. Can be qualified with prov:qualifiedInsertion, which shows details of the insertion, in particular the inserted key-entity pairs. + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-insertion + collections + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-insertion + + + derivedByInsertionFrom + + + 1 + + + + + + A reference to the principal section of the PROV-M document that describes this concept. + + + A reference to the principal section of the PROV-DM document that describes this concept. + + + + + agents-responsibility + starting-point + An agent is something that bears some form of responsibility for an activity taking place, for the existence of an entity, or for another agent's activity. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Agent + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-agent + + Agent + + + + + wasAssociateFor + + + + + + + + + + + Activity that identifies the creation of a resource + + Create + + + + + + + + + + + This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). + +If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All feedback is welcome. + 0.2 + PROV Access and Query Ontology + + + + + Person agents are people. + agents-responsibility + expanded + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-types + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-agent + + Person + + + + + + + + + + + + + It is the intent that the property chain holds: (prov:qualifiedEnd o prov:atTime) rdfs:subPropertyOf prov:endedAtTime. + The time at which an activity ended. See also prov:startedAtTime. + + + + starting-point + + entities-activities + endedAtTime + + + + + + + + + + + + + + + + + + + qualifiedQuotationOf + + + + + ended + + + + + + + + + + A note by the OWL development team about how this term expresses the PROV-DM concept, or how it should be used in context of semantic web or linked data. + + + + + + + + + prov:generated is one of few inverse property defined, to allow Activity-oriented assertions in addition to Entity-oriented assertions. + + + entities-activities + expanded + + + wasGeneratedBy + + generated + + + + qualifiedInvalidationOf + + + + + + + qualifiedDelegation + qualified + + agents-responsibility + + + + If this Agent prov:actedOnBehalfOf Agent :ag, then it can qualify how with prov:qualifiedResponsibility [ a prov:Responsibility; prov:agent :ag; :foo :bar ]. + qualifiedDelegationOf + + + + + + + + Usage + An instance of prov:Usage provides additional descriptions about the binary prov:used relation from some prov:Activity to an prov:Entity that it used. For example, :keynote prov:used :podium; prov:qualifiedUsage [ a prov:Usage; prov:entity :podium; :foo :bar ]. + Usage is the beginning of utilizing an entity by an activity. Before usage, the activity had not begun to utilize this entity and could not have been affected by the entity. + qualified + entities-activities + + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Usage + + + + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Usage + + + + The position that this OWL term should be listed within documentation. The scope of the documentation (e.g., among all terms, among terms within a prov:category, among properties applying to a particular class, etc.) is unspecified. + + + + + + + + + + This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). + +If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All feedback is welcome. + W3C PROVenance Interchange Ontology (PROV-O) Dictionary Extension + + + + + + + + + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-influence + influencer: an identifier (o1) for an ancestor entity, activity, or agent that the former depends on; + + + + + + + Classes and properties used to qualify relationships are annotated with prov:unqualifiedForm to indicate the property used to assert an unqualified provenance relation. + + + + + + qualifiedRevision + + + + revisedEntity + derivations + qualified + + + If this Entity prov:wasRevisionOf Entity :e, then it can qualify how it was revised using prov:qualifiedRevision [ a prov:Revision; prov:entity :e; :foo :bar ]. + + + + + + + + + + + + qualified + It is not recommended that the type EntityInfluence be asserted without also asserting one of its more specific subclasses. + EntityInfluence provides additional descriptions of an Entity's binary influence upon any other kind of resource. Instances of EntityInfluence use the prov:entity property to cite the influencing Entity. + EntityInfluence is the capacity of an entity to have an effect on the character, development, or behavior of another by means of usage, start, end, derivation, or other. + + EntityInfluence + + + + + Note that there are kinds of bundles (e.g. handwritten letters, audio recordings, etc.) that are not expressed in PROV-O, but can be still be described by PROV-O. + A bundle is a named set of provenance descriptions, and is itself an Entity, so allowing provenance of provenance to be expressed. + expanded + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-bundle-declaration + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-bundle-entity + + Bundle + + + + + + + + + + + + + + + If this Entity prov:wasQuotedFrom Entity :e, then it can qualify how using prov:qualifiedQuotation [ a prov:Quotation; prov:entity :e; :foo :bar ]. + + + + + + qualifiedQuotation + qualified + + + derivations + qualifiedQuotationOf + + + + + + + influenced + + + + + wasQuotedFrom + quotedAs + + + + + + + + + An entity is derived from an original entity by copying, or 'quoting', some or all of it. + derivations + expanded + + + + wasActivityOfInfluence + + + + + alternateOf + + + + + + generated + + + + + + entityOfInfluence + + + + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + An instance of prov:Communication provides additional descriptions about the binary prov:wasInformedBy relation from an informed prov:Activity to the prov:Activity that informed it. For example, :you_jumping_off_bridge prov:wasInformedBy :everyone_else_jumping_off_bridge; prov:qualifiedCommunication [ a prov:Communication; prov:activity :everyone_else_jumping_off_bridge; :foo :bar ]. + qualified + + + entities-activities + + Communication is the exchange of an entity by two activities, one activity using the entity generated by the other. + + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-wasInformedBy + Communication + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Communication + + + + + + expanded + agents-responsibility + wasInfluencedBy + influenced + + + + + + + This annotation property links a subproperty of prov:wasInfluencedBy with the subclass of prov:Influence and the qualifying property that are used to qualify it. + +Example annotation: + + prov:wasGeneratedBy prov:qualifiedForm prov:qualifiedGeneration, prov:Generation . + +Then this unqualified assertion: + + :entity1 prov:wasGeneratedBy :activity1 . + +can be qualified by adding: + + :entity1 prov:qualifiedGeneration :entity1Gen . + :entity1Gen + a prov:Generation, prov:Influence; + prov:activity :activity1; + :customValue 1337 . + +Note how the value of the unqualified influence (prov:wasGeneratedBy :activity1) is mirrored as the value of the prov:activity (or prov:entity, or prov:agent) property on the influence class. + + + + Role with the function of creating a resource. The Agent assigned to this role is associated with a Create Activity + + Creator + + + + + + + + + + + + + + + + + + agents-responsibility + qualified + A role is the function of an entity or agent with respect to an activity, in the context of a usage, generation, invalidation, association, start, and end. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-attribute + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-attribute-role + Role + + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-primary-source + PrimarySource + A primary source for a topic refers to something produced by some agent with direct experience and knowledge about the topic, at the time of the topic's study, without benefit from hindsight. + +Because of the directness of primary sources, they 'speak for themselves' in ways that cannot be captured through the filter of secondary sources. As such, it is important for secondary sources to reference those primary sources from which they were derived, so that their reliability can be investigated. + +A primary source relation is a particular case of derivation of secondary materials from their primary sources. It is recognized that the determination of primary sources can be up to interpretation, and should be done according to conventions accepted within the application's domain. + + An instance of prov:PrimarySource provides additional descriptions about the binary prov:hadPrimarySource relation from some secondary prov:Entity to an earlier, primary prov:Entity. For example, :blog prov:hadPrimarySource :newsArticle; prov:qualifiedPrimarySource [ a prov:PrimarySource; prov:entity :newsArticle; :foo :bar ] . + derivations + + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-original-source + qualified + + + + + hadMention + + + mentionOf + + + prov:mentionOf is used to specialize an entity as described in another bundle. It is to be used in conjuction with prov:asInBundle. + +prov:asInBundle is used to cite the Bundle in which the generalization was mentioned. + + + + + informed + + + + + quotedAs + + + + An instance of prov:Invalidation provides additional descriptions about the binary prov:wasInvalidatedBy relation from an invalidated prov:Entity to the prov:Activity that invalidated it. For example, :uncracked_egg prov:wasInvalidatedBy :baking; prov:qualifiedInvalidation [ a prov:Invalidation; prov:activity :baking; :foo :bar ]. + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Invalidation + + + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + + Invalidation + qualified + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Invalidation + Invalidation is the start of the destruction, cessation, or expiry of an existing entity by an activity. The entity is no longer available for use (or further invalidation) after invalidation. Any generation or usage of an entity precedes its invalidation. + + + entities-activities + + + A derivation is a transformation of an entity into another, an update of an entity resulting in a new one, or the construction of a new entity based on a pre-existing entity. + + qualified + derivations + An instance of prov:Derivation provides additional descriptions about the binary prov:wasDerivedFrom relation from some derived prov:Entity to another prov:Entity from which it was derived. For example, :chewed_bubble_gum prov:wasDerivedFrom :unwrapped_bubble_gum; prov:qualifiedDerivation [ a prov:Derivation; prov:entity :unwrapped_bubble_gum; :foo :bar ]. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#Derivation-Relation + Derivation + + + + The more specific forms of prov:Derivation (i.e., prov:Revision, prov:Quotation, prov:PrimarySource) should be asserted if they apply. + http://www.w3.org/TR/2013/REC-prov-constraints-20130430/#prov-dm-constraints-fig + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Derivation + + + Role with the function of having responsibility for making contributions to a resource. The Agent assigned to this role is associated with a Modify or Create Activities + + Contributor + + + + + + + + + + + + + + + + + + + + + + + qualifiedDerivationOf + + + + contributed + + + + agents-responsibility + starting-point + Attribution is the ascribing of an entity to an agent. + + + + wasAttributedTo + + Attribution is the ascribing of an entity to an agent. + + + + + + + Insertion + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + + + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-insertion + Insertion is a derivation that describes the transformation of a dictionary into another, by insertion of one or more key-entity pairs. + + collections + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-insertion + + + + + Relates a provenance service to a URI template string for constructing provenance-URIs. + access-and-query + http://www.w3.org/TR/2013/NOTE-prov-aq-20130430/ + provenanceUriTemplate + + + + + qualifiedAssociationOf + + + + + + + IF wasAttributedTo(e2,ag1,aAttr) holds, THEN wasInfluencedBy(e2,ag1) also holds. + Attribution is a particular case of trace (see http://www.w3.org/TR/prov-dm/#concept-trace), in the sense that it links an entity to the agent that ascribed it. + + + + + collections + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-conceptual-definition + + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + An empty dictionary (i.e. has no members). + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary + Empty Dictionary + + + + + + + + + + + + + The _optional_ Plan adopted by an Agent in Association with some Activity. Plan specifications are out of the scope of this specification. + agents-responsibility + qualified + + + + hadPlan + wasPlanOf + + + + wasUsedInDerivation + + + + hadUsage + + + derivations + + qualified + The _optional_ Usage involved in an Entity's Derivation. + + + wasPrimarySourceOf + + expanded + + derivations + + + + hadPrimarySource + + + + + + + + + + + + + + + started + + + + + + + + + + + + + collections + Key-Entity Pair + + A key-entity pair. Part of a prov:Dictionary through prov:hadDictionaryMember. The key is any RDF Literal, the value is a prov:Entity. + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-membership + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-membership + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + + + invalidated + + + + + + + + + An activity association is an assignment of responsibility to an agent for an activity, indicating that the agent had a role in the activity. It further allows for a plan to be specified, which is the plan intended by the agent to achieve some goals in the context of this activity. + qualified + Association + + + agents-responsibility + + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Association + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Association + An instance of prov:Association provides additional descriptions about the binary prov:wasAssociatedWith relation from an prov:Activity to some prov:Agent that had some responsiblity for it. For example, :baking prov:wasAssociatedWith :baker; prov:qualifiedAssociation [ a prov:Association; prov:agent :baker; :foo :bar ]. + + + Role with the function of owning or managing rights over a resource. The Agent assigned to this role is associated with a RightsAssignment Activity + + RightsHolder + + + + Activity that identifies the rights assignment of a resource. + + RightsAssignment + + + + + + + wasUsedBy + + + used + A prov:Entity that was used by this prov:Activity. For example, :baking prov:used :spoon, :egg, :oven . + + starting-point + entities-activities + + + + + Activity that identifies any contribution of an agent to a resource. + + Contribute + + + + + + + + + + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-collection + A collection is an entity that provides a structure to some constituents, which are themselves entities. These constituents are said to be member of the collections. + + + + + + + + + This concept allows for the provenance of the dictionary, but also of its constituents to be expressed. Such a notion of dictionary corresponds to a wide variety of concrete data structures, such as a maps or associative arrays. + A given dictionary forms a given structure for its members. A different structure (obtained either by insertion or removal of members) constitutes a different dictionary. + A dictionary is an entity that provides a structure to some constituents, which are themselves entities. These constituents are said to be member of the dictionary. + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + Dictionary + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary + collections + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-conceptual-definition + + + collections + + The dictionary was derived from the other by removal. Can be qualified with prov:qualifiedRemoval, which shows details of the removal, in particular the removed keys. + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-removal + + derivedByRemovalFrom + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-removal + collections + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + + + + + + + + + + starting-point + + An activity a2 is dependent on or informed by another activity a1, by way of some unspecified entity that is generated by a1 and used by a2. + informed + wasInformedBy + + + + entities-activities + + + + + + + expanded + qualified + + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-Association + There exist no prescriptive requirement on the nature of plans, their representation, the actions or steps they consist of, or their intended goals. Since plans may evolve over time, it may become necessary to track their provenance, so plans themselves are entities. Representing the plan explicitly in the provenance can be useful for various tasks: for example, to validate the execution as represented in the provenance record, to manage expectation failures, or to provide explanations. + Plan + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-Association + A plan is an entity that represents a set of actions or steps intended by one or more agents to achieve some goals. + + agents-responsibility + + + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#expression-dictionary-membership + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#dictionary-constraints + + hadDictionaryMember + Describes the key-entity pair that was member of a prov:Dictionary. A dictionary can have multiple members. + + + collections + + http://www.w3.org/TR/2013/NOTE-prov-dictionary-20130430/#term-dictionary-membership + collections + + + + + + + + + + + + + + + + + + + + + generatedAsDerivation + + + + + + + + qualifiedGenerationOf + + + + ActivityInfluence + + + ActivitiyInfluence is the capacity of an activity to have an effect on the character, development, or behavior of another by means of generation, invalidation, communication, or other. + + It is not recommended that the type ActivityInfluence be asserted without also asserting one of its more specific subclasses. + qualified + + ActivityInfluence provides additional descriptions of an Activity's binary influence upon any other kind of resource. Instances of ActivityInfluence use the prov:activity property to cite the influencing Activity. + + + + + + hadRevision + + + + + + qualifiedEnd + + qualifiedEndOf + If this Activity prov:wasEndedBy Entity :e1, then it can qualify how it was ended using prov:qualifiedEnd [ a prov:End; prov:entity :e1; :foo :bar ]. + entities-activities + + + + qualified + + + + + + + + + entities-activities + + prov:invalidated is one of few inverse property defined, to allow Activity-oriented assertions in addition to Entity-oriented assertions. + invalidated + + expanded + + + wasInvalidatedBy + + + + + + + + + entities-activities + It is the intent that the property chain holds: (prov:qualifiedStart o prov:atTime) rdfs:subPropertyOf prov:startedAtTime. + + starting-point + + startedAtTime + + + The time at which an activity started. See also prov:endedAtTime. + + + + + + qualifiedInfluenceOf + + + + + + Classify prov-o terms into six components according to prov-dm, including 'agents-responsibility', 'alternate', 'annotations', 'collections', 'derivations', and 'entities-activities'. This classification is used so that readers of prov-o specification can find its correspondence with the prov-dm specification. + + + + + wasUsedBy + + + + + agents-responsibility + expanded + An organization is a social or legal institution such as a company, society, etc. + http://www.w3.org/TR/2013/REC-prov-n-20130430/#expression-types + http://www.w3.org/TR/2013/REC-prov-dm-20130430/#term-agent + + Organization + + + + Activity that identifies the publication of a resource + + Publish + + + + + + + + + + + atTime + + + The time at which an InstantaneousEvent occurred, in the form of xsd:dateTime. + + + + + + + entities-activities + qualified + + + + + + + diff --git a/ontonethub-src/ontonethub/src/license/THIRD-PARTY.properties b/ontonethub-src/ontonethub/src/license/THIRD-PARTY.properties new file mode 100644 index 0000000..7115201 --- /dev/null +++ b/ontonethub-src/ontonethub/src/license/THIRD-PARTY.properties @@ -0,0 +1,40 @@ +# Generated by org.codehaus.mojo.license.AddThirdPartyMojo +#------------------------------------------------------------------------------- +# Already used licenses in project : +# - All files contained in this JAR are licensed under the Apache 2.0 license, unless noted differently in their source (see swing2swt). +# - Apache Software License +# - Apache Software License, Version 2.0 +# - BSD License +# - BSD-style +# - Common Development And Distribution License (CDDL), Version 1.0 +# - Common Development And Distribution License (CDDL), Version 1.1 +# - Common Public License, Version 1.0 +# - Eclipse Public License 1.0 +# - Eclipse Public License, Version 1.0 +# - GNU General Public License (GPL), Version 2 with classpath exception +# - GNU Lesser General Public License (LGPL) +# - GNU Lesser General Public License (LGPL), Version 2.1 +# - GNU Lesser General Public License, Version 2.1 +# - ICU License +# - MIT License +# - New BSD License +# - New BSD license +# - Public Domain License +# - Revised BSD License +# - iCal4j - License +#------------------------------------------------------------------------------- +# Please fill the missing licenses for dependencies : +# +# +#Mon Sep 05 14:09:53 CEST 2016 +antlr--antlr--2.7.2=Public Domain +commons-beanutils--commons-beanutils--1.7.0=The Apache Software License, Version 2.0 +dom4j--dom4j--1.1=BSD-style +jakarta-regexp--jakarta-regexp--1.4=The Apache Software License, Version 2.0 +javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0 +javax.servlet.jsp--jsp-api--2.1=Common Development And Distribution License (CDDL), Version 1.0 +org.apache.zookeeper--zookeeper--3.4.5=The Apache Software License, Version 2.0 +org.restlet.jee--org.restlet--2.1.1=The Apache Software License, Version 2.0 +org.restlet.jee--org.restlet.ext.servlet--2.1.1=The Apache Software License, Version 2.0 +oro--oro--2.0.8=The Apache Software License, Version 2.0 +xerces--xercesImpl--2.7.1=The Apache Software License, Version 2.0 diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJob.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJob.java new file mode 100644 index 0000000..c1f7ab1 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJob.java @@ -0,0 +1,322 @@ +package it.cnr.istc.stlab.ontonethub.web; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.util.Properties; +import java.util.Set; + +import org.apache.clerezza.commons.rdf.Graph; +import org.apache.clerezza.commons.rdf.IRI; +import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl; +import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl; +import org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl; +import org.apache.clerezza.rdf.core.access.TcManager; +import org.apache.clerezza.rdf.ontologies.DC; +import org.apache.clerezza.rdf.ontologies.RDFS; +import org.apache.clerezza.rdf.ontologies.XSD; +import org.apache.commons.io.FileUtils; +import org.apache.stanbol.commons.jobs.api.Job; +import org.apache.stanbol.commons.jobs.api.JobResult; +import org.apache.stanbol.commons.jobs.impl.JobManagerImpl; +import org.osgi.framework.Bundle; +import org.osgi.framework.BundleContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Files; +import com.google.protobuf.TextFormat.ParseException; +import com.hp.hpl.jena.ontology.AnnotationProperty; +import com.hp.hpl.jena.ontology.DatatypeProperty; +import com.hp.hpl.jena.ontology.Individual; +import com.hp.hpl.jena.ontology.ObjectProperty; +import com.hp.hpl.jena.ontology.OntClass; +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.ontology.OntModelSpec; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; + +import freemarker.cache.ClassTemplateLoader; +import freemarker.cache.TemplateLoader; +import freemarker.template.Configuration; +import freemarker.template.Template; +import freemarker.template.TemplateException; +import freemarker.template.TemplateExceptionHandler; +import it.cnr.istc.stlab.ontonethub.web.resources.OntonethubIndexingResource; + +/** + * Implementation of the Stanbol Job interface that allows to execute the indexing of an ontology. + * + * @author Andrea Nuzzolese + * + */ + +public class IndexingJob implements Job { + + private String ontologyName, ontologyDescription, baseURI, ontologyURI, vocabNs; + private Model data; + + private Logger log = LoggerFactory.getLogger(getClass()); + private BundleContext ctx; + private String stanbolHome; + private TcManager tcManager; + + private String bundleNamePattern = "org.apache.stanbol.data.site.{$name}-1.0.0.jar"; + private String zippedIndexNamePattern = "{$name}.solrindex.zip"; + private File ontologiesFolder; + + public IndexingJob(String ontologyName, String ontologyDescription, String baseURI, Model data, BundleContext ctx, TcManager tcManager, File ontologiesFolder, String ontologyURI, String vocabNs) { + this.ontologyName = ontologyName; + this.ontologyDescription = ontologyDescription; + this.baseURI = baseURI; + this.data = data; + this.ctx = ctx; + this.stanbolHome = ctx.getProperty("stanbol.home"); + this.tcManager = tcManager; + this.ontologiesFolder = ontologiesFolder; + this.ontologyURI = ontologyURI; + this.vocabNs = vocabNs; + + } + + @Override + public JobResult call() throws Exception { + Configuration cfg = new Configuration(); + + TemplateLoader loader = new ClassTemplateLoader(getClass(), File.separator + "templates"); + cfg.setTemplateLoader(loader); + cfg.setDefaultEncoding("UTF-8"); + cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER); + + Properties props = new Properties(); + props.setProperty("name", ontologyName); + props.setProperty("description", ontologyDescription); + + boolean error = false; + String errorMessage = null; + Template template; + File tempFolder = null; + + try { + File folder = new File(stanbolHome + File.separator + OntonethubIndexingResource.RUNNABLE_INDEXER_EXECUTABLES_FOLDER); + String tempFolderName = "_" + System.currentTimeMillis(); + tempFolder = new File(folder, tempFolderName); + + Process initProcess = Runtime.getRuntime().exec("java -jar " + stanbolHome + File.separator + OntonethubIndexingResource.RUNNABLE_INDEXER_EXECUTABLES + " init " + tempFolder.getPath()); + initProcess.waitFor(); + + template = cfg.getTemplate("indexing.ftl"); + File configFolder = new File(tempFolder, "indexing" + File.separator + "config"); + Writer writer = new FileWriter(new File(configFolder, "indexing.properties")); + template.process(props, writer); + + } catch (ParseException e) { + log.error(e.getMessage(), e); + errorMessage = "Indexing failed because of the following error: " + e.getMessage(); + error = true; + } catch (IOException e) { + log.error(e.getMessage(), e); + errorMessage = "Indexing failed because of the following error: " + e.getMessage(); + error = true; + } catch (TemplateException e) { + log.error(e.getMessage(), e); + errorMessage = "Indexing failed because of the following error: " + e.getMessage(); + error = true; + } + + IndexingJobResult indexingJobResult = null; + if(!error){ + + File rdfDataFolder = new File(tempFolder, "indexing" + File.separator + "resources" + File.separator + "rdfdata"); + + String tempFileName = "_" + System.currentTimeMillis() + ".rdf"; + data.write(new FileOutputStream(new File(rdfDataFolder, tempFileName)), "RDF/XML"); + + Process indexingProcess = Runtime.getRuntime().exec("java -jar " + stanbolHome + File.separator + OntonethubIndexingResource.RUNNABLE_INDEXER_EXECUTABLES + " index " + tempFolder.getPath()); + indexingProcess.waitFor(); + + String bundleFileName = bundleNamePattern.replace("{$name}", ontologyName); + File bundleFile = new File(tempFolder, "indexing" + File.separator + "dist" + File.separator + bundleFileName); + + String zippedIndexFileName = zippedIndexNamePattern.replace("{$name}", ontologyName); + File zippedIndexFile = new File(tempFolder, "indexing" + File.separator + "dist" + File.separator + zippedIndexFileName); + + log.info("bundleFile {}", bundleFile.getPath()); + if(bundleFile.exists() && zippedIndexFile.exists()){ + + File stanbolDatafiles = new File(stanbolHome + File.separator + "datafiles"); + File deployedIndex = new File(stanbolDatafiles, zippedIndexFileName); + Files.copy(zippedIndexFile, deployedIndex); + + try{ + log.info("Bundle URI: {} - URL: {}", bundleFile.toURI(), bundleFile.toURI().toURL()); + Bundle bundle = ctx.installBundle(bundleFile.toURI().toString()); + bundle.start(); + long bundleId = bundle.getBundleId(); + + String jobId = JobManagerImpl.buildId(this); + IRI jobIRI = new IRI(ontologyURI + "/" + jobId); + + Graph g = tcManager.getMGraph(new IRI("ontonethub-graph")); + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "hasBundle"), + new PlainLiteralImpl(String.valueOf(bundleId)))); + + + /* + * Ontology name + */ + g.add(new TripleImpl( + jobIRI, + RDFS.label, + new PlainLiteralImpl(ontologyName))); + + /* + * Ontology description + */ + g.add(new TripleImpl( + jobIRI, + DC.description, + new PlainLiteralImpl(ontologyDescription))); + + /* + * Store ontology file + */ + File ontologyFile = new File(ontologiesFolder, jobId + "." + + "rdf"); + data.write(new FileOutputStream(ontologyFile)); + + OntModel ontModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM); + ontModel.add(data); + + /* + * Ontology IRI + */ + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "hasOntologyIRI"), + new IRI(baseURI))); + + /* + * OWL classes + */ + int classes = 0; + ExtendedIterator classesIt = ontModel.listClasses(); + while(classesIt.hasNext()) { + classesIt.next(); + classes++; + } + + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "owlClasses"), + new TypedLiteralImpl(String.valueOf(classes), XSD.int_))); + + /* + * Object properties + */ + int objectProperties = 0; + ExtendedIterator objPropertiesIt = ontModel.listObjectProperties(); + while(objPropertiesIt.hasNext()) { + objPropertiesIt.next(); + objectProperties++; + } + + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "objectProperties"), + new TypedLiteralImpl(String.valueOf(objectProperties), XSD.int_))); + + /* + * Datatype properties + */ + int dataProperties = 0; + ExtendedIterator dataPropertiesIt = ontModel.listDatatypeProperties(); + while(dataPropertiesIt.hasNext()) { + dataPropertiesIt.next(); + dataProperties++; + } + + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "datatypeProperties"), + new TypedLiteralImpl(String.valueOf(dataProperties), XSD.int_))); + + + /* + * Annotation properties + */ + int annotationProperties = 0; + ExtendedIterator annotationPropertiesIt = ontModel.listAnnotationProperties(); + while(annotationPropertiesIt.hasNext()) { + annotationPropertiesIt.next(); + annotationProperties++; + } + + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "annotationProperties"), + new TypedLiteralImpl(String.valueOf(annotationProperties), XSD.int_))); + + + /* + * OWL individuals + */ + int individuals = 0; + ExtendedIterator individualsIt = ontModel.listIndividuals(); + while(individualsIt.hasNext()) { + individualsIt.next(); + individuals++; + } + + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "individuals"), + new TypedLiteralImpl(String.valueOf(individuals), XSD.int_))); + + + /* + * Closure of imported ontologies + */ + Set importedOntologies = ontModel.listImportedOntologyURIs(true); + g.add(new TripleImpl( + jobIRI, + new IRI(vocabNs + "importedOntologies"), + new TypedLiteralImpl(String.valueOf(importedOntologies.size()), XSD.int_))); + + + } catch(Exception e){ + log.error(e.getMessage(), e); + } + + + + + + } + + String message = "Indexing of " + ontologyName + " completed."; + indexingJobResult = new IndexingJobResult(message, true); + } + else indexingJobResult = new IndexingJobResult(errorMessage, false); + + if(tempFolder != null && tempFolder.exists()){ + FileUtils.deleteDirectory(tempFolder); + } + + return indexingJobResult; + } + + + + @Override + public String buildResultLocation(String jobId) { + + return "ontonethub/ontology/" + jobId; + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJobInput.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJobInput.java new file mode 100644 index 0000000..2d9d111 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJobInput.java @@ -0,0 +1,59 @@ +package it.cnr.istc.stlab.ontonethub.web; + +import com.hp.hpl.jena.rdf.model.Model; + +/** + * The IndexingJobInput represents the input for an IndexingJob. + * + * @author Andrea Nuzzolese + * + */ +public class IndexingJobInput { + + private String name, description, baseURI; + private Model data; + + public IndexingJobInput() { + + } + + public IndexingJobInput(String name, String description, String baseURI, Model data) { + this.name = name; + this.description = description; + this.baseURI = baseURI; + this.data = data; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getBaseURI() { + return baseURI; + } + + public void setBaseURI(String baseURI) { + this.baseURI = baseURI; + } + + public Model getData() { + return data; + } + + public void setData(Model data) { + this.data = data; + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJobResult.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJobResult.java new file mode 100644 index 0000000..31d8da8 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/IndexingJobResult.java @@ -0,0 +1,33 @@ +package it.cnr.istc.stlab.ontonethub.web; + +import org.apache.stanbol.commons.jobs.api.JobResult; + +/** + * + * This class represents the output of an indexing job. + * + * @author Andrea Nuzzolese + * + */ + +public class IndexingJobResult implements JobResult { + + private String message; + private boolean success; + + public IndexingJobResult(String message, boolean success) { + this.message = message; + this.success = success; + } + + @Override + public String getMessage() { + return message; + } + + @Override + public boolean isSuccess() { + return success; + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/readers/IndexingJobInputReader.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/readers/IndexingJobInputReader.java new file mode 100644 index 0000000..0280b61 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/readers/IndexingJobInputReader.java @@ -0,0 +1,179 @@ +package it.cnr.istc.stlab.ontonethub.web.readers; + +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.lang.annotation.Annotation; +import java.lang.reflect.Type; + +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.ext.MessageBodyReader; +import javax.ws.rs.ext.Provider; + +import org.apache.commons.fileupload.FileItemIterator; +import org.apache.commons.fileupload.FileItemStream; +import org.apache.commons.fileupload.FileUpload; +import org.apache.commons.fileupload.FileUploadException; +import org.apache.commons.fileupload.RequestContext; +import org.apache.commons.io.IOUtils; +import org.apache.felix.scr.annotations.Activate; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Deactivate; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Service; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.system.StreamRDF; +import org.apache.jena.riot.system.StreamRDFLib; +import org.apache.jena.riot.system.StreamRDFWrapper; +import org.osgi.service.component.ComponentContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException; +import com.hp.hpl.jena.graph.Graph; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.sparql.graph.GraphFactory; +import com.hp.hpl.jena.util.FileManager; + +import it.cnr.istc.stlab.ontonethub.web.IndexingJobInput; + +@Component +@Service(Object.class) +@Property(name = "javax.ws.rs", boolValue = true) +@Provider +public class IndexingJobInputReader implements MessageBodyReader { + + private ObjectMapper mapper; + + private final Logger log = LoggerFactory.getLogger(getClass()); + + private FileUpload fu; + + @Override + public boolean isReadable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return IndexingJobInput.class.isAssignableFrom(type); + } + + @Override + public IndexingJobInput readFrom(Class type, Type genericType, Annotation[] annotations, + MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) + throws IOException, WebApplicationException { + + IndexingJobInput indexingJobInput = new IndexingJobInput(); + try{ + + FileItemIterator fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType)); + + String rdf = null; + + while(fileItemIterator.hasNext()){ + FileItemStream fis = fileItemIterator.next(); + if(fis.getFieldName().equals("name")){ + StringWriter writer = new StringWriter(); + IOUtils.copy(fis.openStream(), writer); + String name = writer.toString(); + indexingJobInput.setName(name); + log.info("Name: " + name); + } + else if(fis.getFieldName().equals("description")){ + StringWriter writer = new StringWriter(); + IOUtils.copy(fis.openStream(), writer); + String description = writer.toString(); + indexingJobInput.setDescription(description); + log.info("Description: " + description); + } + else if(fis.getFieldName().equals("baseURI")){ + StringWriter writer = new StringWriter(); + IOUtils.copy(fis.openStream(), writer); + String baseURI = writer.toString(); + indexingJobInput.setBaseURI(baseURI); + log.info("Base URI: " + baseURI); + } + else if(fis.getFieldName().equals("data")){ + //Model model = ModelFactory.createDefaultModel(); + /*StringWriter writer = new StringWriter(); + IOUtils.copy(fis.openStream(), writer); + rdf = writer.toString();*/ + + Graph graph = GraphFactory.createDefaultGraph(); + StreamRDF sink = new StreamRDFWrapper(StreamRDFLib.graph(graph)); + RDFDataMgr.parse(sink, fis.openStream(), "http://localhost/", Lang.RDFXML); + + Model model = ModelFactory.createModelForGraph(graph); + log.info("Model contains {} triples.", model.size()); + //model.read(fis.openStream(), "http://localhost/", "RDF/XML"); + //dataContent = fis.openStream(); + indexingJobInput.setData(model); + } + } + + return indexingJobInput; + } catch (UnrecognizedPropertyException e){ + log.error(e.getMessage(), e); + return null; + } catch (FileUploadException e) { + log.error(e.getMessage(), e); + return null; + } + } + + @Activate + protected void activate(ComponentContext ctx) { + this.mapper = new ObjectMapper(); + this.fu = new FileUpload(); + log.info(getClass() + " activated."); + } + + @Deactivate + protected void deactivate(ComponentContext ctx){ + this.mapper = null; + log.info(getClass() + " deactivated."); + } + + /** + * Adapter from the parameter present in an {@link MessageBodyReader} to + * the {@link RequestContext} as used by the commons.fileupload framework + * @author rwesten + * + */ + private static class MessageBodyReaderContext implements RequestContext{ + + private final InputStream in; + private final String contentType; + private final String charEncoding; + + public MessageBodyReaderContext(InputStream in, MediaType mediaType){ + this.in = in; + this.contentType = mediaType.toString(); + String charset = mediaType.getParameters().get("charset"); + this.charEncoding = charset == null ? "UTF-8" : charset; + } + + @Override + public String getCharacterEncoding() { + return charEncoding; + } + + @Override + public String getContentType() { + return contentType; + } + + @Override + public int getContentLength() { + return -1; + } + + @Override + public InputStream getInputStream() throws IOException { + return in; + } + + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/readers/JenaModelReader.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/readers/JenaModelReader.java new file mode 100644 index 0000000..9ad4317 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/readers/JenaModelReader.java @@ -0,0 +1,33 @@ +package it.cnr.istc.stlab.ontonethub.web.readers; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.annotation.Annotation; +import java.lang.reflect.Type; + +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.ext.MessageBodyReader; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +public class JenaModelReader implements MessageBodyReader { + + @Override + public boolean isReadable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return Model.class.isAssignableFrom(type); + } + + @Override + public Model readFrom(Class type, Type genericType, Annotation[] annotations, + MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) + throws IOException, WebApplicationException { + + Model model = ModelFactory.createDefaultModel(); + model.read(entityStream, null); + return model; + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubIndexingResource.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubIndexingResource.java new file mode 100644 index 0000000..6b09330 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubIndexingResource.java @@ -0,0 +1,773 @@ +package it.cnr.istc.stlab.ontonethub.web.resources; + +import static it.cnr.istc.stlab.ontonethub.web.utils.JerseyUtils.ENTITY_SUPPORTED_MEDIA_TYPES; +import static it.cnr.istc.stlab.ontonethub.web.utils.JerseyUtils.createFieldQueryForFindRequest; +import static it.cnr.istc.stlab.ontonethub.web.utils.LDPathHelper.getLDPathParseExceptionMessage; +import static it.cnr.istc.stlab.ontonethub.web.utils.LDPathHelper.prepareQueryLDPathProgram; +import static it.cnr.istc.stlab.ontonethub.web.utils.LDPathHelper.transformQueryResults; +import static javax.ws.rs.core.MediaType.APPLICATION_JSON_TYPE; +import static javax.ws.rs.core.MediaType.TEXT_HTML; +import static org.apache.stanbol.commons.web.base.utils.MediaTypeUtil.getAcceptableMediaType; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Properties; +import java.util.Set; + +import javax.ws.rs.Consumes; +import javax.ws.rs.DELETE; +import javax.ws.rs.FormParam; +import javax.ws.rs.GET; +import javax.ws.rs.OPTIONS; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.HttpHeaders; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.ResponseBuilder; +import javax.ws.rs.core.Response.Status; +import javax.ws.rs.core.UriInfo; + +import org.apache.clerezza.commons.rdf.Graph; +import org.apache.clerezza.commons.rdf.IRI; +import org.apache.clerezza.commons.rdf.Literal; +import org.apache.clerezza.commons.rdf.Triple; +import org.apache.clerezza.rdf.core.access.EntityAlreadyExistsException; +import org.apache.clerezza.rdf.core.access.TcManager; +import org.apache.clerezza.rdf.ontologies.DC; +import org.apache.clerezza.rdf.ontologies.RDFS; +import org.apache.commons.io.IOUtils; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Reference; +import org.apache.felix.scr.annotations.Service; +import org.apache.marmotta.ldpath.exception.LDPathParseException; +import org.apache.marmotta.ldpath.model.programs.Program; +import org.apache.stanbol.commons.indexedgraph.IndexedGraph; +import org.apache.stanbol.commons.jobs.api.JobManager; +import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils; +import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService; +import org.apache.stanbol.commons.web.base.format.KRFormat; +import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource; +import org.apache.stanbol.commons.web.viewable.Viewable; +import org.apache.stanbol.entityhub.core.query.QueryResultListImpl; +import org.apache.stanbol.entityhub.ldpath.EntityhubLDPath; +import org.apache.stanbol.entityhub.ldpath.backend.SiteBackend; +import org.apache.stanbol.entityhub.ldpath.query.LDPathSelect; +import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory; +import org.apache.stanbol.entityhub.servicesapi.model.Entity; +import org.apache.stanbol.entityhub.servicesapi.model.Representation; +import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory; +import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery; +import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList; +import org.apache.stanbol.entityhub.servicesapi.site.ManagedSite; +import org.apache.stanbol.entityhub.servicesapi.site.Site; +import org.apache.stanbol.entityhub.servicesapi.site.SiteException; +import org.apache.stanbol.entityhub.servicesapi.site.SiteManager; +import org.apache.stanbol.entityhub.servicesapi.util.AdaptingIterator; +import org.codehaus.jettison.json.JSONException; +import org.codehaus.jettison.json.JSONObject; +import org.osgi.framework.Bundle; +import org.osgi.framework.BundleException; +import org.osgi.service.cm.ConfigurationException; +import org.osgi.service.component.ComponentContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.util.FileManager; + +import it.cnr.istc.stlab.ontonethub.web.IndexingJob; +import it.cnr.istc.stlab.ontonethub.web.IndexingJobInput; +import it.cnr.istc.stlab.ontonethub.web.utils.JerseyUtils; + +/** + * Web resource that deals with the management of the ontologies. + * This means that this resource provides the framework with HTTP REST capabilities for indexing and storing ontologies + * that should be part of the ontology network. + * + * @author Andrea Nuzzolese + * + */ +@Component +@Service(Object.class) +@Property(name = "javax.ws.rs", boolValue = true) +@Path("/ontonethub/ontology") +public class OntonethubIndexingResource extends BaseStanbolResource { + + private Logger log = LoggerFactory.getLogger(getClass()); + + public static final String RUNNABLE_INDEXER_EXECUTABLES_FOLDER = "ontonethub-indexing" + File.separator + "executables"; + public static final String RUNNABLE_INDEXER_EXECUTABLES = RUNNABLE_INDEXER_EXECUTABLES_FOLDER + File.separator + "indexing-genericrdf.jar"; + public static final String INNER_INDEXER_EXECUTABLES = "executables" + File.separator + "indexing-genericrdf.jar"; + + /** + * The Field used for find requests if not specified TODO: This will be replaced by the EntitySearch. With + * this search the Site is responsible to decide what properties to use for label based searches. + */ + private static final String DEFAULT_FIND_FIELD = RDFS.label.getUnicodeString(); + + /** + * The Field used as default as selected fields for find requests TODO: Make configurable via the + * {@link ConfiguredSite} interface! NOTE: This feature is deactivated, because OPTIONAL selects do have + * very weak performance when using SPARQL endpoints + */ + // private static final Collection DEFAULT_FIND_SELECTED_FIELDS = + // Arrays.asList(RDFS.comment.getUnicodeString()); + + /** + * The default number of maximal results. + */ + private static final int DEFAULT_FIND_RESULT_LIMIT = 5; + + @Reference + private JobManager jobManager; + + @Reference + private SiteManager siteManager; + + @Reference + private TcManager tcManager; + + @Reference + private NamespacePrefixService nsPrefixService; + + @Context + private UriInfo uriInfo; + + private File ontologiesFolder; + + private ComponentContext ctx; + //private Scope onScope; + + @OPTIONS + @Path("/{id}") + public Response handleCorsPreflightOntology(@PathParam(value = "id") String id, + @Context HttpHeaders headers){ + ResponseBuilder res = Response.ok(); + return res.build(); + } + + @OPTIONS + @Path("/{id}/source") + public Response handleCorsPreflightOntologySource(@PathParam(value = "id") String id, + @Context HttpHeaders headers){ + ResponseBuilder res = Response.ok(); + return res.build(); + } + + @OPTIONS + @Path("/{id}/find") + public Response handleCorsPreflightOntologyFind(@PathParam(value = "id") String id, + @Context HttpHeaders headers){ + ResponseBuilder res = Response.ok(); + return res.build(); + } + + @GET + @Consumes(MediaType.WILDCARD) + @Produces(MediaType.TEXT_PLAIN) + public Response sayHello(){ + return Response.ok("Hello, it's the OntoNetHub!").build(); + } + + @GET + @Consumes(MediaType.WILDCARD) + @Produces(MediaType.APPLICATION_JSON) + @Path("/{id}") + public Response getOntologyInfo(@PathParam("id") String id){ + boolean found = true; + ResponseBuilder responseBuilder = null; + + String ontologyURI = uriInfo.getBaseUri() + uriInfo.getPath(); + String vocabNs = uriInfo.getBaseUri() + "ontonethub/vocab/"; + + Graph g = tcManager.getMGraph(new IRI("ontonethub-graph")); + Iterator tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "hasBundle"), + null); + + String bundleId = null; + if(tripleIt.hasNext()){ + Literal bundleIdLiteral = (Literal) tripleIt.next().getObject(); + bundleId = bundleIdLiteral.getLexicalForm(); + } + + log.info("Bundle ID {} - Ontology ID {}", bundleId, ontologyURI); + if(bundleId != null){ + Bundle bundle = ctx.getBundleContext().getBundle(Long.valueOf(bundleId)); + if(bundle != null){ + JSONObject json = new JSONObject(); + try { + json.put("id", id); + + String ontologyName = null; + String ontologyDescription = null; + String ontologyIRI = null; + + + tripleIt = g.filter(new IRI(ontologyURI), + RDFS.label, + null); + if(tripleIt.hasNext()){ + Literal literal = (Literal) tripleIt.next().getObject(); + ontologyName = literal.getLexicalForm(); + } + + tripleIt = g.filter(new IRI(ontologyURI), + DC.description, + null); + if(tripleIt.hasNext()){ + Literal literal = (Literal) tripleIt.next().getObject(); + ontologyDescription= literal.getLexicalForm(); + } + + String sourceIRI = ontologyURI + "/source"; + json.put("ontologySource", sourceIRI); + + json.put("name", ontologyName); + json.put("description", ontologyDescription); + + + tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "hasOntologyIRI"), + null); + if(tripleIt.hasNext()){ + IRI iri = (IRI) tripleIt.next().getObject(); + ontologyIRI = iri.toString().replace("<", "").replace(">", ""); + } + json.put("ontologyIRI", ontologyIRI); + + /* + * OWL classes + */ + tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "owlClasses"), + null); + int owlClasses = 0; + if(tripleIt.hasNext()){ + String lexicalForm = ((Literal)tripleIt.next().getObject()).getLexicalForm(); + owlClasses = Integer.valueOf(lexicalForm); + } + json.put("owlClasses", owlClasses); + + /* + * Object properties + */ + tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "objectProperties"), + null); + int objectProperties = 0; + if(tripleIt.hasNext()){ + String lexicalForm = ((Literal)tripleIt.next().getObject()).getLexicalForm(); + objectProperties = Integer.valueOf(lexicalForm); + } + json.put("objectProperties", objectProperties); + + /* + * Datatype properties + */ + tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "datatypeProperties"), + null); + int datatypeProperties = 0; + if(tripleIt.hasNext()){ + String lexicalForm = ((Literal)tripleIt.next().getObject()).getLexicalForm(); + datatypeProperties = Integer.valueOf(lexicalForm); + } + json.put("datatypeProperties", datatypeProperties); + + /* + * Annotation properties + */ + tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "annotationProperties"), + null); + int annotationProperties = 0; + if(tripleIt.hasNext()){ + String lexicalForm = ((Literal)tripleIt.next().getObject()).getLexicalForm(); + annotationProperties = Integer.valueOf(lexicalForm); + } + json.put("annotationProperties", annotationProperties); + + /* + * Individuals + */ + tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "individuals"), + null); + int individuals = 0; + if(tripleIt.hasNext()){ + String lexicalForm = ((Literal)tripleIt.next().getObject()).getLexicalForm(); + individuals = Integer.valueOf(lexicalForm); + } + json.put("individuals", individuals); + + /* + * Individuals + */ + tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "importedOntologies"), + null); + int importedOntologies = 0; + if(tripleIt.hasNext()){ + String lexicalForm = ((Literal)tripleIt.next().getObject()).getLexicalForm(); + importedOntologies = Integer.valueOf(lexicalForm); + } + json.put("importedOntologies", importedOntologies); + + responseBuilder = Response.ok(json.toString()); + } catch (JSONException e) { + try { + json.put("error", e.getMessage()); + } catch (JSONException e1) { + log.error(e1.getMessage(), e1); + } + responseBuilder = Response.status(Status.INTERNAL_SERVER_ERROR).entity(json.toString()); + } + } + else found = false; + } + else found = false; + + if(!found) { + JSONObject json = new JSONObject(); + try { + json.put("error", "No ontology exists with the ID provided."); + } catch (JSONException e) { + log.error(e.getMessage(), e); + } + responseBuilder = Response.status(Status.NOT_FOUND).entity(json.toString()); + } + + return responseBuilder.build(); + } + + @GET + @Consumes(MediaType.WILDCARD) + @Produces({ + KRFormat.RDF_XML, + KRFormat.RDF_JSON, + KRFormat.TURTLE, + KRFormat.N_TRIPLE, + KRFormat.N3, + "application/json-ld" + }) + @Path("/{id}/source") + public Response getOntologySource(@PathParam("id") String id){ + + ResponseBuilder responseBuilder = null; + Model model; + try { + model = FileManager.get().loadModel(new File(ontologiesFolder, id + ".rdf").getCanonicalPath()); + } catch (IOException e) { + model = null; + } + if(model != null) responseBuilder = Response.ok(model); + else { + JSONObject json = new JSONObject(); + try { + json.put("error", "No ontology exists with the ID provided."); + } catch (JSONException e) { + log.error(e.getMessage(), e); + } + responseBuilder = Response.status(Status.NOT_FOUND).entity(json); + } + + return responseBuilder.build(); + } + + @POST + @Consumes(MediaType.WILDCARD) + @Produces(MediaType.APPLICATION_JSON) + public Response indexOntology(IndexingJobInput input){ + + ResponseBuilder responseBuilder = null; + JSONObject jsonObject = new JSONObject(); + + + Site site = siteManager.getSite(input.getName()); + log.info("Site {} and ontology name {}", site, input.getName()); + if(site == null){ + String ontologyURI = uriInfo.getBaseUri() + uriInfo.getPath(); + log.info(" Ontology IRI {}", ontologyURI); + String vocabNs = uriInfo.getBaseUri() + "ontonethub/vocab/"; + IndexingJob job = new IndexingJob(input.getName(), input.getDescription(), input.getBaseURI(), input.getData(), ctx.getBundleContext(), tcManager, ontologiesFolder, ontologyURI, vocabNs); + String jid = jobManager.execute(job); + + URI location = URI.create(getPublicBaseUri() + "jobs/" + jid); + + try { + jsonObject.put("monitoringService", location.toString()); + jsonObject.put("ontologyId", jid); + } catch (JSONException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + responseBuilder = Response.ok(jsonObject.toString()); + } + else{ + responseBuilder = Response.status(Status.CONFLICT); + } + + return responseBuilder.build(); + + } + + @DELETE + @Produces(MediaType.APPLICATION_JSON) + @Path("/{id}") + public Response deleteOntologyIndex(@PathParam("id") String id){ + ResponseBuilder responseBuilder = null; + + boolean found = true; + + String ontologyURI = uriInfo.getBaseUri() + uriInfo.getPath(); + String vocabNs = uriInfo.getBaseUri() + "ontonethub/vocab/"; + + Graph g = tcManager.getMGraph(new IRI("ontonethub-graph")); + Iterator tripleIt = g.filter(new IRI(ontologyURI), + new IRI(vocabNs + "hasBundle"), + null); + + String bundleId = null; + Triple tripleToRemove = null; + if(tripleIt.hasNext()){ + tripleToRemove = tripleIt.next(); + Literal bundleIdLiteral = (Literal) tripleToRemove.getObject(); + bundleId = bundleIdLiteral.getLexicalForm(); + } + + if(bundleId != null){ + + Bundle bundle = ctx.getBundleContext().getBundle(Long.valueOf(bundleId)); + if(bundle != null){ + JSONObject json = new JSONObject(); + String symbolicName = bundle.getSymbolicName(); + String siteName = symbolicName.replace("org.apache.stanbol.data.site.", ""); + String path = "org.apache.stanbol.data.site.".replaceAll("\\.", File.separator) + siteName; + URL resourceURL = bundle.getResource(path + File.separator + "org.apache.stanbol.entityhub.site.referencedSite-" + siteName + ".config"); + if(resourceURL != null){ + InputStream is; + try { + is = resourceURL.openStream(); + + Properties properties = new Properties(); + properties.load(is); + + String ontologyName = properties.getProperty("org.apache.stanbol.entityhub.site.id"); + if(ontologyName != null){ + /* + * Uninstall the bundle managing the index. + */ + bundle.uninstall(); + /* + * Remove the triple from the graph that represents the ontology catalogue. + */ + g.remove(tripleToRemove); + + /* + * Remove the ontology from OntoNet + */ + + File ontologyFile = new File(ontologiesFolder, id + ".rdf"); + ontologyFile.delete(); + + /* + * Clean the catalogue. + */ + tripleIt = g.filter(new IRI(ontologyURI), null, null); + List triplesToRemove = new ArrayList(); + while(tripleIt.hasNext()) + triplesToRemove.add(tripleIt.next()); + + for(Triple triple : triplesToRemove) + g.remove(triple); + + responseBuilder = Response.ok(); + } + else found = false; + } catch (IOException e) { + try { + json.put("error", e.getMessage()); + } catch (JSONException e1) { + log.error(e1.getMessage(), e1); + } + responseBuilder = Response.status(Status.INTERNAL_SERVER_ERROR).entity(json.toString()); + } catch (BundleException e) { + try { + json.put("error", e.getMessage()); + } catch (JSONException e1) { + log.error(e1.getMessage(), e1); + } + responseBuilder = Response.status(Status.INTERNAL_SERVER_ERROR).entity(json.toString()); + } + } + else found = false; + } + else found = false; + } + else found = false; + + if(!found) { + JSONObject json = new JSONObject(); + try { + json.put("error", "No ontology exists with the ID provided."); + } catch (JSONException e) { + log.error(e.getMessage(), e); + } + responseBuilder = Response.status(Status.NOT_FOUND).entity(json.toString()); + } + + return responseBuilder.build(); + } + + @POST + @Path("/{id}/find") + public Response findEntity(@PathParam(value = "id") String id, + @FormParam(value = "name") String name, + @FormParam(value = "field") String parsedField, + @FormParam(value = "lang") String language, + // @FormParam(value="select") String select, + @FormParam(value = "limit") Integer limit, + @FormParam(value = "offset") Integer offset, + @FormParam(value = "ldpath") String ldpath, + @Context HttpHeaders headers) { + + + String ontologyURI = uriInfo.getBaseUri() + uriInfo.getPath().replaceAll("\\/find$", ""); + Graph g = tcManager.getMGraph(new IRI("ontonethub-graph")); + Iterator tripleIt = g.filter(new IRI(ontologyURI), + RDFS.label, + null); + String ontologyName = null; + if(tripleIt.hasNext()){ + ontologyName = ((Literal)tripleIt.next().getObject()).getLexicalForm(); + } + if(ontologyName == null) return Response.status(Status.NOT_FOUND).build(); + else{ + Site site = getSite(ontologyName); + log.debug("site/{}/find Request",site.getId()); + Collection supported = new HashSet(JerseyUtils.QUERY_RESULT_SUPPORTED_MEDIA_TYPES); + supported.add(TEXT_HTML); + final MediaType acceptedMediaType = getAcceptableMediaType( + headers, supported, MediaType.APPLICATION_JSON_TYPE); + if(name == null || name.isEmpty()){ + if(MediaType.TEXT_HTML_TYPE.isCompatible(acceptedMediaType)){ + ResponseBuilder rb = Response.ok(new Viewable("find", new SiteResultData(site))); + rb.header(HttpHeaders.CONTENT_TYPE, TEXT_HTML+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } else { + return Response.status(Status.BAD_REQUEST) + .entity("The name must not be null nor empty for find requests. Missing parameter name.\n") + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + } + final String property; + if (parsedField == null) { + property = DEFAULT_FIND_FIELD; + } else { + parsedField = parsedField.trim(); + if (parsedField.isEmpty()) { + property = DEFAULT_FIND_FIELD; + } else { + property = nsPrefixService.getFullName(parsedField); + if(property == null){ + String messsage = String.format("The prefix '%s' of the parsed field '%' is not " + + "mapped to any namespace. Please parse the full URI instead!\n", + NamespaceMappingUtils.getPrefix(parsedField),parsedField); + return Response.status(Status.BAD_REQUEST) + .entity(messsage) + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + } + } + return executeQuery(site, createFieldQueryForFindRequest( + name, property, language, + limit == null || limit < 1 ? DEFAULT_FIND_RESULT_LIMIT : limit, + offset,ldpath), + headers); + } + } + + protected void activate(ComponentContext ctx) throws ConfigurationException, FileNotFoundException, IOException { + this.ctx = ctx; + + String stanbolHome = ctx.getBundleContext().getProperty("stanbol.home"); + + URL indexerExecutablesUrl = ctx.getBundleContext().getBundle().getResource(INNER_INDEXER_EXECUTABLES); + File outFolder = new File(stanbolHome + File.separator + "ontonethub-indexing" + File.separator + "executables"); + outFolder.mkdirs(); + File outFile = new File(outFolder, "indexing-genericrdf.jar"); + + try{ + tcManager.createGraph(new IRI("ontonethub-graph")); + } catch(EntityAlreadyExistsException e){ + log.info("The graph managed by the OntonetHub already exists."); + } + + /* + try { + this.onScope = scopeManager.createOntologyScope("ontonethub-scope", + new BlankOntologySource()); + } catch (DuplicateIDException e) { + log.info("The ontology scope already exists."); + this.onScope = scopeManager.getScope("ontonethub-scope"); + } + */ + + this.ontologiesFolder = new File(stanbolHome + File.separator + "ontonethub-indexing" + File.separator + "ontologies"); + ontologiesFolder.mkdirs(); + if(indexerExecutablesUrl != null){ + IOUtils.copy(indexerExecutablesUrl.openStream(), new FileOutputStream(outFile)); + } + } + + protected void deactivate(ComponentContext ctx) { + this.ctx= null; + } + + + private Site getSite(String siteId) { + Site site = siteManager.getSite(siteId); + if (site == null) { + log.error("Site {} not found (no referenced site with that ID is present within the Entityhub", + siteId); + throw new WebApplicationException(Response.Status.NOT_FOUND); + } + if(site instanceof ManagedSite){ + log.debug(" ... init ManagedSite"); + } + return site; + } + + /** + * Executes the query parsed by {@link #queryEntities(String, File, HttpHeaders)} or created based + * {@link #findEntity(String, String, String, int, int, HttpHeaders)} + * + * @param query + * The query to execute + * @param headers the request headers + * @return the response (results of error) + */ + private Response executeQuery(Site site, FieldQuery query, HttpHeaders headers) throws WebApplicationException { + MediaType mediaType = getAcceptableMediaType(headers, ENTITY_SUPPORTED_MEDIA_TYPES, + APPLICATION_JSON_TYPE); + if(query instanceof LDPathSelect && ((LDPathSelect)query).getLDPathSelect() != null){ + //use the LDPath variant to process this query + return executeLDPathQuery(site, query, ((LDPathSelect)query).getLDPathSelect(), + mediaType, headers); + } else { //use the default query execution + QueryResultList result; + try { + result = site.find(query); + } catch (SiteException e) { + String message = String.format("Unable to Query Site '%s' (message: %s)", + site.getId(),e.getMessage()); + log.error(message, e); + return Response.status(Status.INTERNAL_SERVER_ERROR) + .entity(message) + .header(HttpHeaders.ACCEPT, mediaType).build(); + } + ResponseBuilder rb = Response.ok(result); + rb.header(HttpHeaders.CONTENT_TYPE, mediaType+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } + } + + /** + * Execute a Query that uses LDPath to process results. + * @param query the query + * @param mediaType the mediaType for the response + * @param headers the http headers of the request + * @return the response + */ + private Response executeLDPathQuery(Site site, FieldQuery query, String ldpathProgramString, MediaType mediaType, HttpHeaders headers) { + QueryResultList result; + ValueFactory vf = new RdfValueFactory(new IndexedGraph()); + SiteBackend backend = new SiteBackend(site,vf); + EntityhubLDPath ldPath = new EntityhubLDPath(backend,vf); + //copy the selected fields, because we might need to delete some during + //the preparation phase + Set selectedFields = new HashSet(query.getSelectedFields()); + //first prepare (only execute the query if the parameters are valid) + Program program; + try { + program = prepareQueryLDPathProgram(ldpathProgramString, selectedFields, backend, ldPath); + } catch (LDPathParseException e) { + log.warn("Unable to parse LDPath program used as select for Query:"); + log.warn("FieldQuery: \n {}",query); + log.warn("LDPath: \n {}",((LDPathSelect)query).getLDPathSelect()); + log.warn("Exception:",e); + return Response.status(Status.BAD_REQUEST) + .entity(("Unable to parse LDPath program (Messages: "+ + getLDPathParseExceptionMessage(e)+")!\n")) + .header(HttpHeaders.ACCEPT, mediaType).build(); + } catch (IllegalStateException e) { + log.warn("parsed LDPath program is not compatible with parsed Query!",e); + return Response.status(Status.BAD_REQUEST) + .entity(e.getMessage()) + .header(HttpHeaders.ACCEPT, mediaType).build(); + } + //2. execute the query + Iterator resultIt; + try { // we need to adapt from Entity to Representation + resultIt = new AdaptingIterator(site.findEntities(query).iterator(), + new AdaptingIterator.Adapter() { + @Override + public Representation adapt(Entity value, Class type) { + return value.getRepresentation(); + }},Representation.class); + } catch (SiteException e) { + String message = String.format("Unable to Query Site '%s' (message: %s)", + site.getId(),e.getMessage()); + log.error(message, e); + return Response.status(Status.INTERNAL_SERVER_ERROR) + .entity(message) + .header(HttpHeaders.ACCEPT, mediaType).build(); + } + //process the results + Collection transformedResults = transformQueryResults(resultIt, program, + selectedFields, ldPath, backend, vf); + result = new QueryResultListImpl(query, transformedResults, Representation.class); + ResponseBuilder rb = Response.ok(result); + rb.header(HttpHeaders.CONTENT_TYPE, mediaType+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } + + public class SiteResultData extends ResultData { + + private Site site; + + public SiteResultData(Site site) { + this.site = site; + } + + public boolean isManagedSite() { + return site instanceof ManagedSite; + } + + public Site getSite() { + return site; + } + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubOntologiesResource.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubOntologiesResource.java new file mode 100644 index 0000000..4fa6e46 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubOntologiesResource.java @@ -0,0 +1,278 @@ +package it.cnr.istc.stlab.ontonethub.web.resources; + +import static it.cnr.istc.stlab.ontonethub.web.utils.LDPathHelper.getLDPathParseExceptionMessage; +import static it.cnr.istc.stlab.ontonethub.web.utils.LDPathHelper.prepareQueryLDPathProgram; +import static it.cnr.istc.stlab.ontonethub.web.utils.LDPathHelper.transformQueryResults; +import static javax.ws.rs.core.MediaType.TEXT_HTML; +import static org.apache.stanbol.commons.web.base.utils.MediaTypeUtil.getAcceptableMediaType; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import javax.ws.rs.Consumes; +import javax.ws.rs.FormParam; +import javax.ws.rs.GET; +import javax.ws.rs.OPTIONS; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.HttpHeaders; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.ResponseBuilder; +import javax.ws.rs.core.Response.Status; +import javax.ws.rs.core.UriInfo; + +import org.apache.clerezza.commons.rdf.BlankNodeOrIRI; +import org.apache.clerezza.commons.rdf.Graph; +import org.apache.clerezza.commons.rdf.IRI; +import org.apache.clerezza.commons.rdf.Triple; +import org.apache.clerezza.rdf.core.access.TcManager; +import org.apache.clerezza.rdf.ontologies.RDFS; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Reference; +import org.apache.felix.scr.annotations.Service; +import org.apache.marmotta.ldpath.exception.LDPathParseException; +import org.apache.marmotta.ldpath.model.programs.Program; +import org.apache.stanbol.commons.indexedgraph.IndexedGraph; +import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils; +import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService; +import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource; +import org.apache.stanbol.commons.web.viewable.Viewable; +import org.apache.stanbol.entityhub.core.query.QueryResultListImpl; +import org.apache.stanbol.entityhub.ldpath.EntityhubLDPath; +import org.apache.stanbol.entityhub.ldpath.backend.SiteManagerBackend; +import org.apache.stanbol.entityhub.ldpath.query.LDPathSelect; +import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory; +import org.apache.stanbol.entityhub.servicesapi.model.Entity; +import org.apache.stanbol.entityhub.servicesapi.model.Representation; +import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory; +import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery; +import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList; +import org.apache.stanbol.entityhub.servicesapi.site.SiteManager; +import org.apache.stanbol.entityhub.servicesapi.util.AdaptingIterator; +import org.codehaus.jettison.json.JSONArray; +import org.osgi.service.cm.ConfigurationException; +import org.osgi.service.component.ComponentContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import it.cnr.istc.stlab.ontonethub.web.utils.JerseyUtils; + +@Component +@Service(Object.class) +@Property(name = "javax.ws.rs", boolValue = true) +@Path("/ontonethub/ontologies") +public class OntonethubOntologiesResource extends BaseStanbolResource { + + private Logger log = LoggerFactory.getLogger(getClass()); + + /** + * The Field used for find requests if not specified TODO: Will be depreciated as soon as EntityQuery is + * implemented + */ + private static final String DEFAULT_FIND_FIELD = RDFS.label.getUnicodeString(); + + /** + * The default number of maximal results of searched sites. + */ + private static final int DEFAULT_FIND_RESULT_LIMIT = 5; + + + @Reference + private TcManager tcManager; + + @Reference + private NamespacePrefixService nsPrefixService; + + @Reference + private SiteManager referencedSiteManager; + + @Context + private UriInfo uriInfo; + + private ComponentContext ctx; + + @OPTIONS + public Response handleCorsPreflight(@Context HttpHeaders headers){ + ResponseBuilder res = Response.ok(); + return res.build(); + } + @OPTIONS + @Path("/find") + public Response handleCorsPreflightFind(@Context HttpHeaders headers){ + ResponseBuilder res = Response.ok(); + return res.build(); + } + + @GET + @Consumes(MediaType.WILDCARD) + @Produces(MediaType.APPLICATION_JSON) + public Response listOntologies(){ + + String vocabNs = uriInfo.getBaseUri() + "ontonethub/vocab/"; + + Graph g = tcManager.getMGraph(new IRI("ontonethub-graph")); + Iterator tripleIt = g.filter(null, + new IRI(vocabNs + "hasBundle"), + null); + + JSONArray array = new JSONArray(); + while(tripleIt.hasNext()){ + Triple triple = tripleIt.next(); + BlankNodeOrIRI subject = triple.getSubject(); + String uri = subject.toString(); + uri = uri.substring(1, uri.length()-1); + array.put(uri); + } + return Response.ok(array.toString()).build(); + } + + @POST + @Path("/find") + public Response findEntity(@FormParam(value = "name") String name, + @FormParam(value = "field") String parsedField, + @FormParam(value = "lang") String language, + // @FormParam(value="select") String select, + @FormParam(value = "limit") Integer limit, + @FormParam(value = "offset") Integer offset, + @FormParam(value = "ldpath") String ldpath, + @Context HttpHeaders headers) { + log.debug("findEntity() Request"); + Collection supported = new HashSet(JerseyUtils.QUERY_RESULT_SUPPORTED_MEDIA_TYPES); + supported.add(TEXT_HTML); + final MediaType acceptedMediaType = getAcceptableMediaType( + headers, supported, MediaType.APPLICATION_JSON_TYPE); + if(name == null || name.isEmpty()){ + if(MediaType.TEXT_HTML_TYPE.isCompatible(acceptedMediaType)){ + ResponseBuilder rb = Response.ok(new Viewable("find", this)); + rb.header(HttpHeaders.CONTENT_TYPE, TEXT_HTML+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } else { + return Response.status(Status.BAD_REQUEST) + .entity("The name must not be null nor empty for find requests. Missing parameter name.\n") + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + } + final String property; + if (parsedField == null) { + property = DEFAULT_FIND_FIELD; + } else { + parsedField = parsedField.trim(); + if (parsedField.isEmpty()) { + property = DEFAULT_FIND_FIELD; + } else { + property = nsPrefixService.getFullName(parsedField); + if(property == null){ + String messsage = String.format("The prefix '%s' of the parsed field '%' is not " + + "mapped to any namespace. Please parse the full URI instead!\n", + NamespaceMappingUtils.getPrefix(parsedField),parsedField); + return Response.status(Status.BAD_REQUEST) + .entity(messsage) + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + } + } + FieldQuery query = JerseyUtils.createFieldQueryForFindRequest(name, property, language, + limit == null || limit < 1 ? DEFAULT_FIND_RESULT_LIMIT : limit, offset,ldpath); + return executeQuery(referencedSiteManager, query, acceptedMediaType, headers); + } + + protected void activate(ComponentContext ctx) throws ConfigurationException, FileNotFoundException, IOException { + this.ctx = ctx; + } + + protected void deactivate(ComponentContext ctx) { + this.ctx= null; + } + + /** + * Executes the query parsed by {@link #queryEntities(String, File, HttpHeaders)} or created based + * {@link #findEntity(String, String, String, int, int, HttpHeaders)} + * + * @param manager The {@link SiteManager} + * @param query + * The query to execute + * @param headers the request headers + * @return the response (results of error) + */ + private Response executeQuery(SiteManager manager, + FieldQuery query, MediaType mediaType, + HttpHeaders headers) throws WebApplicationException { + if(query instanceof LDPathSelect && ((LDPathSelect)query).getLDPathSelect() != null){ + //use the LDPath variant to process this query + return executeLDPathQuery(manager, query, ((LDPathSelect)query).getLDPathSelect(), + mediaType, headers); + } else { //use the default query execution + QueryResultList result = manager.find(query); + ResponseBuilder rb = Response.ok(result); + rb.header(HttpHeaders.CONTENT_TYPE, mediaType+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } + } + + /** + * Execute a Query that uses LDPath to process results. + * @param query the query + * @param mediaType the mediaType for the response + * @param headers the http headers of the request + * @return the response + */ + private Response executeLDPathQuery(SiteManager manager,FieldQuery query, String ldpathProgramString, MediaType mediaType, HttpHeaders headers) { + QueryResultList result; + ValueFactory vf = new RdfValueFactory(new IndexedGraph()); + SiteManagerBackend backend = new SiteManagerBackend(manager); + EntityhubLDPath ldPath = new EntityhubLDPath(backend,vf); + //copy the selected fields, because we might need to delete some during + //the preparation phase + Set selectedFields = new HashSet(query.getSelectedFields()); + //first prepare (only execute the query if the parameters are valid) + Program program; + try { + program = prepareQueryLDPathProgram(ldpathProgramString, selectedFields, backend, ldPath); + } catch (LDPathParseException e) { + log.warn("Unable to parse LDPath program used as select for a Query to the '/sites' endpoint:"); + log.warn("FieldQuery: \n {}",query); + log.warn("LDPath: \n {}",((LDPathSelect)query).getLDPathSelect()); + log.warn("Exception:",e); + return Response.status(Status.BAD_REQUEST) + .entity(("Unable to parse LDPath program (Messages: "+ + getLDPathParseExceptionMessage(e)+")!\n")) + .header(HttpHeaders.ACCEPT, mediaType).build(); + } catch (IllegalStateException e) { + log.warn("parsed LDPath program is not compatible with the Query " + + "parsed to the '/sites' endpoint!",e); + return Response.status(Status.BAD_REQUEST) + .entity(e.getMessage()) + .header(HttpHeaders.ACCEPT, mediaType).build(); + } + //2. execute the query + // we need to adapt from Entity to Representation + //TODO: should we add the metadata to the result? + Iterator resultIt = new AdaptingIterator(manager.findEntities(query).iterator(), + new AdaptingIterator.Adapter() { + @Override + public Representation adapt(Entity value, Class type) { + return value.getRepresentation(); + }},Representation.class); + //process the results + Collection transformedResults = transformQueryResults(resultIt, program, + selectedFields, ldPath, backend, vf); + result = new QueryResultListImpl(query, transformedResults, Representation.class); + ResponseBuilder rb = Response.ok(result); + rb.header(HttpHeaders.CONTENT_TYPE, mediaType+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubResource.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubResource.java new file mode 100644 index 0000000..daf5599 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/resources/OntonethubResource.java @@ -0,0 +1,49 @@ +package it.cnr.istc.stlab.ontonethub.web.resources; + +import javax.ws.rs.Consumes; +import javax.ws.rs.GET; +import javax.ws.rs.OPTIONS; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.HttpHeaders; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.ResponseBuilder; +import javax.ws.rs.core.UriInfo; + +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Service; +import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource; + +/** + * Base resource for the OntoNetHub. + * + * @author Andrea Nuzzolese + * + */ +@Component +@Service(Object.class) +@Property(name = "javax.ws.rs", boolValue = true) +@Path("/ontonethub") +public class OntonethubResource extends BaseStanbolResource { + + @Context + private UriInfo uriInfo; + + @OPTIONS + public Response handleCorsPreflightOntology(@PathParam(value = "id") String id, + @Context HttpHeaders headers){ + ResponseBuilder res = Response.ok(); + return res.build(); + } + + @GET + @Consumes(MediaType.WILDCARD) + @Produces({MediaType.TEXT_PLAIN}) + public Response sayHello(){ + return Response.ok("Hello, it's the OntoNetHub!").build(); + } +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/utils/JerseyUtils.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/utils/JerseyUtils.java new file mode 100644 index 0000000..58155b1 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/utils/JerseyUtils.java @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package it.cnr.istc.stlab.ontonethub.web.utils; + +import static javax.ws.rs.core.MediaType.APPLICATION_JSON; +import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.N3; +import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.N_TRIPLE; +import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.RDF_JSON; +import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.RDF_XML; +import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.TEXT_RDF_NT; +import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.TURTLE; +import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.X_TURTLE; + +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.GenericArrayType; +import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; +import java.lang.reflect.WildcardType; +import java.net.URLDecoder; +import java.nio.charset.UnsupportedCharsetException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; + +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.ext.MessageBodyReader; + +import org.apache.commons.io.IOUtils; +import org.apache.stanbol.commons.web.base.utils.MediaTypeUtil; +import org.apache.stanbol.entityhub.core.query.DefaultQueryFactory; +import org.apache.stanbol.entityhub.ldpath.query.LDPathFieldQueryImpl; +import org.apache.stanbol.entityhub.servicesapi.model.Entity; +import org.apache.stanbol.entityhub.servicesapi.model.Representation; +import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery; +import org.apache.stanbol.entityhub.servicesapi.query.FieldQueryFactory; +import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList; +import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint; +import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint.PatternType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility methods used by several of the RESTful service endpoints of the + * Entityhub. + * @author Rupert Westenthaler + * + */ +public final class JerseyUtils { + + private static Logger log = LoggerFactory.getLogger(JerseyUtils.class); + /** + * Unmodifiable Set with the Media Types supported for {@link Representation} + */ + public static final Set REPRESENTATION_SUPPORTED_MEDIA_TYPES = + Collections.unmodifiableSet(new HashSet(Arrays.asList( + APPLICATION_JSON,RDF_XML,N3,TURTLE,X_TURTLE,RDF_JSON,N_TRIPLE, TEXT_RDF_NT, + MediaTypeUtil.JSON_LD))); + /** + * Unmodifiable Set with the Media Types supported for {@link Entity} + */ + public static final Set ENTITY_SUPPORTED_MEDIA_TYPES = + REPRESENTATION_SUPPORTED_MEDIA_TYPES; + + /** + * Unmodifiable Set with the Media Types supported for {@link QueryResultList} + */ + public static final Set QUERY_RESULT_SUPPORTED_MEDIA_TYPES = + REPRESENTATION_SUPPORTED_MEDIA_TYPES; + /** + * This utility class used the {@link DefaultQueryFactory} as + * {@link FieldQueryFactory} instance. + */ + private static FieldQueryFactory queryFactory = DefaultQueryFactory.getInstance(); + + private JerseyUtils() {/* do not create instance of Util Classes */} + + +// /** +// * Returns the {@link FieldQuery} based on the JSON formatted String (in case +// * of "application/x-www-form-urlencoded" requests) or file (in case of +// * "multipart/form-data" requests).

+// * @param query the string containing the JSON serialised FieldQuery or +// * null in case of a "multipart/form-data" request +// * @param file the temporary file holding the data parsed by the request to +// * the web server in case of a "multipart/form-data" request or null +// * in case of the "application/x-www-form-urlencoded" request. +// * @return the FieldQuery parsed from the string provided by one of the two +// * parameters +// * @throws WebApplicationException if both parameter are null or +// * if the string provided by both parameters could not be used to parse a +// * {@link FieldQuery} instance. +// */ +// public static FieldQuery parseFieldQuery(String query, File file) throws WebApplicationException { +// if(query == null && file == null) { +// throw new WebApplicationException(new IllegalArgumentException("Query Requests MUST define the \"query\" parameter"), Response.Status.BAD_REQUEST); +// } +// FieldQuery fieldQuery = null; +// JSONException exception = null; +// if(query != null){ +// try { +// fieldQuery = JSONToFieldQuery.fromJSON(queryFactory,query); +// } catch (JSONException e) { +// log.warn("unable to parse FieldQuery from \"application/x-www-form-urlencoded\" encoded query string "+query,e); +// fieldQuery = null; +// exception = e; +// } +// } //else no query via application/x-www-form-urlencoded parsed +// if(fieldQuery == null && file != null){ +// try { +// query = FileUtils.readFileToString(file); +// fieldQuery = JSONToFieldQuery.fromJSON(queryFactory,query); +// } catch (IOException e) { +// throw new WebApplicationException(e, Response.Status.INTERNAL_SERVER_ERROR); +// } catch (JSONException e) { +// log.warn("unable to parse FieldQuery from \"multipart/form-data\" encoded query string "+query,e); +// exception = e; +// } +// }//fieldquery already initialised or no query via multipart/form-data parsed +// if(fieldQuery == null){ +// throw new WebApplicationException(new IllegalArgumentException("Unable to parse FieldQuery form the parsed query String:"+query, exception),Response.Status.BAD_REQUEST); +// } +// return fieldQuery; +// } + /** + * Creates an {@link FieldQuery} for parameters parsed by the /find requests + * supported by the /symbol, /sites and {siteId} RESTful endpoints. + * TODO: This has to be refactored to use "EntityQuery" as soon as Multiple + * query types are implemented for the Entityhub. + * @param name the name pattern to search entities for (required) + * @param field the field used to search for entities (required) + * @param language the language of the parsed name pattern (optional) + * @param limit the maximum number of result (optional) + * @param offset the offset of the first result (optional) + * @return the {@link FieldQuery} representing the parsed parameter + * @throws WebApplicationException in case the parsed name pattern is invalid. + * The validation of this required parameter provided by the Request is done + * by this method. + * @throws IllegalArgumentException in case the parsed field is invalid. Callers + * of this method need to ensure that this parameter is set to an valid value. + */ + public static FieldQuery createFieldQueryForFindRequest(String name, String field, + String language, Integer limit, + Integer offset, String ldpath) throws WebApplicationException, IllegalArgumentException{ + if(name == null || name.trim().isEmpty()){ + // This throws an WebApplicationException, because the search name is + // provided by the caller. So an empty or missing name is interpreted + // as an bad Requested sent by the user + throw new WebApplicationException( + new IllegalArgumentException( + "The parsed \"name\" pattern to search entities for MUST NOT be NULL nor EMPTY"), + Response.Status.BAD_REQUEST); + } else { + name = name.trim(); + } + if(field == null || field.trim().isEmpty()){ + // This throws no WebApplicationException, because "field" is an + // optional parameter and callers of this method MUST provide an + // valid default value in case the request does not provide any or + // valid data. + throw new IllegalArgumentException("The parsed search \"field\" MUST NOT be NULL nor EMPTY"); + } else { + field = field.trim(); + } + log.debug("Process Find Request:"); + log.debug(" > name : " + name); + log.debug(" > field : " + field); + log.debug(" > lang : " + language); + log.debug(" > limit : " + limit); + log.debug(" > offset: " + offset); + log.debug(" > ldpath: " + ldpath); + FieldQuery query; + if(ldpath != null && !ldpath.isEmpty()){ //STANBOL-417 + query = new LDPathFieldQueryImpl(); + ((LDPathFieldQueryImpl)query).setLDPathSelect(ldpath); + } else { //if no LDPath is parsed select the default field + query = queryFactory.createFieldQuery(); + Collection selectedFields = new ArrayList(); + selectedFields.add(field); //select also the field used to find entities + query.addSelectedFields(selectedFields); + } + if (language == null || language.trim().isEmpty()) { + query.setConstraint(field, new TextConstraint(name, PatternType.wildcard, false)); + } else { + query.setConstraint(field, new TextConstraint(name, PatternType.wildcard, false, language)); + } + if (limit != null && limit > 0) { + query.setLimit(limit); + } + if(offset != null && offset > 0) { + query.setOffset(offset); + } + return query; + } +// /** +// * Getter for a Service from the {@link ServletContext} by using the +// * {@link Class#getName()} as key for {@link ServletContext#getAttribute(String)}. +// * In case the Service can not be found a {@link WebApplicationException} is +// * thrown with the message that the Service is currently not available. +// * @param The type of the Service +// * @param service the Service interface +// * @param context the context used to search the service +// * @return the Service instance +// * @throws WebApplicationException in case the service instance was not found +// * in the parsed servlet context +// * @throws IllegalArgumentException if null is parsed as +// * service or context +// */ +// @SuppressWarnings("unchecked") +// public static T getService(Class service, ServletContext context) throws WebApplicationException, IllegalArgumentException { +// if(service == null){ +// throw new IllegalArgumentException("The parsed ServiceInterface MUST NOT be NULL!"); +// } +// if(context == null){ +// throw new IllegalArgumentException("The parsed ServletContext MUST NOT be NULL"); +// } +// T serviceInstance = (T) context.getAttribute(service.getName()); +// if(serviceInstance == null){ +// throw new WebApplicationException(new IllegalStateException( +// "The "+service.getSimpleName()+" Service is currently not available " + +// "(full name= "+service+"| " + +// "servlet context name = "+context.getServletContextName()+")"), +// Response.Status.INTERNAL_SERVER_ERROR); +// } +// return serviceInstance; +// } + /** + * Tests if a generic type (may be <?>, <? extends {required}> + * or <? super {required}>) is compatible with the required one. + * TODO: Should be moved to an utility class + * @param required the required class the generic type MUST BE compatible with + * @param genericType the required class + * @return if the generic type is compatible with the required class + */ + public static boolean testType(Class required, Type type) { + //for the examples let assume that a Set is the raw type and the + //requested generic type is a Representation with the following class + //hierarchy: + // Object + // -> Representation + // -> RdfRepresentation + // -> InMemoryRepresentation + // -> InputStream + // -> Collection + boolean typeOK = false; +// while(type != null && !typeOK){ +// types.add(type); + if(type instanceof Class){ + typeOK = required.isAssignableFrom((Class) type); + type = ((Class)type).getGenericSuperclass(); + } else if(type instanceof WildcardType){ + //In cases , + WildcardType wildcardSetType = (WildcardType) type; + if(wildcardSetType.getLowerBounds().length > 0){ + Type lowerBound = wildcardSetType.getLowerBounds()[0]; + //OK + // Set + // Set + //NOT OK + // Set + // Set> + typeOK = lowerBound instanceof Class && + required.isAssignableFrom((Class)lowerBound); + } else if (wildcardSetType.getUpperBounds().length > 0){ + Type upperBound = wildcardSetType.getUpperBounds()[0]; + //OK + // Set + // Set + //NOT OK + // Set + // Set + // Set + typeOK = upperBound instanceof Class && + ((Class)upperBound).isAssignableFrom(required); + } else { //no upper nor lower bound + // Set + typeOK = true; + } + } else if(required.isArray() && type instanceof GenericArrayType){ + //In case the required type is an array we need also to support + //possible generic Array specifications + GenericArrayType arrayType = (GenericArrayType)type; + typeOK = testType(required.getComponentType(), arrayType.getGenericComponentType()); + } else if(type instanceof ParameterizedType){ + ParameterizedType pType = ((ParameterizedType)type); + typeOK = pType.getRawType() instanceof Class && + required.isAssignableFrom((Class)pType.getRawType()); + type = null; + } else { + //GenericArrayType but !required.isArray() -> incompatible + //TypeVariable -> no variables define -> incompatible + typeOK = false; +// type = null; //end + } +// } + return typeOK; + } + /** + * Tests the parsed type against the raw type and parsed Type parameters. + * This allows e.g. to check for Map<String,Number> but + * also works with classes that extend generic types such as + * Dummy extends {@link HashMap}<String,String>. + * @param rawType the raw type to test against + * @param parameterTypes the types of the parameters + * @param type the type to test + * @return if the type is compatible or not + */ + public static boolean testParameterizedType(Class rawType, Class[] parameterTypes, Type type) { + // first check the raw type + if (!testType(rawType, type)) { + return false; + } + while (type != null) { + // types.add(type); + Type[] parameters = null; + if (type instanceof ParameterizedType) { + parameters = ((ParameterizedType) type).getActualTypeArguments(); + // the number of type arguments MUST BE the same as parameter types + if (parameters.length == parameterTypes.length) { + boolean compatible = true; + // All parameters MUST BE compatible! + for (int i = 0; compatible && i < parameters.length; i++) { + compatible = testType(parameterTypes[i], parameters[i]); + } + if (compatible) { + return true; + } + } // else check parent types + + } // else not parameterised + if (type instanceof Class) { + type = ((Class) type).getGenericSuperclass(); + } else { + return false; + } + } + return false; + } + + /** + * This Method is intended to parse form data from + * {@link MediaType#APPLICATION_FORM_URLENCODED} requests. This functionality + * us usually needed when writing a {@link MessageBodyReader} to get the + * data from the "{@link InputStream} entityStream" parameter of the + * {@link MessageBodyReader#readFrom(Class, Type, java.lang.annotation.Annotation[], MediaType, javax.ws.rs.core.MultivaluedMap, InputStream)} + * method. + * @param entityStream the stream with the form data + * @param charset The charset used for the request (if null or + * empty UTF-8 is used as default. + * @return the parsed form data as key value map + * @throws IOException on any exception while reading the data form the stream + */ + public static Map parseForm(InputStream entityStream,String charset) throws IOException { + /* TODO: Question: + * If I get an Post Request with "application/x-www-form-urlencoded" + * and a charset (lets assume "iso-2022-kr") do I need to use the + * charset to read the String from the Stream, or to URL decode the + * String or both? + * + * This code assumes that it needs to be used for both, but this needs + * validation! + */ + if(charset == null || charset.isEmpty()){ + charset = "UTF-8"; + } + String data; + try { + data = IOUtils.toString(entityStream,charset); + } catch (UnsupportedCharsetException e) { + throw new IOException(e.getMessage(),e); + } + Map form = new HashMap(); + StringTokenizer tokenizer = new StringTokenizer(data, "&"); + String token; + try { + while (tokenizer.hasMoreTokens()) { + token = tokenizer.nextToken(); + int index = token.indexOf('='); + if (index < 0) { + form.put(URLDecoder.decode(token,charset), null); + } else if (index > 0) { + form.put(URLDecoder.decode(token.substring(0, index),charset), + URLDecoder.decode(token.substring(index+1),charset)); + } + } + } catch (UnsupportedCharsetException e) { + throw new IOException(e.getMessage(),e); + } + return form; + } + +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/utils/LDPathHelper.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/utils/LDPathHelper.java new file mode 100644 index 0000000..52ef62b --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/utils/LDPathHelper.java @@ -0,0 +1,293 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package it.cnr.istc.stlab.ontonethub.web.utils; + +import static javax.ws.rs.core.MediaType.TEXT_HTML; +import static javax.ws.rs.core.MediaType.TEXT_HTML_TYPE; +import static org.apache.stanbol.commons.web.base.utils.MediaTypeUtil.getAcceptableMediaType; +import static org.apache.stanbol.entityhub.ldpath.LDPathUtils.getReader; + +import java.net.URI; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Set; + +import javax.ws.rs.core.HttpHeaders; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.ResponseBuilder; +import javax.ws.rs.core.Response.Status; + +import org.apache.clerezza.commons.rdf.Graph; +import org.apache.marmotta.ldpath.LDPath; +import org.apache.marmotta.ldpath.api.backend.RDFBackend; +import org.apache.marmotta.ldpath.exception.LDPathParseException; +import org.apache.marmotta.ldpath.model.fields.FieldMapping; +import org.apache.marmotta.ldpath.model.programs.Program; +import org.apache.marmotta.ldpath.model.selectors.PropertySelector; +import org.apache.marmotta.ldpath.model.transformers.DoubleTransformer; +import org.apache.stanbol.commons.indexedgraph.IndexedGraph; +import org.apache.stanbol.commons.web.base.resource.BaseStanbolResource; +import org.apache.stanbol.commons.web.viewable.Viewable; +import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory; +import org.apache.stanbol.entityhub.ldpath.EntityhubLDPath; +import org.apache.stanbol.entityhub.ldpath.backend.AbstractBackend; +import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory; +import org.apache.stanbol.entityhub.servicesapi.model.Reference; +import org.apache.stanbol.entityhub.servicesapi.model.Representation; +import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory; +import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class LDPathHelper { + + /** + * Restrict instantiation + */ + private LDPathHelper() {} + + private static final Logger log = LoggerFactory.getLogger(LDPathHelper.class); + + /** + * LDPath {@link FieldMapping} for the {@link RdfResourceEnum#resultScore} + * property used for the score of query results + */ + public static final FieldMapping RESULT_SCORE_MAPPING = + new FieldMapping(RdfResourceEnum.resultScore.getUri(), + URI.create("http://www.w3.org/2001/XMLSchema#double"), + new PropertySelector( + InMemoryValueFactory.getInstance().createReference( + RdfResourceEnum.resultScore.getUri())), + new DoubleTransformer(), null); + + /** + * Executes the LDPath program on the contexts stored in the backend and + * returns the result as an RDF graph + * @param contexts the contexts to execute the program on + * @param ldpath the LDPath program to execute + * @param backend the {@link RDFBackend} to use + * @return The results stored within an RDF graph + * @throws LDPathParseException if the parsed LDPath program is invalid + */ + private static Graph executeLDPath(RDFBackend backend, + String ldpath, + Set contexts ) throws LDPathParseException { + Graph data = new IndexedGraph(); + RdfValueFactory vf = new RdfValueFactory(data); + EntityhubLDPath ldPath = new EntityhubLDPath(backend,vf); + Program program = ldPath.parseProgram(getReader(ldpath)); + if(log.isDebugEnabled()){ + log.debug("Execute on Context(s) '{}' LDPath program: \n{}", + contexts,program.getPathExpression(backend)); + } + /* + * NOTE: We do not need to process the Representations returned by + * EntityhubLDPath#exdecute, because the RdfValueFactory used uses + * the local variable "Graph data" to backup all created + * RdfRepresentation. Because of this all converted data will be + * automatically added the Graph. The only thing we need to do is to + * wrap the Graph in the response. + */ + for(String context : contexts){ + ldPath.execute(vf.createReference(context), program); + } + return data; + } + /** + * Utility that gets the messages of the parsing error. The message about the + * problem is contained in some parent Exception. Therefore this follows + * {@link Exception#getCause()}s. The toString method of the returned map + * will print the "exception: message" in the correct order. + * @param e the exception + * @return the info useful to replay in BAD_REQUEST responses + */ + public static Map getLDPathParseExceptionMessage(LDPathParseException e) { + Map messages = new LinkedHashMap(); + Throwable t = e; + do { // the real parsing error is in some cause ... + messages.put(t.getClass().getSimpleName(),t.getMessage()); // ... so collect all messages + t = t.getCause(); + } while (t != null); + return messages; + } + /** + * Processes LDPath requests as supported by the {@link SiteManagerRootResource}, + * {@link ReferencedSiteRootResource}, {@link EntityhubRootResource}. + * @param resource The resource used as context when sending RESTful Service API + * {@link Viewable} as response entity. + * @param backend The {@link RDFBackend} implementation + * @param ldpath the parsed LDPath program + * @param contexts the Entities to execute the LDPath program + * @param headers the parsed HTTP headers (used to determine the accepted + * content type for the response + * @param servletContext The Servlet context needed for CORS support + * @return the Response {@link Status#BAD_REQUEST} or {@link Status#OK}. + */ + public static Response handleLDPathRequest(BaseStanbolResource resource, + RDFBackend backend, + String ldpath, + Set contexts, + HttpHeaders headers) { + Collection supported = new HashSet(JerseyUtils.ENTITY_SUPPORTED_MEDIA_TYPES); + supported.add(TEXT_HTML); + final MediaType acceptedMediaType = getAcceptableMediaType(headers, + supported, MediaType.APPLICATION_JSON_TYPE); + boolean printDocu = false; + //remove null and "" element + contexts.remove(null); + contexts.remove(""); + if(contexts == null || contexts.isEmpty()){ + if(MediaType.TEXT_HTML_TYPE.isCompatible(acceptedMediaType)){ + printDocu = true; + } else { + return Response.status(Status.BAD_REQUEST) + .entity("No context was provided by the Request. Missing parameter context.\n") + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + } + if(!printDocu & (ldpath == null || ldpath.isEmpty())){ + if(MediaType.TEXT_HTML_TYPE.isCompatible(acceptedMediaType)){ + printDocu = true; + } else { + return Response.status(Status.BAD_REQUEST) + .entity("No ldpath program was provided by the Request. Missing or empty parameter ldpath.\n") + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + } + if(printDocu){ //a missing parameter and the content type is compatible to HTML + ResponseBuilder rb = Response.ok(new Viewable("ldpath", resource)); + rb.header(HttpHeaders.CONTENT_TYPE, TEXT_HTML+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } else if(acceptedMediaType.equals(TEXT_HTML_TYPE)){ + //HTML is only supported for documentation + return Response.status(Status.NOT_ACCEPTABLE) + .entity("The requested content type "+TEXT_HTML+" is not supported.\n") + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + Graph data; + try { + data = executeLDPath(backend, ldpath, contexts); + } catch (LDPathParseException e) { + log.warn("Unable to parse LDPath program:\n"+ldpath,e); + return Response.status(Status.BAD_REQUEST) + .entity(("Unable to parse LDPath program (Messages: "+ + getLDPathParseExceptionMessage(e)+")!\n")) + .header(HttpHeaders.ACCEPT, acceptedMediaType).build(); + } + ResponseBuilder rb = Response.ok(data); + rb.header(HttpHeaders.CONTENT_TYPE, acceptedMediaType+"; charset=utf-8"); + //addCORSOrigin(servletContext, rb, headers); + return rb.build(); + } + + + /** + * Transform the results of a query + * @param resultIt The Iterator over the results + * @param program the LDPath {@link Program} to execute on the results + * @param selectedFields additional selected fields of the query + * @param ldPath the Entityhub LDPath + * @param backend the {@link AbstractBackend} mainly used to + * {@link AbstractBackend#addLocal(Representation) add representations} of + * the query to the local cache + * @param vf the {@link ValueFactory} used create {@link Reference}s for the + * String {@link Representation#getId() id}s of the {@link Representation} in + * the query results + * @return A collection with the transformed Representations in the processed + * order. + */ + public static Collection transformQueryResults(Iterator resultIt, + Program program, + Set selectedFields, + EntityhubLDPath ldPath, + AbstractBackend backend, + ValueFactory vf) { + Collection transformedResults = new LinkedHashSet(); + while(resultIt.hasNext()){ + Representation rep = resultIt.next(); + backend.addLocal(rep); //add results to local cache + Representation transformed = ldPath.execute(vf.createReference(rep.getId()), program); + //also add additional selected fields + for(String selected : selectedFields){ + Iterator values = rep.get(selected); + if(values != null){ + while(values.hasNext()){ + transformed.add(selected, values.next()); + } + } + } + transformedResults.add(transformed); + } + return transformedResults; + } + + + /** + * + * @param ldpathProgram the LDPath program as string + * @param selectedFields the selected fields of the query + * @param backend the RDFBackend (only needed for logging) + * @param ldPath the {@link LDPath} used to parse the program. + * @return the pre-processed and validated program + * @throws LDPathParseException if the parsed LDPath program string is not + * valid + * @throws IllegalStateException if the fields selected by the LDPath + * program conflict with the fields selected by the query. + */ + public static Program prepareQueryLDPathProgram(String ldpathProgram, + Set selectedFields, + AbstractBackend backend, + EntityhubLDPath ldPath) throws LDPathParseException { + Program program = ldPath.parseProgram(getReader(ldpathProgram)); + + //We need to do two things: + // 1) ensure that no fields define by LDPath are also selected + StringBuilder conflicting = null; + // 2) add the field of the result score if not defined by LDPath + String resultScoreProperty = RdfResourceEnum.resultScore.getUri(); + boolean foundRsultRankingField = false; + for(FieldMapping ldPathField : program.getFields()){ + String field = ldPathField.getFieldName(); + if(!foundRsultRankingField && resultScoreProperty.equals(field)){ + foundRsultRankingField = true; + } + //remove from selected fields -> if we decide later that + //this should not be an BAD_REQUEST + if(selectedFields.remove(ldPathField.getFieldName())){ + if(conflicting == null){ + conflicting = new StringBuilder(); + } + conflicting.append('\n').append(" > ") + .append(ldPathField.getPathExpression(backend)); + } + } + if(conflicting != null){ //there are conflicts + throw new IllegalStateException("Selected Fields conflict with Fields defined by" + + "the LDPath program! Conflicts: "+conflicting.toString()); + } + if(!foundRsultRankingField){ //if no mapping for the result score + program.addMapping(RESULT_SCORE_MAPPING); //add the default mapping + } + return program; + } +} diff --git a/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/writers/ModelWriter.java b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/writers/ModelWriter.java new file mode 100644 index 0000000..eb33812 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/java/it/cnr/istc/stlab/ontonethub/web/writers/ModelWriter.java @@ -0,0 +1,86 @@ +package it.cnr.istc.stlab.ontonethub.web.writers; + +import java.io.IOException; +import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.lang.reflect.Type; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.ext.MessageBodyWriter; +import javax.ws.rs.ext.Provider; + +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Service; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.stanbol.commons.web.base.format.KRFormat; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.sparql.vocabulary.FOAF; +import com.hp.hpl.jena.vocabulary.RDF; + +/** + * + * @author Andrea Nuzzolese + * + */ + +@Component +@Service(Object.class) +@Property(name="javax.ws.rs", boolValue=true) +@Provider +public class ModelWriter implements MessageBodyWriter{ + + public static final Set supportedMediaTypes; + static { + Set types = new HashSet(); + types.add(KRFormat.N3); + types.add(KRFormat.N_TRIPLE); + types.add(KRFormat.RDF_JSON); + types.add(KRFormat.TURTLE); + types.add("application/json-ld"); + supportedMediaTypes = Collections.unmodifiableSet(types); + } + + @Override + public boolean isWriteable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { + String mediaTypeString = mediaType.getType()+'/'+mediaType.getSubtype(); + return Model.class.isAssignableFrom(type) && supportedMediaTypes.contains(mediaTypeString); + } + + @Override + public long getSize(Model t, Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return -1; + } + + @Override + public void writeTo(Model t, Class type, Type genericType, Annotation[] annotations, MediaType mediaType, + MultivaluedMap httpHeaders, OutputStream entityStream) + throws IOException, WebApplicationException { + + Lang lang = null; + + if(mediaType.equals(KRFormat.N3_TYPE)) + lang = Lang.N3; + else if(mediaType.equals(KRFormat.N_TRIPLE_TYPE)) + lang = Lang.NTRIPLES; + else if(mediaType.equals(KRFormat.RDF_JSON_TYPE)) + lang = Lang.RDFJSON; + else if(mediaType.equals(new MediaType("application", "json-ld"))) + lang = Lang.JSONLD; + else lang = Lang.TURTLE; + + RDFDataMgr.write(entityStream, t, lang); + + } + +} diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/com.fasterxml.jackson.core.JsonFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/com.fasterxml.jackson.core.JsonFactory new file mode 100644 index 0000000..239a78a --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/com.fasterxml.jackson.core.JsonFactory @@ -0,0 +1 @@ +com.fasterxml.jackson.core.JsonFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/com.fasterxml.jackson.core.ObjectCodec b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/com.fasterxml.jackson.core.ObjectCodec new file mode 100644 index 0000000..f126bb4 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/com.fasterxml.jackson.core.ObjectCodec @@ -0,0 +1 @@ +com.fasterxml.jackson.databind.ObjectMapper diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.annotation.processing.Processor b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.annotation.processing.Processor new file mode 100644 index 0000000..ccab8b7 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.annotation.processing.Processor @@ -0,0 +1 @@ +ch.qos.cal10n.verifier.processor.CAL10NAnnotationProcessor diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.datatype.DatatypeFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.datatype.DatatypeFactory new file mode 100644 index 0000000..c1c1855 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.datatype.DatatypeFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.datatype.DatatypeFactoryImpl diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.parsers.DocumentBuilderFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.parsers.DocumentBuilderFactory new file mode 100644 index 0000000..3845cc1 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.parsers.DocumentBuilderFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.DocumentBuilderFactoryImpl diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.parsers.SAXParserFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.parsers.SAXParserFactory new file mode 100644 index 0000000..88b247c --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.parsers.SAXParserFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.SAXParserFactoryImpl diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLEventFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLEventFactory new file mode 100644 index 0000000..5cf6974 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLEventFactory @@ -0,0 +1 @@ +com.ctc.wstx.stax.WstxEventFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLInputFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLInputFactory new file mode 100644 index 0000000..db49e7a --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLInputFactory @@ -0,0 +1 @@ +com.ctc.wstx.stax.WstxInputFactory \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLOutputFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLOutputFactory new file mode 100644 index 0000000..a15830d --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.stream.XMLOutputFactory @@ -0,0 +1 @@ +com.ctc.wstx.stax.WstxOutputFactory \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.validation.SchemaFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.validation.SchemaFactory new file mode 100644 index 0000000..ec3f1f4 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/javax.xml.validation.SchemaFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.validation.XMLSchemaFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.commons.logging.LogFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.commons.logging.LogFactory new file mode 100644 index 0000000..50a7c3b --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.commons.logging.LogFactory @@ -0,0 +1,5 @@ +org.apache.commons.logging.impl.SLF4JLogFactory + +# Axis gets at JCL through its own mechanism as defined by Commons Discovery, which +# in turn follows the instructions found at: +# http://java.sun.com/j2se/1.3/docs/guide/jar/jar.html#Service Provider diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.felix.scrplugin.annotations.AnnotationProcessor b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.felix.scrplugin.annotations.AnnotationProcessor new file mode 100644 index 0000000..4a11882 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.felix.scrplugin.annotations.AnnotationProcessor @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.apache.felix.scrplugin.processing.SCRAnnotationProcessor +org.apache.felix.scrplugin.processing.SlingAnnotationProcessor diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem new file mode 100644 index 0000000..815e724 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.hadoop.fs.LocalFileSystem +org.apache.hadoop.fs.viewfs.ViewFileSystem +org.apache.hadoop.fs.s3.S3FileSystem +org.apache.hadoop.fs.s3native.NativeS3FileSystem +org.apache.hadoop.fs.kfs.KosmosFileSystem +org.apache.hadoop.fs.ftp.FTPFileSystem +org.apache.hadoop.fs.HarFileSystem +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.hadoop.hdfs.DistributedFileSystem +org.apache.hadoop.hdfs.HftpFileSystem +org.apache.hadoop.hdfs.HsftpFileSystem +org.apache.hadoop.hdfs.web.WebHdfsFileSystem diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.io.compress.CompressionCodec b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.io.compress.CompressionCodec new file mode 100644 index 0000000..df46e32 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.io.compress.CompressionCodec @@ -0,0 +1,20 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.io.compress.BZip2Codec +org.apache.hadoop.io.compress.DefaultCodec +org.apache.hadoop.io.compress.DeflateCodec +org.apache.hadoop.io.compress.GzipCodec +org.apache.hadoop.io.compress.Lz4Codec +org.apache.hadoop.io.compress.SnappyCodec + diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.SecurityInfo b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.SecurityInfo new file mode 100644 index 0000000..f7f3ec2 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.SecurityInfo @@ -0,0 +1,14 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.security.AnnotatedSecurityInfo diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier new file mode 100644 index 0000000..59603a9 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier @@ -0,0 +1,15 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier +org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer new file mode 100644 index 0000000..5889c12 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer @@ -0,0 +1,17 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.hdfs.DFSClient$Renewer +org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier$Renewer +org.apache.hadoop.hdfs.HftpFileSystem$TokenManager +org.apache.hadoop.hdfs.web.WebHdfsFileSystem$DtRenewer diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory new file mode 100644 index 0000000..bdc5750 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ja.JapaneseIterationMarkCharFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory +org.apache.lucene.analysis.charfilter.MappingCharFilterFactory +org.apache.lucene.analysis.fa.PersianCharFilterFactory +org.apache.lucene.analysis.pattern.PatternReplaceCharFilterFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory new file mode 100644 index 0000000..679fc70 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory @@ -0,0 +1,185 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ja.JapaneseBaseFormFilterFactory +org.apache.lucene.analysis.ja.JapaneseKatakanaStemFilterFactory +org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilterFactory +org.apache.lucene.analysis.ja.JapaneseReadingFormFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.icu.ICUFoldingFilterFactory +org.apache.lucene.analysis.icu.ICUNormalizer2FilterFactory +org.apache.lucene.analysis.icu.ICUTransformFilterFactory +org.apache.lucene.collation.ICUCollationKeyFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.stempel.StempelPolishStemFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory +org.apache.lucene.analysis.ar.ArabicStemFilterFactory +org.apache.lucene.analysis.bg.BulgarianStemFilterFactory +org.apache.lucene.analysis.br.BrazilianStemFilterFactory +org.apache.lucene.analysis.cjk.CJKBigramFilterFactory +org.apache.lucene.analysis.cjk.CJKWidthFilterFactory +org.apache.lucene.analysis.cn.ChineseFilterFactory +org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory +org.apache.lucene.analysis.commongrams.CommonGramsQueryFilterFactory +org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilterFactory +org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilterFactory +org.apache.lucene.analysis.core.LowerCaseFilterFactory +org.apache.lucene.analysis.core.StopFilterFactory +org.apache.lucene.analysis.core.TypeTokenFilterFactory +org.apache.lucene.analysis.cz.CzechStemFilterFactory +org.apache.lucene.analysis.de.GermanLightStemFilterFactory +org.apache.lucene.analysis.de.GermanMinimalStemFilterFactory +org.apache.lucene.analysis.de.GermanNormalizationFilterFactory +org.apache.lucene.analysis.de.GermanStemFilterFactory +org.apache.lucene.analysis.el.GreekLowerCaseFilterFactory +org.apache.lucene.analysis.el.GreekStemFilterFactory +org.apache.lucene.analysis.en.EnglishMinimalStemFilterFactory +org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory +org.apache.lucene.analysis.en.KStemFilterFactory +org.apache.lucene.analysis.en.PorterStemFilterFactory +org.apache.lucene.analysis.es.SpanishLightStemFilterFactory +org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory +org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory +org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory +org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory +org.apache.lucene.analysis.ga.IrishLowerCaseFilterFactory +org.apache.lucene.analysis.gl.GalicianMinimalStemFilterFactory +org.apache.lucene.analysis.gl.GalicianStemFilterFactory +org.apache.lucene.analysis.hi.HindiNormalizationFilterFactory +org.apache.lucene.analysis.hi.HindiStemFilterFactory +org.apache.lucene.analysis.hu.HungarianLightStemFilterFactory +org.apache.lucene.analysis.hunspell.HunspellStemFilterFactory +org.apache.lucene.analysis.id.IndonesianStemFilterFactory +org.apache.lucene.analysis.in.IndicNormalizationFilterFactory +org.apache.lucene.analysis.it.ItalianLightStemFilterFactory +org.apache.lucene.analysis.lv.LatvianStemFilterFactory +org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory +org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory +org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory +org.apache.lucene.analysis.miscellaneous.KeepWordFilterFactory +org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilterFactory +org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory +org.apache.lucene.analysis.miscellaneous.LengthFilterFactory +org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory +org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilterFactory +org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory +org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory +org.apache.lucene.analysis.miscellaneous.TrimFilterFactory +org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory +org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory +org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory +org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory +org.apache.lucene.analysis.ngram.NGramFilterFactory +org.apache.lucene.analysis.no.NorwegianLightStemFilterFactory +org.apache.lucene.analysis.no.NorwegianMinimalStemFilterFactory +org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory +org.apache.lucene.analysis.pattern.PatternCaptureGroupFilterFactory +org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory +org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterFactory +org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory +org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory +org.apache.lucene.analysis.position.PositionFilterFactory +org.apache.lucene.analysis.pt.PortugueseLightStemFilterFactory +org.apache.lucene.analysis.pt.PortugueseMinimalStemFilterFactory +org.apache.lucene.analysis.pt.PortugueseStemFilterFactory +org.apache.lucene.analysis.reverse.ReverseStringFilterFactory +org.apache.lucene.analysis.ru.RussianLightStemFilterFactory +org.apache.lucene.analysis.shingle.ShingleFilterFactory +org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory +org.apache.lucene.analysis.standard.ClassicFilterFactory +org.apache.lucene.analysis.standard.StandardFilterFactory +org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory +org.apache.lucene.analysis.synonym.SynonymFilterFactory +org.apache.lucene.analysis.th.ThaiWordFilterFactory +org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory +org.apache.lucene.analysis.util.ElisionFilterFactory +org.apache.lucene.collation.CollationKeyFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory +org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilterFactory +org.apache.lucene.analysis.phonetic.PhoneticFilterFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory new file mode 100644 index 0000000..a92489c --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ja.JapaneseTokenizerFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.icu.segmentation.ICUTokenizerFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ar.ArabicLetterTokenizerFactory +org.apache.lucene.analysis.cjk.CJKTokenizerFactory +org.apache.lucene.analysis.cn.ChineseTokenizerFactory +org.apache.lucene.analysis.core.KeywordTokenizerFactory +org.apache.lucene.analysis.core.LetterTokenizerFactory +org.apache.lucene.analysis.core.LowerCaseTokenizerFactory +org.apache.lucene.analysis.core.WhitespaceTokenizerFactory +org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory +org.apache.lucene.analysis.ngram.NGramTokenizerFactory +org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory +org.apache.lucene.analysis.pattern.PatternTokenizerFactory +org.apache.lucene.analysis.ru.RussianLetterTokenizerFactory +org.apache.lucene.analysis.standard.ClassicTokenizerFactory +org.apache.lucene.analysis.standard.StandardTokenizerFactory +org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory +org.apache.lucene.analysis.wikipedia.WikipediaTokenizerFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec new file mode 100644 index 0000000..9795e81 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene40.Lucene40Codec +org.apache.lucene.codecs.lucene3x.Lucene3xCodec +org.apache.lucene.codecs.lucene41.Lucene41Codec +org.apache.lucene.codecs.lucene42.Lucene42Codec +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.simpletext.SimpleTextCodec +org.apache.lucene.codecs.appending.AppendingCodec diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat new file mode 100644 index 0000000..7345d7b --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.diskdv.DiskDocValuesFormat +org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat new file mode 100644 index 0000000..d5e62bc --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat +org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat +org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat +org.apache.lucene.codecs.memory.MemoryPostingsFormat +org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat +org.apache.lucene.codecs.memory.DirectPostingsFormat diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.marmotta.ldpath.api.functions.SelectorFunction b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.marmotta.ldpath.api.functions.SelectorFunction new file mode 100644 index 0000000..6bb0ed3 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.marmotta.ldpath.api.functions.SelectorFunction @@ -0,0 +1,5 @@ +org.apache.marmotta.ldpath.model.functions.ConcatenateFunction +org.apache.marmotta.ldpath.model.functions.FirstFunction +org.apache.marmotta.ldpath.model.functions.LastFunction +org.apache.marmotta.ldpath.model.functions.SortFunction +org.apache.marmotta.ldpath.model.functions.CountFunction \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.marmotta.ldpath.api.functions.TestFunction b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.marmotta.ldpath.api.functions.TestFunction new file mode 100644 index 0000000..3a72d08 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.marmotta.ldpath.api.functions.TestFunction @@ -0,0 +1,6 @@ +org.apache.marmotta.ldpath.model.tests.functions.EqualTest +org.apache.marmotta.ldpath.model.tests.functions.NotEqualTest +org.apache.marmotta.ldpath.model.tests.functions.GreaterEqualTest +org.apache.marmotta.ldpath.model.tests.functions.GreaterThanTest +org.apache.marmotta.ldpath.model.tests.functions.LessEqualTest +org.apache.marmotta.ldpath.model.tests.functions.LessThanTest \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider new file mode 100644 index 0000000..3c05728 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider @@ -0,0 +1,2 @@ +org.apache.stanbol.commons.namespaceprefix.provider.stanbol.DefaultNamespaceMappingsProvider +org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.PrefixccProvider \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.solr.SolrServerProvider b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.solr.SolrServerProvider new file mode 100644 index 0000000..4d838d5 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.solr.SolrServerProvider @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.impl.RESTfulSolrServerProvider + +# TODO: adding here the StandaloneEmbeddedSolrServerProvider of the +# solr.managed bundle as workaround for the maven assembly plugin +# overriding multiple files with the same name +org.apache.stanbol.commons.solr.managed.standalone.StandaloneEmbeddedSolrServerProvider + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.managed.standalone.StandaloneEmbeddedSolrServerProvider diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.solr.managed.ManagedSolrServer b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.solr.managed.ManagedSolrServer new file mode 100644 index 0000000..eec2647 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.solr.managed.ManagedSolrServer @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.managed.standalone.DefaultStandaloneManagedSolrServerWrapper diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider new file mode 100644 index 0000000..e7c924c --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.managed.standalone.ClassPathDataFileProvider diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.dtd b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.dtd new file mode 100644 index 0000000..2c7037e --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.dtd @@ -0,0 +1 @@ +com.ctc.wstx.dtd.DTDSchemaFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.relaxng b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.relaxng new file mode 100644 index 0000000..681466f --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.relaxng @@ -0,0 +1 @@ +com.ctc.wstx.msv.RelaxNGSchemaFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.w3c.dom.DOMImplementationSourceList b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.w3c.dom.DOMImplementationSourceList new file mode 100644 index 0000000..7a52dd1 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.w3c.dom.DOMImplementationSourceList @@ -0,0 +1 @@ +org.apache.xerces.dom.DOMXSImplementationSourceImpl \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.xml.sax.driver b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.xml.sax.driver new file mode 100644 index 0000000..409dd43 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/META-INF/services/org.xml.sax.driver @@ -0,0 +1,2 @@ +org.apache.xerces.parsers.SAXParser + diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/entityTypes.properties b/ontonethub-src/ontonethub/src/main/resources/indexing/config/entityTypes.properties new file mode 100644 index 0000000..57521ea --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/entityTypes.properties @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#NOTE: This configuration file can be used for both +# * org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter +# * org.apache.stanbol.entityhub.indexing.source.jenatdb.ResourceFilterIterator +# however users need to consider that the Jena TDB ResourceFilterIterator does +# not support wildcards '*'. So the default configuration used by this file +# will not work. + +#Configuration for the FieldValueFilter + +#This can be used to configure specific rdf:types to be indexed. Entities with +#other types will be filtered and not be included in the local DBpedia.org +#index + +#How to configure + +#The key 'field' can be used to configure the field the filters are applied +# - 'rdf:type' is used as default for the field +# - Only a single field is supported. However one can configure multiple instances +# with different configurations in the 'indexing.properties' file. +# - It is possible to use a full URI or prefix:localname for all prefixes registered +# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum' + +#field=rdf:type + +#The key 'values' is used to specify the filter +# - If NOT present, than Entities with NO values for the field are filtered. All +# others are accepted +# - The value '*' deactivates filtering +# - Multiple types are supported. Configurations are separated by ';' +# - It is possible to use full URIs are prefix:local name for all prefixes registered +# in 'org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum' +# - null can be used to explicitly include Entities with no value + +#Examples + +#This deactivate filtering +values=* + +#This activated filtering for Persons, Places and Organisation and also includes +#all entities with no type +#values=null;dbp-ont:Person;dbp-ont:Place;dbp-ont:Organisation; + +#The following two configurations would only index entities with no values for the +#configured field +#values=null +#values= + diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/fieldboosts.properties b/ontonethub-src/ontonethub/src/main/resources/indexing/config/fieldboosts.properties new file mode 100644 index 0000000..68b6a67 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/fieldboosts.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#This file can be used to configre field boosts for Solr/Lucene +#use the full qualified URI of the field as key ant the float boost factor +#for the field as value. A value of 1.0 is the default. + +#NOTE: full UTF-8 is supported for keys! + +#This defines boosts for "label" like properties of typically used ontologies +http://www.w3.org/2000/01/rdf-schema#label=3 +http://purl.org/dc/terms/title=3 +http://purl.org/dc/elements/1.1/title=3 +http://xmlns.com/foaf/0.1/name=3 +http://schema.org/name=3 +http://www.w3.org/2004/02/skos/core#prefLabel=3 +http://www.w3.org/2004/02/skos/core#altLabel=1.5 \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/fst.config b/ontonethub-src/ontonethub/src/main/resources/indexing/config/fst.config new file mode 100644 index 0000000..39b00dd --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/fst.config @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#Syntax +#index={indexField};[store={storeField}] +index=rdfs:label \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/iditerator.properties b/ontonethub-src/ontonethub/src/main/resources/indexing/config/iditerator.properties new file mode 100644 index 0000000..82247ba --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/iditerator.properties @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DEFAULT CONFIGURATION FOR THE +# org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator +# This implementation reads Entity IDs and Scores form a line of a text file + +# NOTE: All values provided in this file are the default values + +# the text file with the data (located relative to the resource +# (indexing/resource) directory). The default name for the file is +# "entityScores.tsv" +source=entityScores.tsv + +# configure the position of the score and the entity id +id-pos=1 +score-pos=2 + +# if the Id field only contains the local name of the entity the id-namespace +# property can be used to configure the namespace. The default is to use no +# namespace -> meaning the the ID contains the full qualified name +#id-namespace=http://example.org/entities/ + +# separator between the id and score (default TAB) +separator= + +#URL encoding/decoding of entity IDs +encodeIds=false +decodeIds=false + +# trimming of lines (default is false) +trimLine=false +# trimming the entity (default is true) +trimEntity=true + +#the charset used to read the data from the file (default UTF-8) +charset=UTF-8 diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/indexing.properties b/ontonethub-src/ontonethub/src/main/resources/indexing/config/indexing.properties new file mode 100644 index 0000000..af908c0 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/indexing.properties @@ -0,0 +1,317 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ------------ +# Index Metadata +# ------------ + +# Here the name of the dataset MUST be specified by the user +# It MUST BE a single word with no spaces. +name=changeme + +# an optional short description may be used. If missing default descriptions are +# created. +description=short description (http://www.example.org) + +# The "Synchronized" property is supported by some Entityhub Yard implementations +# to automatically update/delete local data as soon as the created archive with +# the indexed data is updated/deleted in the /datafiles folder of Apache Stanbol +# By default this feature is activated. For very big indexes users might want to +# disable this feature to allow the deletion of the archive after the index was +# initialised successfully. +# By default this feature is enabled. Uncommend the next line to deactivate it. +Synchronized=true + +# ------------ +# Indexing Mode dependent Configurations: (see readme.md for details) +# ------------ + +# The indexing Tool support two modes. See (1) and (2) for details. + +# (1) Iterate over Data and lookup scores: (default) +# ------------ + +# use the Jena TDB as source for indexing the RDF data located within +# "indexing/resource/rdfdata" +entityDataIterable=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata + +#NOTE: if you want to index Bnodes you need to activate the bnode parameter +# see STANBOL-765 for details (and documentation) +#entityDataIterable=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata,bnode:true + +# The EntityScore Provider needs to provide the scores for indexed entities +# use the NoEntityScoreProvider if no scores are available +entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider + +# The EntityFieldScoreProvider can be used to use the value of an property as score +# the property can be configured by the "field" parameter +# Scores are parsed from numbers and strings that can be converted to numbers. +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider,field:http://www.example.org/myOntology#score + +# The EntityIneratorToScoreProviderAdapter can be used to adapt any configured +# "entityIdIterator" to an "entityScoreProvider". See also the comments for +# "entityIdIterator". +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter + +# Indexing VCARD + +# Alternative configuration for indexing vCard files +# change the config for the vcard indexer in the "vcard.properties" file +#entityDataIterable=org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource,config:vcard +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider + + + +# (2) Iterate over Entity IDs and lookup Data +# ------------ + +# First one needs to provide an EntityIterator +# Typically the LineBasedEntityIterator implementation is used. The configuration +# for this implementation is typically provided in an own file. A default +# configuration is provided by the iditerator.properties file. +#entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,config:iditerator + +# This EntityIterator allows to use a simple Triple filter to select entities for Indexing. +# It uses the same configuration as "FieldValueFilter" but DOES NOT support +# Wildcards. See "FieldValueFilter" for details on how to configure! +# +# NOTE: Can only be used if Jena TDB (jenatdb.RdfIndexingSource) is used as +# indexing source! +#entityIdIterator=org.apache.stanbol.entityhub.indexing.source.jenatdb.ResourceFilterIterator,config:entityTypes.properties + +# Second a entityDataProvide needs to be specified. Here we use the Jena TDB +# Note that this implementation implements both entityDataIterable AND +# entityDataProvider. +# RDF data needs to be located in the "indexing/resource/rdfdata" +# entityDataProvider=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata + +# ------------ +#Score Normalizer +# ------------ + +# Entity Scores are normalised by the ScoreNormalizer +# if no score normaliser is configured the scores will be used as provided by +# the entities +#scoreNormalizer= + +# ScoreNormalizer can be chained as shown by the following example configuration +# The score for an entity is first processed by the last normalizer +#scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:scorerange;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer,config:minscore + +# Different Implementations: +# - RangeNormaliser: allows to define a range for score values. see +# "scorerange.properties" for possible configurations +# - NaturalLogNormaliser: Should be used if the score represents the number of +# incommings links. +# - MinScoreNormalizer: allows to prevent indexing of all entities with a score +# lower than the configured minimum. see "minscore.properties" for possible +# configurations. + +# ------------ +# Entity Processor +# ------------ + +# Multiple Entity processors can be used for indexing entities. The are separated by ';' +# and are executed in the order of definition. + +# FiledMapperProcessor: +# +# entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor +# +# This processor ensures that "field mappings" are executed while indexing entities. +# By default it will use the mappings configured by the "fieldConfiguraton" +# property. To use other mappings one can use the "mappings" parameter (e.g. +# mappings:otherMappings.txt + +# FieldValueFilter +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes +# +# This allows to define a field and values that are used to filter entities. Only Entities +# that do have one of the defined values as actual value of the defined field will +# get indexed. This is typically used to filter entities by rdf:type, but can be used +# for any URI property. See the default entityTypes.properties file for more information + +# ResourceUriFilter +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.ResourceUriFilter,prefix:http://www.eample.org/ +# +# This allows to filter all resource that do not start with the configured prefix + +# LdpathProcessor +# +# This allows to use simple LDpath statements to process entities. Such as mapping +# only properties of entities with a specific type +# +# skos:prefLabel = .[rdf:type is ]/rdfs:label; +# +# Praameters: +# * append:[true/flase] (default=true) If the result of the LDpath program is +# appended to the processed entity or if the processed entity should be +# replaced with the results of the LDpath program +# * ldpath:{file} (required, no default) The {file} containing the LDpath +# program used by this processor. {file} is relative to the config directory. +# +# NOTEs: +# * The LdpathProcessor has only access to the local properties of the currently +# indexed entity. LDPath statements that refer other information such as paths +# with a lengths > 1 or inverse properties will not work +# * Processors can be chained by defining multiple Processor instances in the +# configuration and separating them with ';'. This allows to use multiple +# LdpathProcessor instances and/or to chain LdpathProcessor(s) with others +# such as the "FiledMapperProcessor". Processors are executed as defined +# within the configuration of the "entityProcessor" property. +# * When using the FiledMapperProcessor on results of the LdpathProcessor make +# sure that the fields defined in the LDpath statements are indexed by the +# FiledMapperProcessor. Otherwise such values will NOT be indexed! +# org.apache.stanbol.entityhub.indexing.core.processor.LdpathProcessor,ldpath:ldpath-mapping.txt,append:true + +# GeonamesUriProcessor +# +# A typical case is that geonames URIs are missing the tailing '/'. This processor +# will search for geonames URIs and correct them. +# org.apache.stanbol.entityhub.indexing.core.processor.GeonamesUriProcessor + +# WikipediaToDBPediaUriProcessor +# +# This processor will rewrite Wikipedia URIs to DBPedia URIs +# (e.g. "http://de.wikipedia.org/wiki/Hawaii" to "http://de.dbpedia.org/resource/Hawaii" +# org.apache.stanbol.entityhub.indexing.core.processor.WikipediaToDBPediaUriProcessor + +# EmptyProcessor +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.EmptyProcessor +# +# This processor can be used to deactivate EntityProcessing + +# Default Entity Processor configuration +entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes;org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor + +# ------------ +# Index Field Configuration +# ------------ + +# An index needs to provide its configuration. This is used at runtime if +# entities are updated. +fieldConfiguration=mappings.txt + + +# ------------ +# Post-Processing +# ------------ + +# The indexing tool now supports a post-processing step that is executed after +# all entities are already indexed to the indexing destination. +# For this step the configured IndexingDestination is used as both the source +# and the target. +# The post-processing allows to apply an additional set of EntityProcessor +# instances to each indexed Entity. +# In principle all EntityProcessor implementations as used for entity processing +# during the normal indexing phase can also be used for post-processing. However +# as there are already all information available within the IndexingDestination +# this phase can also be used to perform processing steps that would not be +# easily possible during the indexing phase. + +# LdpathPostProcessor +# +# EntityProcessor that wraps the IndexingDestination as RDF Backend and +# therefore allows to execute any kind of LDpath programs based on the indexed +# Entity data. +# Typical use cases of this processor include: +# * indexing transitive closures +# skos:broaderTransitive = (skos:broader)* +# * collecting labels of referenced entities to be used for disambiguation (e.g. +# use lables of linked concepts in a SKOS concept scheme : +# = *[rdf:type is skos:Concept]/(skos:prefLabel | skos:altLabel) +# * advanced indexing rules that need paths longer than one (e.g. adding +# labels of redirects pointing to an entity +# rdfs:label = rdfs:label | (^rdfs:seeAlso/rdfs:label) +# +# Parameter: This uses the same parameter as the LdpathProcessor. + +#entityPostProcessor=org.apache.stanbol.entityhub.indexing.core.processor.LdpathPostProcessor,ldpath:ldpath-post-mapping.txt,append:true + + +# ------------ +# Indexing Destination +# ------------ + +# A SolrYard is used as destination for indexing +# To boost some fields (typically labels) one can use the fieldboosts.properties +# A default field boost configuration is provided. +# A default fst.config is also provided (see STANBOL-1167) +indexingDestination=org.apache.stanbol.entityhub.indexing.destination.solryard.SolrYardIndexingDestination,boosts:fieldboosts,fstConf:fst.config + + +# ------------ +# Additional configurations for ReferencedSite +# ------------ + +# All the following properties are optional, but can be used to configure +# the referenced site used to access the indexed data within the Entityhub + +# The entity prefixes are used to determine if an entity needs to be searched +# on a referenced site. If not specified requests for any entity will be +# forwarded to this referenced site. +# use ';' to seperate multiple values +#org.apache.stanbol.entityhub.site.entityPrefix=http://example.org/resource;urn:mycompany: + +# Configuration the remote Service +# If the indexed data are also available remotly (e.g. by a Linked data endpoint) +# than it is possible to allow also direct access to such entities +# (a) retrieving entities (access URI and EntityDereferencer implementation) +#org.apache.stanbol.entityhub.site.accessUri="http://example.org/resource" +#org.apache.stanbol.entityhub.site.dereferencerType= +# available EntityDereferencer implementation +# - org.apache.stanbol.entityhub.dereferencer.CoolUriDereferencer +# - org.apache.stanbol.entityhub.dereferencer.SparqlDereferencer + +# (b) search entities (queryUri and EntitySearcher implementation) +#org.apache.stanbol.entityhub.site.queryUri=http://example.org/sparql +#org.apache.stanbol.entityhub.site.searcherType= +# available EntitySearcher implementation +# - org.apache.stanbol.entityhub.searcher.SparqlSearcher (generic SPARQL) +# - org.apache.stanbol.entityhub.searcher.LarqSearcher (Larq SPARQL extensions) +# - org.apache.stanbol.entityhub.searcher.VirtuosoSearcher (Virtuoso SPARQL extensions) + +# The referenced site can also specify additional mappings to be used in the +# case an entity of this site is imported to the Entityhub. +# Typically the same mappings as used for the indexing are a good start. +# However one might want to copy some values (e.g. labels) to commonly used +# fields used by the Entityhub +org.apache.stanbol.entityhub.site.fieldMappings=mappings.txt + + +# License(s) +# Add here the name and URLs of the license to be used for all entities +# provided by this referenced site +# NOTE: licenseName and licenseUrl MUST use the ordering as below! +# This example shows dual licensing with "cc by-sa" and GNU +#org.apache.stanbol.entityhub.site.licenseName=Creative Commons Attribution-ShareAlike 3.0;GNU Free Documentation License +#org.apache.stanbol.entityhub.site.licenseUrl=http://creativecommons.org/licenses/by-sa/3.0/;http://www.gnu.org/licenses/fdl.html + +# Attribution +# Some Licenses require attributions. This properties can be used to provide a +# link to the site with the attribution and the attribution text +#org.apache.stanbol.entityhub.site.attributionUrl=http://example.org/About.html +#org.apache.stanbol.entityhub.site.attribution=To the universe + + +# Fail on Error loading Resource Files +# The indexing tool can be configured to fail on error loading resource files. +# By default, a resource file will be ignored when an error occurs while trying to load it +failOnErrorLoadingResource=false + + diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/mappings.txt b/ontonethub-src/ontonethub/src/main/resources/indexing/config/mappings.txt new file mode 100644 index 0000000..dbbbe92 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/mappings.txt @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#NOTE: THIS IS A DEFAULT MAPPING SPECIFICATION THAT INCLUDES MAPPINGS FOR +# COMMON ONTOLOGIES. USERS MIGHT WANT TO ADAPT THIS CONFIGURATION BY +# COMMENTING/UNCOMMENTING AND/OR ADDING NEW MAPPINGS + +# --- Define the Languages for all fields --- +# to restrict languages to be imported (for all fields) +#| @=null;en;de;fr;it + +#NOTE: null is used to import labels with no specified language + +# --- Define the Languages for all fields --- +# Uncomment to restrict indexing to a specific list of languages, otherwise all +# languages are indexed +#| @=null;en;de;fr;it + +# --- RDF RDFS and OWL Mappings --- +# This configuration only index properties that are typically used to store +# instance data defined by such namespaces. This excludes ontology definitions + +# NOTE that nearly all other ontologies are are using properties of these three +# schemas, therefore it is strongly recommended to include such information! + +rdf:type | d=entityhub:ref + +rdfs:label +rdfs:comment +rdfs:seeAlso | d=entityhub:ref + + +owl:sameAs | d=entityhub:ref + +#If one likes to also index ontologies one should add the following statements +#owl:* +#rdfs:* + +# --- Dublin Core (DC) --- +# The default configuration imports all dc-terms data and copies values for the +# old dc-elements standard over to the according properties of the dc-terms +# standard. + +# NOTE that a lot of other ontologies are also using DC for some of there data +# therefore it is strongly recommended to include such information! + +#mapping for all dc-terms properties +dc:* + +# copy dc:title to rdfs:label +dc:title > rdfs:label + +# deactivated by default, because such mappings are mapped to dc-terms +#dc-elements:* + +# mappings for the dc-elements properties to the dc-terms +dc-elements:contributor > dc:contributor +dc-elements:coverage > dc:coverage +dc-elements:creator > dc:creator +dc-elements:date > dc:date +dc-elements:description > dc:description +dc-elements:format > dc:format +dc-elements:identifier > dc:identifier +dc-elements:language > dc:language +dc-elements:publisher > dc:publisher +dc-elements:relation > dc:relation +dc-elements:rights > dc:rights +dc-elements:source > dc:source +dc-elements:subject > dc:subject +dc-elements:title > dc:title +dc-elements:type > dc:type +#also use dc-elements:title as label +dc-elements:title > rdfs:label + +# --- Social Networks (via foaf) --- +#The Friend of a Friend schema is often used to describe social relations between people +foaf:* + +# copy the name of a person over to rdfs:label +foaf:name > rdfs:label + +# additional data types checks +foaf:knows | d=entityhub:ref +foaf:made | d=entityhub:ref +foaf:maker | d=entityhub:ref +foaf:member | d=entityhub:ref +foaf:homepage | d=xsd:anyURI +foaf:depiction | d=xsd:anyURI +foaf:img | d=xsd:anyURI +foaf:logo | d=xsd:anyURI +#page about the entity +foaf:page | d=xsd:anyURI + + +# --- Schema.org -- + +# Defines an Ontology used by search engines (Google, Yahoo and Bing) for +# indexing websites. + +schema:* +# Copy all names of schema instances over to rdfs:label +schema:name > rdfs:label + +# --- Simple Knowledge Organization System (SKOS) --- + +# A common data model for sharing and linking knowledge organization systems +# via the Semantic Web. Typically used to encode controlled vocabularies as +# a thesaurus +skos:* + +# copy all SKOS labels (preferred, alternative and hidden) over to rdfs:label +skos:prefLabel > rdfs:label +skos:altLabel > rdfs:label +skos:hiddenLabel > rdfs:label + +# copy values of **Match relations to the according related, broader and narrower +skos:relatedMatch > skos:related +skos:broadMatch > skos:broader +skos:narrowMatch > skos:skos:narrower + +#similar mappings for transitive variants are not contained, because transitive +#reasoning is not directly supported by the Entityhub. + +# Some SKOS thesaurus do use "skos:transitiveBroader" and "skos:transitiveNarrower" +# however such properties are only intended to be used by reasoners to +# calculate transitive closures over broader/narrower hierarchies. +# see http://www.w3.org/TR/skos-reference/#L2413 for details +# to correct such cases we will copy transitive relations to their counterpart +skos:narrowerTransitive > skos:narrower +skos:broaderTransitive > skos:broader + + +# --- Semantically-Interlinked Online Communities (SIOC) --- + +# An ontology for describing the information in online communities. +# This information can be used to export information from online communities +# and to link them together. The scope of the application areas that SIOC can +# be used for includes (and is not limited to) weblogs, message boards, +# mailing lists and chat channels. +sioc:* + +# --- biographical information (bio) +# A vocabulary for describing biographical information about people, both living +# and dead. (see http://vocab.org/bio/0.1/) +bio:* + +# --- Rich Site Summary (rss) --- +rss:* + +# --- GoodRelations (gr) --- +# GoodRelations is a standardised vocabulary for product, price, and company data +gr:* + +# --- Creative Commons Rights Expression Language (cc) +# The Creative Commons Rights Expression Language (CC REL) lets you describe +# copyright licenses in RDF. +cc:* + + + + + + diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/minscore.properties b/ontonethub-src/ontonethub/src/main/resources/indexing/config/minscore.properties new file mode 100644 index 0000000..9df8944 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/minscore.properties @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#to include entities with the configured min-score +inclusive=true +#the required minimum number of incomming links +min-score=2 \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/scorerange.properties b/ontonethub-src/ontonethub/src/main/resources/indexing/config/scorerange.properties new file mode 100644 index 0000000..ba33bf4 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/scorerange.properties @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# the range is always [0..{upper-bound}] +upper-bound=1 \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/indexing/config/vcard.properties b/ontonethub-src/ontonethub/src/main/resources/indexing/config/vcard.properties new file mode 100644 index 0000000..5440cc7 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/indexing/config/vcard.properties @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#Configurations for the vcard indexing source. +#This file only configures this component to activate (actually use it) you need +#to activate it in the 'indexing.properties' file by activating: +#'entityDataIterable=org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource' +# and deactivating all other entityDataIterable and entityIdIterator +#configurations. +#An example configuration is contained in the 'indexing.properties' file. +#Search for the 'Indexing VCARD' section + +#name of the folder with the vcard files (relative to /indexing/resource) +#'vcard' is the default. You can add multiple folders by splitting them with ',' +source=vcard +#The prefix used for the created instance +#URIs will use {prefix}{type}/{name} where +# {prefix} is the configured value +# {type} is "person" or "organization" +# {name} is the value of FN for persons and ORG for organizations. However +# spaces are replaces with '-' and URL encoded +prefix=http://www.exampe.com/changeme/ +#The encoding used to read the vCard file +#parse an empty value to use plattform encoding +#default is UTF8 +encoding=UTF8 diff --git a/ontonethub-src/ontonethub/src/main/resources/log4j.properties b/ontonethub-src/ontonethub/src/main/resources/log4j.properties new file mode 100644 index 0000000..dc63190 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/log4j.properties @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Generally print only WARN and ERROR messages +log4j.rootLogger=WARN, A1 +# to the console +log4j.appender.A1=org.apache.log4j.ConsoleAppender +#using the pattern layout +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +# with this configuration +log4j.appender.A1.layout.ConversionPattern=%d{HH:mm:ss,SSS} [%t] %-5p %c{2} - %m%n +# however log also INFO messages of the indexing components +log4j.logger.org.apache.stanbol.entityhub.indexing=INFO +# for loggings during importing RDF data +log4j.logger.com.hp.hpl.jena=INFO +#solrtexttagger is very verbose (even on WARN level) +log4j.logger.org.opensextant.solrtexttagger=ERROR \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/services/com.fasterxml.jackson.core.JsonFactory b/ontonethub-src/ontonethub/src/main/resources/services/com.fasterxml.jackson.core.JsonFactory new file mode 100644 index 0000000..239a78a --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/com.fasterxml.jackson.core.JsonFactory @@ -0,0 +1 @@ +com.fasterxml.jackson.core.JsonFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/com.fasterxml.jackson.core.ObjectCodec b/ontonethub-src/ontonethub/src/main/resources/services/com.fasterxml.jackson.core.ObjectCodec new file mode 100644 index 0000000..f126bb4 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/com.fasterxml.jackson.core.ObjectCodec @@ -0,0 +1 @@ +com.fasterxml.jackson.databind.ObjectMapper diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.annotation.processing.Processor b/ontonethub-src/ontonethub/src/main/resources/services/javax.annotation.processing.Processor new file mode 100644 index 0000000..ccab8b7 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.annotation.processing.Processor @@ -0,0 +1 @@ +ch.qos.cal10n.verifier.processor.CAL10NAnnotationProcessor diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.datatype.DatatypeFactory b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.datatype.DatatypeFactory new file mode 100644 index 0000000..c1c1855 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.datatype.DatatypeFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.datatype.DatatypeFactoryImpl diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.parsers.DocumentBuilderFactory b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.parsers.DocumentBuilderFactory new file mode 100644 index 0000000..3845cc1 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.parsers.DocumentBuilderFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.DocumentBuilderFactoryImpl diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.parsers.SAXParserFactory b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.parsers.SAXParserFactory new file mode 100644 index 0000000..88b247c --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.parsers.SAXParserFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.SAXParserFactoryImpl diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLEventFactory b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLEventFactory new file mode 100644 index 0000000..5cf6974 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLEventFactory @@ -0,0 +1 @@ +com.ctc.wstx.stax.WstxEventFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLInputFactory b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLInputFactory new file mode 100644 index 0000000..db49e7a --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLInputFactory @@ -0,0 +1 @@ +com.ctc.wstx.stax.WstxInputFactory \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLOutputFactory b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLOutputFactory new file mode 100644 index 0000000..a15830d --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.stream.XMLOutputFactory @@ -0,0 +1 @@ +com.ctc.wstx.stax.WstxOutputFactory \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.validation.SchemaFactory b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.validation.SchemaFactory new file mode 100644 index 0000000..ec3f1f4 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/javax.xml.validation.SchemaFactory @@ -0,0 +1 @@ +org.apache.xerces.jaxp.validation.XMLSchemaFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.commons.logging.LogFactory b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.commons.logging.LogFactory new file mode 100644 index 0000000..50a7c3b --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.commons.logging.LogFactory @@ -0,0 +1,5 @@ +org.apache.commons.logging.impl.SLF4JLogFactory + +# Axis gets at JCL through its own mechanism as defined by Commons Discovery, which +# in turn follows the instructions found at: +# http://java.sun.com/j2se/1.3/docs/guide/jar/jar.html#Service Provider diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.felix.scrplugin.annotations.AnnotationProcessor b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.felix.scrplugin.annotations.AnnotationProcessor new file mode 100644 index 0000000..4a11882 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.felix.scrplugin.annotations.AnnotationProcessor @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.apache.felix.scrplugin.processing.SCRAnnotationProcessor +org.apache.felix.scrplugin.processing.SlingAnnotationProcessor diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.fs.FileSystem b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.fs.FileSystem new file mode 100644 index 0000000..815e724 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.fs.FileSystem @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.hadoop.fs.LocalFileSystem +org.apache.hadoop.fs.viewfs.ViewFileSystem +org.apache.hadoop.fs.s3.S3FileSystem +org.apache.hadoop.fs.s3native.NativeS3FileSystem +org.apache.hadoop.fs.kfs.KosmosFileSystem +org.apache.hadoop.fs.ftp.FTPFileSystem +org.apache.hadoop.fs.HarFileSystem +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.hadoop.hdfs.DistributedFileSystem +org.apache.hadoop.hdfs.HftpFileSystem +org.apache.hadoop.hdfs.HsftpFileSystem +org.apache.hadoop.hdfs.web.WebHdfsFileSystem diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.io.compress.CompressionCodec b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.io.compress.CompressionCodec new file mode 100644 index 0000000..df46e32 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.io.compress.CompressionCodec @@ -0,0 +1,20 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.io.compress.BZip2Codec +org.apache.hadoop.io.compress.DefaultCodec +org.apache.hadoop.io.compress.DeflateCodec +org.apache.hadoop.io.compress.GzipCodec +org.apache.hadoop.io.compress.Lz4Codec +org.apache.hadoop.io.compress.SnappyCodec + diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.SecurityInfo b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.SecurityInfo new file mode 100644 index 0000000..f7f3ec2 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.SecurityInfo @@ -0,0 +1,14 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.security.AnnotatedSecurityInfo diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.token.TokenIdentifier b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.token.TokenIdentifier new file mode 100644 index 0000000..59603a9 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.token.TokenIdentifier @@ -0,0 +1,15 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier +org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.token.TokenRenewer b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.token.TokenRenewer new file mode 100644 index 0000000..5889c12 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.hadoop.security.token.TokenRenewer @@ -0,0 +1,17 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +org.apache.hadoop.hdfs.DFSClient$Renewer +org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier$Renewer +org.apache.hadoop.hdfs.HftpFileSystem$TokenManager +org.apache.hadoop.hdfs.web.WebHdfsFileSystem$DtRenewer diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.CharFilterFactory b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.CharFilterFactory new file mode 100644 index 0000000..bdc5750 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.CharFilterFactory @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ja.JapaneseIterationMarkCharFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory +org.apache.lucene.analysis.charfilter.MappingCharFilterFactory +org.apache.lucene.analysis.fa.PersianCharFilterFactory +org.apache.lucene.analysis.pattern.PatternReplaceCharFilterFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.TokenFilterFactory b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.TokenFilterFactory new file mode 100644 index 0000000..679fc70 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.TokenFilterFactory @@ -0,0 +1,185 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ja.JapaneseBaseFormFilterFactory +org.apache.lucene.analysis.ja.JapaneseKatakanaStemFilterFactory +org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilterFactory +org.apache.lucene.analysis.ja.JapaneseReadingFormFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.icu.ICUFoldingFilterFactory +org.apache.lucene.analysis.icu.ICUNormalizer2FilterFactory +org.apache.lucene.analysis.icu.ICUTransformFilterFactory +org.apache.lucene.collation.ICUCollationKeyFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.cn.smart.SmartChineseWordTokenFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.stempel.StempelPolishStemFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory +org.apache.lucene.analysis.ar.ArabicStemFilterFactory +org.apache.lucene.analysis.bg.BulgarianStemFilterFactory +org.apache.lucene.analysis.br.BrazilianStemFilterFactory +org.apache.lucene.analysis.cjk.CJKBigramFilterFactory +org.apache.lucene.analysis.cjk.CJKWidthFilterFactory +org.apache.lucene.analysis.cn.ChineseFilterFactory +org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory +org.apache.lucene.analysis.commongrams.CommonGramsQueryFilterFactory +org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilterFactory +org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilterFactory +org.apache.lucene.analysis.core.LowerCaseFilterFactory +org.apache.lucene.analysis.core.StopFilterFactory +org.apache.lucene.analysis.core.TypeTokenFilterFactory +org.apache.lucene.analysis.cz.CzechStemFilterFactory +org.apache.lucene.analysis.de.GermanLightStemFilterFactory +org.apache.lucene.analysis.de.GermanMinimalStemFilterFactory +org.apache.lucene.analysis.de.GermanNormalizationFilterFactory +org.apache.lucene.analysis.de.GermanStemFilterFactory +org.apache.lucene.analysis.el.GreekLowerCaseFilterFactory +org.apache.lucene.analysis.el.GreekStemFilterFactory +org.apache.lucene.analysis.en.EnglishMinimalStemFilterFactory +org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory +org.apache.lucene.analysis.en.KStemFilterFactory +org.apache.lucene.analysis.en.PorterStemFilterFactory +org.apache.lucene.analysis.es.SpanishLightStemFilterFactory +org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory +org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory +org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory +org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory +org.apache.lucene.analysis.ga.IrishLowerCaseFilterFactory +org.apache.lucene.analysis.gl.GalicianMinimalStemFilterFactory +org.apache.lucene.analysis.gl.GalicianStemFilterFactory +org.apache.lucene.analysis.hi.HindiNormalizationFilterFactory +org.apache.lucene.analysis.hi.HindiStemFilterFactory +org.apache.lucene.analysis.hu.HungarianLightStemFilterFactory +org.apache.lucene.analysis.hunspell.HunspellStemFilterFactory +org.apache.lucene.analysis.id.IndonesianStemFilterFactory +org.apache.lucene.analysis.in.IndicNormalizationFilterFactory +org.apache.lucene.analysis.it.ItalianLightStemFilterFactory +org.apache.lucene.analysis.lv.LatvianStemFilterFactory +org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory +org.apache.lucene.analysis.miscellaneous.CapitalizationFilterFactory +org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilterFactory +org.apache.lucene.analysis.miscellaneous.KeepWordFilterFactory +org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilterFactory +org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory +org.apache.lucene.analysis.miscellaneous.LengthFilterFactory +org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory +org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilterFactory +org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory +org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory +org.apache.lucene.analysis.miscellaneous.TrimFilterFactory +org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory +org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory +org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory +org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory +org.apache.lucene.analysis.ngram.NGramFilterFactory +org.apache.lucene.analysis.no.NorwegianLightStemFilterFactory +org.apache.lucene.analysis.no.NorwegianMinimalStemFilterFactory +org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory +org.apache.lucene.analysis.pattern.PatternCaptureGroupFilterFactory +org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory +org.apache.lucene.analysis.payloads.NumericPayloadTokenFilterFactory +org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory +org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory +org.apache.lucene.analysis.position.PositionFilterFactory +org.apache.lucene.analysis.pt.PortugueseLightStemFilterFactory +org.apache.lucene.analysis.pt.PortugueseMinimalStemFilterFactory +org.apache.lucene.analysis.pt.PortugueseStemFilterFactory +org.apache.lucene.analysis.reverse.ReverseStringFilterFactory +org.apache.lucene.analysis.ru.RussianLightStemFilterFactory +org.apache.lucene.analysis.shingle.ShingleFilterFactory +org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory +org.apache.lucene.analysis.standard.ClassicFilterFactory +org.apache.lucene.analysis.standard.StandardFilterFactory +org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory +org.apache.lucene.analysis.synonym.SynonymFilterFactory +org.apache.lucene.analysis.th.ThaiWordFilterFactory +org.apache.lucene.analysis.tr.TurkishLowerCaseFilterFactory +org.apache.lucene.analysis.util.ElisionFilterFactory +org.apache.lucene.collation.CollationKeyFilterFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.phonetic.BeiderMorseFilterFactory +org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilterFactory +org.apache.lucene.analysis.phonetic.PhoneticFilterFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.TokenizerFactory b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.TokenizerFactory new file mode 100644 index 0000000..a92489c --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.analysis.util.TokenizerFactory @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ja.JapaneseTokenizerFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.icu.segmentation.ICUTokenizerFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.cn.smart.SmartChineseSentenceTokenizerFactory +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.analysis.ar.ArabicLetterTokenizerFactory +org.apache.lucene.analysis.cjk.CJKTokenizerFactory +org.apache.lucene.analysis.cn.ChineseTokenizerFactory +org.apache.lucene.analysis.core.KeywordTokenizerFactory +org.apache.lucene.analysis.core.LetterTokenizerFactory +org.apache.lucene.analysis.core.LowerCaseTokenizerFactory +org.apache.lucene.analysis.core.WhitespaceTokenizerFactory +org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory +org.apache.lucene.analysis.ngram.NGramTokenizerFactory +org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory +org.apache.lucene.analysis.pattern.PatternTokenizerFactory +org.apache.lucene.analysis.ru.RussianLetterTokenizerFactory +org.apache.lucene.analysis.standard.ClassicTokenizerFactory +org.apache.lucene.analysis.standard.StandardTokenizerFactory +org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory +org.apache.lucene.analysis.wikipedia.WikipediaTokenizerFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.Codec b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.Codec new file mode 100644 index 0000000..9795e81 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.Codec @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene40.Lucene40Codec +org.apache.lucene.codecs.lucene3x.Lucene3xCodec +org.apache.lucene.codecs.lucene41.Lucene41Codec +org.apache.lucene.codecs.lucene42.Lucene42Codec +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.simpletext.SimpleTextCodec +org.apache.lucene.codecs.appending.AppendingCodec diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.DocValuesFormat b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.DocValuesFormat new file mode 100644 index 0000000..7345d7b --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.DocValuesFormat @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene42.Lucene42DocValuesFormat +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.diskdv.DiskDocValuesFormat +org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.PostingsFormat b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.PostingsFormat new file mode 100644 index 0000000..d5e62bc --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.lucene.codecs.PostingsFormat @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat +org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat +org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat +org.apache.lucene.codecs.memory.MemoryPostingsFormat +org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat +org.apache.lucene.codecs.memory.DirectPostingsFormat diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.marmotta.ldpath.api.functions.SelectorFunction b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.marmotta.ldpath.api.functions.SelectorFunction new file mode 100644 index 0000000..6bb0ed3 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.marmotta.ldpath.api.functions.SelectorFunction @@ -0,0 +1,5 @@ +org.apache.marmotta.ldpath.model.functions.ConcatenateFunction +org.apache.marmotta.ldpath.model.functions.FirstFunction +org.apache.marmotta.ldpath.model.functions.LastFunction +org.apache.marmotta.ldpath.model.functions.SortFunction +org.apache.marmotta.ldpath.model.functions.CountFunction \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.marmotta.ldpath.api.functions.TestFunction b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.marmotta.ldpath.api.functions.TestFunction new file mode 100644 index 0000000..3a72d08 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.marmotta.ldpath.api.functions.TestFunction @@ -0,0 +1,6 @@ +org.apache.marmotta.ldpath.model.tests.functions.EqualTest +org.apache.marmotta.ldpath.model.tests.functions.NotEqualTest +org.apache.marmotta.ldpath.model.tests.functions.GreaterEqualTest +org.apache.marmotta.ldpath.model.tests.functions.GreaterThanTest +org.apache.marmotta.ldpath.model.tests.functions.LessEqualTest +org.apache.marmotta.ldpath.model.tests.functions.LessThanTest \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider new file mode 100644 index 0000000..3c05728 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider @@ -0,0 +1,2 @@ +org.apache.stanbol.commons.namespaceprefix.provider.stanbol.DefaultNamespaceMappingsProvider +org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.PrefixccProvider \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.solr.SolrServerProvider b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.solr.SolrServerProvider new file mode 100644 index 0000000..4d838d5 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.solr.SolrServerProvider @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.impl.RESTfulSolrServerProvider + +# TODO: adding here the StandaloneEmbeddedSolrServerProvider of the +# solr.managed bundle as workaround for the maven assembly plugin +# overriding multiple files with the same name +org.apache.stanbol.commons.solr.managed.standalone.StandaloneEmbeddedSolrServerProvider + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.managed.standalone.StandaloneEmbeddedSolrServerProvider diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.solr.managed.ManagedSolrServer b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.solr.managed.ManagedSolrServer new file mode 100644 index 0000000..eec2647 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.solr.managed.ManagedSolrServer @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.managed.standalone.DefaultStandaloneManagedSolrServerWrapper diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider new file mode 100644 index 0000000..e7c924c --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.solr.managed.standalone.ClassPathDataFileProvider diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.dtd b/ontonethub-src/ontonethub/src/main/resources/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.dtd new file mode 100644 index 0000000..2c7037e --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.dtd @@ -0,0 +1 @@ +com.ctc.wstx.dtd.DTDSchemaFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.relaxng b/ontonethub-src/ontonethub/src/main/resources/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.relaxng new file mode 100644 index 0000000..681466f --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.codehaus.stax2.validation.XMLValidationSchemaFactory.relaxng @@ -0,0 +1 @@ +com.ctc.wstx.msv.RelaxNGSchemaFactory diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.w3c.dom.DOMImplementationSourceList b/ontonethub-src/ontonethub/src/main/resources/services/org.w3c.dom.DOMImplementationSourceList new file mode 100644 index 0000000..7a52dd1 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.w3c.dom.DOMImplementationSourceList @@ -0,0 +1 @@ +org.apache.xerces.dom.DOMXSImplementationSourceImpl \ No newline at end of file diff --git a/ontonethub-src/ontonethub/src/main/resources/services/org.xml.sax.driver b/ontonethub-src/ontonethub/src/main/resources/services/org.xml.sax.driver new file mode 100644 index 0000000..409dd43 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/services/org.xml.sax.driver @@ -0,0 +1,2 @@ +org.apache.xerces.parsers.SAXParser + diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/README.md b/ontonethub-src/ontonethub/src/main/resources/solr/README.md new file mode 100644 index 0000000..c6a56a7 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/solr/README.md @@ -0,0 +1,25 @@ +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Solr Server Configuration Directory +----------------------------------- + +The "solr" directory includes the default configuration of an Solr Server used +with the SolrYard configuration. + +Currently this only includes an empty solr.xml file. Because all other +configurations (such as adding Solr cores, copying Solr schemas or whole +Solr indexes - config and data) are handeled internally by the +ManagedSolrServer.S diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/conf/solr.xml b/ontonethub-src/ontonethub/src/main/resources/solr/conf/solr.xml new file mode 100644 index 0000000..93417f3 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/solr/conf/solr.xml @@ -0,0 +1,29 @@ + + + + + + + + + diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/core/allextras.solrindex.zip b/ontonethub-src/ontonethub/src/main/resources/solr/core/allextras.solrindex.zip new file mode 100644 index 0000000..7600c4f Binary files /dev/null and b/ontonethub-src/ontonethub/src/main/resources/solr/core/allextras.solrindex.zip differ diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/core/default.solrindex.zip b/ontonethub-src/ontonethub/src/main/resources/solr/core/default.solrindex.zip new file mode 100644 index 0000000..52a2673 Binary files /dev/null and b/ontonethub-src/ontonethub/src/main/resources/solr/core/default.solrindex.zip differ diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/core/entityhub.solrindex.zip b/ontonethub-src/ontonethub/src/main/resources/solr/core/entityhub.solrindex.zip new file mode 100644 index 0000000..c748ff6 Binary files /dev/null and b/ontonethub-src/ontonethub/src/main/resources/solr/core/entityhub.solrindex.zip differ diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/core/kuromoji.solrindex.zip b/ontonethub-src/ontonethub/src/main/resources/solr/core/kuromoji.solrindex.zip new file mode 100644 index 0000000..951a98b Binary files /dev/null and b/ontonethub-src/ontonethub/src/main/resources/solr/core/kuromoji.solrindex.zip differ diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/core/paoding.solrindex.outdated b/ontonethub-src/ontonethub/src/main/resources/solr/core/paoding.solrindex.outdated new file mode 100644 index 0000000..ec17e9c Binary files /dev/null and b/ontonethub-src/ontonethub/src/main/resources/solr/core/paoding.solrindex.outdated differ diff --git a/ontonethub-src/ontonethub/src/main/resources/solr/core/smartcn.solrindex.zip b/ontonethub-src/ontonethub/src/main/resources/solr/core/smartcn.solrindex.zip new file mode 100644 index 0000000..a62dec3 Binary files /dev/null and b/ontonethub-src/ontonethub/src/main/resources/solr/core/smartcn.solrindex.zip differ diff --git a/ontonethub-src/ontonethub/src/main/resources/templates/indexing.ftl b/ontonethub-src/ontonethub/src/main/resources/templates/indexing.ftl new file mode 100644 index 0000000..6ca5c14 --- /dev/null +++ b/ontonethub-src/ontonethub/src/main/resources/templates/indexing.ftl @@ -0,0 +1,317 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ------------ +# Index Metadata +# ------------ + +# Here the name of the dataset MUST be specified by the user +# It MUST BE a single word with no spaces. +name=${name} + +# an optional short description may be used. If missing default descriptions are +# created. +description=${description} + +# The "Synchronized" property is supported by some Entityhub Yard implementations +# to automatically update/delete local data as soon as the created archive with +# the indexed data is updated/deleted in the /datafiles folder of Apache Stanbol +# By default this feature is activated. For very big indexes users might want to +# disable this feature to allow the deletion of the archive after the index was +# initialised successfully. +# By default this feature is enabled. Uncommend the next line to deactivate it. +Synchronized=true + +# ------------ +# Indexing Mode dependent Configurations: (see readme.md for details) +# ------------ + +# The indexing Tool support two modes. See (1) and (2) for details. + +# (1) Iterate over Data and lookup scores: (default) +# ------------ + +# use the Jena TDB as source for indexing the RDF data located within +# "indexing/resource/rdfdata" +entityDataIterable=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata + +#NOTE: if you want to index Bnodes you need to activate the bnode parameter +# see STANBOL-765 for details (and documentation) +#entityDataIterable=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata,bnode:true + +# The EntityScore Provider needs to provide the scores for indexed entities +# use the NoEntityScoreProvider if no scores are available +entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider + +# The EntityFieldScoreProvider can be used to use the value of an property as score +# the property can be configured by the "field" parameter +# Scores are parsed from numbers and strings that can be converted to numbers. +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityFieldScoreProvider,field:http://www.example.org/myOntology#score + +# The EntityIneratorToScoreProviderAdapter can be used to adapt any configured +# "entityIdIterator" to an "entityScoreProvider". See also the comments for +# "entityIdIterator". +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.EntityIneratorToScoreProviderAdapter + +# Indexing VCARD + +# Alternative configuration for indexing vCard files +# change the config for the vcard indexer in the "vcard.properties" file +#entityDataIterable=org.apache.stanbol.entityhub.indexing.source.vcard.VcardIndexingSource,config:vcard +#entityScoreProvider=org.apache.stanbol.entityhub.indexing.core.source.NoEntityScoreProvider + + + +# (2) Iterate over Entity IDs and lookup Data +# ------------ + +# First one needs to provide an EntityIterator +# Typically the LineBasedEntityIterator implementation is used. The configuration +# for this implementation is typically provided in an own file. A default +# configuration is provided by the iditerator.properties file. +#entityIdIterator=org.apache.stanbol.entityhub.indexing.core.source.LineBasedEntityIterator,config:iditerator + +# This EntityIterator allows to use a simple Triple filter to select entities for Indexing. +# It uses the same configuration as "FieldValueFilter" but DOES NOT support +# Wildcards. See "FieldValueFilter" for details on how to configure! +# +# NOTE: Can only be used if Jena TDB (jenatdb.RdfIndexingSource) is used as +# indexing source! +#entityIdIterator=org.apache.stanbol.entityhub.indexing.source.jenatdb.ResourceFilterIterator,config:entityTypes.properties + +# Second a entityDataProvide needs to be specified. Here we use the Jena TDB +# Note that this implementation implements both entityDataIterable AND +# entityDataProvider. +# RDF data needs to be located in the "indexing/resource/rdfdata" +# entityDataProvider=org.apache.stanbol.entityhub.indexing.source.jenatdb.RdfIndexingSource,source:rdfdata + +# ------------ +#Score Normalizer +# ------------ + +# Entity Scores are normalised by the ScoreNormalizer +# if no score normaliser is configured the scores will be used as provided by +# the entities +#scoreNormalizer= + +# ScoreNormalizer can be chained as shown by the following example configuration +# The score for an entity is first processed by the last normalizer +#scoreNormalizer=org.apache.stanbol.entityhub.indexing.core.normaliser.RangeNormaliser,config:scorerange;org.apache.stanbol.entityhub.indexing.core.normaliser.NaturalLogNormaliser;org.apache.stanbol.entityhub.indexing.core.normaliser.MinScoreNormalizer,config:minscore + +# Different Implementations: +# - RangeNormaliser: allows to define a range for score values. see +# "scorerange.properties" for possible configurations +# - NaturalLogNormaliser: Should be used if the score represents the number of +# incommings links. +# - MinScoreNormalizer: allows to prevent indexing of all entities with a score +# lower than the configured minimum. see "minscore.properties" for possible +# configurations. + +# ------------ +# Entity Processor +# ------------ + +# Multiple Entity processors can be used for indexing entities. The are separated by ';' +# and are executed in the order of definition. + +# FiledMapperProcessor: +# +# entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor +# +# This processor ensures that "field mappings" are executed while indexing entities. +# By default it will use the mappings configured by the "fieldConfiguraton" +# property. To use other mappings one can use the "mappings" parameter (e.g. +# mappings:otherMappings.txt + +# FieldValueFilter +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes +# +# This allows to define a field and values that are used to filter entities. Only Entities +# that do have one of the defined values as actual value of the defined field will +# get indexed. This is typically used to filter entities by rdf:type, but can be used +# for any URI property. See the default entityTypes.properties file for more information + +# ResourceUriFilter +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.ResourceUriFilter,prefix:http://www.eample.org/ +# +# This allows to filter all resource that do not start with the configured prefix + +# LdpathProcessor +# +# This allows to use simple LDpath statements to process entities. Such as mapping +# only properties of entities with a specific type +# +# skos:prefLabel = .[rdf:type is ]/rdfs:label; +# +# Praameters: +# * append:[true/flase] (default=true) If the result of the LDpath program is +# appended to the processed entity or if the processed entity should be +# replaced with the results of the LDpath program +# * ldpath:{file} (required, no default) The {file} containing the LDpath +# program used by this processor. {file} is relative to the config directory. +# +# NOTEs: +# * The LdpathProcessor has only access to the local properties of the currently +# indexed entity. LDPath statements that refer other information such as paths +# with a lengths > 1 or inverse properties will not work +# * Processors can be chained by defining multiple Processor instances in the +# configuration and separating them with ';'. This allows to use multiple +# LdpathProcessor instances and/or to chain LdpathProcessor(s) with others +# such as the "FiledMapperProcessor". Processors are executed as defined +# within the configuration of the "entityProcessor" property. +# * When using the FiledMapperProcessor on results of the LdpathProcessor make +# sure that the fields defined in the LDpath statements are indexed by the +# FiledMapperProcessor. Otherwise such values will NOT be indexed! +# org.apache.stanbol.entityhub.indexing.core.processor.LdpathProcessor,ldpath:ldpath-mapping.txt,append:true + +# GeonamesUriProcessor +# +# A typical case is that geonames URIs are missing the tailing '/'. This processor +# will search for geonames URIs and correct them. +# org.apache.stanbol.entityhub.indexing.core.processor.GeonamesUriProcessor + +# WikipediaToDBPediaUriProcessor +# +# This processor will rewrite Wikipedia URIs to DBPedia URIs +# (e.g. "http://de.wikipedia.org/wiki/Hawaii" to "http://de.dbpedia.org/resource/Hawaii" +# org.apache.stanbol.entityhub.indexing.core.processor.WikipediaToDBPediaUriProcessor + +# EmptyProcessor +# +#entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.EmptyProcessor +# +# This processor can be used to deactivate EntityProcessing + +# Default Entity Processor configuration +entityProcessor=org.apache.stanbol.entityhub.indexing.core.processor.FieldValueFilter,config:entityTypes;org.apache.stanbol.entityhub.indexing.core.processor.FiledMapperProcessor + +# ------------ +# Index Field Configuration +# ------------ + +# An index needs to provide its configuration. This is used at runtime if +# entities are updated. +fieldConfiguration=mappings.txt + + +# ------------ +# Post-Processing +# ------------ + +# The indexing tool now supports a post-processing step that is executed after +# all entities are already indexed to the indexing destination. +# For this step the configured IndexingDestination is used as both the source +# and the target. +# The post-processing allows to apply an additional set of EntityProcessor +# instances to each indexed Entity. +# In principle all EntityProcessor implementations as used for entity processing +# during the normal indexing phase can also be used for post-processing. However +# as there are already all information available within the IndexingDestination +# this phase can also be used to perform processing steps that would not be +# easily possible during the indexing phase. + +# LdpathPostProcessor +# +# EntityProcessor that wraps the IndexingDestination as RDF Backend and +# therefore allows to execute any kind of LDpath programs based on the indexed +# Entity data. +# Typical use cases of this processor include: +# * indexing transitive closures +# skos:broaderTransitive = (skos:broader)* +# * collecting labels of referenced entities to be used for disambiguation (e.g. +# use lables of linked concepts in a SKOS concept scheme : +# = *[rdf:type is skos:Concept]/(skos:prefLabel | skos:altLabel) +# * advanced indexing rules that need paths longer than one (e.g. adding +# labels of redirects pointing to an entity +# rdfs:label = rdfs:label | (^rdfs:seeAlso/rdfs:label) +# +# Parameter: This uses the same parameter as the LdpathProcessor. + +#entityPostProcessor=org.apache.stanbol.entityhub.indexing.core.processor.LdpathPostProcessor,ldpath:ldpath-post-mapping.txt,append:true + + +# ------------ +# Indexing Destination +# ------------ + +# A SolrYard is used as destination for indexing +# To boost some fields (typically labels) one can use the fieldboosts.properties +# A default field boost configuration is provided. +# A default fst.config is also provided (see STANBOL-1167) +indexingDestination=org.apache.stanbol.entityhub.indexing.destination.solryard.SolrYardIndexingDestination,boosts:fieldboosts,fstConf:fst.config + + +# ------------ +# Additional configurations for ReferencedSite +# ------------ + +# All the following properties are optional, but can be used to configure +# the referenced site used to access the indexed data within the Entityhub + +# The entity prefixes are used to determine if an entity needs to be searched +# on a referenced site. If not specified requests for any entity will be +# forwarded to this referenced site. +# use ';' to seperate multiple values +#org.apache.stanbol.entityhub.site.entityPrefix=http://example.org/resource;urn:mycompany: + +# Configuration the remote Service +# If the indexed data are also available remotly (e.g. by a Linked data endpoint) +# than it is possible to allow also direct access to such entities +# (a) retrieving entities (access URI and EntityDereferencer implementation) +#org.apache.stanbol.entityhub.site.accessUri="http://example.org/resource" +#org.apache.stanbol.entityhub.site.dereferencerType= +# available EntityDereferencer implementation +# - org.apache.stanbol.entityhub.dereferencer.CoolUriDereferencer +# - org.apache.stanbol.entityhub.dereferencer.SparqlDereferencer + +# (b) search entities (queryUri and EntitySearcher implementation) +#org.apache.stanbol.entityhub.site.queryUri=http://example.org/sparql +#org.apache.stanbol.entityhub.site.searcherType= +# available EntitySearcher implementation +# - org.apache.stanbol.entityhub.searcher.SparqlSearcher (generic SPARQL) +# - org.apache.stanbol.entityhub.searcher.LarqSearcher (Larq SPARQL extensions) +# - org.apache.stanbol.entityhub.searcher.VirtuosoSearcher (Virtuoso SPARQL extensions) + +# The referenced site can also specify additional mappings to be used in the +# case an entity of this site is imported to the Entityhub. +# Typically the same mappings as used for the indexing are a good start. +# However one might want to copy some values (e.g. labels) to commonly used +# fields used by the Entityhub +org.apache.stanbol.entityhub.site.fieldMappings=mappings.txt + + +# License(s) +# Add here the name and URLs of the license to be used for all entities +# provided by this referenced site +# NOTE: licenseName and licenseUrl MUST use the ordering as below! +# This example shows dual licensing with "cc by-sa" and GNU +#org.apache.stanbol.entityhub.site.licenseName=Creative Commons Attribution-ShareAlike 3.0;GNU Free Documentation License +#org.apache.stanbol.entityhub.site.licenseUrl=http://creativecommons.org/licenses/by-sa/3.0/;http://www.gnu.org/licenses/fdl.html + +# Attribution +# Some Licenses require attributions. This properties can be used to provide a +# link to the site with the attribution and the attribution text +#org.apache.stanbol.entityhub.site.attributionUrl=http://example.org/About.html +#org.apache.stanbol.entityhub.site.attribution=To the universe + + +# Fail on Error loading Resource Files +# The indexing tool can be configured to fail on error loading resource files. +# By default, a resource file will be ignored when an error occurs while trying to load it +failOnErrorLoadingResource=false + + diff --git a/ontonethub-src/pom.xml b/ontonethub-src/pom.xml new file mode 100644 index 0000000..9ac2b26 --- /dev/null +++ b/ontonethub-src/pom.xml @@ -0,0 +1,162 @@ + + + + + 4.0.0 + + org.apache.stanbol + stanbol-parent + 6 + parent + + + org.apache.stanbol + apache-stanbol + 1.0.0 + pom + + Apache Stanbol + Pseudo project to build Apache Stanbol. + + 2010 + + + Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0 + + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0 + + http://stanbol.apache.org/tags/apache-stanbol-1.0.0 + + + + apache-stanbol-${project.version} + + + + + prefixccprovider + ontonethub + bundlelists/ontonethub + ontonethub-war + + + + + it + + true + + + + rat + + false + + + + + org.apache.rat + apache-rat-plugin + + false + + + **/.* + **/.*/* + **/.*/**/* + + + **/target/** + **/*.log + launchers/full/sling/** + launchers/full/factstore/** + + + DEPENDENCIES + DEPENDENCIES-BY-LICENSE + **/src/license/THIRD-PARTY.properties + + + **/*.config + **/*.cfg + **/*.ref + **/*.txt + **/*.tsv + **/*.sem + + + **/*.bin + **/test/**/*.eml + **/test/**/*.nt + **/test/**/*.html + **/test/**/*.xhtml + **/test/**/*.rdf + **/test/**/*.rtf + **/test/**/*.rules + **/test/**/*.odt + + + commons/web/home/src/main/resources/org/apache/stanbol/commons/web/home/static/scripts/jquery-1.4.2.js + contenthub/web/src/main/resources/org/apache/stanbol/contenthub/web/static/scripts/jit.js + contenthub/web/src/main/resources/org/apache/stanbol/contenthub/web/static/style/jquery-ui-1.8.11.custom.css + contenthub/web/src/main/resources/org/apache/stanbol/contenthub/web/static/scripts/jquery-ui-1.8.11.custom.min.js + contenthub/web/src/main/resources/org/apache/stanbol/contenthub/web/static/scripts/jquery-1.5.1.min.js + contenthub/web/src/main/resources/org/apache/stanbol/contenthub/web/static/scripts/jquery-1.4.2.js + factstore/factstore/src/main/resources/org/apache/stanbol/factstore/web/static/scripts/json2.js + ontologymanager/web/src/main/resources/org/apache/stanbol/ontologymanager/web/static/scripts/jquery-1.6.1 + reasoners/web/src/main/resources/org/apache/stanbol/reasoners/web/static/jquery/jquery-1.6.1 + rules/web/src/main/resources/org/apache/stanbol/rules/web/static/jquery/jquery-1.6.1 + enhancer/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/static/openlayers-2.9/** + commons/owl/** + + + PLAYGROUND.txt + conventions/*.* + demos/** + enhancer/data/text-examples/** + + + + + + org.codehaus.mojo + license-maven-plugin + + + + aggregate-add-third-party + + + + + + + + + + diff --git a/ontonethub-src/prefixccprovider/README.md b/ontonethub-src/prefixccprovider/README.md new file mode 100644 index 0000000..3a64928 --- /dev/null +++ b/ontonethub-src/prefixccprovider/README.md @@ -0,0 +1,32 @@ + + +Namespace Prefix Provider for prefix.cc +--------------------------------- + +This provides an implementation of the NamespacePrefixProvider interface that +provides all namespace prefixes defined/managed by [prefix.cc](http://prefix.cc). + +This implementation can be used within and outside of an OSGI environment. +When running within OSGI it will deactivate itself if the Stanbol OfflineMode +is active. + +Mappings are periodically updated but hold locally in-memory. The default update +cycle is one hour, but can be configured by manually constructing an instance +or via the OSGI component configuration. + + diff --git a/ontonethub-src/prefixccprovider/pom.xml b/ontonethub-src/prefixccprovider/pom.xml new file mode 100644 index 0000000..24d2fba --- /dev/null +++ b/ontonethub-src/prefixccprovider/pom.xml @@ -0,0 +1,134 @@ + + + + 4.0.0 + + + org.apache.stanbol + stanbol-parent + 6 + ../../../parent + + + org.apache.stanbol + org.apache.stanbol.commons.namespaceprefix.provider.prefixcc + 1.0.0 + bundle + + Apache Stanbol Commons prefix.cc based Namespace Prefix Provider + + prefix.cc is a service where users can define namespace prefix mappings + + + + scm:svn:http://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/commons/namespaceprefix/prefixccprovider + + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/commons/namespaceprefix/prefixccprovider + + scm:svn:https://svn.apache.org/repos/asf/stanbol/tags/apache-stanbol-1.0.0/stanbol.apache.org/branches/release-1.0.0-branch + + + + + + org.apache.felix + maven-scr-plugin + + + org.apache.felix + maven-bundle-plugin + true + + + + org.apache.stanbol.commons.namespaceprefix; provide:=true; version="[0.11,1.1)", + * + + + org.apache.stanbol.commons.namespaceprefix.provider.prefixcc, + org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.component + + + + + + org.apache.rat + apache-rat-plugin + + false + + src/license/THIRD-PARTY.properties + + + src/test/resources/testnamespaceprefix.mappings + src/main/resources/META-INF/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider + + + + + + + + + org.apache.stanbol + org.apache.stanbol.commons.namespaceprefix.service + 1.0.0 + + + org.apache.stanbol + org.apache.stanbol.commons.stanboltools.offline + 1.0.0 + true + + + org.slf4j + slf4j-api + + + commons-io + commons-io + + + org.osgi + org.osgi.core + + + org.osgi + org.osgi.compendium + + + org.apache.felix + org.apache.felix.scr.annotations + + + + + junit + junit + test + + + org.slf4j + slf4j-log4j12 + test + + + + + diff --git a/ontonethub-src/prefixccprovider/src/main/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/PrefixccProvider.java b/ontonethub-src/prefixccprovider/src/main/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/PrefixccProvider.java new file mode 100644 index 0000000..3837877 --- /dev/null +++ b/ontonethub-src/prefixccprovider/src/main/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/PrefixccProvider.java @@ -0,0 +1,192 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.stanbol.commons.namespaceprefix.provider.prefixcc; + +import java.io.IOException; +import java.io.InputStream; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Date; +import java.util.List; +import java.util.ServiceLoader; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.io.IOUtils; +import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider; +import org.apache.stanbol.commons.namespaceprefix.impl.NamespacePrefixProviderImpl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PrefixccProvider implements NamespacePrefixProvider { + + private static final Logger log = LoggerFactory.getLogger(PrefixccProvider.class); + + public static final URL GET_ALL; + static { + try { + GET_ALL = new URL("http://prefix.cc/popular/all.file.txt"); + } catch (MalformedURLException e) { + throw new IllegalStateException("Unable to create http://prefix.cc URL",e); + } + } + private final ScheduledExecutorService scheduler = + Executors.newScheduledThreadPool(1); + + + private NamespacePrefixProvider cache; + private long cacheStamp; + + /** + * Intended to be used by the {@link ServiceLoader} utility. + * uses 1 hour delay and DOES a sync initial load of the mappings + * before returning. + */ + public PrefixccProvider(){ //by default update once every hour + this(1,TimeUnit.HOURS, true); + } + /** + * Creates a prefix.cc provider with the specified delay. The initial + * load of the mappings is done immediately but asynchronously. That means + * that the mappings will not be available when the constructor returns.

+ * While this implementation does not restrict configured delays expected + * values are in the era of hours. + * @param delay the delay + * @param unit the unit of the delay. + */ + public PrefixccProvider(int delay,TimeUnit unit){ + this(delay,unit,false); + } + /** + * Creates a prefix.cc provider. If syncInitialLoad is enabled the initial + * load of the data is done before the constructor returns. Otherwise + * mappings are loaded asynchronously as specified by the parsed delay.

+ * While this implementation does not restrict configured delays expected + * values are in the era of hours. + * @param delay the delay + * @param unit the time unit of the delay + * @param syncInitialLoad if true mappings are loaded before + * the constructor returns. Otherwise mappings are loaded asynchronously + */ + public PrefixccProvider(int delay,TimeUnit unit, boolean syncInitialLoad){ + if(delay <= 0){ + throw new IllegalArgumentException("The parsed delay '" + +delay+"' MUST NOT be <= 0"); + } + if(unit == null){ + unit = TimeUnit.SECONDS; + } + int initialDelay; + if(syncInitialLoad){ + loadMappings(); + initialDelay = delay; + } else { + initialDelay = 0; + } + scheduler.scheduleWithFixedDelay( + new Runnable() { + + @Override + public void run() { + loadMappings(); + } + }, initialDelay, delay, unit); + } + + protected final void loadMappings() { + try { + log.info("Load Namespace Prefix Mappings form {}",GET_ALL); + HttpURLConnection con = (HttpURLConnection)GET_ALL.openConnection(); + con.setReadTimeout(5000); //set the max connect & read timeout to 5sec + con.setConnectTimeout(5000); + con.connect(); + String contentType = con.getContentType().split(";")[0]; + if("text/plain".equalsIgnoreCase(contentType)){ + InputStream in = con.getInputStream(); + try { + cache = new NamespacePrefixProviderImpl(in); + cacheStamp = System.currentTimeMillis(); + log.info(" ... completed"); + } finally { + IOUtils.closeQuietly(in); + } + } else { + log.warn("Response from prefix.cc does have the wrong content type '" + + contentType + "' (expected: text/plain). This indicates that the " + + "service is currently unavailable!"); + } + con.disconnect(); //we connect once every {long-period} + } catch (IOException e) { + log.warn("Unable to load prefix.cc NamespaceMappings (Message: " + + e.getMessage() +")",e); + ; + } + } + /** + * deletes the local cahe and stops the periodical updates of the cache + */ + public void close(){ + scheduler.shutdown(); + cache = null; + } + + /** + * If prefix.cc data are available + * @return + */ + public boolean isAvailable(){ + return cache != null; + } + /** + * The Date where the locally cached data where synced the last time with + * prefix.cc. Will return null if no data where received from + * prefix.cc ({@link #isAvailable()} == false) + * @return the date where the local cache was received from prefix.cc + */ + public Date getCacheTimeStamp(){ + if(cache != null){ + return new Date(cacheStamp); + } else { + return null; + } + } + + @Override + protected void finalize() throws Throwable { + close(); + super.finalize(); + } + + @Override + public String getNamespace(String prefix) { + NamespacePrefixProvider npp = cache; + return npp == null ? null : npp.getNamespace(prefix); + } + + @Override + public String getPrefix(String namespace) { + NamespacePrefixProvider npp = cache; + return npp == null ? null : npp.getPrefix(namespace); + } + @Override + public List getPrefixes(String namespace) { + NamespacePrefixProvider npp = cache; + return npp == null ? null : npp.getPrefixes(namespace); + } +} diff --git a/ontonethub-src/prefixccprovider/src/main/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/component/PrefixccProviderComponent.java b/ontonethub-src/prefixccprovider/src/main/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/component/PrefixccProviderComponent.java new file mode 100644 index 0000000..54af230 --- /dev/null +++ b/ontonethub-src/prefixccprovider/src/main/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/component/PrefixccProviderComponent.java @@ -0,0 +1,159 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.component; + +import java.math.BigDecimal; +import java.util.Dictionary; +import java.util.Hashtable; +import java.util.concurrent.TimeUnit; + +import org.apache.felix.scr.annotations.Activate; +import org.apache.felix.scr.annotations.Component; +import org.apache.felix.scr.annotations.ConfigurationPolicy; +import org.apache.felix.scr.annotations.Deactivate; +import org.apache.felix.scr.annotations.Properties; +import org.apache.felix.scr.annotations.Property; +import org.apache.felix.scr.annotations.Reference; +import org.apache.felix.scr.annotations.ReferenceCardinality; +import org.apache.felix.scr.annotations.ReferencePolicy; +import org.apache.felix.scr.annotations.ReferenceStrategy; +import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider; +import org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.PrefixccProvider; +import org.apache.stanbol.commons.stanboltools.offline.OfflineMode; +import org.osgi.framework.BundleContext; +import org.osgi.framework.Constants; +import org.osgi.framework.ServiceRegistration; +import org.osgi.service.cm.ConfigurationException; +import org.osgi.service.component.ComponentContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * OSGI component configuring and registering the {@link PrefixccProvider}. + * Non-OSGI users do not need to bother with this.

+ * This class mainly exists to keep the {@link PrefixccProvider} independent of + * the Stanbol {@link OfflineMode} switch + * + * @author Rupert Westenthaler + * + */ +@Component(immediate=true,policy=ConfigurationPolicy.OPTIONAL,metatype = true) +@Properties(value={ + @Property(name=PrefixccProviderComponent.UPDATE_INTERVAL,intValue=60), + @Property(name=Constants.SERVICE_RANKING,intValue=-100) +}) +public class PrefixccProviderComponent { + + private Logger log = LoggerFactory.getLogger(PrefixccProviderComponent.class); + + /** + * The duration + */ + public static final String UPDATE_INTERVAL = "stanbol.commons.namespaceprovider.prefixcc.update-interval"; + public static final int DEFAULT_UPDATE_INTERVAL = 60; + + ServiceRegistration providerRegistration; + + @Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY, + policy=ReferencePolicy.DYNAMIC,strategy=ReferenceStrategy.EVENT, + bind="bindOfflineMode",unbind="unbindOfflineMode") + private OfflineMode offlineMode; + + int updateInterval; + private Dictionary providerProperties = null; + private BundleContext bc; + private PrefixccProvider provider; + + protected void bindOfflineMode(OfflineMode mode){ + this.offlineMode = mode; + updateProviderState(); + } + protected void unbindOfflineMode(OfflineMode mode){ + this.offlineMode = null; + updateProviderState(); + } + + @Activate + protected void activate(ComponentContext ctx) throws ConfigurationException { + bc = ctx.getBundleContext(); + Object value = ctx.getProperties().get(UPDATE_INTERVAL); + if(value instanceof Number){ + updateInterval = ((Number)value).intValue(); + } else if(value != null && !value.toString().isEmpty()){ + try { + updateInterval = new BigDecimal(value.toString()).intValue(); + } catch (NumberFormatException e) { + throw new ConfigurationException(UPDATE_INTERVAL, + "Unable to parse integer value from the configured value '" + + value +"' (type: "+value.getClass()+")"); + } + } else { + updateInterval = DEFAULT_UPDATE_INTERVAL; + } + if(updateInterval < 0){ + log.warn("Negative update interval '{}' configured. Will use default '{}'!", + updateInterval,DEFAULT_UPDATE_INTERVAL); + updateInterval = DEFAULT_UPDATE_INTERVAL; + } else if(updateInterval == 0){ + updateInterval = DEFAULT_UPDATE_INTERVAL; + } + //we need to copy over the service ranking + providerProperties = new Hashtable(); + Object ranking = ctx.getProperties().get(Constants.SERVICE_RANKING); + if(ranking != null){ + providerProperties.put(Constants.SERVICE_RANKING, ranking); + } + updateProviderState(); + } + + @Deactivate + protected void deactivate(ComponentContext ctx){ + providerProperties = null; + updateProviderState(); + bc = null; + } + + /** + * uses the {@link #providerProperties} and {@link #offlineMode} + * state to decide if the {@link PrefixccProvider} should be registered as + * a service or not. If the current state is different the desired state it + * creates and register / unregister destroys the {@link #provider

+ * Consumes: {@link #providerProperties} and {@link #offlineMode}
+ * Manages: {@link #provider} and {@link #providerRegistration} + */ + private synchronized void updateProviderState(){ + if(providerProperties != null && offlineMode == null){ //register + if(providerRegistration == null){ + provider = new PrefixccProvider(updateInterval, TimeUnit.MINUTES); + providerRegistration = bc.registerService( + NamespacePrefixProvider.class.getName(), provider, providerProperties); + log.info("registered prefix.cc NamespacePrefixProvider ..."); + } + } else { //unregister + if(providerRegistration != null){ + providerRegistration.unregister(); + log.info("unregistered prefix.cc NamespacePrefixProvider ..."); + } + if(provider != null){ + provider.close(); + provider = null; + } + } + } + + +} diff --git a/ontonethub-src/prefixccprovider/src/main/resources/META-INF/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider b/ontonethub-src/prefixccprovider/src/main/resources/META-INF/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider new file mode 100644 index 0000000..229506d --- /dev/null +++ b/ontonethub-src/prefixccprovider/src/main/resources/META-INF/services/org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider @@ -0,0 +1 @@ +org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.PrefixccProvider \ No newline at end of file diff --git a/ontonethub-src/prefixccprovider/src/main/resources/OSGI-INF/metatype/metatype.properties b/ontonethub-src/prefixccprovider/src/main/resources/OSGI-INF/metatype/metatype.properties new file mode 100644 index 0000000..ca3736c --- /dev/null +++ b/ontonethub-src/prefixccprovider/src/main/resources/OSGI-INF/metatype/metatype.properties @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.component.PrefixccProviderComponent.name=Stanbol \ +Commons Namespace Prefix Provider for http://prefix.cc +org.apache.stanbol.commons.namespaceprefix.provider.prefixcc.component.PrefixccProviderComponent.description=This \ +Namespace Prefix Provider provides mappings as manged by http://prefix.cc. It downloads priodically all \ +available mappings and caches them locally in-memory. This provider is only available if OfflineMode is NOT active. + +stanbol.commons.namespaceprovider.prefixcc.update-interval.name=Update Interval +stanbol.commons.namespaceprovider.prefixcc.update-interval.description=The interval in minutes \ +used by the provider to reload mappings from http://prefix.cc + +service.ranking.name=Service Ranking +service.ranking.description=Namespace prefix mappings from providers with a higher ranking will \ +override those with lower rankings. \ No newline at end of file diff --git a/ontonethub-src/prefixccprovider/src/test/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/PrefixccProviderTest.java b/ontonethub-src/prefixccprovider/src/test/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/PrefixccProviderTest.java new file mode 100644 index 0000000..e5c6602 --- /dev/null +++ b/ontonethub-src/prefixccprovider/src/test/java/org/apache/stanbol/commons/namespaceprefix/provider/prefixcc/PrefixccProviderTest.java @@ -0,0 +1,128 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.stanbol.commons.namespaceprefix.provider.prefixcc; + + +import java.io.File; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.Date; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.io.IOUtils; +import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService; +import org.apache.stanbol.commons.namespaceprefix.service.StanbolNamespacePrefixService; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PrefixccProviderTest { + + private static final Logger log = LoggerFactory.getLogger(PrefixccProviderTest.class); + + private static String foaf_ns = "http://xmlns.com/foaf/0.1/"; + private static String foaf_pf = "foaf"; + + + @Test + public void test(){ + Date date = new Date(); + PrefixccProvider pcp = new PrefixccProvider(10,TimeUnit.SECONDS); + Assert.assertNull(pcp.getPrefix(foaf_ns)); //async loading + for(int i =0; i<5 && !pcp.isAvailable();i++){ + try { + Thread.sleep(1000); + } catch (InterruptedException e) {} + } + if(!pcp.isAvailable()){ + log.warn("Unable to obtain prefix.cc data after 5sec .. skipping further tests"); + return; + } + //assertMappings + Assert.assertEquals(foaf_pf, pcp.getPrefix(foaf_ns)); + Assert.assertEquals(foaf_ns, pcp.getNamespace(foaf_pf)); + //assert cache time stamp + Date cacheDate = pcp.getCacheTimeStamp(); + Assert.assertTrue(date.compareTo(cacheDate) == -1); + Assert.assertTrue(new Date().compareTo(cacheDate) >= 0); + //assert close + pcp.close(); + Assert.assertFalse(pcp.isAvailable()); + Assert.assertNull(pcp.getCacheTimeStamp()); + } + /** + * Checks if the service is reachable (test is performed online) and if + * prefix.cc sends information with the correct content type. + * @return + */ + private boolean checkServiceAvailable(){ + try { + HttpURLConnection con = (HttpURLConnection)PrefixccProvider.GET_ALL.openConnection(); + con.setReadTimeout(5000); //set the max connect & read timeout to 5sec + con.setConnectTimeout(5000); + con.connect(); + String contentType = con.getContentType(); + IOUtils.closeQuietly(con.getInputStream()); //close the stream + if("text/plain".equalsIgnoreCase(contentType)){ + return true; + } else { + log.info("Request to http://prefix.cc ... returned an unexpected " + + "ContentType "+contentType+ " (expected: text/plain) " + + " ... deactivate" + PrefixccProvider.class.getSimpleName() + + " test"); + return false; //service seams to be down ... skip tests + } + } catch (IOException e) { + log.info("Unable to connect to http://prefix.cc ... deactivating " + + PrefixccProvider.class.getSimpleName()+ " test"); + return false; + } + } + + @Test + public void testServiceLoader() throws IOException{ + //this test works only if online + if(!checkServiceAvailable()){ + return; //skip test + } + //this test for now does not use predefined mappings + + URL mappingURL = PrefixccProviderTest.class.getClassLoader() + .getResource("testnamespaceprefix.mappings"); + //Assert.assertNotNull(mappingURL); + File mappingFile; + if(mappingURL != null){ + try { + mappingFile = new File(mappingURL.toURI()); + } catch(URISyntaxException e) { + mappingFile = new File(mappingURL.getPath()); + } + //Assert.assertTrue(mappingFile.isFile()); + } else { + mappingFile = new File("testnamespaceprefix.mappings"); + } + NamespacePrefixService service = new StanbolNamespacePrefixService(mappingFile); + //assertMappings + Assert.assertEquals(foaf_pf, service.getPrefix(foaf_ns)); + Assert.assertEquals(foaf_ns, service.getNamespace(foaf_pf)); + + } + +} diff --git a/ontonethub-src/prefixccprovider/src/test/resources/log4j.properties b/ontonethub-src/prefixccprovider/src/test/resources/log4j.properties new file mode 100644 index 0000000..a7d5b65 --- /dev/null +++ b/ontonethub-src/prefixccprovider/src/test/resources/log4j.properties @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Root logger option +log4j.rootLogger=INFO, stdout + +# Direct log messages to stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target=System.out +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n +log4j.logger.org.apache.stanbol.enhancer.engines.keywordextraction=DEBUG \ No newline at end of file diff --git a/ontonethub-src/prefixccprovider/src/test/resources/testnamespaceprefix.mappings b/ontonethub-src/prefixccprovider/src/test/resources/testnamespaceprefix.mappings new file mode 100644 index 0000000..6056c42 --- /dev/null +++ b/ontonethub-src/prefixccprovider/src/test/resources/testnamespaceprefix.mappings @@ -0,0 +1 @@ +dummy http://test.dummy.org/ \ No newline at end of file diff --git a/ontonethub-src/startup.sh b/ontonethub-src/startup.sh new file mode 100755 index 0000000..87d6d36 --- /dev/null +++ b/ontonethub-src/startup.sh @@ -0,0 +1,8 @@ +#!/bin/bash +if [ ! -f /usr/local/tomcat/webapps/stanbol.war ]; then + cp ontonethub-war/target/stanbol.war /usr/local/tomcat/webapps/ +fi +while : +do + sleep 1 +done \ No newline at end of file diff --git a/ontonethub.yaml b/ontonethub.yaml new file mode 100644 index 0000000..4435e77 --- /dev/null +++ b/ontonethub.yaml @@ -0,0 +1,550 @@ +{ + "swagger" : "2.0", + "info" : { + "description" : "HTTP Rest services for accessing the entities defined in the ontology network.", + "version" : "0.1", + "title" : "OntoNetHub", + "contact" : { + "name" : "andrea.nuzzolese@istc.cnr.it" + }, + "license" : { + "name" : "Apache 2.0", + "url" : "http://www.apache.org/licenses/LICENSE-2.0.html" + } + }, + "host" : "localhost:8000", + "basePath" : "/", + "tags" : [ { + "name" : "find" + }, + { + "name" : "ontology catalogue" + } + ], + "schemes" : [ "http" ], + "paths" : { + "/stanbol/ontonethub/ontologies" : { + "get" : { + "tags" : [ "info", "ontology catalogue", "/ontonethub/ontologies" ], + "summary" : "Returns the list of ontologies indexed by the OntoNet Hub.", + "operationId" : "list", + "produces" : ["application/json"], + "responses" : { + "200" : { + "description" : "The list of the indexes for the ontologies. The list is provided as a JSON array.", + "schema" : { + "type" : "array", + "items" : { + "type" : "string" + } + } + } + } + } + }, + "/stanbol/ontonethub/ontology" : { + "post" : { + "consumes" : ["multipart/form-data"], + "tags" : [ "Add", "/ontonethub/ontology"], + "summary" : "Add an ontology for OntoNet management.", + "description" : "", + "operationId" : "add", + "produces" : [ "application/json"], + "parameters" : [ { + "name" : "name", + "in" : "formData", + "description" : "The name of the ontology.", + "required" : true, + "type" : "string" + }, + {"name" : "description", + "in" : "formData", + "description" : "A textual description of the ontology.", + "required" : true, + "type" : "string" + }, + {"name" : "baseURI", + "in" : "formData", + "description" : "The URI associated in the Web with the ontology being added.", + "required" : true, + "type" : "string" + }, + {"name" : "data", + "in" : "formData", + "description" : "The OWL ontology to add.", + "required" : true, + "type" : "file" + } + ], + "responses" : { + "200" : { + "description" : "Ontology succesfully added. The URL of the job that is performing the indexing is returned in the JSON provided as output.", + "schema" : { + "$ref" : "#/definitions/JobLink" + } + }, + "409" : { + "description" : "The ontology cannot be added as it already exists in the OntoNetHub.", + } + } + } + }, + "/stanbol/ontonethub/ontology/{id}" : { + "get" : { + "consumes" : ["*/*"], + "tags" : [ "info", "/ontonethub/ontology"], + "summary" : "Return the information associated with the ontology idendified by the ID.", + "description" : "", + "operationId" : "OntologyInfo", + "produces" : [ "application/json"], + "parameters" : [ + { + "name" : "id", + "in" : "path", + "description" : "The ID of the ontology that is object of the information seeking.", + "required" : true, + "type" : "string" + } + ], + "responses" : { + "200" : { + "description" : "The ontology exists and the information is returned as JSON.", + "schema" : { + "$ref" : "#/definitions/OntologyInfo" + } + }, + "404" : { + "description" : "The ontology cannot be fount in OntoNetHub.", + "schema" : { + "$ref" : "#/definitions/Error" + } + }, + "500" : { + "description" : "An error occurred when retrieving the information associated with the ontology. The description of the error is returned in the body of the response.", + "schema" : { + "$ref" : "#/definitions/Error" + } + } + } + }, + "delete" : { + "consumes" : ["*/*"], + "tags" : [ "delete", "/ontonethub/ontology"], + "summary" : "Delete from the OntoNetHub the ontology idendified by the ID.", + "description" : "", + "operationId" : "OntologyDelete", + "produces" : [ "application/json"], + "parameters" : [ + { + "name" : "id", + "in" : "path", + "description" : "The ID of the ontology to delete.", + "required" : true, + "type" : "string" + } + ], + "responses" : { + "200" : { + "description" : "The ontology has been deleted.", + "schema" : { + "$ref" : "#/definitions/OntologyInfo" + } + }, + "404" : { + "description" : "The ontology cannot be fount in OntoNetHub.", + "schema" : { + "$ref" : "#/definitions/Error" + } + } + } + } + }, + "/stanbol/ontonethub/ontology/{id}/source" : { + "get" : { + "consumes" : ["*/*"], + "tags" : [ "ontology source", "/ontonethub/ontology"], + "summary" : "Return the OWL surce of an ontology managed by the OntoNetHub.", + "description" : "", + "operationId" : "OntologySource", + "produces" : [ + "application/rdf+xml", + "text/turtle", + "text/rdf+n3", + "application/rdf+json", + "application/json-ld"], + "parameters" : [ + { + "name" : "id", + "in" : "path", + "description" : "The ID of the ontology that is object of the information seeking.", + "required" : true, + "type" : "string" + } + ], + "responses" : { + "200" : { + "description" : "The OWL source is returned as output by using the requested notation as syntax." + }, + "404" : { + "description" : "The ontology cannot be fount in OntoNetHub.", + "schema" : { + "$ref" : "#/definitions/Error" + } + } + } + } + }, + "/stanbol/jobs/{id}" : { + "get" : { + "consumes" : ["*/*"], + "tags" : [ "processing jobs", "/jobs", "info"], + "summary" : "Return the status information of a job (identified by the specific ID provided as input).", + "description" : "", + "operationId" : "JobStatus", + "produces" : [ + "application/json", + "text/html", + ], + "parameters" : [ + { + "name" : "id", + "in" : "path", + "description" : "The ID of the ontology that proceced by the OntoNetHub for processing (i.e. indexing and storage).", + "required" : true, + "type" : "string" + } + ], + "responses" : { + "200" : { + "description" : "The indexing job started. The information about the status of the job cab be retrieved by accessing the output location provided as output.", + "schema" : { + "$ref" : "#/definitions/Job" + } + }, + "404" : { + "description" : "The job does not exist." + } + } + }, + "delete" : { + "consumes" : ["*/*"], + "tags" : [ "processing jobs", "/jobs", "delete"], + "summary" : "Delete a job. Jobs can be deleted when they are in the status 'finished'.", + "description" : "", + "operationId" : "JobDelete", + "produces" : [ ], + "parameters" : [ + { + "name" : "id", + "in" : "path", + "description" : "The ID of the ontology that proceced by the OntoNetHub for processing (i.e. indexing and storage).", + "required" : true, + "type" : "string" + } + ], + "responses" : { + "200" : { + "description" : "The indexing job has been deleted.", + "schema" : { + "$ref" : "#/definitions/Job" + } + }, + "404" : { + "description" : "The job does not exist." + } + } + } + }, + "/stanbol/ontonethub/ontologies/find" : { + "post" : { + "consumes" : ["application/x-www-form-urlencoded"], + "tags" : [ "find" ], + "summary" : "Find the entities of the ontology network that match a specific query string.", + "description" : "", + "operationId" : "FindOnts", + "produces" : [ "application/json", + "application/rdf+xml", + "text/turtle", + "application/x-turtle", + "text/rdf+nt", + "text/rdf+n3", + "application/rdf+json"], + "parameters" : [ { + "name" : "name", + "in" : "formData", + "description" : "The name of the entity (supports wildcards e.g. \"Accu*\" for \"Accuracy\")", + "required" : true, + "type" : "string" + }, + {"name" : "field", + "in" : "formData", + "description" : "The name of the field used for the query. One MUST parse the full name. Namespace prefixes are not supported yet. (default is rdfs:label)", + "required" : false, + "type" : "string" + }, + {"name" : "lang", + "in" : "formData", + "description" : "The language of the parsed name can be defined", + "required" : false, + "type" : "string" + }, + {"name" : "ldpath", + "in" : "formData", + "description" : "LDPath (http://marmotta.apache.org/ldpath/language.html) programs can be used to specify what information to return for Entities selected by the /find request", + "required" : false, + "type" : "string" + }, + {"name" : "limit", + "in" : "formData", + "description" : "The maximum number of results", + "required" : false, + "type" : "integer" + }, + {"name" : "offset", + "in" : "formData", + "description" : "The offset of first result", + "required" : false, + "type" : "integer" + } + ], + "responses" : { + "200" : { + "description" : "successful operation", + "schema" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/ResultSet" + } + } + } + } + } + }, + "/stanbol/ontonethub/ontology/{id}/find" : { + "post" : { + "consumes" : ["application/x-www-form-urlencoded"], + "tags" : [ "find", "/ontonethub/ontology"], + "summary" : "Find the entities of the ontology network that match a specific query string.", + "description" : "", + "operationId" : "FindOnt", + "produces" : [ "application/json", + "application/rdf+xml", + "text/turtle", + "application/x-turtle", + "text/rdf+nt", + "text/rdf+n3", + "application/rdf+json"], + "parameters" : [ + { + "name" : "id", + "in" : "path", + "description" : "The id of the ontology to query on.", + "required" : true, + "type" : "string" + }, + { + "name" : "name", + "in" : "formData", + "description" : "The name of the entity (supports wildcards e.g. \"Accu*\" for \"Accuracy\")", + "required" : true, + "type" : "string" + }, + {"name" : "field", + "in" : "formData", + "description" : "The name of the field used for the query. One MUST parse the full name. Namespace prefixes are not supported yet. (default is rdfs:label)", + "required" : false, + "type" : "string" + }, + {"name" : "lang", + "in" : "formData", + "description" : "The language of the parsed name can be defined", + "required" : false, + "type" : "string" + }, + {"name" : "ldpath", + "in" : "formData", + "description" : "LDPath (http://marmotta.apache.org/ldpath/language.html) programs can be used to specify what information to return for Entities selected by the /find request", + "required" : false, + "type" : "string" + }, + {"name" : "limit", + "in" : "formData", + "description" : "The maximum number of results", + "required" : false, + "type" : "integer" + }, + {"name" : "offset", + "in" : "formData", + "description" : "The offset of first result", + "required" : false, + "type" : "integer" + } + ], + "responses" : { + "200" : { + "description" : "Successful operation", + "schema" : { + "type" : "array", + "items" : { + "$ref" : "#/definitions/ResultSet" + } + } + }, + "404" : { + "description" : "The ontology with the specified ID does not exist.", + } + } + } + } + }, + "definitions" : { + "ResultSet" : { + "type" : "object", + "properties" : { + "query" : { + "type" : "object", + "properties" : { + "selected" : { + "type" : "array", + "items": { + "type": "string" + } + }, + "constraints" : { + "type" : "array", + "items": { + "$ref": "#/definitions/Constraint" + } + }, + "limit" : { + "type" : "integer" + }, + "offset" : { + "type" : "integer" + } + } + }, + "results" : { + "type" : "array", + "items": { + "$ref": "#/definitions/Result" + } + } + } + }, + "Constraint" : { + "type" : "object", + "properties" : { + "type" : { + "type" : "string" + }, + "patternType" : { + "type" : "string" + }, + "text" : { + "type" : "string" + }, + "proximityRanking" : { + "type" : "boolean" + }, + "field" : { + "type" : "string" + }, + "boost" : { + "type" : "integer" + } + } + }, + "Result" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "string" + }, + "constraint" : { + "type" : "array", + "items" : { + "type" : "object", + "properties" : { + "type" : { + "type" : "string" + }, + "xml:lang" : { + "type" : "string" + }, + "value" : { + "type" : "string" + } + } + } + } + } + }, + "JobLink" : { + "type" : "object", + "properties" : { + "monitoringService" : { + "type" : "string" + } + } + }, + "Job" : { + "type" : "object", + "properties" : { + "status" : { + "type" : "string" + }, + "outputLocation" : { + "type" : "string" + }, + "messages" : { + "type" : "array", + "items" : { + "type" : "string" + } + + } + } + }, + "OntologyInfo" : { + "type" : "object", + "properties" : { + "id" : { + "type" : "string" + }, + "ontologySource" : { + "type" : "string" + }, + "name" : { + "type" : "string" + }, + "ontologyIRI" : { + "type" : "string" + }, + "owlClasses" : { + "type" : "integer" + }, + "objectProperties" : { + "type" : "integer" + }, + "datatypeProperties" : { + "type" : "integer" + }, + "annotationProperties" : { + "type" : "integer" + }, + "importedOntologies" : { + "type" : "integer" + } + } + }, + "Error" : { + "type" : "object", + "properties" : { + "error" : { + "type" : "string" + } + } + } + } +} \ No newline at end of file