diff --git a/.travis.yml.future b/.github/workflows/cypress_ui.yml.future
similarity index 74%
rename from .travis.yml.future
rename to .github/workflows/cypress_ui.yml.future
index 8bd747625e4..b38ae2f9558 100644
--- a/.travis.yml.future
+++ b/.github/workflows/cypress_ui.yml.future
@@ -1,16 +1,17 @@
+###############################################################################
+#
+# THIS IS AN OLD TRAVIS-CI.ORG JOB FILE
+# To be used with Github Actions, it would be necessary to refactor it.
+# Keeping it as the future example it has been before.
+# See also #5846
+#
+###############################################################################
+
services:
- docker
jobs:
include:
- # Execute java unit- and integration tests
- - stage: test
- language: java
- jdk:
- - oraclejdk8
- script: mvn -DcompilerArgument=-Xlint:unchecked test -P all-unit-tests
- after_success: mvn jacoco:report coveralls:report
-
# Execute Cypress for UI testing
# see https://docs.cypress.io/guides/guides/continuous-integration.html
- stage: test
diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml
new file mode 100644
index 00000000000..464d60c2db6
--- /dev/null
+++ b/.github/workflows/maven_unit_test.yml
@@ -0,0 +1,42 @@
+name: Maven Unit Tests
+
+on:
+ push:
+ paths:
+ - "**.java"
+ pull_request:
+ paths:
+ - "**.java"
+
+jobs:
+ unittest:
+ name: (JDK ${{ matrix.jdk }} / ${{ matrix.os }}) Unit Tests
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ ubuntu-latest ]
+ jdk: [ '11' ]
+ #include:
+ # - os: ubuntu-latest
+ # jdk: '16'
+ runs-on: ${{ matrix.os }}
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up JDK ${{ matrix.jdk }}
+ uses: actions/setup-java@v2
+ with:
+ java-version: ${{ matrix.jdk }}
+ distribution: 'adopt'
+ - name: Cache Maven packages
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2
+ key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
+ restore-keys: ${{ runner.os }}-m2
+ - name: Build with Maven
+ run: mvn -DcompilerArgument=-Xlint:unchecked -P all-unit-tests clean test
+ - name: Maven Code Coverage
+ env:
+ CI_NAME: github
+ COVERALLS_SECRET: ${{ secrets.GITHUB_TOKEN }}
+ run: mvn -V -B jacoco:report coveralls:report -DrepoToken=${COVERALLS_SECRET} -DpullRequest=${{ github.event.number }}
\ No newline at end of file
diff --git a/.github/workflows/reviewdog_checkstyle.yml b/.github/workflows/reviewdog_checkstyle.yml
new file mode 100644
index 00000000000..90a0dd7d06b
--- /dev/null
+++ b/.github/workflows/reviewdog_checkstyle.yml
@@ -0,0 +1,21 @@
+name: Maven CheckStyle Task
+on:
+ pull_request:
+ paths:
+ - "**.java"
+
+jobs:
+ checkstyle_job:
+ runs-on: ubuntu-latest
+ name: Checkstyle job
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ - name: Run check style
+ uses: nikitasavinov/checkstyle-action@master
+ with:
+ fail_on_error: true
+ reporter: github-pr-review
+ checkstyle_config: checkstyle.xml
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 67de6619add..00000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-language: java
-jdk:
- - openjdk11
-script: mvn -DcompilerArgument=-Xlint:unchecked test -P all-unit-tests
-after_success: mvn jacoco:report coveralls:report
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2227286d4d1..cb7e5f9d123 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,12 +6,11 @@ We aren't just looking for developers. There are many ways to contribute to Data
## Ideas/Feature Requests
-Your idea or feature request might already be captured in the Dataverse [issue tracker] on GitHub but if not, the best way to bring it to the community's attention is by posting on the [dataverse-community Google Group][] or bringing it up on a [Community Call][]. You're also welcome make some noise in the [#dataverse IRC channel][] (which is [logged][]) or cram your idea into 280 characters and mention [@dataverseorg][] on Twitter. To discuss your idea privately, please email it to support@dataverse.org
+Your idea or feature request might already be captured in the Dataverse [issue tracker] on GitHub but if not, the best way to bring it to the community's attention is by posting on the [dataverse-community Google Group][] or bringing it up on a [Community Call][]. You're also welcome to make some noise in [chat.dataverse.org][] or cram your idea into 280 characters and mention [@dataverseorg][] on Twitter. To discuss your idea privately, please email it to support@dataverse.org
There's a chance your idea is already on our roadmap, which is available at https://www.iq.harvard.edu/roadmap-dataverse-project
-[#dataverse IRC channel]: http://chat.dataverse.org
-[logged]: http://irclog.iq.harvard.edu/dataverse/today
+[chat.dataverse.org]: http://chat.dataverse.org
[issue tracker]: https://github.com/IQSS/dataverse/issues
[@dataverseorg]: https://twitter.com/dataverseorg
@@ -55,7 +54,7 @@ We love code contributions. Developers are not limited to the main Dataverse cod
[API Guide]: http://guides.dataverse.org/en/latest/api
[Installation Guide]: http://guides.dataverse.org/en/latest/installation
-If you are interested in working on the main Dataverse code, great! Before you start coding, please reach out to us either on the [dataverse-community Google Group][], the [dataverse-dev Google Group][], [IRC][] (#dataverse on freenode), or via support@dataverse.org to make sure the effort is well coordinated and we avoid merge conflicts. We maintain a list of [community contributors][] and [dev efforts][] the community is working on so please let us know if you'd like to be added or removed from either list.
+If you are interested in working on the main Dataverse code, great! Before you start coding, please reach out to us either on the [dataverse-community Google Group][], the [dataverse-dev Google Group][], [chat.dataverse.org][], or via support@dataverse.org to make sure the effort is well coordinated and we avoid merge conflicts. We maintain a list of [community contributors][] and [dev efforts][] the community is working on so please let us know if you'd like to be added or removed from either list.
Please read http://guides.dataverse.org/en/latest/developers/version-control.html to understand how we use the "git flow" model of development and how we will encourage you to create a GitHub issue (if it doesn't exist already) to associate with your pull request. That page also includes tips on making a pull request.
@@ -66,6 +65,5 @@ Thanks for your contribution!
[dataverse-community Google Group]: https://groups.google.com/group/dataverse-community
[Community Call]: https://dataverse.org/community-calls
[dataverse-dev Google Group]: https://groups.google.com/group/dataverse-dev
-[IRC]: http://chat.dataverse.org
[community contributors]: https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing
[dev efforts]: https://github.com/orgs/IQSS/projects/2#column-5298405
diff --git a/README.md b/README.md
index 3f3b8c2de90..6fd11374353 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Dataverse is a trademark of President and Fellows of Harvard College and is regi
[![API Test Status](https://jenkins.dataverse.org/buildStatus/icon?job=IQSS-dataverse-develop&subject=API%20Test%20Status)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/)
[![API Test Coverage](https://img.shields.io/jenkins/coverage/jacoco?jobUrl=https%3A%2F%2Fjenkins.dataverse.org%2Fjob%2FIQSS-dataverse-develop&label=API%20Test%20Coverage)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/ws/target/coverage-it/index.html)
-[![Unit Test Status](https://img.shields.io/travis/IQSS/dataverse?label=Unit%20Test%20Status)](https://travis-ci.org/IQSS/dataverse)
+[![Unit Test Status](https://github.com/IQSS/dataverse/actions/workflows/maven_unit_test.yml/badge.svg?branch=develop)](https://github.com/IQSS/dataverse/actions/workflows/maven_unit_test.yml)
[![Unit Test Coverage](https://img.shields.io/coveralls/github/IQSS/dataverse?label=Unit%20Test%20Coverage)](https://coveralls.io/github/IQSS/dataverse?branch=develop)
[![Guides Build Status](https://github.com/IQSS/dataverse/actions/workflows/guides_build_sphinx.yml/badge.svg)](https://github.com/IQSS/dataverse/actions/workflows/guides_build_sphinx.yml)
diff --git a/checkstyle.xml b/checkstyle.xml
index 5a864136fea..99185e15e97 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -97,7 +97,9 @@
-->
-
+
+
+
-
-
-
Deposit and share your data. Get academic credit.
-
Harvard Dataverse is a repository for research data. Deposit data and code here.
-
-
diff --git a/doc/sphinx-guides/source/_static/util/createsequence.sql b/doc/sphinx-guides/source/_static/util/createsequence.sql
index 2677832abd8..7ac1968de2c 100644
--- a/doc/sphinx-guides/source/_static/util/createsequence.sql
+++ b/doc/sphinx-guides/source/_static/util/createsequence.sql
@@ -1,14 +1,14 @@
--- A script for creating a numeric identifier sequence, and an external
--- stored procedure, for accessing the sequence from inside the application,
--- in a non-hacky, JPA way.
+-- A script for creating a numeric identifier sequence, and an external
+-- stored procedure, for accessing the sequence from inside the application,
+-- in a non-hacky, JPA way.
-- NOTE:
-- 1. The database user name "dvnapp" is hard-coded here - it may
-- need to be changed to match your database user name;
-
+
-- 2. In the code below, the sequence starts with 1, but it can be adjusted by
--- changing the MINVALUE as needed.
+-- changing the MINVALUE as needed.
CREATE SEQUENCE datasetidentifier_seq
INCREMENT 1
@@ -22,12 +22,12 @@ ALTER TABLE datasetidentifier_seq OWNER TO "dvnapp";
-- And now create a PostgreSQL FUNCTION, for JPA to
-- access as a NamedStoredProcedure:
-CREATE OR REPLACE FUNCTION generateIdentifierAsSequentialNumber(
- OUT identifier int)
- RETURNS int AS
-$BODY$
+CREATE OR REPLACE FUNCTION generateIdentifierFromStoredProcedure()
+RETURNS varchar AS $$
+DECLARE
+ identifier varchar;
BEGIN
- select nextval('datasetidentifier_seq') into identifier;
+ identifier := nextval('datasetidentifier_seq')::varchar;
+ RETURN identifier;
END;
-$BODY$
- LANGUAGE plpgsql;
+$$ LANGUAGE plpgsql IMMUTABLE;
diff --git a/doc/sphinx-guides/source/_static/util/identifier_from_timestamp.sql b/doc/sphinx-guides/source/_static/util/identifier_from_timestamp.sql
new file mode 100644
index 00000000000..a755b5ecd4a
--- /dev/null
+++ b/doc/sphinx-guides/source/_static/util/identifier_from_timestamp.sql
@@ -0,0 +1,46 @@
+-- A script for creating, through a database stored procedure, sequential
+-- 8 character identifiers from a base36 representation of current timestamp.
+
+CREATE OR REPLACE FUNCTION base36_encode(
+ IN digits bigint, IN min_width int = 0)
+RETURNS varchar AS $$
+DECLARE
+ chars char[];
+ ret varchar;
+ val bigint;
+BEGIN
+ chars := ARRAY[
+ '0','1','2','3','4','5','6','7','8','9',
+ 'a','b','c','d','e','f','g','h','i','j',
+ 'k','l','m','n','o','p','q','r','s','t',
+ 'u','v','w','x','y','z'];
+ val := digits;
+ ret := '';
+ IF val < 0 THEN
+ val := val * -1;
+ END IF;
+ WHILE val != 0 LOOP
+ ret := chars[(val % 36)+1] || ret;
+ val := val / 36;
+ END LOOP;
+
+ IF min_width > 0 AND char_length(ret) < min_width THEN
+ ret := lpad(ret, min_width, '0');
+ END IF;
+
+ RETURN ret;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE;
+
+
+CREATE OR REPLACE FUNCTION generateIdentifierFromStoredProcedure()
+RETURNS varchar AS $$
+DECLARE
+ curr_time_msec bigint;
+ identifier varchar;
+BEGIN
+ curr_time_msec := extract(epoch from now())*1000;
+ identifier := base36_encode(curr_time_msec);
+ RETURN identifier;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE;
diff --git a/doc/sphinx-guides/source/admin/dashboard.rst b/doc/sphinx-guides/source/admin/dashboard.rst
index 5129552b706..3f77729d0ab 100644
--- a/doc/sphinx-guides/source/admin/dashboard.rst
+++ b/doc/sphinx-guides/source/admin/dashboard.rst
@@ -22,7 +22,7 @@ This dashboard tool allows you to define sets of local datasets to make availabl
Metadata Export
---------------
-This part of the Dashboard is simply a reminder message that metadata export happens through the Dataverse Software API. See the :doc:`metadataexport` section and the :doc:`/api/native-api` section of the API Guide for more details.
+This part of the Dashboard is simply a reminder message that metadata export happens through the Dataverse Software API. See the :doc:`/admin/metadataexport` section and the :doc:`/api/native-api` section of the API Guide for more details.
Users
-----
diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst
index a55c90d2eb3..a18204588c2 100644
--- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst
+++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst
@@ -41,7 +41,7 @@ Recursively assigns the users and groups having a role(s),that are in the set co
curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/addRoleAssignmentsToChildren
Configure a Dataverse Collection to store all new files in a specific file store
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. ::
@@ -110,6 +110,8 @@ Mints a new identifier for a dataset previously registered with a handle. Only a
curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/admin/$dataset-id/reregisterHDLToPID
+.. _send-metadata-to-pid-provider:
+
Send Dataset metadata to PID provider
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst
index af8b2f19015..a47eb2d58aa 100755
--- a/doc/sphinx-guides/source/api/client-libraries.rst
+++ b/doc/sphinx-guides/source/api/client-libraries.rst
@@ -27,9 +27,10 @@ It was created and is maintained by `The Agile Monkeys `_.
+https://github.com/IQSS/dataverse-client-r is the official R package for Dataverse Software APIs. The latest release can be installed from `CRAN `_.
+The R client can search and download datasets. It is useful when automatically (instead of manually) downloading data files as part of a script. For bulk edit and upload operations, we currently recommend pyDataverse.
-The package is currently maintained by `Will Beasley `_. It was created by `Thomas Leeper `_ whose Dataverse collection can be found at https://dataverse.harvard.edu/dataverse/leeper
+The package is currently maintained by `Shiro Kuriwaki `_. It was originally created by `Thomas Leeper `_ and then formerly maintained by `Will Beasley `_.
Java
----
diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst
index 101c6c2bfaa..933932cd7b9 100755
--- a/doc/sphinx-guides/source/api/intro.rst
+++ b/doc/sphinx-guides/source/api/intro.rst
@@ -204,6 +204,15 @@ Please note that some APIs are only documented in other guides that are more sui
- :doc:`/installation/config`
+- Developer Guide
+
+ - :doc:`/developers/aux-file-support`
+ - :doc:`/developers/big-data-support`
+ - :doc:`/developers/dataset-migration-api`
+ - :doc:`/developers/dataset-semantic-metadata-api`
+ - :doc:`/developers/s3-direct-upload-api`
+ - :doc:`/developers/workflows`
+
Client Libraries
~~~~~~~~~~~~~~~~
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 9fb019ab9dd..823efe05669 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -580,10 +580,32 @@ The fully expanded example above (without environment variables) looks like this
.. code-block:: bash
- curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/actions/:publish
+ curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/actions/:publish
You should expect a 200 ("OK") response and JSON output.
+Retrieve Guestbook Responses for a Dataverse Collection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In order to retrieve a file containing a list of Guestbook Responses in csv format for Dataverse collection, you must know either its "alias" (which the GUI calls an "identifier") or its database ID. If the Dataverse collection has more than one guestbook you may provide the id of a single guestbook as an optional parameter. If no guestbook id is provided the results returned will be the same as pressing the "Download All Responses" button on the Manage Dataset Guestbook page. If the guestbook id is provided then only those responses from that guestbook will be included in the file.
+
+.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below.
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export SERVER_URL=https://demo.dataverse.org
+ export ID=root
+ export GUESTBOOK_ID=1
+
+ curl -O -J -f -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/guestbookResponses?guestbookId=$GUESTBOOK_ID
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+ curl -O -J -f -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/guestbookResponses?guestbookId=1
+
Datasets
--------
@@ -609,13 +631,13 @@ Example: Getting the dataset whose DOI is *10.5072/FK2/J8SJZB*:
export SERVER_URL=https://demo.dataverse.org
export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB
- curl $SERVER_URL/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER
+ curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER
The fully expanded example above (without environment variables) looks like this:
.. code-block:: bash
- curl https://demo.dataverse.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB
+ curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB
Getting its draft version:
@@ -624,13 +646,13 @@ Getting its draft version:
export SERVER_URL=https://demo.dataverse.org
export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB
- curl http://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER
+ curl -H "X-Dataverse-key:$API_TOKEN" http://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER
The fully expanded example above (without environment variables) looks like this:
.. code-block:: bash
- curl https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB
+ curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB
|CORS| Show the dataset whose id is passed:
@@ -825,14 +847,20 @@ through the Dataverse application.
For example, if you have a dataset version with 2 files, one with the folder named "subfolder":
+|image1|
+
.. |image1| image:: ./img/dataset_page_files_view.png
or, as viewed as a tree on the dataset page:
+|image2|
+
.. |image2| image:: ./img/dataset_page_tree_view.png
The output of the API for the top-level folder (``/api/datasets/{dataset}/dirindex/``) will be as follows:
+|image3|
+
.. |image3| image:: ./img/index_view_top.png
with the underlying html source:
@@ -851,6 +879,8 @@ with the underlying html source:
The ``/dirindex/?folder=subfolder`` link above will produce the following view:
+|image4|
+
.. |image4| image:: ./img/index_view_subfolder.png
with the html source as follows:
@@ -1201,6 +1231,14 @@ The fully expanded example above (without environment variables) looks like this
.. code-block:: bash
curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/privateUrl
+
+If Anonymized Access has been enabled on a Dataverse instance (see the :ref:`:AnonymizedFieldTypeNames` setting), an optional 'anonymizedAccess' query parameter is allowed.
+Setting anonymizedAccess=true in your call will create a PrivateURL that only allows an anonymized view of the Dataset (see :ref:`privateurl`).
+
+.. code-block:: bash
+
+ curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/privateUrl?anonymizedAccess=true
+
Get the Private URL for a Dataset
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2078,6 +2116,49 @@ The fully expanded example above (without environment variables) looks like this
Note: The ``id`` returned in the json response is the id of the file metadata version.
+
+
+Adding File Metadata
+~~~~~~~~~~~~~~~~~~~~
+
+This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage.
+
+The jsonData object includes values for:
+
+* "description" - A description of the file
+* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
+* "storageIdentifier" - String
+* "fileName" - String
+* "mimeType" - String
+* "fixity/checksum" either:
+
+ * "md5Hash" - String with MD5 hash value, or
+ * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
+
+.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below.
+
+A curl example using an ``PERSISTENT_ID``
+
+* ``SERVER_URL`` - e.g. https://demo.dataverse.org
+* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section.
+* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV``
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export SERVER_URL=https://demo.dataverse.org
+ export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
+ export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \
+ {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]"
+
+ curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+ curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]'
+
Updating File Metadata
~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/sphinx-guides/source/api/sword.rst b/doc/sphinx-guides/source/api/sword.rst
index 23d6588bd86..d853994f073 100755
--- a/doc/sphinx-guides/source/api/sword.rst
+++ b/doc/sphinx-guides/source/api/sword.rst
@@ -34,11 +34,15 @@ HTTP Basic Authentication commonly makes use of both a username and a password b
Clients such as ``curl`` expect both a username and a password separated by a colon. With ``curl``, the way to indicate that the password should be blank or empty is to include the colon at the end of the username (the API token) like this:
-``curl -u 54b143b5-d001-4254-afc0-a1c0f6a5b5a7:``
+.. code-block:: bash
+
+ curl -u 54b143b5-d001-4254-afc0-a1c0f6a5b5a7:
All the curl examples below take this form but instead of showing an API token like above, a Bash environment variable called ``$API_TOKEN`` is shown instead like this:
-``curl -u $API_TOKEN:``
+.. code-block:: bash
+
+ curl -u $API_TOKEN:
.. _RFC 7617: https://tools.ietf.org/html/rfc7617
@@ -53,9 +57,9 @@ Differences in Dataverse Software 4 from DVN 3.x lead to a few minor backward in
- Newly required fields when creating/editing datasets for compliance with the `Joint Declaration for Data Citation principles `_.
- - dcterms:creator (maps to authorName)
+ - ``dcterms:creator`` (maps to authorName)
- - dcterms:description
+ - ``dcterms:description``
- Deaccessioning is no longer supported. An alternative will be developed at https://github.com/IQSS/dataverse/issues/778
@@ -72,11 +76,11 @@ New features as of v1.1
- Datasets versions will only be increased to the next minor version (i.e. 1.1) rather than a major version (2.0) if possible. This depends on the nature of the change. Adding or removing a file, for example, requires a major version bump.
-- "Author Affiliation" can now be populated with an XML attribute. For example: Stumptown, Jane
+- "Author Affiliation" can now be populated with an XML attribute. For example: ``Stumptown, Jane``
-- "Contributor" can now be populated and the "Type" (Editor, Funder, Researcher, etc.) can be specified with an XML attribute. For example: CaffeineForAll
+- "Contributor" can now be populated and the "Type" (Editor, Funder, Researcher, etc.) can be specified with an XML attribute. For example: ``CaffeineForAll``
-- "License" can now be set with dcterms:license and the possible values are "CC0" and "NONE". "License" interacts with "Terms of Use" (dcterms:rights) in that if you include dcterms:rights in the XML, the license will be set to "NONE". If you don't include dcterms:rights, the license will default to "CC0". It is invalid to specify "CC0" as a license and also include dcterms:rights; an error will be returned. For backwards compatibility, dcterms:rights is allowed to be blank (i.e. ) but blank values will not be persisted to the database and the license will be set to "NONE".
+- "License" can now be set with ``dcterms:license`` and the possible values are "CC0" and "NONE". "License" interacts with "Terms of Use" (``dcterms:rights``) in that if you include ``dcterms:rights`` in the XML, the license will be set to "NONE". If you don't include ``dcterms:rights``, the license will default to "CC0". It is invalid to specify "CC0" as a license and also include ``dcterms:rights``; an error will be returned. For backwards compatibility, ``dcterms:rights`` is allowed to be blank (i.e. ````) but blank values will not be persisted to the database and the license will be set to "NONE".
- "Contact E-mail" is automatically populated from dataset owner's email.
@@ -94,18 +98,23 @@ Retrieve SWORD service document
The service document enumerates the Dataverse collections (also "collections" from a SWORD perspective) the user can deposit data into. The "collectionPolicy" element for each Dataverse collections contains the Terms of Use. Any user with an API token can use this API endpoint. Institution-wide Shibboleth groups are not respected because membership in such a group can only be set via a browser.
-``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/service-document``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/service-document
Create a dataset with an Atom entry
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
To create a dataset, you must have the "Dataset Creator" role (the ``AddDataset`` permission) on a Dataverse collection. Practically speaking, you should first retrieve the service document to list the Dataverse collections into which you are authorized to deposit data.
-``curl -u $API_TOKEN: --data-binary "@path/to/atom-entry-study.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: --data-binary "@path/to/atom-entry-study.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS
Example Atom entry (XML)
.. literalinclude:: sword-atom-entry.xml
+ :language: xml
Dublin Core Terms (DC Terms) Qualified Mapping - Dataverse Project DB Element Crosswalk
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -146,14 +155,18 @@ List datasets in a Dataverse Collection
You must have permission to add datasets in a Dataverse collection (the Dataverse collection should appear in the service document) to list the datasets inside. Institution-wide Shibboleth groups are not respected because membership in such a group can only be set via a browser.
-``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS
Add files to a dataset with a zip file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You must have ``EditDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to add files.
-``curl -u $API_TOKEN: --data-binary @path/to/example.zip -H "Content-Disposition: filename=example.zip" -H "Content-Type: application/zip" -H "Packaging: http://purl.org/net/sword/package/SimpleZip" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/study/doi:TEST/12345``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: --data-binary @path/to/example.zip -H "Content-Disposition: filename=example.zip" -H "Content-Type: application/zip" -H "Packaging: http://purl.org/net/sword/package/SimpleZip" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/study/doi:TEST/12345
Display a dataset atom entry
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -162,56 +175,72 @@ You must have ``ViewUnpublishedDataset`` permission (Contributor role or above s
Contains data citation (bibliographicCitation), alternate URI (persistent URI of study), edit URI, edit media URI, statement URI.
-``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345
Display a dataset statement
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Contains title, author, feed of file entries, latestVersionState, locked boolean, updated timestamp. You must have ``ViewUnpublishedDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to display the statement.
-``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/statement/study/doi:TEST/12345``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/statement/study/doi:TEST/12345
Delete a file by database id
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You must have ``EditDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to delete files.
-``curl -u $API_TOKEN: -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/123``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/123
Replacing metadata for a dataset
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Please note that **ALL** metadata (title, author, etc.) will be replaced, including fields that can not be expressed with "dcterms" fields. You must have ``EditDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to replace metadata.
-``curl -u $API_TOKEN: --upload-file "path/to/atom-entry-study2.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: --upload-file "path/to/atom-entry-study2.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345``
Delete a dataset
~~~~~~~~~~~~~~~~
You must have the ``DeleteDatasetDraft`` permission (Contributor role or above such as Curator or Admin) on the dataset to delete it. Please note that if the dataset has never been published you will be able to delete it completely but if the dataset has already been published you will only be able to delete post-publication drafts, never a published version.
-``curl -u $API_TOKEN: -i -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: -i -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345
Determine if a Dataverse Collection has been published
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This API endpoint is the same as the "list datasets in a Dataverse collection" endpoint documented above and the same permissions apply but it is documented here separately to point out that you can look for a boolean called ``dataverseHasBeenReleased`` to know if a Dataverse collection has been released, which is required for publishing a dataset.
-``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS``
+.. code-block:: bash
+
+ curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS
Publish a Dataverse Collection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The ``cat /dev/null`` and ``--data-binary @-`` arguments are used to send zero-length content to the API, which is required by the upstream library to process the ``In-Progress: false`` header. You must have the ``PublishDataverse`` permission (Admin role) on the Dataverse collection to publish it.
-``cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/dataverse/$DATAVERSE_ALIAS``
+.. code-block:: bash
+
+ cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/dataverse/$DATAVERSE_ALIAS
Publish a dataset
~~~~~~~~~~~~~~~~~
The ``cat /dev/null`` and ``--data-binary @-`` arguments are used to send zero-length content to the API, which is required by the upstream library to process the ``In-Progress: false`` header. You must have the ``PublishDataset`` permission (Curator or Admin role) on the dataset to publish it.
-``cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345``
+.. code-block:: bash
+
+ cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345
.. _known-issues:
@@ -249,4 +278,3 @@ Client libraries
- R: https://github.com/IQSS/dataverse-client-r
- Ruby: https://github.com/swordapp/sword2ruby
- PHP: https://github.com/swordapp/swordappv2-php-library
-
diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py
index 30c53f9febf..a68a623d24e 100755
--- a/doc/sphinx-guides/source/conf.py
+++ b/doc/sphinx-guides/source/conf.py
@@ -65,9 +65,9 @@
# built documents.
#
# The short X.Y version.
-version = '5.5'
+version = '5.6'
# The full version, including alpha/beta/rc tags.
-release = '5.5'
+release = '5.6'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/doc/sphinx-guides/source/developers/dataset-migration-api.rst b/doc/sphinx-guides/source/developers/dataset-migration-api.rst
new file mode 100644
index 00000000000..1dc8f7866e0
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/dataset-migration-api.rst
@@ -0,0 +1,58 @@
+Dataset Migration API
+=====================
+
+The Dataverse software includes several ways to add Datasets originally created elsewhere (not to mention Harvesting capabilities). These include the Sword API (see the :doc:`/api/sword` guide) and the /dataverses/{id}/datasets/:import methods (json and ddi) (see the :doc:`/api/native-api` guide).
+
+This experimental migration API offers an additional option with some potential advantages:
+
+* metadata can be specified using the json-ld format used in the OAI-ORE metadata export
+* existing publication dates and PIDs are maintained (currently limited to the case where the PID can be managed by the Dataverse software, e.g. where the authority and shoulder match those the software is configured for)
+* updating the PID at the provider can be done immediately or later (with other existing APIs)
+* adding files can be done via the standard APIs, including using direct-upload to S3
+
+This API consists of 2 calls: one to create an initial Dataset version, and one to 'republish' the dataset through Dataverse with a specified publication date.
+Both calls require super-admin privileges.
+
+These calls can be used in concert with other API calls to add files, update metadata, etc. before the 'republish' step is done.
+
+
+Start Migrating a Dataset into a Dataverse Collection
+-----------------------------------------------------
+
+.. note:: This action requires a Dataverse installation account with superuser permissions.
+
+To import a dataset with an existing persistent identifier (PID), the provided json-ld metadata should include it.
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export SERVER_URL=https://demo.dataverse.org
+ export DATAVERSE_ID=root
+
+ curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets/:startmigration --upload-file dataset-migrate.jsonld
+
+An example jsonld file is available at :download:`dataset-migrate.jsonld <../_static/api/dataset-migrate.jsonld>` . Note that you would need to replace the PID in the sample file with one supported in your Dataverse instance. (Also note that `Issue #8028 `_ currently breaks testing this API with DataCite test DOIs.)
+
+Publish a Migrated Dataset
+--------------------------
+
+The call above creates a Dataset. Once it is created, other APIs can be used to add files, add additional metadata, etc. When a version is complete, the following call can be used to publish it with its original publication date.
+
+.. note:: This action requires a Dataverse installation account with superuser permissions.
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export SERVER_URL=https://demo.dataverse.org
+
+ curl -H 'Content-Type: application/ld+json' -H X-Dataverse-key:$API_TOKEN -X POST -d '{"schema:datePublished": "2020-10-26","@context":{ "schema":"http://schema.org/"}}' "$SERVER_URL/api/datasets/{id}/actions/:releasemigrated"
+
+datePublished is the only metadata supported in this call.
+
+An optional query parameter: updatepidatprovider (default is false) can be set to true to automatically update the metadata and targetUrl of the PID at the provider. With this set true, the result of this call will be that the PID redirects to this dataset rather than the dataset in the source repository.
+
+.. code-block:: bash
+
+ curl -H 'Content-Type: application/ld+json' -H X-Dataverse-key:$API_TOKEN -X POST -d '{"schema:datePublished": "2020-10-26","@context":{ "schema":"http://schema.org/"}}' "$SERVER_URL/api/datasets/{id}/actions/:releasemigrated?updatepidatprovider=true"
+
+If the parameter is not added and set to true, other existing APIs can be used to update the PID at the provider later, e.g. :ref:`send-metadata-to-pid-provider`
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst
new file mode 100644
index 00000000000..da28cc60c53
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst
@@ -0,0 +1,103 @@
+Dataset Semantic Metadata API
+=============================
+
+The OAI_ORE metadata export format represents Dataset metadata using json-ld (see the :doc:`/admin/metadataexport` section). As part of an RDA-supported effort to allow import of Datasets exported as Bags with an included OAI_ORE metadata file,
+an experimental API has been created that provides a json-ld alternative to the v1.0 API calls to get/set/delete Dataset metadata in the :doc:`/api/native-api`.
+
+You may prefer to work with this API if you are building a tool to import from a Bag/OAI-ORE source or already work with json-ld representations of metadata, or if you prefer the flatter json-ld representation to Dataverse software's json representation (which includes structure related to the metadata blocks involved and the type/multiplicity of the metadata fields.)
+You may not want to use this API if you need stability and backward compatibility (the 'experimental' designation for this API implies that community feedback is desired and that, in future Dataverse software versions, the API may be modified based on that feedback).
+
+Note: The examples use the 'application/ld+json' mimetype. For compatibility reasons, the APIs also be used with mimetype "application/json-ld"
+
+Get Dataset Metadata
+--------------------
+
+To get the json-ld formatted metadata for a Dataset, specify the Dataset ID (DATASET_ID) or Persistent identifier (DATASET_PID), and, for specific versions, the version number.
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export DATASET_ID='12345'
+ export DATASET_PID='doi:10.5072/FK2A1B2C3'
+ export VERSION='1.0'
+ export SERVER_URL=https://demo.dataverse.org
+
+ Example 1: Get metadata for version '1.0'
+
+ curl -H X-Dataverse-key:$API_TOKEN -H 'Accept: application/ld+json' "$SERVER_URL/api/datasets/$DATASET_ID/versions/$VERSION/metadata"
+
+ Example 2: Get metadata for the latest version using the DATASET PID
+
+ curl -H X-Dataverse-key:$API_TOKEN -H 'Accept: application/ld+json' "$SERVER_URL/api/datasets/:persistentId/metadata?persistentId=$DATASET_PID"
+
+You should expect a 200 ("OK") response and JSON-LD mirroring the OAI-ORE representation in the returned 'data' object.
+
+
+Add Dataset Metadata
+--------------------
+
+To add json-ld formatted metadata for a Dataset, specify the Dataset ID (DATASET_ID) or Persistent identifier (DATASET_PID). Adding '?replace=true' will overwrite an existing metadata value. The default (replace=false) will only add new metadata or add a new value to a multi-valued field.
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export DATASET_ID='12345'
+ export DATASET_PID='doi:10.5072/FK2A1B2C3'
+ export VERSION='1.0'
+ export SERVER_URL=https://demo.dataverse.org
+
+ Example: Change the Dataset title
+
+ curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"Title": "Submit menu test", "@context":{"Title": "http://purl.org/dc/terms/title"}}' "$SERVER_URL/api/datasets/$DATASET_ID/metadata?replace=true"
+
+ Example 2: Add a description using the DATASET PID
+
+ curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"citation:Description": {"dsDescription:Text": "New description"}, "@context":{"citation": "https://dataverse.org/schema/citation/","dsDescription": "https://dataverse.org/schema/citation/dsDescription#"}}' "$SERVER_URL/api/datasets/:persistentId/metadata?persistentId=$DATASET_PID"
+
+You should expect a 200 ("OK") response indicating whether a draft Dataset version was created or an existing draft was updated.
+
+
+Delete Dataset Metadata
+-----------------------
+
+To delete metadata for a Dataset, send a json-ld representation of the fields to delete and specify the Dataset ID (DATASET_ID) or Persistent identifier (DATASET_PID).
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export DATASET_ID='12345'
+ export DATASET_PID='doi:10.5072/FK2A1B2C3'
+ export VERSION='1.0'
+ export SERVER_URL=https://demo.dataverse.org
+
+ Example: Delete the TermsOfUseAndAccess 'restrictions' value 'No restrictions' for the latest version using the DATASET PID
+
+ curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"https://dataverse.org/schema/core#restrictions":"No restrictions"}' "$SERVER_URL/api/datasets/:persistentId/metadata/delete?persistentId=$DATASET_PID"
+
+Note, this example uses the term URI directly rather than adding an '@context' element. You can use either form in any of these API calls.
+
+You should expect a 200 ("OK") response indicating whether a draft Dataset version was created or an existing draft was updated.
+
+
+Create a Dataset
+----------------
+
+Specifying the Content-Type as application/ld+json with the existing /api/dataverses/{id}/datasets API call (see :ref:`create-dataset-command`) supports using the same metadata format when creating a Dataset.
+
+With curl, this is done by adding the following header:
+
+.. code-block:: bash
+
+ -H 'Content-Type: application/ld+json'
+
+ .. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export SERVER_URL=https://demo.dataverse.org
+ export DATAVERSE_ID=root
+ export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
+
+ curl -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -X POST $SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets --upload-file dataset-create.jsonld
+
+An example jsonld file is available at :download:`dataset-create.jsonld <../_static/api/dataset-create.jsonld>`
+
diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst
index ed1849e6059..61ab98bf292 100755
--- a/doc/sphinx-guides/source/developers/dev-environment.rst
+++ b/doc/sphinx-guides/source/developers/dev-environment.rst
@@ -85,9 +85,9 @@ To install Payara, run the following commands:
``cd /usr/local``
-``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2020.6/payara-5.2020.6.zip``
+``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.5/payara-5.2021.5.zip``
-``sudo unzip payara-5.2020.6.zip``
+``sudo unzip payara-5.2021.5.zip``
``sudo chown -R $USER /usr/local/payara5``
@@ -139,8 +139,6 @@ To install Solr, execute the following commands:
``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.8.1/schema_dv_mdb_fields.xml``
-``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.8.1/schema_dv_mdb_copies.xml``
-
``mv schema*.xml collection1/conf``
``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.8.1/solrconfig.xml``
diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
index eebfd50ba35..78d4a6f378a 100755
--- a/doc/sphinx-guides/source/developers/index.rst
+++ b/doc/sphinx-guides/source/developers/index.rst
@@ -35,4 +35,6 @@ Developer Guide
big-data-support
aux-file-support
s3-direct-upload-api
+ dataset-semantic-metadata-api
+ dataset-migration-api
workflows
diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst
index 29be6ab1d93..8fc0c679a8b 100755
--- a/doc/sphinx-guides/source/developers/intro.rst
+++ b/doc/sphinx-guides/source/developers/intro.rst
@@ -19,7 +19,7 @@ To get started, you'll want to set up your :doc:`dev-environment` and make sure
Getting Help
------------
-If you have any questions at all, please reach out to other developers via the channels listed in https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md such as http://chat.dataverse.org (#dataverse on freenode), the `dataverse-dev `_ mailing list, `community calls `_, or support@dataverse.org.
+If you have any questions at all, please reach out to other developers via the channels listed in https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md such as http://chat.dataverse.org, the `dataverse-dev `_ mailing list, `community calls `_, or support@dataverse.org.
Core Technologies
-----------------
diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
index 9f2386facb1..d1a71c313ca 100644
--- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
+++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
@@ -7,7 +7,7 @@ Direct upload involves a series of three activities, each involving interacting
* Requesting initiation of a transfer from the server
* Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3
-* A call to the server to register the file as part of the dataset/replace a file in the dataset or to cancel the transfer
+* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer
This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload.
Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.)
@@ -116,6 +116,38 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data
Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method.
With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+To add multiple Uploaded Files to the Dataset
+-------------------------------------------------
+
+Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter.
+jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for:
+
+* "description" - A description of the file
+* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
+* "storageIdentifier" - String
+* "fileName" - String
+* "mimeType" - String
+* "fixity/checksum" either:
+
+ * "md5Hash" - String with MD5 hash value, or
+ * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
+
+The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+
+.. code-block:: bash
+
+ export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ export SERVER_URL=https://demo.dataverse.org
+ export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
+ export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \
+ {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]"
+
+ curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+
+Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method.
+With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+
+
Replacing an existing file in the Dataset
-----------------------------------------
diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst
index bbfac33fcda..7bde4055e33 100755
--- a/doc/sphinx-guides/source/developers/testing.rst
+++ b/doc/sphinx-guides/source/developers/testing.rst
@@ -37,11 +37,9 @@ A unit test should execute an operation of your code in a controlled fashion. Yo
Unit Test Automation Overview
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We use a variety of tools to write, execute, and measure the code coverage of unit tests, including Maven, JUnit, Jacoco, GitHub, Travis, and Coveralls. We'll explain the role of each tool below, but here's an overview of what you can expect from the automation we've set up.
+We use a variety of tools to write, execute, and measure the code coverage of unit tests, including Maven, JUnit, Jacoco, GitHub, and Coveralls. We'll explain the role of each tool below, but here's an overview of what you can expect from the automation we've set up.
-As you prepare to make a pull request, as described in the :doc:`version-control` section, you will be working on a new branch you create from the "develop" branch. Let's say your branch is called ``1012-private-url``. As you work, you are constantly invoking Maven to build the war file. When you do a "clean and build" in Netbeans, Maven runs all the unit tests (anything ending with ``Test.java``) and the runs the results through a tool called Jacoco that calculates code coverage. When you push your branch to GitHub and make a pull request, a web service called Travis CI runs Maven and Jacoco on your branch and pushes the results to Coveralls, which is a web service that tracks changes to code coverage over time.
-
-To make this more concrete, observe that https://github.com/IQSS/dataverse/pull/3111 has comments from a GitHub user called ``coveralls`` saying things like "Coverage increased (+0.5%) to 5.547% when pulling dd6ceb1 on 1012-private-url into be5b26e on develop." Clicking on the comment should lead you to a URL such as https://coveralls.io/builds/7013870 which shows how code coverage has gone up or down. That page links to a page such as https://travis-ci.org/IQSS/dataverse/builds/144840165 which shows the build on the Travis side that pushed the results to Coveralls. Note that we have configured Coveralls to not mark small decreases in code coverage as a failure.
+As you prepare to make a pull request, as described in the :doc:`version-control` section, you will be working on a new branch you create from the "develop" branch. Let's say your branch is called ``1012-private-url``. As you work, you are constantly invoking Maven to build the war file. When you do a "clean and build" in Netbeans, Maven runs all the unit tests (anything ending with ``Test.java``) and then runs the results through a tool called Jacoco that calculates code coverage. When you push your branch to GitHub and make a pull request, GitHub Actions runs Maven and Jacoco on your branch and pushes the results to Coveralls, which is a web service that tracks changes to code coverage over time. Note that we have configured Coveralls to not mark small decreases in code coverage as a failure. You can find the Coveralls reports at https://coveralls.io/github/IQSS/dataverse
The main takeaway should be that we care about unit testing enough to measure the changes to code coverage over time using automation. Now let's talk about how you can help keep our code coverage up by writing unit tests with JUnit.
@@ -102,12 +100,10 @@ In addition, there is a writeup on "The Testable Command" at https://github.com/
Running Non-Essential (Excluded) Unit Tests
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-You should be aware that some unit tests have been deemed "non-essential" and have been annotated with ``@Category(NonEssentialTests.class)`` and are excluded from the "dev" Maven profile, which is the default profile. All unit tests (that have not been annotated with ``@Ignore``), including these non-essential tests, are run from continuous integration systems such as Jenkins and Travis CI with the following ``mvn`` command that invokes a non-default profile:
+You should be aware that some unit tests have been deemed "non-essential" and have been annotated with ``@Category(NonEssentialTests.class)`` and are excluded from the "dev" Maven profile, which is the default profile. All unit tests (that have not been annotated with ``@Ignore``), including these non-essential tests, are run from continuous integration systems such as Jenkins and GitHub Actions with the following ``mvn`` command that invokes a non-default profile:
``mvn test -P all-unit-tests``
-Typically https://travis-ci.org/IQSS/dataverse will show a higher number of unit tests executed because it uses the profile above.
-
Generally speaking, unit tests have been flagged as non-essential because they are slow or because they require an Internet connection. You should not feel obligated to run these tests continuously but you can use the ``mvn`` command above to run them. To iterate on the unit test in Netbeans and execute it with "Run -> Test File", you must temporarily comment out the annotation flagging the test as non-essential.
Integration Tests
@@ -246,7 +242,7 @@ Once installed, you may run commands with ``mvn [options] [] [`.
+If you are adding a new test class, be sure to add it to :download:`tests/integration-tests.txt <../../../../tests/integration-tests.txt>` so that our automated testing knows about it.
Writing and Using a Testcontainers Test
@@ -393,12 +389,12 @@ The script requires a file called ``files.txt`` to operate and database IDs for
Continuous Integration
----------------------
-The Dataverse Project currently makes use of two Continuous Integration platforms, Travis and Jenkins.
-
-Travis builds are configured via :download:`.travis.yml <../../../../.travis.yml>` and a `GitHub webhook `; build output is viewable at https://travis-ci.org/IQSS/dataverse/builds
+The Dataverse Project currently makes use of two Continuous Integration platforms, Jenkins and GitHub Actions.
Our Jenkins config is a work in progress and may be viewed at https://github.com/IQSS/dataverse-jenkins/ A corresponding GitHub webhook is required. Build output is viewable at https://jenkins.dataverse.org/
+GitHub Actions jobs can be found in ``.github/workflows``.
+
As always, pull requests to improve our continuous integration configurations are welcome.
Enhance build time by caching dependencies
@@ -438,13 +434,6 @@ How to Run the Phoenix Tests
- Log into Jenkins and click "Build Now" at https://build.hmdc.harvard.edu:8443/job/phoenix.dataverse.org-build-develop/
- Wait for all three chained Jenkins jobs to complete and note if they passed or failed. If you see a failure, open a GitHub issue or at least get the attention of some developers.
-List of Tests Run Against the Phoenix Server
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-We haven't thought much about a good way to publicly list the "IT" classes that are executed against the phoenix server. (Currently your best bet is to look at the ``Executing Maven`` line at the top of the "Full Log" of "Console Output" of ``phoenix.dataverse.org-apitest-develop`` Jenkins job mentioned above.) We endeavor to keep the list of tests in the "all-in-one" Docker environment described above in sync with the list of tests configured in Jenkins. That is to say, refer to :download:`run-test-suite.sh <../../../../conf/docker-aio/run-test-suite.sh>` mentioned in ``conf/docker-aio/readme.md`` for the current list of IT tests that are expected to pass. Here's a dump of that file:
-
-.. literalinclude:: ../../../../conf/docker-aio/run-test-suite.sh
-
Accessibility Testing
---------------------
@@ -482,7 +471,6 @@ Future Work on Integration Tests
- Automate testing of dataverse-client-python: https://github.com/IQSS/dataverse-client-python/issues/10
- Work with @leeper on testing the R client: https://github.com/IQSS/dataverse-client-r
- Review and attempt to implement "API Test Checklist" from @kcondon at https://docs.google.com/document/d/199Oq1YwQ4pYCguaeW48bIN28QAitSk63NbPYxJHCCAE/edit?usp=sharing
-- Attempt to use @openscholar approach for running integration tests using Travis https://github.com/openscholar/openscholar/blob/SCHOLAR-3.x/.travis.yml (probably requires using Ubuntu rather than CentOS)
- Generate code coverage reports for **integration** tests: https://github.com/pkainulainen/maven-examples/issues/3 and http://www.petrikainulainen.net/programming/maven/creating-code-coverage-reports-for-unit-and-integration-tests-with-the-jacoco-maven-plugin/
- Consistent logging of API Tests. Show test name at the beginning and end and status codes returned.
- expected passing and known/expected failing integration tests: https://github.com/IQSS/dataverse/issues/4438
@@ -495,15 +483,14 @@ Browser-Based Testing
Installation Testing
~~~~~~~~~~~~~~~~~~~~
-- Run `vagrant up` on a server to test the installer: http://guides.dataverse.org/en/latest/developers/tools.html#vagrant . We haven't been able to get this working in Travis: https://travis-ci.org/IQSS/dataverse/builds/96292683 . Perhaps it would be possible to use AWS as a provider from Vagrant judging from https://circleci.com/gh/critical-alert/circleci-vagrant/6 .
-- Work with @lwo to automate testing of https://github.com/IQSS/dataverse-puppet . Consider using Travis: https://github.com/IQSS/dataverse-puppet/issues/10
-- Work with @donsizemore to automate testing of https://github.com/GlobalDataverseCommunityConsortium/dataverse-ansible with Travis or similar.
+- Run `vagrant up` on a server to test the installer
+- Work with @donsizemore to automate testing of https://github.com/GlobalDataverseCommunityConsortium/dataverse-ansible
Future Work on Load/Performance Testing
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Clean up and copy stress tests code, config, and docs into main repo from https://github.com/IQSS/dataverse-helper-scripts/tree/master/src/stress_tests
-- Marcel Duran created a command-line wrapper for the WebPagetest API that can be used to test performance in your continuous integration pipeline (TAP, Jenkins, Travis-CI, etc): https://github.com/marcelduran/webpagetest-api/wiki/Test-Specs#jenkins-integration
+- Marcel Duran created a command-line wrapper for the WebPagetest API that can be used to test performance in your continuous integration pipeline (TAP, Jenkins, etc.): https://github.com/marcelduran/webpagetest-api/wiki/Test-Specs#jenkins-integration
- Create top-down checklist, building off the "API Test Coverage" spreadsheet at https://github.com/IQSS/dataverse/issues/3358#issuecomment-256400776
Future Work on Accessibility Testing
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 39f27f749dc..072a8df0183 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -205,6 +205,7 @@ Here are the configuration options for handles:
- :ref:`:IdentifierGenerationStyle <:IdentifierGenerationStyle>` (optional)
- :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional)
- :ref:`:IndependentHandleService <:IndependentHandleService>` (optional)
+- :ref:`:HandleAuthHandle <:HandleAuthHandle>` (optional)
Note: If you are **minting your own handles** and plan to set up your own handle service, please refer to `Handle.Net documentation `_.
@@ -528,24 +529,25 @@ Lastly, go ahead and restart your Payara server. With Dataverse deployed and the
S3 Storage Options
##################
-=========================================== ================== ========================================================================= =============
-JVM Option Value Description Default value
-=========================================== ================== ========================================================================= =============
-dataverse.files.storage-driver-id Enable as the default storage driver. ``file``
-dataverse.files..bucket-name > The bucket name. See above. (none)
-dataverse.files..download-redirect ``true``/``false`` Enable direct download or proxy through Dataverse. ``false``
-dataverse.files..upload-redirect ``true``/``false`` Enable direct upload of files added to a dataset to the S3 store. ``false``
-dataverse.files..ingestsizelimit Maximum size of directupload files that should be ingested (none)
-dataverse.files..url-expiration-minutes > If direct uploads/downloads: time until links expire. Optional. 60
-dataverse.files..min-part-size > Multipart direct uploads will occur for files larger than this. Optional. ``1024**3``
-dataverse.files..custom-endpoint-url > Use custom S3 endpoint. Needs URL either with or without protocol. (none)
-dataverse.files..custom-endpoint-region > Only used when using custom endpoint. Optional. ``dataverse``
-dataverse.files..proxy-url > URL of a proxy protecting the S3 store. Optional. (none)
-dataverse.files..path-style-access ``true``/``false`` Use path style buckets instead of subdomains. Optional. ``false``
-dataverse.files..payload-signing ``true``/``false`` Enable payload signing. Optional ``false``
-dataverse.files..chunked-encoding ``true``/``false`` Disable chunked encoding. Optional ``true``
-dataverse.files..connection-pool-size > The maximum number of open connections to the S3 server ``256``
-=========================================== ================== ========================================================================= =============
+=========================================== ================== ========================================================================== =============
+JVM Option Value Description Default value
+=========================================== ================== ========================================================================== =============
+dataverse.files.storage-driver-id Enable as the default storage driver. ``file``
+dataverse.files..bucket-name > The bucket name. See above. (none)
+dataverse.files..download-redirect ``true``/``false`` Enable direct download or proxy through Dataverse. ``false``
+dataverse.files..upload-redirect ``true``/``false`` Enable direct upload of files added to a dataset to the S3 store. ``false``
+dataverse.files..ingestsizelimit Maximum size of directupload files that should be ingested (none)
+dataverse.files..url-expiration-minutes > If direct uploads/downloads: time until links expire. Optional. 60
+dataverse.files..min-part-size > Multipart direct uploads will occur for files larger than this. Optional. ``1024**3``
+dataverse.files..custom-endpoint-url > Use custom S3 endpoint. Needs URL either with or without protocol. (none)
+dataverse.files..custom-endpoint-region > Only used when using custom endpoint. Optional. ``dataverse``
+dataverse.files..profile > Allows the use of AWS profiles for storage spanning multiple AWS accounts. (none)
+dataverse.files..proxy-url > URL of a proxy protecting the S3 store. Optional. (none)
+dataverse.files..path-style-access ``true``/``false`` Use path style buckets instead of subdomains. Optional. ``false``
+dataverse.files..payload-signing ``true``/``false`` Enable payload signing. Optional ``false``
+dataverse.files..chunked-encoding ``true``/``false`` Disable chunked encoding. Optional ``true``
+dataverse.files..connection-pool-size > The maximum number of open connections to the S3 server ``256``
+=========================================== ================== ========================================================================== =============
Reported Working S3-Compatible Storage
######################################
@@ -604,9 +606,9 @@ Once you have the location of your custom homepage HTML file, run this curl comm
``curl -X PUT -d '/var/www/dataverse/branding/custom-homepage.html' http://localhost:8080/api/admin/settings/:HomePageCustomizationFile``
-If you prefer to start with less of a blank slate, you can download the :download:`custom-homepage-dynamic.html ` template which was built for the Harvard Dataverse Repository, and includes branding messaging, action buttons, search input, subject links, and recent dataset links. This page was built to utilize the :doc:`/api/metrics` to deliver dynamic content to the page via javascript.
+If you prefer to start with less of a blank slate, you can review the custom homepage used by the Harvard Dataverse Repository, which includes branding messaging, action buttons, search input, subject links, and recent dataset links. This page was built to utilize the :doc:`/api/metrics` to deliver dynamic content to the page via Javascript. The files can be found at https://github.com/IQSS/dataverse.harvard.edu
-Note that the ``custom-homepage.html`` and ``custom-homepage-dynamic.html`` files provided have multiple elements that assume your root Dataverse collection still has an alias of "root". While you were branding your root Dataverse collection, you may have changed the alias to "harvard" or "librascholar" or whatever and you should adjust the custom homepage code as needed.
+Note that the ``custom-homepage.html`` file provided has multiple elements that assume your root Dataverse collection still has an alias of "root". While you were branding your root Dataverse collection, you may have changed the alias to "harvard" or "librascholar" or whatever and you should adjust the custom homepage code as needed.
For more background on what this curl command above is doing, see the "Database Settings" section below. If you decide you'd like to remove this setting, use the following curl command:
@@ -1476,49 +1478,96 @@ Out of the box, the DOI shoulder is set to "FK2/" but this is for testing only!
:IdentifierGenerationStyle
++++++++++++++++++++++++++
-By default, the Dataverse Software generates a random 6 character string, pre-pended by the Shoulder if set, to use as the identifier
-for a Dataset. Set this to ``sequentialNumber`` to use sequential numeric values
-instead (again pre-pended by the Shoulder if set). (the assumed default setting is ``randomString``).
-In addition to this setting, a database sequence must be created in the database.
-We provide the script below (downloadable :download:`here `).
-You may need to make some changes to suit your system setup, see the comments for more information:
+By default, the Dataverse Software generates a random 6 character string,
+pre-pended by the Shoulder if set, to use as the identifier for a Dataset.
+Set this to ``storedProcGenerated`` to generate instead a custom *unique*
+identifier (again pre-pended by the Shoulder if set) through a database
+stored procedure or function (the assumed default setting is ``randomString``).
+In addition to this setting, a stored procedure or function must be created in
+the database.
+
+As a first example, the script below (downloadable
+:download:`here `) produces
+sequential numerical values. You may need to make some changes to suit your
+system setup, see the comments for more information:
.. literalinclude:: ../_static/util/createsequence.sql
+ :language: plpgsql
+
+As a second example, the script below (downloadable
+:download:`here `) produces
+sequential 8 character identifiers from a base36 representation of current
+timestamp.
+
+.. literalinclude:: ../_static/util/identifier_from_timestamp.sql
+ :language: plpgsql
-Note that the SQL above is Postgres-specific. If necessary, it can be reimplemented
-in any other SQL flavor - the standard JPA code in the application simply expects
-the database to have a saved function ("stored procedure") named ``generateIdentifierAsSequentialNumber``
-with the single return argument ``identifier``.
+Note that the SQL in these examples scripts is Postgres-specific.
+If necessary, it can be reimplemented in any other SQL flavor - the standard
+JPA code in the application simply expects the database to have a saved
+function ("stored procedure") named ``generateIdentifierFromStoredProcedure()``
+returning a single ``varchar`` argument.
-Please note that ``:IdentifierGenerationStyle`` also plays a role for the "identifier" for files. See the section on ``:DataFilePIDFormat`` below for more details.
+Please note that ``:IdentifierGenerationStyle`` also plays a role for the
+"identifier" for files. See the section on :ref:`:DataFilePIDFormat` below for
+more details.
.. _:DataFilePIDFormat:
:DataFilePIDFormat
++++++++++++++++++
-This setting controls the way that the "identifier" component of a file's persistent identifier (PID) relates to the PID of its "parent" dataset.
-
-By default the identifier for a file is dependent on its parent dataset. For example, if the identifier of a dataset is "TJCLKP", the identifier for a file within that dataset will consist of the parent dataset's identifier followed by a slash ("/"), followed by a random 6 character string, yielding "TJCLKP/MLGWJO". Identifiers in this format are what you should expect if you leave ``:DataFilePIDFormat`` undefined or set it to ``DEPENDENT`` and have not changed the ``:IdentifierGenerationStyle`` setting from its default.
-
-Alternatively, the identifier for File PIDs can be configured to be independent of Dataset PIDs using the setting "``INDEPENDENT``". In this case, file PIDs will not contain the PIDs of their parent datasets, and their PIDs will be generated the exact same way that datasets' PIDs are, based on the ``:IdentifierGenerationStyle`` setting described above (random 6 character strings or sequential numbers, pre-pended by any shoulder).
-
-The chart below shows examples from each possible combination of parameters from the two settings. ``:IdentifierGenerationStyle`` can be either ``randomString`` (the default) or ``sequentialNumber`` and ``:DataFilePIDFormat`` can be either ``DEPENDENT`` (the default) or ``INDEPENDENT``. In the examples below the "identifier" for the dataset is "TJCLKP" for "randomString" and "100001" for "sequentialNumber".
-
-+-----------------+---------------+------------------+
-| | randomString | sequentialNumber |
-| | | |
-+=================+===============+==================+
-| **DEPENDENT** | TJCLKP/MLGWJO | 100001/1 |
-+-----------------+---------------+------------------+
-| **INDEPENDENT** | MLGWJO | 100002 |
-+-----------------+---------------+------------------+
-
-As seen above, in cases where ``:IdentifierGenerationStyle`` is set to *sequentialNumber* and ``:DataFilePIDFormat`` is set to *DEPENDENT*, each file within a dataset will be assigned a number *within* that dataset starting with "1".
-
-Otherwise, if ``:DataFilePIDFormat`` is set to *INDEPENDENT*, then each file will be assigned a PID with the next number in the overall sequence, regardless of what dataset it is in. If the file is created after a dataset with the PID 100001, then the file will be assigned the PID 100002. This option is functional, but it is not a recommended use case.
-
-Note that in either case, when using the ``sequentialNumber`` option, datasets and files share the same database sequence that was created as part of the setup described in ``:IdentifierGenerationStyle`` above.
+This setting controls the way that the "identifier" component of a file's
+persistent identifier (PID) relates to the PID of its "parent" dataset.
+
+By default the identifier for a file is dependent on its parent dataset.
+For example, if the identifier of a dataset is "TJCLKP", the identifier for
+a file within that dataset will consist of the parent dataset's identifier
+followed by a slash ("/"), followed by a random 6 character string,
+yielding "TJCLKP/MLGWJO". Identifiers in this format are what you should
+expect if you leave ``:DataFilePIDFormat`` undefined or set it to
+``DEPENDENT`` and have not changed the ``:IdentifierGenerationStyle``
+setting from its default.
+
+Alternatively, the identifier for File PIDs can be configured to be
+independent of Dataset PIDs using the setting ``INDEPENDENT``.
+In this case, file PIDs will not contain the PIDs of their parent datasets,
+and their PIDs will be generated the exact same way that datasets' PIDs are,
+based on the ``:IdentifierGenerationStyle`` setting described above
+(random 6 character strings or custom unique identifiers through a stored
+procedure, pre-pended by any shoulder).
+
+The chart below shows examples from each possible combination of parameters
+from the two settings. ``:IdentifierGenerationStyle`` can be either
+``randomString`` (the default) or ``storedProcGenerated`` and
+``:DataFilePIDFormat`` can be either ``DEPENDENT`` (the default) or
+``INDEPENDENT``. In the examples below the "identifier" for the dataset is
+"TJCLKP" for ``randomString`` and "100001" for ``storedProcGenerated`` (when
+using sequential numerical values, as described in
+:ref:`:IdentifierGenerationStyle` above), or "krby26qt" for
+``storedProcGenerated`` (when using base36 timestamps, as described in
+:ref:`:IdentifierGenerationStyle` above).
+
++-----------------+---------------+----------------------+---------------------+
+| | randomString | storedProcGenerated | storedProcGenerated |
+| | | | |
+| | | (sequential numbers) | (base36 timestamps) |
++=================+===============+======================+=====================+
+| **DEPENDENT** | TJCLKP/MLGWJO | 100001/1 | krby26qt/1 |
++-----------------+---------------+----------------------+---------------------+
+| **INDEPENDENT** | MLGWJO | 100002 | krby27pz |
++-----------------+---------------+----------------------+---------------------+
+
+As seen above, in cases where ``:IdentifierGenerationStyle`` is set to
+``storedProcGenerated`` and ``:DataFilePIDFormat`` is set to ``DEPENDENT``,
+each file within a dataset will be assigned a number *within* that dataset
+starting with "1".
+
+Otherwise, if ``:DataFilePIDFormat`` is set to ``INDEPENDENT``, each file
+within the dataset is assigned with a new PID which is the next available
+identifier provided from the database stored procedure. In our example:
+"100002" when using sequential numbers or "krby27pz" when using base36
+timestamps.
.. _:FilePIDsEnabled:
@@ -1543,6 +1592,17 @@ By default this setting is absent and the Dataverse Software assumes it to be fa
``curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:IndependentHandleService``
+.. _:HandleAuthHandle:
+
+:HandleAuthHandle
++++++++++++++++++++++++++
+
+Specific for Handle PIDs. Set this setting to / to be used on a global handle service when the public key is NOT stored in the default handle.
+By default this setting is absent and the Dataverse Software assumes it to be not set. If the public key for instance is stored in handle: 21.T12996/USER01.
+For this handle the prefix is '21.T12996' and the suffix is 'USER01'. The command to execute is then:
+
+``curl -X PUT -d '21.T12996/USER01' http://localhost:8080/api/admin/settings/:HandleAuthHandle``
+
.. _:FileValidationOnPublishEnabled:
:FileValidationOnPublishEnabled
@@ -1677,10 +1737,12 @@ Notes:
:ZipDownloadLimit
+++++++++++++++++
-For performance reasons, your Dataverse installation will only create zip files on the fly up to 100 MB but the limit can be increased. Here's an example of raising the limit to 1 GB:
+For performance reasons, your Dataverse installation will only allow creation of zip files up to 100 MB, but the limit can be increased. Here's an example of raising the limit to 1 GB:
``curl -X PUT -d 1000000000 http://localhost:8080/api/admin/settings/:ZipDownloadLimit``
+In the UI, users trying to download a zip file larger than the Dataverse installation's :ZipDownloadLimit will receive messaging that the zip file is too large, and the user will be presented with alternate access options.
+
:TabularIngestSizeLimit
+++++++++++++++++++++++
@@ -2236,3 +2298,13 @@ By default, the name of the root Dataverse collection is used as the 'brandname'
++++++++++++++++++++++++++++++++++++++++++++++
In the DDI metadata exports, the default behavior is to always add the repository (using its brandname - the root collection name or the value of :ref:`:InstallationName <:InstallationName>`) to the stdyDscr/distStmt/distrbtr element. If this setting is true, this will only be done when a Distributor is not already defined in the Dataset metadata. (Note that, since metadata export files are cached, they will have to be reexported (see :doc:`/admin/metadataexport`) before they incorporate a change in this setting.)
+
+.. _:AnonymizedFieldTypeNames:
+
+:AnonymizedFieldTypeNames
++++++++++++++++++++++++++
+
+A comma-separated list of field type names that should be 'withheld' when dataset access occurs via a Private Url with Anonymized Access (e.g. to support anonymized review).
+A suggested minimum includes author, datasetContact, and contributor, but additional fields such as depositor, grantNumber, and publication might also need to be included.
+
+``curl -X PUT -d 'author, datasetContact, contributor, depositor, grantNumber, publication' http://localhost:8080/api/admin/settings/:AnonymizedFieldTypeNames``
diff --git a/doc/sphinx-guides/source/installation/intro.rst b/doc/sphinx-guides/source/installation/intro.rst
index 6c6199af02d..4dd5f9e8795 100644
--- a/doc/sphinx-guides/source/installation/intro.rst
+++ b/doc/sphinx-guides/source/installation/intro.rst
@@ -36,7 +36,7 @@ Getting Help
To get help installing or configuring a Dataverse installation, please try one or more of:
- posting to the `dataverse-community `_ Google Group.
-- asking at http://chat.dataverse.org (#dataverse on the freenode IRC network)
+- asking at http://chat.dataverse.org
- emailing support@dataverse.org to open a private ticket at https://help.hmdc.harvard.edu
Improving this Guide
diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst
index f0f7fab3511..e3dc04ac70b 100644
--- a/doc/sphinx-guides/source/installation/prerequisites.rst
+++ b/doc/sphinx-guides/source/installation/prerequisites.rst
@@ -44,7 +44,7 @@ On RHEL/derivative you can make Java 11 the default with the ``alternatives`` co
Payara
------
-Payara 5.2020.6 is recommended. Newer versions might work fine, regular updates are recommended.
+Payara 5.2021.5 is recommended. Newer versions might work fine, regular updates are recommended.
Installing Payara
=================
@@ -55,8 +55,8 @@ Installing Payara
- Download and install Payara (installed in ``/usr/local/payara5`` in the example commands below)::
- # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2020.6/payara-5.2020.6.zip
- # unzip payara-5.2020.6.zip
+ # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.5/payara-5.2021.5.zip
+ # unzip payara-5.2021.5.zip
# mv payara5 /usr/local
If you intend to install and run Payara under a service account (and we hope you do), chown -R the Payara hierarchy to root to protect it but give the service account access to the below directories:
@@ -362,7 +362,14 @@ The Dataverse Software uses `Rserve `_ to communicat
to R. Rserve is installed as a library package, as described in the
step above. It runs as a daemon process on the server, accepting
network connections on a dedicated port. This requires some extra
-configuration and we provide a script (:fixedwidthplain:`scripts/r/rserve/rserve-setup.sh`) for setting it up.
+configuration and we provide a script for setting it up.
+
+You'll want to obtain local copies of the Rserve setup files found in
+https://github.com/IQSS/dataverse/tree/master/scripts/r/rserve
+either by cloning a local copy of the IQSS repository:
+:fixedwidthplain:`git clone -b master https://github.com/IQSS/dataverse.git`
+or by downloading the files individually.
+
Run the script as follows (as root)::
cd /scripts/r/rserve
diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
index 262862f29fc..393f9ac202a 100755
--- a/doc/sphinx-guides/source/user/dataset-management.rst
+++ b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -508,10 +508,11 @@ Creating a Private URL for your dataset allows you to share your dataset (for vi
#. Go to your unpublished dataset
#. Select the “Edit” button
#. Select “Private URL” in the dropdown menu
-#. In the pop-up select “Create Private URL”
+#. In the pop-up select “Create Private URL” or "Create URL for Anonymized Access". The latter supports anonymous review by removing author names and other potentially identifying information from citations, version history tables, and some metadata fields (as configured by the administrator).
#. Copy the Private URL which has been created for this dataset and it can now be shared with anyone you wish to have access to view or download files in your unpublished dataset.
To disable a Private URL and to revoke access, follow the same steps as above until step #3 when you return to the popup, click the “Disable Private URL” button.
+Note that only one PrivateURL (normal or with anonymized access) can be configured per dataset at a time.
Dataset Versions
================
diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst
index 9c640a99aa7..99f088db41c 100755
--- a/doc/sphinx-guides/source/versions.rst
+++ b/doc/sphinx-guides/source/versions.rst
@@ -6,8 +6,9 @@ Dataverse Software Documentation Versions
This list provides a way to refer to the documentation for previous versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo.
-- 5.5
+- 5.6
+- `5.5 `__
- `5.4.1 `__
- `5.4 `__
- `5.3 `__
diff --git a/doc/theTestableCommand/TheTestableCommand.md b/doc/theTestableCommand/TheTestableCommand.md
index e60faa313f0..5a9fc259d4a 100644
--- a/doc/theTestableCommand/TheTestableCommand.md
+++ b/doc/theTestableCommand/TheTestableCommand.md
@@ -21,7 +21,7 @@ While they can't replace end-to-end tests, unit tests are a great way to validat
Because unit tests are easy to create (Java only, no configuration needed) and quick to run, it is possible to write many of them, such that many aspects of the code are tested. Normally, a single unit test would test a single use case of the unit. This way, when a unit test fails, the failure describes exactly what part stopped functioning. Other unit tests are not blocked by the failure, and so by running the entire test suite, the developer can get a good overview of which parts are broken and which parts are functioning well.
-Because unit tests are easy to execute, it is recommended to get in the habit of running them prior to committing code changes to the repository. These tests are also integrated into Dataverse's automatic build processes (on [Travis-ci](https://travis-ci.org/IQSS/dataverse)). A failed test halts the build. Dataverse's build process also collects data about code coverage during the unit tests, using [Coveralls](https://coveralls.io/github/IQSS/dataverse). While code coverage is a problematic measure for Java EE applications (and has some inherent problems as well), generally speaking larger coverage means better testing.
+Because unit tests are easy to execute, it is recommended to get in the habit of running them prior to committing code changes to the repository. These tests are also integrated into Dataverse's automatic build processes. A failed test halts the build. Dataverse's build process also collects data about code coverage during the unit tests, using [Coveralls](https://coveralls.io/github/IQSS/dataverse). While code coverage is a problematic measure for Java EE applications (and has some inherent problems as well), generally speaking larger coverage means better testing.
Unit Testing of application logic in Java EE applications is normally hard to do, as the application logic lives in the service beans, which rely on dependency injections. Writing unit tests for service beans is possible, but as it involves a test container, and a persistent context (read: in-memory database) these unit tests are not very unit-y.
diff --git a/downloads/download.sh b/downloads/download.sh
index 8c2b51dd4c7..33476c24b76 100755
--- a/downloads/download.sh
+++ b/downloads/download.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-curl -L -O https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2020.6/payara-5.2020.6.zip
+curl -L -O https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.5/payara-5.2021.5.zip
curl -L -O https://archive.apache.org/dist/lucene/solr/8.8.1/solr-8.8.1.tgz
curl -L -O https://search.maven.org/remotecontent?filepath=org/jboss/weld/weld-osgi-bundle/2.2.10.Final/weld-osgi-bundle-2.2.10.Final-glassfish4.jar
curl -s -L http://sourceforge.net/projects/schemaspy/files/schemaspy/SchemaSpy%205.0.0/schemaSpy_5.0.0.jar/download > schemaSpy_5.0.0.jar
diff --git a/pom.xml b/pom.xml
index 331c616f743..bb53d1efa64 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
-->
edu.harvard.iqdataverse
- 5.5
+ 5.6wardataverse
@@ -26,11 +26,13 @@
false8.0.0
- 5.2020.6
+ 5.2021.542.2.191.11.7621.2
- 4.5.5
+ 3.12.0
+ 4.5.13
+ 0.157.04.13.15.7.0${junit.jupiter.version}
@@ -58,6 +60,18 @@
+
+ payara-nexus-artifacts
+ Payara Nexus Artifacts
+ https://nexus.payara.fish/repository/payara-artifacts
+
+ true
+
+
+ false
+
+
+
payara-patched-externalsPayara Patched Externals
@@ -120,6 +134,11 @@
commons-logging${commons.logging.version}
+
+ org.apache.commons
+ commons-lang3
+ ${commons.lang3.version}
+ org.apache.httpcomponentshttpclient
@@ -128,7 +147,7 @@
com.google.cloudgoogle-cloud-bom
- 0.115.0-alpha
+ ${google.cloud.version}pomimport
@@ -155,9 +174,21 @@
org.glassfishjavax.json
- 1.0.4
+ 1.1.4test
+
+ org.skyscreamer
+ jsonassert
+ 1.5.0
+ test
+
+
+ com.vaadin.external.google
+ android-json
+
+
+ org.apache.httpcomponentshttpclient
@@ -200,6 +231,11 @@
aws-java-sdk-s3
+
+ com.apicatalog
+ titanium-json-ld
+ 0.8.6
+ org.apache.abdera
@@ -288,6 +324,11 @@
${jakartaee-api.version}provided
+
+ com.sun.mail
+ jakarta.mail
+ provided
+ org.glassfishjakarta.faces
@@ -296,7 +337,7 @@
org.primefacesprimefaces
- 8.0
+ 10.0.0org.primefaces.themes
@@ -318,9 +359,14 @@
provided
- commons-lang
- commons-lang
- 2.6
+ org.apache.commons
+ commons-lang3
+
+
+
+ org.apache.commons
+ commons-text
+ 1.9org.apache.solr
@@ -486,7 +532,7 @@
com.nimbusdsoauth2-oidc-sdk
- 6.18
+ 9.9.1
@@ -594,7 +640,6 @@
com.google.cloudgoogle-cloud-storage
- 1.97.0
@@ -772,7 +817,7 @@
org.eluder.coverallscoveralls-maven-plugin
- 4.0.0
+ 4.3.0javax.xml.bind
@@ -794,12 +839,19 @@
org.apache.maven.pluginsmaven-checkstyle-plugin
- 3.0.0
+ 3.1.2checkstyle.xmlUTF-8true
+
+
+ com.puppycrawl.tools
+ checkstyle
+ 8.42
+
+
diff --git a/scripts/installer/Makefile b/scripts/installer/Makefile
index 180e2cb03d5..fe26bb5d6c6 100644
--- a/scripts/installer/Makefile
+++ b/scripts/installer/Makefile
@@ -4,7 +4,7 @@ GLASSFISH_SETUP_SCRIPT=${INSTALLER_ZIP_DIR}/as-setup.sh
API_SCRIPTS=${INSTALLER_ZIP_DIR}/setup-datasetfields.sh ${INSTALLER_ZIP_DIR}/setup-users.sh ${INSTALLER_ZIP_DIR}/setup-builtin-roles.sh ${INSTALLER_ZIP_DIR}/setup-dvs.sh ${INSTALLER_ZIP_DIR}/data ${INSTALLER_ZIP_DIR}/setup-identity-providers.sh ${INSTALLER_ZIP_DIR}/setup-all.sh ${INSTALLER_ZIP_DIR}/post-install-api-block.sh
JHOVE_CONFIG=${INSTALLER_ZIP_DIR}/jhove.conf
JHOVE_SCHEMA=${INSTALLER_ZIP_DIR}/jhoveConfig.xsd
-SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_fields.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_copies.xml ${INSTALLER_ZIP_DIR}/updateSchemaMDB.sh
+SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_fields.xml ${INSTALLER_ZIP_DIR}/updateSchemaMDB.sh
SOLR_CONFIG=${INSTALLER_ZIP_DIR}/solrconfig.xml
PYTHON_FILES=${INSTALLER_ZIP_DIR}/README_python.txt ${INSTALLER_ZIP_DIR}/installConfig.py ${INSTALLER_ZIP_DIR}/installUtils.py ${INSTALLER_ZIP_DIR}/install.py ${INSTALLER_ZIP_DIR}/installAppServer.py ${INSTALLER_ZIP_DIR}/requirements.txt ${INSTALLER_ZIP_DIR}/default.config ${INSTALLER_ZIP_DIR}/interactive.config
INSTALL_SCRIPT=${INSTALLER_ZIP_DIR}/install
@@ -56,7 +56,7 @@ ${JHOVE_SCHEMA}: ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR}
@echo copying jhove schema file
/bin/cp ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR}
-${SOLR_SCHEMA}: ../../conf/solr/8.8.1/schema.xml ../../conf/solr/8.8.1/schema_dv_mdb_fields.xml ../../conf/solr/8.8.1/schema_dv_mdb_copies.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR}
+${SOLR_SCHEMA}: ../../conf/solr/8.8.1/schema.xml ../../conf/solr/8.8.1/schema_dv_mdb_fields.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR}
@echo copying Solr schema file
/bin/cp ../../conf/solr/8.8.1/schema*.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR}
diff --git a/scripts/installer/README.txt b/scripts/installer/README.txt
index 70f08550a75..f4f316dc958 100644
--- a/scripts/installer/README.txt
+++ b/scripts/installer/README.txt
@@ -38,6 +38,5 @@ jhove.conf
SOLR schema and config files, from conf/solr/8.8.1:
schema.xml
-schema_dv_cmb_copies.xml
-schema_dv_cmb_fields.xml
+schema_dv_mdb_fields.xml
solrconfig.xml
diff --git a/scripts/issues/7451/PRE-RELEASE-INFO.txt b/scripts/issues/7451/PRE-RELEASE-INFO.txt
new file mode 100644
index 00000000000..c14c25aa93f
--- /dev/null
+++ b/scripts/issues/7451/PRE-RELEASE-INFO.txt
@@ -0,0 +1,39 @@
+In the next release another constraint is being added to existing
+databases, to prevent any possibility of creating datafile objects
+referencing the same file. This was originally planned for v4.20, but
+in that release the constraint was only added to newly created
+databases, and was not forced on the databases that had already
+existed. If your current database was originally created by version
+4.20 or newer, you don't need to do anything.
+
+If you do have an older database, it MUST BE RE-CHECKED for any
+existing duplicates before the next release (5.6, presumably) can be
+deployed. Hopefully there are no such inconsistencies in your
+database, but if there are any, they will need to be resolved, or the
+next version of the application WILL FAIL TO DEPLOY, with an error
+message from FlyWay. Please run the following script:
+
+https://github.com/IQSS/dataverse/raw/develop/scripts/issues/7451/check_datafiles_7451.sh
+
+The script relies on the PostgreSQL utility psql to access the
+database. You will need to edit the credentials at the top of the script
+to match your database configuration.
+
+The script will check your database for any duplicated storage
+identifiers that would violate the new constraint.
+
+For harvested files, it will directly resolve any conflicts.
+
+For local files, rather than attempt to make any changes right away
+(this being an issue of data integrity with a potential to affect your
+users) it will instruct you to send the produced diagnostic
+information to support@dataverse.org so that we can assist you
+in resolving the issues in your database.
+
+If no inconsistencies are found, the script will report that the database
+is ready to be upgraded to the next release.
+
+(Please note that there's a very good chance that your database does
+not have any conflicts of this nature. But we want to do this to be
+absolutely sure. We apologize for any inconvenience.)
+
diff --git a/scripts/issues/7451/check_datafiles_7451.sh b/scripts/issues/7451/check_datafiles_7451.sh
new file mode 100755
index 00000000000..1e4c95c69f4
--- /dev/null
+++ b/scripts/issues/7451/check_datafiles_7451.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+
+# begin config
+# PostgresQL credentials:
+# edit the following lines so that psql can talk to your database
+pg_host=localhost
+pg_port=5432
+pg_user=dvnapp
+pg_db=dvndb
+# you can leave the password blank, if Postgres is configured
+# to accept connections without auth:
+pg_pass=
+# psql executable - add full path, if necessary:
+PSQL_EXEC=psql
+
+# end config
+
+# check for duplicate storageidentifiers in harvested datafiles:
+
+PG_QUERY_0="SELECT COUNT(DISTINCT o.id) FROM datafile f, dataset s, dvobject p, dvobject o WHERE s.id = p.id AND o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS NOT null AND o.storageidentifier IS NOT null"
+
+PG_QUERY_1="SELECT s.id, o.storageidentifier FROM datafile f, dataset s, dvobject o WHERE o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS NOT null AND o.storageidentifier IS NOT null ORDER by s.id, o.storageidentifier"
+
+PG_QUERY_FIX_0="UPDATE dvobject SET storageidentifier=NULL WHERE dtype='DataFile' AND (storageidentifier='file://' OR storageidentifier='http://' OR storageidentifier='s3://')"
+
+PG_QUERY_FIX_1="UPDATE dvobject SET storageidentifier=CONCAT(storageidentifier, ' ', id) WHERE owner_id = %d AND storageidentifier='%s'"
+
+PGPASSWORD=$pg_pass; export PGPASSWORD
+
+echo "Checking the total number of storageidentifiers in harvested datafiles..."
+
+NUM_DATAFILES=`${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_0}"`
+if [ $? != 0 ]
+then
+ echo "FAILED to execute psql! Check the credentials and try again?"
+ echo "exiting..."
+ echo
+ echo "the command line that failed:"
+ echo "${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c \"${PG_QUERY_0}\""
+ exit 1
+fi
+
+echo $NUM_DATAFILES total.
+echo
+
+# Before we do anything else, reset the storageidentifiers of the datafiles (harvested and otherwise) that
+# may have ended up set to invalid, prefix-only values like "file://" back to NULL:
+
+${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -q -c "${PG_QUERY_FIX_0}"
+
+
+echo "Let's check if any harvested storage identifiers are referenced more than once within the same dataset:"
+
+${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_1}" |
+uniq -c |
+awk '{if ($1 > 1) print $0}' | sort -u > /tmp/harvestedidentifiers.tmp
+
+NUM_CONFIRMED=`cat /tmp/harvestedidentifiers.tmp | wc -l`
+
+if [ $NUM_CONFIRMED == 0 ]
+then
+ echo
+ echo "Good news - it appears that there are NO duplicate storageidentifiers in your harvested datafiles;"
+ echo "nothing to fix."
+ echo
+else
+
+ echo "Found ${NUM_CONFIRMED} harvested files with identical storageidentifiers; fixing in place..."
+
+ cat /tmp/harvestedidentifiers.tmp | sed 's:\\:\\\\:g' | while read howmany dataset storageidentifier
+ do
+ # Harvard prod. db had a few harvested storage identifiers consisting of a single space (" "),
+ # which would confuse the shell. Extremely unlikely to be found in any other installation.
+ if [[ "x${storageidentifier}" = "x" ]]
+ then
+ storageidentifier=" "
+ fi
+
+ PG_QUERY_SI=`printf "${PG_QUERY_FIX_1}" $dataset "$storageidentifier"`
+ ${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_SI}"
+ done
+
+ echo "... done."
+ echo
+
+ echo -n "Let's confirm that all these dupes have been fixed... "
+ ${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_1}" |
+ uniq -c |
+ awk '{if ($1 > 1) print $0}' | sort -u > /tmp/harvestedidentifiers.tmp
+
+ NUM_CONFIRMED=`cat /tmp/harvestedidentifiers.tmp | wc -l`
+
+ if [ $NUM_CONFIRMED == 0 ]
+ then
+ echo "Looks good."
+ echo
+ else
+ echo "Oops!"
+ echo "Unfortunately, the script failed to fix some of the harvested duplicates."
+ echo "Please send the contents of the file /tmp/harvestedidentifiers.tmp"
+ echo "to Dataverse support at support@dataverse.org."
+ echo "Apologies for the extra trouble..."
+ echo
+ exit 1
+ fi
+
+fi
+
+
+# now, check for duplicate storageidentifiers in local datafiles:
+
+PG_QUERY_3="SELECT COUNT(DISTINCT o.id) FROM datafile f, dataset s, dvobject p, dvobject o WHERE s.id = p.id AND o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS null AND o.storageidentifier IS NOT null"
+
+PG_QUERY_4="SELECT s.id, o.storageidentifier FROM datafile f, dataset s, dvobject o WHERE o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS null AND o.storageidentifier IS NOT null ORDER by s.id, o.storageidentifier"
+
+PG_QUERY_5="SELECT p.authority, p.identifier, o.storageidentifier, o.id, o.createdate, f.contenttype FROM datafile f, dvobject p, dvobject o WHERE o.id = f.id AND o.owner_id = p.id AND p.id = %d AND o.storageidentifier='%s' ORDER by o.id"
+
+echo "Checking the number of non-harvested datafiles in the database..."
+
+NUM_DATAFILES=`${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_3}"`
+echo $NUM_DATAFILES total.
+echo
+
+echo "Let's check if any storage identifiers are referenced more than once within the same dataset:"
+
+${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_4}" |
+uniq -c |
+awk '{if ($1 > 1) print $0}' > /tmp/storageidentifiers.tmp
+
+NUM_CONFIRMED=`cat /tmp/storageidentifiers.tmp | wc -l`
+
+if [ $NUM_CONFIRMED == 0 ]
+then
+ echo
+ echo "Good news - it appears that there are NO duplicate DataFile objects in your database."
+ echo "Your installation is ready to be upgraded to Dataverse 5.5"
+ echo
+else
+
+ echo "The following storage identifiers appear to be referenced from multiple non-harvested DvObjects:"
+ cat /tmp/storageidentifiers.tmp
+ echo "(output saved in /tmp/storageidentifiers.tmp)"
+
+ echo "Looking up details for the affected datafiles:"
+
+ cat /tmp/storageidentifiers.tmp | while read howmany dataset storageidentifier
+ do
+ PG_QUERY_SI=`printf "${PG_QUERY_5}" $dataset "$storageidentifier"`
+ ${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_SI}"
+ done | tee /tmp/duplicates_info.tmp
+
+ echo "(output saved in /tmp/duplicates_info.tmp)"
+
+ echo
+ echo "Please send the output above to Dataverse support at support@dataverse.org."
+ echo "We will assist you in the process of cleaning up the affected files above."
+ echo "We apologize for any inconvenience."
+ echo
+fi
+
+
diff --git a/scripts/search/tests/data/dataset-finch1.jsonld b/scripts/search/tests/data/dataset-finch1.jsonld
new file mode 100644
index 00000000000..be39c9f14b2
--- /dev/null
+++ b/scripts/search/tests/data/dataset-finch1.jsonld
@@ -0,0 +1,26 @@
+
+{
+ "http://purl.org/dc/terms/title": "Darwin's Finches",
+ "http://purl.org/dc/terms/subject": "Medicine, Health and Life Sciences",
+ "http://purl.org/dc/terms/creator": {
+ "https://dataverse.org/schema/citation/author#Name": "Finch, Fiona",
+ "https://dataverse.org/schema/citation/author#Affiliation": "Birds Inc."
+ },
+ "https://dataverse.org/schema/citation/Contact": {
+ "https://dataverse.org/schema/citation/datasetContact#E-mail": "finch@mailinator.com",
+ "https://dataverse.org/schema/citation/datasetContact#Name": "Finch, Fiona"
+ },
+ "https://dataverse.org/schema/citation/Description": {
+ "https://dataverse.org/schema/citation/dsDescription#Text": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds."
+ },
+ "@type": [
+ "http://www.openarchives.org/ore/terms/Aggregation",
+ "http://schema.org/Dataset"
+ ],
+ "http://schema.org/version": "DRAFT",
+ "http://schema.org/name": "Darwin's Finches",
+ "https://dataverse.org/schema/core#fileTermsOfAccess": {
+ "https://dataverse.org/schema/core#fileRequestAccess": false
+ },
+ "http://schema.org/includedInDataCatalog": "Root"
+}
\ No newline at end of file
diff --git a/scripts/vagrant/setup.sh b/scripts/vagrant/setup.sh
index 14f12cea692..24bac307709 100644
--- a/scripts/vagrant/setup.sh
+++ b/scripts/vagrant/setup.sh
@@ -35,16 +35,15 @@ echo "export MAVEN_HOME=/opt/maven" >> /etc/profile.d/maven.sh
echo "export PATH=/opt/maven/bin:${PATH}" >> /etc/profile.d/maven.sh
chmod 0755 /etc/profile.d/maven.sh
-# disable centos8 postgresql module and install postgresql10-server
-# note: postgresql10 because 9.6 isn't backwards compatible with python3-psycopg2
+# disable centos8 postgresql module and install postgresql13-server
dnf -qy module disable postgresql
dnf install -qy https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm
-dnf install -qy postgresql10-server
-/usr/pgsql-10/bin/postgresql-10-setup initdb
-/usr/bin/systemctl stop postgresql-10
-cp /dataverse/conf/vagrant/var/lib/pgsql/data/pg_hba.conf /var/lib/pgsql/10/data/pg_hba.conf
-/usr/bin/systemctl start postgresql-10
-/usr/bin/systemctl enable postgresql-10
+dnf install -qy postgresql13-server
+/usr/pgsql-13/bin/postgresql-13-setup initdb
+/usr/bin/systemctl stop postgresql-13
+cp /dataverse/conf/vagrant/var/lib/pgsql/data/pg_hba.conf /var/lib/pgsql/13/data/pg_hba.conf
+/usr/bin/systemctl start postgresql-13
+/usr/bin/systemctl enable postgresql-13
PAYARA_USER=dataverse
echo "Ensuring Unix user '$PAYARA_USER' exists"
@@ -53,7 +52,7 @@ SOLR_USER=solr
echo "Ensuring Unix user '$SOLR_USER' exists"
useradd $SOLR_USER || :
DOWNLOAD_DIR='/dataverse/downloads'
-PAYARA_ZIP="$DOWNLOAD_DIR/payara-5.2020.6.zip"
+PAYARA_ZIP="$DOWNLOAD_DIR/payara-5.2021.5.zip"
SOLR_TGZ="$DOWNLOAD_DIR/solr-8.8.1.tgz"
if [ ! -f $PAYARA_ZIP ] || [ ! -f $SOLR_TGZ ]; then
echo "Couldn't find $PAYARA_ZIP or $SOLR_TGZ! Running download script...."
diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java
index ce32c18fa7a..218e4c85474 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java
@@ -23,7 +23,7 @@
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.TypedQuery;
-import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.text.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
@@ -175,7 +175,7 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well
- String description = StringEscapeUtils.escapeXml(dataset.getLatestVersion().getDescriptionPlainText());
+ String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText());
if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) {
description = AbstractGlobalIdServiceBean.UNAVAILABLE;
}
@@ -185,7 +185,7 @@ public static String getMetadataFromDvObject(String identifier, Map citationList = new ArrayList<>();
- citationList.add(formatString(getAuthorsString(), html));
+ if(anonymized) {
+ citationList.add(BundleUtil.getStringFromBundle("file.anonymized.authorsWithheld"));
+ } else {
+ citationList.add(formatString(getAuthorsString(), html));
+ }
citationList.add(year);
if ((fileTitle != null) && isDirect()) {
citationList.add(formatString(fileTitle, html, "\""));
@@ -643,7 +650,7 @@ private String formatString(String value, boolean escapeHtml, String wrapperFron
private String formatString(String value, boolean escapeHtml, String wrapperStart, String wrapperEnd) {
if (!StringUtils.isEmpty(value)) {
- return new StringBuilder(wrapperStart).append(escapeHtml ? StringEscapeUtils.escapeHtml(value) : value)
+ return new StringBuilder(wrapperStart).append(escapeHtml ? StringEscapeUtils.escapeHtml4(value) : value)
.append(wrapperEnd).toString();
}
return null;
@@ -655,7 +662,7 @@ private String formatURL(String text, String url, boolean html) {
}
if (html && url != null) {
- return "" + StringEscapeUtils.escapeHtml(text) + "";
+ return "" + StringEscapeUtils.escapeHtml4(text) + "";
} else {
return text;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 706b8d9f4e5..83a65110be2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -38,7 +38,7 @@
import javax.persistence.Query;
import javax.persistence.StoredProcedureQuery;
import javax.persistence.TypedQuery;
-import org.apache.commons.lang.RandomStringUtils;
+import org.apache.commons.lang3.RandomStringUtils;
/**
*
@@ -1428,11 +1428,11 @@ public String generateDataFileIdentifier(DataFile datafile, GlobalIdServiceBean
switch (doiIdentifierType) {
case "randomString":
return generateIdentifierAsRandomString(datafile, idServiceBean, prepend);
- case "sequentialNumber":
+ case "storedProcGenerated":
if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){
- return generateIdentifierAsIndependentSequentialNumber(datafile, idServiceBean, prepend);
+ return generateIdentifierFromStoredProcedureIndependent(datafile, idServiceBean, prepend);
} else {
- return generateIdentifierAsDependentSequentialNumber(datafile, idServiceBean, prepend);
+ return generateIdentifierFromStoredProcedureDependent(datafile, idServiceBean, prepend);
}
default:
/* Should we throw an exception instead?? -- L.A. 4.6.2 */
@@ -1450,24 +1450,24 @@ private String generateIdentifierAsRandomString(DataFile datafile, GlobalIdServi
}
- private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) {
+ private String generateIdentifierFromStoredProcedureIndependent(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) {
String identifier;
do {
- StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber");
+ StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure");
query.execute();
- Integer identifierNumeric = (Integer) query.getOutputParameterValue(1);
+ String identifierFromStoredProcedure = (String) query.getOutputParameterValue(1);
// some diagnostics here maybe - is it possible to determine that it's failing
// because the stored procedure hasn't been created in the database?
- if (identifierNumeric == null) {
+ if (identifierFromStoredProcedure == null) {
return null;
}
- identifier = prepend + identifierNumeric.toString();
+ identifier = prepend + identifierFromStoredProcedure;
} while (!isGlobalIdUnique(identifier, datafile, idServiceBean));
return identifier;
}
- private String generateIdentifierAsDependentSequentialNumber(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) {
+ private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) {
String identifier;
Long retVal;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java
index 02d3da128f1..275d47cf1de 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java
@@ -20,7 +20,7 @@
import javax.persistence.JoinColumn;
import javax.persistence.ManyToOne;
import javax.persistence.Table;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
*
diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
index cd40e76a304..0f12d02fb3e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
@@ -60,29 +60,22 @@
})
/*
- Below is the stored procedure for getting a numeric value from a database
- sequence. Used when the Dataverse is (optionally) configured to use
- incremental numeric values for dataset ids, instead of the default
+ Below is the database stored procedure for getting a string dataset id.
+ Used when the Dataverse is (optionally) configured to use
+ procedurally generated values for dataset ids, instead of the default
random strings.
- Unfortunately, there's no standard EJB way of handling sequences. So in the
- past we would simply use a NativeQuery to call a proprietary Postgres
- sequence query. A better way of handling this however is to define any
- proprietary SQL functionality outside of the application, in the database,
- and call it using the standard JPA @StoredProcedureQuery.
-
- The identifier sequence and the stored procedure for accessing it are currently
- implemented with PostgresQL "CREATE SEQUENCE ..." and "CREATE FUNCTION ...";
- (we explain how to create these in the installation documentation and supply
- a script). If necessary, it can be implemented using other SQL flavors -
+ The use of a stored procedure to create an identifier is explained in the
+ installation documentation (where an example script is supplied).
+ The stored procedure can be implemented using other SQL flavors -
without having to modify the application code.
- -- L.A. 4.6.2
+ -- L.A. 4.6.2 (modified by C.S. for version 5.4.1+)
*/
@NamedStoredProcedureQuery(
- name = "Dataset.generateIdentifierAsSequentialNumber",
- procedureName = "generateIdentifierAsSequentialNumber",
+ name = "Dataset.generateIdentifierFromStoredProcedure",
+ procedureName = "generateIdentifierFromStoredProcedure",
parameters = {
- @StoredProcedureParameter(mode = ParameterMode.OUT, type = Integer.class)
+ @StoredProcedureParameter(mode = ParameterMode.OUT, type = String.class)
}
)
@Entity
@@ -649,7 +642,11 @@ public String getCitation(DatasetVersion version) {
}
public String getCitation(boolean isOnlineVersion, DatasetVersion version) {
- return version.getCitation(isOnlineVersion);
+ return getCitation(isOnlineVersion, version, false);
+ }
+
+ public String getCitation(boolean isOnlineVersion, DatasetVersion version, boolean anonymized) {
+ return version.getCitation(isOnlineVersion, anonymized);
}
public String getPublicationDateFormattedYYYYMMDD() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java
index 79f8916deb9..228cedd8663 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java
@@ -33,7 +33,7 @@
import javax.persistence.OrderBy;
import javax.persistence.Table;
import javax.persistence.Transient;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
@Entity
@ValidateDatasetFieldType
@@ -241,7 +241,7 @@ public String getValue() {
return datasetFieldValues.get(0).getValue();
} else if (controlledVocabularyValues != null && !controlledVocabularyValues.isEmpty()) {
if (controlledVocabularyValues.get(0) != null){
- return controlledVocabularyValues.get(0).getStrValue();
+ return controlledVocabularyValues.get(0).getLocaleStrValue();
}
}
return null;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java
index 221922ea004..407a1d57bd3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java
@@ -25,7 +25,7 @@
import javax.persistence.OneToMany;
import javax.persistence.OrderBy;
import javax.persistence.Table;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
*
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java
index 00b7be97b83..c685fcb3e54 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java
@@ -11,7 +11,7 @@
import edu.harvard.iq.dataverse.util.BundleUtil;
import java.util.Collections;
import java.util.List;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java
index 27929dd3a39..2447a6478fd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java
@@ -21,7 +21,7 @@
import javax.persistence.ManyToOne;
import javax.persistence.Table;
import javax.persistence.Transient;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
*
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
index e571fd89627..0b1619e6851 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
@@ -18,7 +18,7 @@
import java.util.regex.Pattern;
import javax.validation.ConstraintValidator;
import javax.validation.ConstraintValidatorContext;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
*
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index dbd762bba6e..be960082bd6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -115,7 +115,7 @@
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
-import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.text.StringEscapeUtils;
import org.apache.commons.lang3.mutable.MutableBoolean;
import org.apache.commons.io.IOUtils;
@@ -438,16 +438,6 @@ public void setRemoveUnusedTags(boolean removeUnusedTags) {
private String fileSortField;
private String fileSortOrder;
- private LazyFileMetadataDataModel lazyModel;
-
- public LazyFileMetadataDataModel getLazyModel() {
- return lazyModel;
- }
-
- public void setLazyModel(LazyFileMetadataDataModel lazyModel) {
- this.lazyModel = lazyModel;
- }
-
public List> getCartList() {
if (session.getUser() instanceof AuthenticatedUser) {
return ((AuthenticatedUser) session.getUser()).getCart().getContents();
@@ -1862,7 +1852,8 @@ private String init(boolean initFull) {
}
// init the citation
- displayCitation = dataset.getCitation(true, workingVersion);
+ displayCitation = dataset.getCitation(true, workingVersion, isAnonymizedAccess());
+ logger.fine("Citation: " + displayCitation);
if(workingVersion.isPublished()) {
MakeDataCountEntry entry = new MakeDataCountEntry(FacesContext.getCurrentInstance(), dvRequestService, workingVersion);
@@ -1899,7 +1890,8 @@ private String init(boolean initFull) {
this.guestbookResponse = guestbookResponseService.initGuestbookResponseForFragment(workingVersion, null, session);
logger.fine("Checking if rsync support is enabled.");
if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods))
- && dataset.getFiles().isEmpty()) { //only check for rsync if no files exist
+ && dataset.getFiles().isEmpty() && this.canUpdateDataset() ) { //only check for rsync if no files exist
+ //and user can update dataset
try {
ScriptRequestResponse scriptRequestResponse = commandEngine.submit(new RequestRsyncScriptCommand(dvRequestService.getDataverseRequest(), dataset));
logger.fine("script: " + scriptRequestResponse.getScript());
@@ -1912,9 +1904,11 @@ private String init(boolean initFull) {
setHasRsyncScript(false);
}
} catch (RuntimeException ex) {
- logger.warning("Problem getting rsync script: " + ex.getLocalizedMessage());
+ logger.warning("Problem getting rsync script(RuntimeException): " + ex.getLocalizedMessage());
+ FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", ex.getLocalizedMessage()));
} catch (CommandException cex) {
logger.warning("Problem getting rsync script (Command Exception): " + cex.getLocalizedMessage());
+ FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", cex.getLocalizedMessage()));
}
}
@@ -1988,8 +1982,10 @@ private String init(boolean initFull) {
} catch (CommandException ex) {
// No big deal. The user simply doesn't have access to create or delete a Private URL.
}
+ logger.fine("PrivateUser: " + (session.getUser() instanceof PrivateUrlUser));
if (session.getUser() instanceof PrivateUrlUser) {
PrivateUrlUser privateUrlUser = (PrivateUrlUser) session.getUser();
+ logger.fine("Anon: " + privateUrlUser.hasAnonymizedAccess());
if (dataset != null && dataset.getId().equals(privateUrlUser.getDatasetId())) {
JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.privateurl.header"),
BundleUtil.getStringFromBundle("dataset.privateurl.infoMessageReviewer"));
@@ -2027,8 +2023,6 @@ private String init(boolean initFull) {
datasetExploreTools = externalToolService.findDatasetToolsByType(ExternalTool.Type.EXPLORE);
rowsPerPage = 10;
-
-
return null;
}
@@ -2783,14 +2777,22 @@ public String editFileMetadata(){
public String deleteDatasetVersion() {
DeleteDatasetVersionCommand cmd;
+
+ Map deleteStorageLocations = datafileService.getPhysicalFilesToDelete(dataset.getLatestVersion());
+ boolean deleteCommandSuccess = false;
try {
cmd = new DeleteDatasetVersionCommand(dvRequestService.getDataverseRequest(), dataset);
commandEngine.submit(cmd);
JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetVersion.message.deleteSuccess"));
+ deleteCommandSuccess = true;
} catch (CommandException ex) {
JH.addMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("dataset.message.deleteFailure"));
logger.severe(ex.getMessage());
}
+
+ if (deleteCommandSuccess && !deleteStorageLocations.isEmpty()) {
+ datafileService.finalizeFileDeletes(deleteStorageLocations);
+ }
return returnToDatasetOnly();
}
@@ -3107,8 +3109,8 @@ public void updateFileCounts(){
private List getSuccessMessageArguments() {
List arguments = new ArrayList<>();
String dataverseString = "";
- arguments.add(StringEscapeUtils.escapeHtml(dataset.getDisplayName()));
- dataverseString += " " + StringEscapeUtils.escapeHtml(selectedDataverseForLinking.getDisplayName()) + "";
+ arguments.add(StringEscapeUtils.escapeHtml4(dataset.getDisplayName()));
+ dataverseString += " " + StringEscapeUtils.escapeHtml4(selectedDataverseForLinking.getDisplayName()) + "";
arguments.add(dataverseString);
return arguments;
}
@@ -5065,23 +5067,16 @@ public void setShowLinkingPopup(boolean showLinkingPopup) {
//
- /*
- public void setSelectedGroup(ExplicitGroup selectedGroup) {
- setShowDeletePopup(true);
- this.selectedGroup = selectedGroup;
- }
- */
-
- public void createPrivateUrl() {
+ public void createPrivateUrl(boolean anonymizedAccess) {
try {
- PrivateUrl createdPrivateUrl = commandEngine.submit(new CreatePrivateUrlCommand(dvRequestService.getDataverseRequest(), dataset));
+ PrivateUrl createdPrivateUrl = commandEngine.submit(new CreatePrivateUrlCommand(dvRequestService.getDataverseRequest(), dataset, anonymizedAccess));
privateUrl = createdPrivateUrl;
JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("dataset.privateurl.header"),
BundleUtil.getStringFromBundle("dataset.privateurl.infoMessageAuthor", Arrays.asList(getPrivateUrlLink(privateUrl))));
privateUrlWasJustCreated = true;
} catch (CommandException ex) {
String msg = BundleUtil.getStringFromBundle("dataset.privateurl.noPermToCreate", PrivateUrlUtil.getRequiredPermissions(ex));
- logger.info("Unable to create a Private URL for dataset id " + dataset.getId() + ". Message to user: " + msg + " Exception: " + ex);
+ logger.warning("Unable to create a Private URL for dataset id " + dataset.getId() + ". Message to user: " + msg + " Exception: " + ex);
JH.addErrorMessage(msg);
}
}
@@ -5103,8 +5098,31 @@ public boolean isUserCanCreatePrivateURL() {
public String getPrivateUrlLink(PrivateUrl privateUrl) {
return privateUrl.getLink();
}
-
-
+
+ public boolean isAnonymizedAccess() {
+ if (session.getUser() instanceof PrivateUrlUser) {
+ return ((PrivateUrlUser)session.getUser()).hasAnonymizedAccess();
+ } else {
+ return false;
+ }
+ }
+
+ public boolean isAnonymizedPrivateUrl() {
+ if(privateUrl != null) {
+ return privateUrl.isAnonymizedAccess();
+ } else {
+ return false;
+ }
+ }
+
+ public boolean isAnonymizedAccessEnabled() {
+ if (settingsWrapper.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames) != null) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
// todo: we should be able to remove - this is passed in the html pages to other fragments, but they could just access this service bean directly.
public FileDownloadServiceBean getFileDownloadService() {
return fileDownloadService;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
index 224ccfd22f3..2cf1f0d094f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -49,7 +49,7 @@
import javax.persistence.Query;
import javax.persistence.StoredProcedureQuery;
import javax.persistence.TypedQuery;
-import org.apache.commons.lang.RandomStringUtils;
+import org.apache.commons.lang3.RandomStringUtils;
import org.ocpsoft.common.util.Strings;
/**
@@ -273,8 +273,8 @@ public String generateDatasetIdentifier(Dataset dataset, GlobalIdServiceBean idS
switch (identifierType) {
case "randomString":
return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder);
- case "sequentialNumber":
- return generateIdentifierAsSequentialNumber(dataset, idServiceBean, shoulder);
+ case "storedProcGenerated":
+ return generateIdentifierFromStoredProcedure(dataset, idServiceBean, shoulder);
default:
/* Should we throw an exception instead?? -- L.A. 4.6.2 */
return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder);
@@ -290,19 +290,19 @@ private String generateIdentifierAsRandomString(Dataset dataset, GlobalIdService
return identifier;
}
- private String generateIdentifierAsSequentialNumber(Dataset dataset, GlobalIdServiceBean idServiceBean, String shoulder) {
+ private String generateIdentifierFromStoredProcedure(Dataset dataset, GlobalIdServiceBean idServiceBean, String shoulder) {
String identifier;
do {
- StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber");
+ StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure");
query.execute();
- Integer identifierNumeric = (Integer) query.getOutputParameterValue(1);
+ String identifierFromStoredProcedure = (String) query.getOutputParameterValue(1);
// some diagnostics here maybe - is it possible to determine that it's failing
// because the stored procedure hasn't been created in the database?
- if (identifierNumeric == null) {
+ if (identifierFromStoredProcedure == null) {
return null;
}
- identifier = shoulder + identifierNumeric.toString();
+ identifier = shoulder + identifierFromStoredProcedure;
} while (!isIdentifierLocallyUnique(identifier, dataset));
return identifier;
@@ -732,21 +732,30 @@ public void exportAllDatasets(boolean forceReExport) {
//depends on dataset state and user privleges
public String getReminderString(Dataset dataset, boolean canPublishDataset) {
+ String reminderString;
+
if(!dataset.isReleased() ){
//messages for draft state.
if (canPublishDataset){
- return BundleUtil.getStringFromBundle("dataset.message.publish.remind.draft");
+ reminderString = BundleUtil.getStringFromBundle("dataset.message.publish.remind.draft");
} else {
- return BundleUtil.getStringFromBundle("dataset.message.submit.remind.draft");
+ reminderString = BundleUtil.getStringFromBundle("dataset.message.submit.remind.draft");
}
} else{
//messages for new version - post-publish
if (canPublishDataset){
- return BundleUtil.getStringFromBundle("dataset.message.publish.remind.version");
+ reminderString = BundleUtil.getStringFromBundle("dataset.message.publish.remind.version");
} else {
- return BundleUtil.getStringFromBundle("dataset.message.submit.remind.version");
+ reminderString = BundleUtil.getStringFromBundle("dataset.message.submit.remind.version");
}
}
+
+ if (reminderString != null) {
+ return reminderString;
+ } else {
+ logger.warning("Unable to get reminder string from bundle. Returning empty string.");
+ return "";
+ }
}
public void updateLastExportTimeStamp(Long datasetId) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
index d28ce5175d4..78c1687a7b7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
@@ -49,7 +49,7 @@
import javax.validation.Validator;
import javax.validation.ValidatorFactory;
import javax.validation.constraints.Size;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
*
@@ -1340,7 +1340,11 @@ public String getCitation() {
}
public String getCitation(boolean html) {
- return new DataCitation(this).toString(html);
+ return getCitation(html, false);
+ }
+
+ public String getCitation(boolean html, boolean anonymized) {
+ return new DataCitation(this).toString(html, anonymized);
}
public Date getCitationDate() {
@@ -1703,11 +1707,11 @@ public String getJsonLd() {
JsonArrayBuilder authors = Json.createArrayBuilder();
for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) {
JsonObjectBuilder author = Json.createObjectBuilder();
- String name = datasetAuthor.getName().getValue();
+ String name = datasetAuthor.getName().getDisplayValue();
DatasetField authorAffiliation = datasetAuthor.getAffiliation();
String affiliation = null;
if (authorAffiliation != null) {
- affiliation = datasetAuthor.getAffiliation().getValue();
+ affiliation = datasetAuthor.getAffiliation().getDisplayValue();
}
// We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization".
//author.add("@type", "Person");
@@ -1859,7 +1863,7 @@ public String getJsonLd() {
JsonObjectBuilder license = Json.createObjectBuilder().add("@type", "Dataset");
if (TermsOfUseAndAccess.License.CC0.equals(terms.getLicense())) {
- license.add("text", "CC0").add("url", "https://creativecommons.org/publicdomain/zero/1.0/");
+ license.add("text", "CC0").add("url", TermsOfUseAndAccess.CC0_URI);
} else {
String termsOfUse = terms.getTermsOfUse();
// Terms of use can be null if you create the dataset with JSON.
@@ -1945,6 +1949,10 @@ public String getJsonLd() {
job.add("distribution", fileArray);
}
jsonLd = job.build().toString();
+
+ //Most fields above should be stripped/sanitized but, since this is output in the dataset page as header metadata, do a final sanitize step to make sure
+ jsonLd = MarkupChecker.stripAllTags(jsonLd);
+
return jsonLd;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java
index 8cc25f5148e..fee8a66a290 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java
@@ -12,7 +12,7 @@
import java.util.List;
import java.util.Set;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import edu.harvard.iq.dataverse.util.BundleUtil;
import java.util.Arrays;
import java.util.Date;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
index 3c1ae3abf38..32dd32b643f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
@@ -34,7 +34,7 @@
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
import javax.persistence.TypedQuery;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrServerException;
/**
diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
index b46333a4287..342aaec187a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
@@ -33,7 +33,7 @@
import javax.validation.constraints.Pattern;
import javax.validation.constraints.Size;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.hibernate.validator.constraints.NotBlank;
import org.hibernate.validator.constraints.NotEmpty;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java b/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java
index 7e9655b3970..b806ef8e22d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java
@@ -25,7 +25,7 @@
import javax.faces.view.ViewScoped;
import javax.inject.Inject;
import javax.inject.Named;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
*
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
index c7f816ce219..1e2d3f507a1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
@@ -52,8 +52,8 @@
import javax.ejb.EJBException;
import javax.faces.event.ValueChangeEvent;
import javax.faces.model.SelectItem;
-import org.apache.commons.lang.StringEscapeUtils;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.text.StringEscapeUtils;
+import org.apache.commons.lang3.StringUtils;
import org.primefaces.PrimeFaces;
import org.primefaces.event.TransferEvent;
@@ -805,8 +805,8 @@ public String saveLinkedDataverse() {
private List getSuccessMessageArguments() {
List arguments = new ArrayList<>();
- arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName()));
- String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + "";
+ arguments.add(StringEscapeUtils.escapeHtml4(dataverse.getDisplayName()));
+ String linkString = "" + StringEscapeUtils.escapeHtml4(linkingDataverse.getDisplayName()) + "";
arguments.add(linkString);
return arguments;
}
@@ -867,7 +867,7 @@ public String saveSavedSearch() {
commandEngine.submit(cmd);
List arguments = new ArrayList<>();
- String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + "";
+ String linkString = "" + StringEscapeUtils.escapeHtml4(linkingDataverse.getDisplayName()) + "";
arguments.add(linkString);
String successMessageString = BundleUtil.getStringFromBundle("dataverse.saved.search.success", arguments);
JsfHelper.addSuccessMessage(successMessageString);
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
index f6b396f4c00..ad72f3819fb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
@@ -2,7 +2,7 @@
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import javax.persistence.MappedSuperclass;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
* A {@link DvObject} that can contain other {@link DvObject}s.
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
index 4830c422d05..01b0890d588 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
@@ -19,7 +19,7 @@
import javax.persistence.NonUniqueResultException;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.ocpsoft.common.util.Strings;
/**
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index aef3f7d3446..d2620d9a240 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -76,7 +76,7 @@
import javax.faces.event.FacesEvent;
import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletResponse;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.mutable.MutableBoolean;
import org.primefaces.PrimeFaces;
@@ -598,7 +598,7 @@ public String init() {
}
if (mode == FileEditMode.UPLOAD) {
- if (settingsWrapper.getUploadMethodsCount() == 1){
+ if (settingsWrapper.getUploadMethodsCount() == 1){
JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("dataset.message.uploadFiles.label"), BundleUtil.getStringFromBundle("dataset.message.uploadFilesSingle.message", Arrays.asList(systemConfig.getGuidesBaseUrl(), systemConfig.getGuidesVersion())));
} else if (settingsWrapper.getUploadMethodsCount() > 1) {
JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("dataset.message.uploadFiles.label"), BundleUtil.getStringFromBundle("dataset.message.uploadFilesMultiple.message", Arrays.asList(systemConfig.getGuidesBaseUrl(), systemConfig.getGuidesVersion())));
@@ -1527,10 +1527,22 @@ private void setUpRsync() {
}
} catch (EJBException ex) {
logger.warning("Problem getting rsync script (EJBException): " + EjbUtil.ejbExceptionToString(ex));
+ FacesContext.getCurrentInstance().addMessage(uploadComponentId,
+ new FacesMessage(FacesMessage.SEVERITY_ERROR,
+ "Problem getting rsync script (EJBException): " + EjbUtil.ejbExceptionToString(ex),
+ "Problem getting rsync script (EJBException):"));
} catch (RuntimeException ex) {
logger.warning("Problem getting rsync script (RuntimeException): " + ex.getLocalizedMessage());
+ FacesContext.getCurrentInstance().addMessage(uploadComponentId,
+ new FacesMessage(FacesMessage.SEVERITY_ERROR,
+ "Problem getting rsync script (RuntimeException): " + ex.getMessage(),
+ "Problem getting rsync script (RuntimeException):"));
} catch (CommandException cex) {
logger.warning("Problem getting rsync script (Command Exception): " + cex.getLocalizedMessage());
+ FacesContext.getCurrentInstance().addMessage(uploadComponentId,
+ new FacesMessage(FacesMessage.SEVERITY_ERROR,
+ "Problem getting rsync script (Command Exception): " + cex.getMessage(),
+ "Problem getting rsync script (Command Exception):"));
}
}
}
@@ -2037,7 +2049,9 @@ public void handleExternalUpload() {
// -----------------------------------------------------------
if (this.isFileReplaceOperation()){
this.handleReplaceFileUpload(storageLocation, fileName, contentType, checksumValue, checksumType);
- this.setFileMetadataSelectedForTagsPopup(fileReplacePageHelper.getNewFileMetadatasBeforeSave().get(0));
+ if (fileReplacePageHelper.getNewFileMetadatasBeforeSave() != null){
+ this.setFileMetadataSelectedForTagsPopup(fileReplacePageHelper.getNewFileMetadatasBeforeSave().get(0));
+ }
return;
}
// -----------------------------------------------------------
@@ -2967,8 +2981,15 @@ public boolean rsyncUploadSupported() {
// ToDo - rsync was written before multiple store support and currently is hardcoded to use the "s3" store.
// When those restrictions are lifted/rsync can be configured per store, the test in the
// Dataset Util method should be updated
+ if(settingsWrapper.isRsyncUpload() && !DatasetUtil.isAppropriateStorageDriver(dataset) ){
+ //dataset.file.upload.setUp.rsync.failed.detail
+ FacesMessage message = new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.upload.setUp.rsync.failed"), BundleUtil.getStringFromBundle("dataset.file.upload.setUp.rsync.failed.detail"));
+ FacesContext.getCurrentInstance().addMessage(null, message);
+ }
+
- return settingsWrapper.isRsyncUpload() && DatasetUtil.isAppropriateStorageDriver(dataset);
+
+ return settingsWrapper.isRsyncUpload() && DatasetUtil.isAppropriateStorageDriver(dataset);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java
index 7b0fb0fd76c..0b2a92fe06a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java
@@ -374,23 +374,19 @@ public String getFileDateToDisplay() {
}
return "";
}
-
+
public String getFileCitation(){
- return getFileCitation(false);
- }
-
+ return getFileCitation(false, false);
+ }
-
-
- public String getFileCitation(boolean html){
- return new DataCitation(this).toString(html);
- }
-
- public String getDirectFileCitation(boolean html){
- return new DataCitation(this, true).toString(html);
+ public String getFileCitation(boolean html, boolean anonymized){
+ return new DataCitation(this).toString(html, anonymized);
}
-
-
+
+ public String getDirectFileCitation(boolean html, boolean anonymized){
+ return new DataCitation(this, true).toString(html, anonymized);
+ }
+
public DatasetVersion getDatasetVersion() {
return datasetVersion;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
index d863500d137..045ac1f934a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
@@ -971,5 +971,11 @@ public boolean isFileAccessRequest() {
public void setFileAccessRequest(boolean fileAccessRequest) {
this.fileAccessRequest = fileAccessRequest;
- }
+ }
+ public boolean isAnonymizedAccess() {
+ if(session.getUser() instanceof PrivateUrlUser) {
+ return ((PrivateUrlUser)session.getUser()).hasAnonymizedAccess();
+ }
+ return false;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/Guestbook.java b/src/main/java/edu/harvard/iq/dataverse/Guestbook.java
index 742e73403c1..18913bfd5bf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Guestbook.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Guestbook.java
@@ -23,7 +23,7 @@
import javax.persistence.Transient;
import edu.harvard.iq.dataverse.util.DateUtil;
-import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.text.StringEscapeUtils;
import org.hibernate.validator.constraints.NotBlank;
/**
diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
index a7fb2b5a3fd..37d7169b959 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
@@ -24,7 +24,7 @@
import javax.faces.view.ViewScoped;
import javax.inject.Inject;
import javax.inject.Named;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
/**
*
diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
index 809417e3f9c..f2d290215da 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
@@ -128,6 +128,18 @@ public void streamResponsesByDataverseIdAndGuestbookId(OutputStream out, Long da
// of queries now) -- L.A.
Map customQandAs = mapCustomQuestionAnswersAsStrings(dataverseId, guestbookId);
+
+ List