diff --git a/.travis.yml.future b/.github/workflows/cypress_ui.yml.future similarity index 74% rename from .travis.yml.future rename to .github/workflows/cypress_ui.yml.future index 8bd747625e4..b38ae2f9558 100644 --- a/.travis.yml.future +++ b/.github/workflows/cypress_ui.yml.future @@ -1,16 +1,17 @@ +############################################################################### +# +# THIS IS AN OLD TRAVIS-CI.ORG JOB FILE +# To be used with Github Actions, it would be necessary to refactor it. +# Keeping it as the future example it has been before. +# See also #5846 +# +############################################################################### + services: - docker jobs: include: - # Execute java unit- and integration tests - - stage: test - language: java - jdk: - - oraclejdk8 - script: mvn -DcompilerArgument=-Xlint:unchecked test -P all-unit-tests - after_success: mvn jacoco:report coveralls:report - # Execute Cypress for UI testing # see https://docs.cypress.io/guides/guides/continuous-integration.html - stage: test diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml new file mode 100644 index 00000000000..464d60c2db6 --- /dev/null +++ b/.github/workflows/maven_unit_test.yml @@ -0,0 +1,42 @@ +name: Maven Unit Tests + +on: + push: + paths: + - "**.java" + pull_request: + paths: + - "**.java" + +jobs: + unittest: + name: (JDK ${{ matrix.jdk }} / ${{ matrix.os }}) Unit Tests + strategy: + fail-fast: false + matrix: + os: [ ubuntu-latest ] + jdk: [ '11' ] + #include: + # - os: ubuntu-latest + # jdk: '16' + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - name: Set up JDK ${{ matrix.jdk }} + uses: actions/setup-java@v2 + with: + java-version: ${{ matrix.jdk }} + distribution: 'adopt' + - name: Cache Maven packages + uses: actions/cache@v2 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + - name: Build with Maven + run: mvn -DcompilerArgument=-Xlint:unchecked -P all-unit-tests clean test + - name: Maven Code Coverage + env: + CI_NAME: github + COVERALLS_SECRET: ${{ secrets.GITHUB_TOKEN }} + run: mvn -V -B jacoco:report coveralls:report -DrepoToken=${COVERALLS_SECRET} -DpullRequest=${{ github.event.number }} \ No newline at end of file diff --git a/.github/workflows/reviewdog_checkstyle.yml b/.github/workflows/reviewdog_checkstyle.yml new file mode 100644 index 00000000000..90a0dd7d06b --- /dev/null +++ b/.github/workflows/reviewdog_checkstyle.yml @@ -0,0 +1,21 @@ +name: Maven CheckStyle Task +on: + pull_request: + paths: + - "**.java" + +jobs: + checkstyle_job: + runs-on: ubuntu-latest + name: Checkstyle job + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Run check style + uses: nikitasavinov/checkstyle-action@master + with: + fail_on_error: true + reporter: github-pr-review + checkstyle_config: checkstyle.xml + github_token: ${{ secrets.GITHUB_TOKEN }} + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 67de6619add..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,5 +0,0 @@ -language: java -jdk: - - openjdk11 -script: mvn -DcompilerArgument=-Xlint:unchecked test -P all-unit-tests -after_success: mvn jacoco:report coveralls:report diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2227286d4d1..cb7e5f9d123 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,12 +6,11 @@ We aren't just looking for developers. There are many ways to contribute to Data ## Ideas/Feature Requests -Your idea or feature request might already be captured in the Dataverse [issue tracker] on GitHub but if not, the best way to bring it to the community's attention is by posting on the [dataverse-community Google Group][] or bringing it up on a [Community Call][]. You're also welcome make some noise in the [#dataverse IRC channel][] (which is [logged][]) or cram your idea into 280 characters and mention [@dataverseorg][] on Twitter. To discuss your idea privately, please email it to support@dataverse.org +Your idea or feature request might already be captured in the Dataverse [issue tracker] on GitHub but if not, the best way to bring it to the community's attention is by posting on the [dataverse-community Google Group][] or bringing it up on a [Community Call][]. You're also welcome to make some noise in [chat.dataverse.org][] or cram your idea into 280 characters and mention [@dataverseorg][] on Twitter. To discuss your idea privately, please email it to support@dataverse.org There's a chance your idea is already on our roadmap, which is available at https://www.iq.harvard.edu/roadmap-dataverse-project -[#dataverse IRC channel]: http://chat.dataverse.org -[logged]: http://irclog.iq.harvard.edu/dataverse/today +[chat.dataverse.org]: http://chat.dataverse.org [issue tracker]: https://github.com/IQSS/dataverse/issues [@dataverseorg]: https://twitter.com/dataverseorg @@ -55,7 +54,7 @@ We love code contributions. Developers are not limited to the main Dataverse cod [API Guide]: http://guides.dataverse.org/en/latest/api [Installation Guide]: http://guides.dataverse.org/en/latest/installation -If you are interested in working on the main Dataverse code, great! Before you start coding, please reach out to us either on the [dataverse-community Google Group][], the [dataverse-dev Google Group][], [IRC][] (#dataverse on freenode), or via support@dataverse.org to make sure the effort is well coordinated and we avoid merge conflicts. We maintain a list of [community contributors][] and [dev efforts][] the community is working on so please let us know if you'd like to be added or removed from either list. +If you are interested in working on the main Dataverse code, great! Before you start coding, please reach out to us either on the [dataverse-community Google Group][], the [dataverse-dev Google Group][], [chat.dataverse.org][], or via support@dataverse.org to make sure the effort is well coordinated and we avoid merge conflicts. We maintain a list of [community contributors][] and [dev efforts][] the community is working on so please let us know if you'd like to be added or removed from either list. Please read http://guides.dataverse.org/en/latest/developers/version-control.html to understand how we use the "git flow" model of development and how we will encourage you to create a GitHub issue (if it doesn't exist already) to associate with your pull request. That page also includes tips on making a pull request. @@ -66,6 +65,5 @@ Thanks for your contribution! [dataverse-community Google Group]: https://groups.google.com/group/dataverse-community [Community Call]: https://dataverse.org/community-calls [dataverse-dev Google Group]: https://groups.google.com/group/dataverse-dev -[IRC]: http://chat.dataverse.org [community contributors]: https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing [dev efforts]: https://github.com/orgs/IQSS/projects/2#column-5298405 diff --git a/README.md b/README.md index 3f3b8c2de90..6fd11374353 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Dataverse is a trademark of President and Fellows of Harvard College and is regi [![API Test Status](https://jenkins.dataverse.org/buildStatus/icon?job=IQSS-dataverse-develop&subject=API%20Test%20Status)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/) [![API Test Coverage](https://img.shields.io/jenkins/coverage/jacoco?jobUrl=https%3A%2F%2Fjenkins.dataverse.org%2Fjob%2FIQSS-dataverse-develop&label=API%20Test%20Coverage)](https://jenkins.dataverse.org/job/IQSS-dataverse-develop/ws/target/coverage-it/index.html) -[![Unit Test Status](https://img.shields.io/travis/IQSS/dataverse?label=Unit%20Test%20Status)](https://travis-ci.org/IQSS/dataverse) +[![Unit Test Status](https://github.com/IQSS/dataverse/actions/workflows/maven_unit_test.yml/badge.svg?branch=develop)](https://github.com/IQSS/dataverse/actions/workflows/maven_unit_test.yml) [![Unit Test Coverage](https://img.shields.io/coveralls/github/IQSS/dataverse?label=Unit%20Test%20Coverage)](https://coveralls.io/github/IQSS/dataverse?branch=develop) [![Guides Build Status](https://github.com/IQSS/dataverse/actions/workflows/guides_build_sphinx.yml/badge.svg)](https://github.com/IQSS/dataverse/actions/workflows/guides_build_sphinx.yml) diff --git a/checkstyle.xml b/checkstyle.xml index 5a864136fea..99185e15e97 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -97,7 +97,9 @@ --> - + + + -
-
-
Deposit and share your data. Get academic credit.
-

Harvard Dataverse is a repository for research data. Deposit data and code here.

- -

- datasets -    - downloads -

- - - Add a dataset - -
-
-
Organize datasets and gather metrics in your own repository.
-

A dataverse is a container for all your datasets, files, and metadata.

- -

- dataverses -

- - - Add a dataverse - -
-
- -
- - -
- - - -
-
Find data across research fields, preview metadata, and download files
-
-
-
- - - - -
-
- -
-
- - -
-

Browse by subject

-
- -
-
- - - -
-

ALL DATA

-
-
-
- - -
-
-
-
Datasets from journal dataverses
- -
- - -

Loading...

- - - - -
- - - -
-
Datasets from other dataverses
- -
- - -

Loading...

-
-
- -
- -
- - - -
-
-
-
- -
- - -
-
-
Activity
-
- -
- -
-
-
Datasets
-
All Activity
-
Past 30 Days
-
-
-
Total
-
...
-
...
-
-
-
Deposited
-
...
-
...
-
-
-
Harvested
-
...
-
...
-
-
- - - -
-
-
Files
-
All Activity
-
Past 30 Days
-
-
-
Downloaded
-
...
-
...
-
-
-
Deposited
-
...
-
...
-
-
- -
-
- -
- - -
-
-
Looking for other online repositories at Harvard?
- -
-
- - diff --git a/doc/sphinx-guides/source/_static/util/createsequence.sql b/doc/sphinx-guides/source/_static/util/createsequence.sql index 2677832abd8..7ac1968de2c 100644 --- a/doc/sphinx-guides/source/_static/util/createsequence.sql +++ b/doc/sphinx-guides/source/_static/util/createsequence.sql @@ -1,14 +1,14 @@ --- A script for creating a numeric identifier sequence, and an external --- stored procedure, for accessing the sequence from inside the application, --- in a non-hacky, JPA way. +-- A script for creating a numeric identifier sequence, and an external +-- stored procedure, for accessing the sequence from inside the application, +-- in a non-hacky, JPA way. -- NOTE: -- 1. The database user name "dvnapp" is hard-coded here - it may -- need to be changed to match your database user name; - + -- 2. In the code below, the sequence starts with 1, but it can be adjusted by --- changing the MINVALUE as needed. +-- changing the MINVALUE as needed. CREATE SEQUENCE datasetidentifier_seq INCREMENT 1 @@ -22,12 +22,12 @@ ALTER TABLE datasetidentifier_seq OWNER TO "dvnapp"; -- And now create a PostgreSQL FUNCTION, for JPA to -- access as a NamedStoredProcedure: -CREATE OR REPLACE FUNCTION generateIdentifierAsSequentialNumber( - OUT identifier int) - RETURNS int AS -$BODY$ +CREATE OR REPLACE FUNCTION generateIdentifierFromStoredProcedure() +RETURNS varchar AS $$ +DECLARE + identifier varchar; BEGIN - select nextval('datasetidentifier_seq') into identifier; + identifier := nextval('datasetidentifier_seq')::varchar; + RETURN identifier; END; -$BODY$ - LANGUAGE plpgsql; +$$ LANGUAGE plpgsql IMMUTABLE; diff --git a/doc/sphinx-guides/source/_static/util/identifier_from_timestamp.sql b/doc/sphinx-guides/source/_static/util/identifier_from_timestamp.sql new file mode 100644 index 00000000000..a755b5ecd4a --- /dev/null +++ b/doc/sphinx-guides/source/_static/util/identifier_from_timestamp.sql @@ -0,0 +1,46 @@ +-- A script for creating, through a database stored procedure, sequential +-- 8 character identifiers from a base36 representation of current timestamp. + +CREATE OR REPLACE FUNCTION base36_encode( + IN digits bigint, IN min_width int = 0) +RETURNS varchar AS $$ +DECLARE + chars char[]; + ret varchar; + val bigint; +BEGIN + chars := ARRAY[ + '0','1','2','3','4','5','6','7','8','9', + 'a','b','c','d','e','f','g','h','i','j', + 'k','l','m','n','o','p','q','r','s','t', + 'u','v','w','x','y','z']; + val := digits; + ret := ''; + IF val < 0 THEN + val := val * -1; + END IF; + WHILE val != 0 LOOP + ret := chars[(val % 36)+1] || ret; + val := val / 36; + END LOOP; + + IF min_width > 0 AND char_length(ret) < min_width THEN + ret := lpad(ret, min_width, '0'); + END IF; + + RETURN ret; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + + +CREATE OR REPLACE FUNCTION generateIdentifierFromStoredProcedure() +RETURNS varchar AS $$ +DECLARE + curr_time_msec bigint; + identifier varchar; +BEGIN + curr_time_msec := extract(epoch from now())*1000; + identifier := base36_encode(curr_time_msec); + RETURN identifier; +END; +$$ LANGUAGE plpgsql IMMUTABLE; diff --git a/doc/sphinx-guides/source/admin/dashboard.rst b/doc/sphinx-guides/source/admin/dashboard.rst index 5129552b706..3f77729d0ab 100644 --- a/doc/sphinx-guides/source/admin/dashboard.rst +++ b/doc/sphinx-guides/source/admin/dashboard.rst @@ -22,7 +22,7 @@ This dashboard tool allows you to define sets of local datasets to make availabl Metadata Export --------------- -This part of the Dashboard is simply a reminder message that metadata export happens through the Dataverse Software API. See the :doc:`metadataexport` section and the :doc:`/api/native-api` section of the API Guide for more details. +This part of the Dashboard is simply a reminder message that metadata export happens through the Dataverse Software API. See the :doc:`/admin/metadataexport` section and the :doc:`/api/native-api` section of the API Guide for more details. Users ----- diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index a55c90d2eb3..a18204588c2 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -41,7 +41,7 @@ Recursively assigns the users and groups having a role(s),that are in the set co curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/addRoleAssignmentsToChildren Configure a Dataverse Collection to store all new files in a specific file store -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. :: @@ -110,6 +110,8 @@ Mints a new identifier for a dataset previously registered with a handle. Only a curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/admin/$dataset-id/reregisterHDLToPID +.. _send-metadata-to-pid-provider: + Send Dataset metadata to PID provider ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst index af8b2f19015..a47eb2d58aa 100755 --- a/doc/sphinx-guides/source/api/client-libraries.rst +++ b/doc/sphinx-guides/source/api/client-libraries.rst @@ -27,9 +27,10 @@ It was created and is maintained by `The Agile Monkeys `_. +https://github.com/IQSS/dataverse-client-r is the official R package for Dataverse Software APIs. The latest release can be installed from `CRAN `_. +The R client can search and download datasets. It is useful when automatically (instead of manually) downloading data files as part of a script. For bulk edit and upload operations, we currently recommend pyDataverse. -The package is currently maintained by `Will Beasley `_. It was created by `Thomas Leeper `_ whose Dataverse collection can be found at https://dataverse.harvard.edu/dataverse/leeper +The package is currently maintained by `Shiro Kuriwaki `_. It was originally created by `Thomas Leeper `_ and then formerly maintained by `Will Beasley `_. Java ---- diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst index 101c6c2bfaa..933932cd7b9 100755 --- a/doc/sphinx-guides/source/api/intro.rst +++ b/doc/sphinx-guides/source/api/intro.rst @@ -204,6 +204,15 @@ Please note that some APIs are only documented in other guides that are more sui - :doc:`/installation/config` +- Developer Guide + + - :doc:`/developers/aux-file-support` + - :doc:`/developers/big-data-support` + - :doc:`/developers/dataset-migration-api` + - :doc:`/developers/dataset-semantic-metadata-api` + - :doc:`/developers/s3-direct-upload-api` + - :doc:`/developers/workflows` + Client Libraries ~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 9fb019ab9dd..823efe05669 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -580,10 +580,32 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/actions/:publish + curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/actions/:publish You should expect a 200 ("OK") response and JSON output. +Retrieve Guestbook Responses for a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to retrieve a file containing a list of Guestbook Responses in csv format for Dataverse collection, you must know either its "alias" (which the GUI calls an "identifier") or its database ID. If the Dataverse collection has more than one guestbook you may provide the id of a single guestbook as an optional parameter. If no guestbook id is provided the results returned will be the same as pressing the "Download All Responses" button on the Manage Dataset Guestbook page. If the guestbook id is provided then only those responses from that guestbook will be included in the file. + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + export GUESTBOOK_ID=1 + + curl -O -J -f -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/guestbookResponses?guestbookId=$GUESTBOOK_ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -O -J -f -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/guestbookResponses?guestbookId=1 + Datasets -------- @@ -609,13 +631,13 @@ Example: Getting the dataset whose DOI is *10.5072/FK2/J8SJZB*: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB - curl $SERVER_URL/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER + curl -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/datasets/:persistentId/?persistentId=$PERSISTENT_IDENTIFIER The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/?persistentId=doi:10.5072/FK2/J8SJZB Getting its draft version: @@ -624,13 +646,13 @@ Getting its draft version: export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/J8SJZB - curl http://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER + curl -H "X-Dataverse-key:$API_TOKEN" http://$SERVER/api/datasets/:persistentId/versions/:draft?persistentId=$PERSISTENT_IDENTIFIER The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB + curl -H "X-Dataverse-key:$API_TOKEN" https://demo.dataverse.org/api/datasets/:persistentId/versions/:draft?persistentId=doi:10.5072/FK2/J8SJZB |CORS| Show the dataset whose id is passed: @@ -825,14 +847,20 @@ through the Dataverse application. For example, if you have a dataset version with 2 files, one with the folder named "subfolder": +|image1| + .. |image1| image:: ./img/dataset_page_files_view.png or, as viewed as a tree on the dataset page: +|image2| + .. |image2| image:: ./img/dataset_page_tree_view.png The output of the API for the top-level folder (``/api/datasets/{dataset}/dirindex/``) will be as follows: +|image3| + .. |image3| image:: ./img/index_view_top.png with the underlying html source: @@ -851,6 +879,8 @@ with the underlying html source: The ``/dirindex/?folder=subfolder`` link above will produce the following view: +|image4| + .. |image4| image:: ./img/index_view_subfolder.png with the html source as follows: @@ -1201,6 +1231,14 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/privateUrl + +If Anonymized Access has been enabled on a Dataverse instance (see the :ref:`:AnonymizedFieldTypeNames` setting), an optional 'anonymizedAccess' query parameter is allowed. +Setting anonymizedAccess=true in your call will create a PrivateURL that only allows an anonymized view of the Dataset (see :ref:`privateurl`). + +.. code-block:: bash + + curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/24/privateUrl?anonymizedAccess=true + Get the Private URL for a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2078,6 +2116,49 @@ The fully expanded example above (without environment variables) looks like this Note: The ``id`` returned in the json response is the id of the file metadata version. + + +Adding File Metadata +~~~~~~~~~~~~~~~~~~~~ + +This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage. + +The jsonData object includes values for: + +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. + +A curl example using an ``PERSISTENT_ID`` + +* ``SERVER_URL`` - e.g. https://demo.dataverse.org +* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section. +* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + Updating File Metadata ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/api/sword.rst b/doc/sphinx-guides/source/api/sword.rst index 23d6588bd86..d853994f073 100755 --- a/doc/sphinx-guides/source/api/sword.rst +++ b/doc/sphinx-guides/source/api/sword.rst @@ -34,11 +34,15 @@ HTTP Basic Authentication commonly makes use of both a username and a password b Clients such as ``curl`` expect both a username and a password separated by a colon. With ``curl``, the way to indicate that the password should be blank or empty is to include the colon at the end of the username (the API token) like this: -``curl -u 54b143b5-d001-4254-afc0-a1c0f6a5b5a7:`` +.. code-block:: bash + + curl -u 54b143b5-d001-4254-afc0-a1c0f6a5b5a7: All the curl examples below take this form but instead of showing an API token like above, a Bash environment variable called ``$API_TOKEN`` is shown instead like this: -``curl -u $API_TOKEN:`` +.. code-block:: bash + + curl -u $API_TOKEN: .. _RFC 7617: https://tools.ietf.org/html/rfc7617 @@ -53,9 +57,9 @@ Differences in Dataverse Software 4 from DVN 3.x lead to a few minor backward in - Newly required fields when creating/editing datasets for compliance with the `Joint Declaration for Data Citation principles `_. - - dcterms:creator (maps to authorName) + - ``dcterms:creator`` (maps to authorName) - - dcterms:description + - ``dcterms:description`` - Deaccessioning is no longer supported. An alternative will be developed at https://github.com/IQSS/dataverse/issues/778 @@ -72,11 +76,11 @@ New features as of v1.1 - Datasets versions will only be increased to the next minor version (i.e. 1.1) rather than a major version (2.0) if possible. This depends on the nature of the change. Adding or removing a file, for example, requires a major version bump. -- "Author Affiliation" can now be populated with an XML attribute. For example: Stumptown, Jane +- "Author Affiliation" can now be populated with an XML attribute. For example: ``Stumptown, Jane`` -- "Contributor" can now be populated and the "Type" (Editor, Funder, Researcher, etc.) can be specified with an XML attribute. For example: CaffeineForAll +- "Contributor" can now be populated and the "Type" (Editor, Funder, Researcher, etc.) can be specified with an XML attribute. For example: ``CaffeineForAll`` -- "License" can now be set with dcterms:license and the possible values are "CC0" and "NONE". "License" interacts with "Terms of Use" (dcterms:rights) in that if you include dcterms:rights in the XML, the license will be set to "NONE". If you don't include dcterms:rights, the license will default to "CC0". It is invalid to specify "CC0" as a license and also include dcterms:rights; an error will be returned. For backwards compatibility, dcterms:rights is allowed to be blank (i.e. ) but blank values will not be persisted to the database and the license will be set to "NONE". +- "License" can now be set with ``dcterms:license`` and the possible values are "CC0" and "NONE". "License" interacts with "Terms of Use" (``dcterms:rights``) in that if you include ``dcterms:rights`` in the XML, the license will be set to "NONE". If you don't include ``dcterms:rights``, the license will default to "CC0". It is invalid to specify "CC0" as a license and also include ``dcterms:rights``; an error will be returned. For backwards compatibility, ``dcterms:rights`` is allowed to be blank (i.e. ````) but blank values will not be persisted to the database and the license will be set to "NONE". - "Contact E-mail" is automatically populated from dataset owner's email. @@ -94,18 +98,23 @@ Retrieve SWORD service document The service document enumerates the Dataverse collections (also "collections" from a SWORD perspective) the user can deposit data into. The "collectionPolicy" element for each Dataverse collections contains the Terms of Use. Any user with an API token can use this API endpoint. Institution-wide Shibboleth groups are not respected because membership in such a group can only be set via a browser. -``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/service-document`` +.. code-block:: bash + + curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/service-document Create a dataset with an Atom entry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To create a dataset, you must have the "Dataset Creator" role (the ``AddDataset`` permission) on a Dataverse collection. Practically speaking, you should first retrieve the service document to list the Dataverse collections into which you are authorized to deposit data. -``curl -u $API_TOKEN: --data-binary "@path/to/atom-entry-study.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS`` +.. code-block:: bash + + curl -u $API_TOKEN: --data-binary "@path/to/atom-entry-study.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS Example Atom entry (XML) .. literalinclude:: sword-atom-entry.xml + :language: xml Dublin Core Terms (DC Terms) Qualified Mapping - Dataverse Project DB Element Crosswalk ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -146,14 +155,18 @@ List datasets in a Dataverse Collection You must have permission to add datasets in a Dataverse collection (the Dataverse collection should appear in the service document) to list the datasets inside. Institution-wide Shibboleth groups are not respected because membership in such a group can only be set via a browser. -``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS`` +.. code-block:: bash + + curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS Add files to a dataset with a zip file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You must have ``EditDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to add files. -``curl -u $API_TOKEN: --data-binary @path/to/example.zip -H "Content-Disposition: filename=example.zip" -H "Content-Type: application/zip" -H "Packaging: http://purl.org/net/sword/package/SimpleZip" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/study/doi:TEST/12345`` +.. code-block:: bash + + curl -u $API_TOKEN: --data-binary @path/to/example.zip -H "Content-Disposition: filename=example.zip" -H "Content-Type: application/zip" -H "Packaging: http://purl.org/net/sword/package/SimpleZip" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/study/doi:TEST/12345 Display a dataset atom entry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -162,56 +175,72 @@ You must have ``ViewUnpublishedDataset`` permission (Contributor role or above s Contains data citation (bibliographicCitation), alternate URI (persistent URI of study), edit URI, edit media URI, statement URI. -``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345`` +.. code-block:: bash + + curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345 Display a dataset statement ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Contains title, author, feed of file entries, latestVersionState, locked boolean, updated timestamp. You must have ``ViewUnpublishedDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to display the statement. -``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/statement/study/doi:TEST/12345`` +.. code-block:: bash + + curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/statement/study/doi:TEST/12345 Delete a file by database id ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You must have ``EditDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to delete files. -``curl -u $API_TOKEN: -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/123`` +.. code-block:: bash + + curl -u $API_TOKEN: -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit-media/file/123 Replacing metadata for a dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Please note that **ALL** metadata (title, author, etc.) will be replaced, including fields that can not be expressed with "dcterms" fields. You must have ``EditDataset`` permission (Contributor role or above such as Curator or Admin) on the dataset to replace metadata. -``curl -u $API_TOKEN: --upload-file "path/to/atom-entry-study2.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345`` +.. code-block:: bash + + curl -u $API_TOKEN: --upload-file "path/to/atom-entry-study2.xml" -H "Content-Type: application/atom+xml" https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345`` Delete a dataset ~~~~~~~~~~~~~~~~ You must have the ``DeleteDatasetDraft`` permission (Contributor role or above such as Curator or Admin) on the dataset to delete it. Please note that if the dataset has never been published you will be able to delete it completely but if the dataset has already been published you will only be able to delete post-publication drafts, never a published version. -``curl -u $API_TOKEN: -i -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345`` +.. code-block:: bash + + curl -u $API_TOKEN: -i -X DELETE https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345 Determine if a Dataverse Collection has been published ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This API endpoint is the same as the "list datasets in a Dataverse collection" endpoint documented above and the same permissions apply but it is documented here separately to point out that you can look for a boolean called ``dataverseHasBeenReleased`` to know if a Dataverse collection has been released, which is required for publishing a dataset. -``curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS`` +.. code-block:: bash + + curl -u $API_TOKEN: https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/$DATAVERSE_ALIAS Publish a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``cat /dev/null`` and ``--data-binary @-`` arguments are used to send zero-length content to the API, which is required by the upstream library to process the ``In-Progress: false`` header. You must have the ``PublishDataverse`` permission (Admin role) on the Dataverse collection to publish it. -``cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/dataverse/$DATAVERSE_ALIAS`` +.. code-block:: bash + + cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/dataverse/$DATAVERSE_ALIAS Publish a dataset ~~~~~~~~~~~~~~~~~ The ``cat /dev/null`` and ``--data-binary @-`` arguments are used to send zero-length content to the API, which is required by the upstream library to process the ``In-Progress: false`` header. You must have the ``PublishDataset`` permission (Curator or Admin role) on the dataset to publish it. -``cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345`` +.. code-block:: bash + + cat /dev/null | curl -u $API_TOKEN: -X POST -H "In-Progress: false" --data-binary @- https://$HOSTNAME/dvn/api/data-deposit/v1.1/swordv2/edit/study/doi:TEST/12345 .. _known-issues: @@ -249,4 +278,3 @@ Client libraries - R: https://github.com/IQSS/dataverse-client-r - Ruby: https://github.com/swordapp/sword2ruby - PHP: https://github.com/swordapp/swordappv2-php-library - diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 30c53f9febf..a68a623d24e 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -65,9 +65,9 @@ # built documents. # # The short X.Y version. -version = '5.5' +version = '5.6' # The full version, including alpha/beta/rc tags. -release = '5.5' +release = '5.6' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/developers/dataset-migration-api.rst b/doc/sphinx-guides/source/developers/dataset-migration-api.rst new file mode 100644 index 00000000000..1dc8f7866e0 --- /dev/null +++ b/doc/sphinx-guides/source/developers/dataset-migration-api.rst @@ -0,0 +1,58 @@ +Dataset Migration API +===================== + +The Dataverse software includes several ways to add Datasets originally created elsewhere (not to mention Harvesting capabilities). These include the Sword API (see the :doc:`/api/sword` guide) and the /dataverses/{id}/datasets/:import methods (json and ddi) (see the :doc:`/api/native-api` guide). + +This experimental migration API offers an additional option with some potential advantages: + +* metadata can be specified using the json-ld format used in the OAI-ORE metadata export +* existing publication dates and PIDs are maintained (currently limited to the case where the PID can be managed by the Dataverse software, e.g. where the authority and shoulder match those the software is configured for) +* updating the PID at the provider can be done immediately or later (with other existing APIs) +* adding files can be done via the standard APIs, including using direct-upload to S3 + +This API consists of 2 calls: one to create an initial Dataset version, and one to 'republish' the dataset through Dataverse with a specified publication date. +Both calls require super-admin privileges. + +These calls can be used in concert with other API calls to add files, update metadata, etc. before the 'republish' step is done. + + +Start Migrating a Dataset into a Dataverse Collection +----------------------------------------------------- + +.. note:: This action requires a Dataverse installation account with superuser permissions. + +To import a dataset with an existing persistent identifier (PID), the provided json-ld metadata should include it. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export DATAVERSE_ID=root + + curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets/:startmigration --upload-file dataset-migrate.jsonld + +An example jsonld file is available at :download:`dataset-migrate.jsonld <../_static/api/dataset-migrate.jsonld>` . Note that you would need to replace the PID in the sample file with one supported in your Dataverse instance. (Also note that `Issue #8028 `_ currently breaks testing this API with DataCite test DOIs.) + +Publish a Migrated Dataset +-------------------------- + +The call above creates a Dataset. Once it is created, other APIs can be used to add files, add additional metadata, etc. When a version is complete, the following call can be used to publish it with its original publication date. + +.. note:: This action requires a Dataverse installation account with superuser permissions. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + + curl -H 'Content-Type: application/ld+json' -H X-Dataverse-key:$API_TOKEN -X POST -d '{"schema:datePublished": "2020-10-26","@context":{ "schema":"http://schema.org/"}}' "$SERVER_URL/api/datasets/{id}/actions/:releasemigrated" + +datePublished is the only metadata supported in this call. + +An optional query parameter: updatepidatprovider (default is false) can be set to true to automatically update the metadata and targetUrl of the PID at the provider. With this set true, the result of this call will be that the PID redirects to this dataset rather than the dataset in the source repository. + +.. code-block:: bash + + curl -H 'Content-Type: application/ld+json' -H X-Dataverse-key:$API_TOKEN -X POST -d '{"schema:datePublished": "2020-10-26","@context":{ "schema":"http://schema.org/"}}' "$SERVER_URL/api/datasets/{id}/actions/:releasemigrated?updatepidatprovider=true" + +If the parameter is not added and set to true, other existing APIs can be used to update the PID at the provider later, e.g. :ref:`send-metadata-to-pid-provider` \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst new file mode 100644 index 00000000000..da28cc60c53 --- /dev/null +++ b/doc/sphinx-guides/source/developers/dataset-semantic-metadata-api.rst @@ -0,0 +1,103 @@ +Dataset Semantic Metadata API +============================= + +The OAI_ORE metadata export format represents Dataset metadata using json-ld (see the :doc:`/admin/metadataexport` section). As part of an RDA-supported effort to allow import of Datasets exported as Bags with an included OAI_ORE metadata file, +an experimental API has been created that provides a json-ld alternative to the v1.0 API calls to get/set/delete Dataset metadata in the :doc:`/api/native-api`. + +You may prefer to work with this API if you are building a tool to import from a Bag/OAI-ORE source or already work with json-ld representations of metadata, or if you prefer the flatter json-ld representation to Dataverse software's json representation (which includes structure related to the metadata blocks involved and the type/multiplicity of the metadata fields.) +You may not want to use this API if you need stability and backward compatibility (the 'experimental' designation for this API implies that community feedback is desired and that, in future Dataverse software versions, the API may be modified based on that feedback). + +Note: The examples use the 'application/ld+json' mimetype. For compatibility reasons, the APIs also be used with mimetype "application/json-ld" + +Get Dataset Metadata +-------------------- + +To get the json-ld formatted metadata for a Dataset, specify the Dataset ID (DATASET_ID) or Persistent identifier (DATASET_PID), and, for specific versions, the version number. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export DATASET_ID='12345' + export DATASET_PID='doi:10.5072/FK2A1B2C3' + export VERSION='1.0' + export SERVER_URL=https://demo.dataverse.org + + Example 1: Get metadata for version '1.0' + + curl -H X-Dataverse-key:$API_TOKEN -H 'Accept: application/ld+json' "$SERVER_URL/api/datasets/$DATASET_ID/versions/$VERSION/metadata" + + Example 2: Get metadata for the latest version using the DATASET PID + + curl -H X-Dataverse-key:$API_TOKEN -H 'Accept: application/ld+json' "$SERVER_URL/api/datasets/:persistentId/metadata?persistentId=$DATASET_PID" + +You should expect a 200 ("OK") response and JSON-LD mirroring the OAI-ORE representation in the returned 'data' object. + + +Add Dataset Metadata +-------------------- + +To add json-ld formatted metadata for a Dataset, specify the Dataset ID (DATASET_ID) or Persistent identifier (DATASET_PID). Adding '?replace=true' will overwrite an existing metadata value. The default (replace=false) will only add new metadata or add a new value to a multi-valued field. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export DATASET_ID='12345' + export DATASET_PID='doi:10.5072/FK2A1B2C3' + export VERSION='1.0' + export SERVER_URL=https://demo.dataverse.org + + Example: Change the Dataset title + + curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"Title": "Submit menu test", "@context":{"Title": "http://purl.org/dc/terms/title"}}' "$SERVER_URL/api/datasets/$DATASET_ID/metadata?replace=true" + + Example 2: Add a description using the DATASET PID + + curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"citation:Description": {"dsDescription:Text": "New description"}, "@context":{"citation": "https://dataverse.org/schema/citation/","dsDescription": "https://dataverse.org/schema/citation/dsDescription#"}}' "$SERVER_URL/api/datasets/:persistentId/metadata?persistentId=$DATASET_PID" + +You should expect a 200 ("OK") response indicating whether a draft Dataset version was created or an existing draft was updated. + + +Delete Dataset Metadata +----------------------- + +To delete metadata for a Dataset, send a json-ld representation of the fields to delete and specify the Dataset ID (DATASET_ID) or Persistent identifier (DATASET_PID). + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export DATASET_ID='12345' + export DATASET_PID='doi:10.5072/FK2A1B2C3' + export VERSION='1.0' + export SERVER_URL=https://demo.dataverse.org + + Example: Delete the TermsOfUseAndAccess 'restrictions' value 'No restrictions' for the latest version using the DATASET PID + + curl -X PUT -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -d '{"https://dataverse.org/schema/core#restrictions":"No restrictions"}' "$SERVER_URL/api/datasets/:persistentId/metadata/delete?persistentId=$DATASET_PID" + +Note, this example uses the term URI directly rather than adding an '@context' element. You can use either form in any of these API calls. + +You should expect a 200 ("OK") response indicating whether a draft Dataset version was created or an existing draft was updated. + + +Create a Dataset +---------------- + +Specifying the Content-Type as application/ld+json with the existing /api/dataverses/{id}/datasets API call (see :ref:`create-dataset-command`) supports using the same metadata format when creating a Dataset. + +With curl, this is done by adding the following header: + +.. code-block:: bash + + -H 'Content-Type: application/ld+json' + + .. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export DATAVERSE_ID=root + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + + curl -H X-Dataverse-key:$API_TOKEN -H 'Content-Type: application/ld+json' -X POST $SERVER_URL/api/dataverses/$DATAVERSE_ID/datasets --upload-file dataset-create.jsonld + +An example jsonld file is available at :download:`dataset-create.jsonld <../_static/api/dataset-create.jsonld>` + diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index ed1849e6059..61ab98bf292 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -85,9 +85,9 @@ To install Payara, run the following commands: ``cd /usr/local`` -``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2020.6/payara-5.2020.6.zip`` +``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.5/payara-5.2021.5.zip`` -``sudo unzip payara-5.2020.6.zip`` +``sudo unzip payara-5.2021.5.zip`` ``sudo chown -R $USER /usr/local/payara5`` @@ -139,8 +139,6 @@ To install Solr, execute the following commands: ``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.8.1/schema_dv_mdb_fields.xml`` -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.8.1/schema_dv_mdb_copies.xml`` - ``mv schema*.xml collection1/conf`` ``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/8.8.1/solrconfig.xml`` diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index eebfd50ba35..78d4a6f378a 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -35,4 +35,6 @@ Developer Guide big-data-support aux-file-support s3-direct-upload-api + dataset-semantic-metadata-api + dataset-migration-api workflows diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index 29be6ab1d93..8fc0c679a8b 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -19,7 +19,7 @@ To get started, you'll want to set up your :doc:`dev-environment` and make sure Getting Help ------------ -If you have any questions at all, please reach out to other developers via the channels listed in https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md such as http://chat.dataverse.org (#dataverse on freenode), the `dataverse-dev `_ mailing list, `community calls `_, or support@dataverse.org. +If you have any questions at all, please reach out to other developers via the channels listed in https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md such as http://chat.dataverse.org, the `dataverse-dev `_ mailing list, `community calls `_, or support@dataverse.org. Core Technologies ----------------- diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 9f2386facb1..d1a71c313ca 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -7,7 +7,7 @@ Direct upload involves a series of three activities, each involving interacting * Requesting initiation of a transfer from the server * Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3 -* A call to the server to register the file as part of the dataset/replace a file in the dataset or to cancel the transfer +* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload. Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.) @@ -116,6 +116,38 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. +To add multiple Uploaded Files to the Dataset +------------------------------------------------- + +Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: + +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + + Replacing an existing file in the Dataset ----------------------------------------- diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst index bbfac33fcda..7bde4055e33 100755 --- a/doc/sphinx-guides/source/developers/testing.rst +++ b/doc/sphinx-guides/source/developers/testing.rst @@ -37,11 +37,9 @@ A unit test should execute an operation of your code in a controlled fashion. Yo Unit Test Automation Overview ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We use a variety of tools to write, execute, and measure the code coverage of unit tests, including Maven, JUnit, Jacoco, GitHub, Travis, and Coveralls. We'll explain the role of each tool below, but here's an overview of what you can expect from the automation we've set up. +We use a variety of tools to write, execute, and measure the code coverage of unit tests, including Maven, JUnit, Jacoco, GitHub, and Coveralls. We'll explain the role of each tool below, but here's an overview of what you can expect from the automation we've set up. -As you prepare to make a pull request, as described in the :doc:`version-control` section, you will be working on a new branch you create from the "develop" branch. Let's say your branch is called ``1012-private-url``. As you work, you are constantly invoking Maven to build the war file. When you do a "clean and build" in Netbeans, Maven runs all the unit tests (anything ending with ``Test.java``) and the runs the results through a tool called Jacoco that calculates code coverage. When you push your branch to GitHub and make a pull request, a web service called Travis CI runs Maven and Jacoco on your branch and pushes the results to Coveralls, which is a web service that tracks changes to code coverage over time. - -To make this more concrete, observe that https://github.com/IQSS/dataverse/pull/3111 has comments from a GitHub user called ``coveralls`` saying things like "Coverage increased (+0.5%) to 5.547% when pulling dd6ceb1 on 1012-private-url into be5b26e on develop." Clicking on the comment should lead you to a URL such as https://coveralls.io/builds/7013870 which shows how code coverage has gone up or down. That page links to a page such as https://travis-ci.org/IQSS/dataverse/builds/144840165 which shows the build on the Travis side that pushed the results to Coveralls. Note that we have configured Coveralls to not mark small decreases in code coverage as a failure. +As you prepare to make a pull request, as described in the :doc:`version-control` section, you will be working on a new branch you create from the "develop" branch. Let's say your branch is called ``1012-private-url``. As you work, you are constantly invoking Maven to build the war file. When you do a "clean and build" in Netbeans, Maven runs all the unit tests (anything ending with ``Test.java``) and then runs the results through a tool called Jacoco that calculates code coverage. When you push your branch to GitHub and make a pull request, GitHub Actions runs Maven and Jacoco on your branch and pushes the results to Coveralls, which is a web service that tracks changes to code coverage over time. Note that we have configured Coveralls to not mark small decreases in code coverage as a failure. You can find the Coveralls reports at https://coveralls.io/github/IQSS/dataverse The main takeaway should be that we care about unit testing enough to measure the changes to code coverage over time using automation. Now let's talk about how you can help keep our code coverage up by writing unit tests with JUnit. @@ -102,12 +100,10 @@ In addition, there is a writeup on "The Testable Command" at https://github.com/ Running Non-Essential (Excluded) Unit Tests ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You should be aware that some unit tests have been deemed "non-essential" and have been annotated with ``@Category(NonEssentialTests.class)`` and are excluded from the "dev" Maven profile, which is the default profile. All unit tests (that have not been annotated with ``@Ignore``), including these non-essential tests, are run from continuous integration systems such as Jenkins and Travis CI with the following ``mvn`` command that invokes a non-default profile: +You should be aware that some unit tests have been deemed "non-essential" and have been annotated with ``@Category(NonEssentialTests.class)`` and are excluded from the "dev" Maven profile, which is the default profile. All unit tests (that have not been annotated with ``@Ignore``), including these non-essential tests, are run from continuous integration systems such as Jenkins and GitHub Actions with the following ``mvn`` command that invokes a non-default profile: ``mvn test -P all-unit-tests`` -Typically https://travis-ci.org/IQSS/dataverse will show a higher number of unit tests executed because it uses the profile above. - Generally speaking, unit tests have been flagged as non-essential because they are slow or because they require an Internet connection. You should not feel obligated to run these tests continuously but you can use the ``mvn`` command above to run them. To iterate on the unit test in Netbeans and execute it with "Run -> Test File", you must temporarily comment out the annotation flagging the test as non-essential. Integration Tests @@ -246,7 +242,7 @@ Once installed, you may run commands with ``mvn [options] [] [`. +If you are adding a new test class, be sure to add it to :download:`tests/integration-tests.txt <../../../../tests/integration-tests.txt>` so that our automated testing knows about it. Writing and Using a Testcontainers Test @@ -393,12 +389,12 @@ The script requires a file called ``files.txt`` to operate and database IDs for Continuous Integration ---------------------- -The Dataverse Project currently makes use of two Continuous Integration platforms, Travis and Jenkins. - -Travis builds are configured via :download:`.travis.yml <../../../../.travis.yml>` and a `GitHub webhook `; build output is viewable at https://travis-ci.org/IQSS/dataverse/builds +The Dataverse Project currently makes use of two Continuous Integration platforms, Jenkins and GitHub Actions. Our Jenkins config is a work in progress and may be viewed at https://github.com/IQSS/dataverse-jenkins/ A corresponding GitHub webhook is required. Build output is viewable at https://jenkins.dataverse.org/ +GitHub Actions jobs can be found in ``.github/workflows``. + As always, pull requests to improve our continuous integration configurations are welcome. Enhance build time by caching dependencies @@ -438,13 +434,6 @@ How to Run the Phoenix Tests - Log into Jenkins and click "Build Now" at https://build.hmdc.harvard.edu:8443/job/phoenix.dataverse.org-build-develop/ - Wait for all three chained Jenkins jobs to complete and note if they passed or failed. If you see a failure, open a GitHub issue or at least get the attention of some developers. -List of Tests Run Against the Phoenix Server -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We haven't thought much about a good way to publicly list the "IT" classes that are executed against the phoenix server. (Currently your best bet is to look at the ``Executing Maven`` line at the top of the "Full Log" of "Console Output" of ``phoenix.dataverse.org-apitest-develop`` Jenkins job mentioned above.) We endeavor to keep the list of tests in the "all-in-one" Docker environment described above in sync with the list of tests configured in Jenkins. That is to say, refer to :download:`run-test-suite.sh <../../../../conf/docker-aio/run-test-suite.sh>` mentioned in ``conf/docker-aio/readme.md`` for the current list of IT tests that are expected to pass. Here's a dump of that file: - -.. literalinclude:: ../../../../conf/docker-aio/run-test-suite.sh - Accessibility Testing --------------------- @@ -482,7 +471,6 @@ Future Work on Integration Tests - Automate testing of dataverse-client-python: https://github.com/IQSS/dataverse-client-python/issues/10 - Work with @leeper on testing the R client: https://github.com/IQSS/dataverse-client-r - Review and attempt to implement "API Test Checklist" from @kcondon at https://docs.google.com/document/d/199Oq1YwQ4pYCguaeW48bIN28QAitSk63NbPYxJHCCAE/edit?usp=sharing -- Attempt to use @openscholar approach for running integration tests using Travis https://github.com/openscholar/openscholar/blob/SCHOLAR-3.x/.travis.yml (probably requires using Ubuntu rather than CentOS) - Generate code coverage reports for **integration** tests: https://github.com/pkainulainen/maven-examples/issues/3 and http://www.petrikainulainen.net/programming/maven/creating-code-coverage-reports-for-unit-and-integration-tests-with-the-jacoco-maven-plugin/ - Consistent logging of API Tests. Show test name at the beginning and end and status codes returned. - expected passing and known/expected failing integration tests: https://github.com/IQSS/dataverse/issues/4438 @@ -495,15 +483,14 @@ Browser-Based Testing Installation Testing ~~~~~~~~~~~~~~~~~~~~ -- Run `vagrant up` on a server to test the installer: http://guides.dataverse.org/en/latest/developers/tools.html#vagrant . We haven't been able to get this working in Travis: https://travis-ci.org/IQSS/dataverse/builds/96292683 . Perhaps it would be possible to use AWS as a provider from Vagrant judging from https://circleci.com/gh/critical-alert/circleci-vagrant/6 . -- Work with @lwo to automate testing of https://github.com/IQSS/dataverse-puppet . Consider using Travis: https://github.com/IQSS/dataverse-puppet/issues/10 -- Work with @donsizemore to automate testing of https://github.com/GlobalDataverseCommunityConsortium/dataverse-ansible with Travis or similar. +- Run `vagrant up` on a server to test the installer +- Work with @donsizemore to automate testing of https://github.com/GlobalDataverseCommunityConsortium/dataverse-ansible Future Work on Load/Performance Testing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Clean up and copy stress tests code, config, and docs into main repo from https://github.com/IQSS/dataverse-helper-scripts/tree/master/src/stress_tests -- Marcel Duran created a command-line wrapper for the WebPagetest API that can be used to test performance in your continuous integration pipeline (TAP, Jenkins, Travis-CI, etc): https://github.com/marcelduran/webpagetest-api/wiki/Test-Specs#jenkins-integration +- Marcel Duran created a command-line wrapper for the WebPagetest API that can be used to test performance in your continuous integration pipeline (TAP, Jenkins, etc.): https://github.com/marcelduran/webpagetest-api/wiki/Test-Specs#jenkins-integration - Create top-down checklist, building off the "API Test Coverage" spreadsheet at https://github.com/IQSS/dataverse/issues/3358#issuecomment-256400776 Future Work on Accessibility Testing diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 39f27f749dc..072a8df0183 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -205,6 +205,7 @@ Here are the configuration options for handles: - :ref:`:IdentifierGenerationStyle <:IdentifierGenerationStyle>` (optional) - :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional) - :ref:`:IndependentHandleService <:IndependentHandleService>` (optional) +- :ref:`:HandleAuthHandle <:HandleAuthHandle>` (optional) Note: If you are **minting your own handles** and plan to set up your own handle service, please refer to `Handle.Net documentation `_. @@ -528,24 +529,25 @@ Lastly, go ahead and restart your Payara server. With Dataverse deployed and the S3 Storage Options ################## -=========================================== ================== ========================================================================= ============= -JVM Option Value Description Default value -=========================================== ================== ========================================================================= ============= -dataverse.files.storage-driver-id Enable as the default storage driver. ``file`` -dataverse.files..bucket-name The bucket name. See above. (none) -dataverse.files..download-redirect ``true``/``false`` Enable direct download or proxy through Dataverse. ``false`` -dataverse.files..upload-redirect ``true``/``false`` Enable direct upload of files added to a dataset to the S3 store. ``false`` -dataverse.files..ingestsizelimit Maximum size of directupload files that should be ingested (none) -dataverse.files..url-expiration-minutes If direct uploads/downloads: time until links expire. Optional. 60 -dataverse.files..min-part-size Multipart direct uploads will occur for files larger than this. Optional. ``1024**3`` -dataverse.files..custom-endpoint-url Use custom S3 endpoint. Needs URL either with or without protocol. (none) -dataverse.files..custom-endpoint-region Only used when using custom endpoint. Optional. ``dataverse`` -dataverse.files..proxy-url URL of a proxy protecting the S3 store. Optional. (none) -dataverse.files..path-style-access ``true``/``false`` Use path style buckets instead of subdomains. Optional. ``false`` -dataverse.files..payload-signing ``true``/``false`` Enable payload signing. Optional ``false`` -dataverse.files..chunked-encoding ``true``/``false`` Disable chunked encoding. Optional ``true`` -dataverse.files..connection-pool-size The maximum number of open connections to the S3 server ``256`` -=========================================== ================== ========================================================================= ============= +=========================================== ================== ========================================================================== ============= +JVM Option Value Description Default value +=========================================== ================== ========================================================================== ============= +dataverse.files.storage-driver-id Enable as the default storage driver. ``file`` +dataverse.files..bucket-name The bucket name. See above. (none) +dataverse.files..download-redirect ``true``/``false`` Enable direct download or proxy through Dataverse. ``false`` +dataverse.files..upload-redirect ``true``/``false`` Enable direct upload of files added to a dataset to the S3 store. ``false`` +dataverse.files..ingestsizelimit Maximum size of directupload files that should be ingested (none) +dataverse.files..url-expiration-minutes If direct uploads/downloads: time until links expire. Optional. 60 +dataverse.files..min-part-size Multipart direct uploads will occur for files larger than this. Optional. ``1024**3`` +dataverse.files..custom-endpoint-url Use custom S3 endpoint. Needs URL either with or without protocol. (none) +dataverse.files..custom-endpoint-region Only used when using custom endpoint. Optional. ``dataverse`` +dataverse.files..profile Allows the use of AWS profiles for storage spanning multiple AWS accounts. (none) +dataverse.files..proxy-url URL of a proxy protecting the S3 store. Optional. (none) +dataverse.files..path-style-access ``true``/``false`` Use path style buckets instead of subdomains. Optional. ``false`` +dataverse.files..payload-signing ``true``/``false`` Enable payload signing. Optional ``false`` +dataverse.files..chunked-encoding ``true``/``false`` Disable chunked encoding. Optional ``true`` +dataverse.files..connection-pool-size The maximum number of open connections to the S3 server ``256`` +=========================================== ================== ========================================================================== ============= Reported Working S3-Compatible Storage ###################################### @@ -604,9 +606,9 @@ Once you have the location of your custom homepage HTML file, run this curl comm ``curl -X PUT -d '/var/www/dataverse/branding/custom-homepage.html' http://localhost:8080/api/admin/settings/:HomePageCustomizationFile`` -If you prefer to start with less of a blank slate, you can download the :download:`custom-homepage-dynamic.html ` template which was built for the Harvard Dataverse Repository, and includes branding messaging, action buttons, search input, subject links, and recent dataset links. This page was built to utilize the :doc:`/api/metrics` to deliver dynamic content to the page via javascript. +If you prefer to start with less of a blank slate, you can review the custom homepage used by the Harvard Dataverse Repository, which includes branding messaging, action buttons, search input, subject links, and recent dataset links. This page was built to utilize the :doc:`/api/metrics` to deliver dynamic content to the page via Javascript. The files can be found at https://github.com/IQSS/dataverse.harvard.edu -Note that the ``custom-homepage.html`` and ``custom-homepage-dynamic.html`` files provided have multiple elements that assume your root Dataverse collection still has an alias of "root". While you were branding your root Dataverse collection, you may have changed the alias to "harvard" or "librascholar" or whatever and you should adjust the custom homepage code as needed. +Note that the ``custom-homepage.html`` file provided has multiple elements that assume your root Dataverse collection still has an alias of "root". While you were branding your root Dataverse collection, you may have changed the alias to "harvard" or "librascholar" or whatever and you should adjust the custom homepage code as needed. For more background on what this curl command above is doing, see the "Database Settings" section below. If you decide you'd like to remove this setting, use the following curl command: @@ -1476,49 +1478,96 @@ Out of the box, the DOI shoulder is set to "FK2/" but this is for testing only! :IdentifierGenerationStyle ++++++++++++++++++++++++++ -By default, the Dataverse Software generates a random 6 character string, pre-pended by the Shoulder if set, to use as the identifier -for a Dataset. Set this to ``sequentialNumber`` to use sequential numeric values -instead (again pre-pended by the Shoulder if set). (the assumed default setting is ``randomString``). -In addition to this setting, a database sequence must be created in the database. -We provide the script below (downloadable :download:`here `). -You may need to make some changes to suit your system setup, see the comments for more information: +By default, the Dataverse Software generates a random 6 character string, +pre-pended by the Shoulder if set, to use as the identifier for a Dataset. +Set this to ``storedProcGenerated`` to generate instead a custom *unique* +identifier (again pre-pended by the Shoulder if set) through a database +stored procedure or function (the assumed default setting is ``randomString``). +In addition to this setting, a stored procedure or function must be created in +the database. + +As a first example, the script below (downloadable +:download:`here `) produces +sequential numerical values. You may need to make some changes to suit your +system setup, see the comments for more information: .. literalinclude:: ../_static/util/createsequence.sql + :language: plpgsql + +As a second example, the script below (downloadable +:download:`here `) produces +sequential 8 character identifiers from a base36 representation of current +timestamp. + +.. literalinclude:: ../_static/util/identifier_from_timestamp.sql + :language: plpgsql -Note that the SQL above is Postgres-specific. If necessary, it can be reimplemented -in any other SQL flavor - the standard JPA code in the application simply expects -the database to have a saved function ("stored procedure") named ``generateIdentifierAsSequentialNumber`` -with the single return argument ``identifier``. +Note that the SQL in these examples scripts is Postgres-specific. +If necessary, it can be reimplemented in any other SQL flavor - the standard +JPA code in the application simply expects the database to have a saved +function ("stored procedure") named ``generateIdentifierFromStoredProcedure()`` +returning a single ``varchar`` argument. -Please note that ``:IdentifierGenerationStyle`` also plays a role for the "identifier" for files. See the section on ``:DataFilePIDFormat`` below for more details. +Please note that ``:IdentifierGenerationStyle`` also plays a role for the +"identifier" for files. See the section on :ref:`:DataFilePIDFormat` below for +more details. .. _:DataFilePIDFormat: :DataFilePIDFormat ++++++++++++++++++ -This setting controls the way that the "identifier" component of a file's persistent identifier (PID) relates to the PID of its "parent" dataset. - -By default the identifier for a file is dependent on its parent dataset. For example, if the identifier of a dataset is "TJCLKP", the identifier for a file within that dataset will consist of the parent dataset's identifier followed by a slash ("/"), followed by a random 6 character string, yielding "TJCLKP/MLGWJO". Identifiers in this format are what you should expect if you leave ``:DataFilePIDFormat`` undefined or set it to ``DEPENDENT`` and have not changed the ``:IdentifierGenerationStyle`` setting from its default. - -Alternatively, the identifier for File PIDs can be configured to be independent of Dataset PIDs using the setting "``INDEPENDENT``". In this case, file PIDs will not contain the PIDs of their parent datasets, and their PIDs will be generated the exact same way that datasets' PIDs are, based on the ``:IdentifierGenerationStyle`` setting described above (random 6 character strings or sequential numbers, pre-pended by any shoulder). - -The chart below shows examples from each possible combination of parameters from the two settings. ``:IdentifierGenerationStyle`` can be either ``randomString`` (the default) or ``sequentialNumber`` and ``:DataFilePIDFormat`` can be either ``DEPENDENT`` (the default) or ``INDEPENDENT``. In the examples below the "identifier" for the dataset is "TJCLKP" for "randomString" and "100001" for "sequentialNumber". - -+-----------------+---------------+------------------+ -| | randomString | sequentialNumber | -| | | | -+=================+===============+==================+ -| **DEPENDENT** | TJCLKP/MLGWJO | 100001/1 | -+-----------------+---------------+------------------+ -| **INDEPENDENT** | MLGWJO | 100002 | -+-----------------+---------------+------------------+ - -As seen above, in cases where ``:IdentifierGenerationStyle`` is set to *sequentialNumber* and ``:DataFilePIDFormat`` is set to *DEPENDENT*, each file within a dataset will be assigned a number *within* that dataset starting with "1". - -Otherwise, if ``:DataFilePIDFormat`` is set to *INDEPENDENT*, then each file will be assigned a PID with the next number in the overall sequence, regardless of what dataset it is in. If the file is created after a dataset with the PID 100001, then the file will be assigned the PID 100002. This option is functional, but it is not a recommended use case. - -Note that in either case, when using the ``sequentialNumber`` option, datasets and files share the same database sequence that was created as part of the setup described in ``:IdentifierGenerationStyle`` above. +This setting controls the way that the "identifier" component of a file's +persistent identifier (PID) relates to the PID of its "parent" dataset. + +By default the identifier for a file is dependent on its parent dataset. +For example, if the identifier of a dataset is "TJCLKP", the identifier for +a file within that dataset will consist of the parent dataset's identifier +followed by a slash ("/"), followed by a random 6 character string, +yielding "TJCLKP/MLGWJO". Identifiers in this format are what you should +expect if you leave ``:DataFilePIDFormat`` undefined or set it to +``DEPENDENT`` and have not changed the ``:IdentifierGenerationStyle`` +setting from its default. + +Alternatively, the identifier for File PIDs can be configured to be +independent of Dataset PIDs using the setting ``INDEPENDENT``. +In this case, file PIDs will not contain the PIDs of their parent datasets, +and their PIDs will be generated the exact same way that datasets' PIDs are, +based on the ``:IdentifierGenerationStyle`` setting described above +(random 6 character strings or custom unique identifiers through a stored +procedure, pre-pended by any shoulder). + +The chart below shows examples from each possible combination of parameters +from the two settings. ``:IdentifierGenerationStyle`` can be either +``randomString`` (the default) or ``storedProcGenerated`` and +``:DataFilePIDFormat`` can be either ``DEPENDENT`` (the default) or +``INDEPENDENT``. In the examples below the "identifier" for the dataset is +"TJCLKP" for ``randomString`` and "100001" for ``storedProcGenerated`` (when +using sequential numerical values, as described in +:ref:`:IdentifierGenerationStyle` above), or "krby26qt" for +``storedProcGenerated`` (when using base36 timestamps, as described in +:ref:`:IdentifierGenerationStyle` above). + ++-----------------+---------------+----------------------+---------------------+ +| | randomString | storedProcGenerated | storedProcGenerated | +| | | | | +| | | (sequential numbers) | (base36 timestamps) | ++=================+===============+======================+=====================+ +| **DEPENDENT** | TJCLKP/MLGWJO | 100001/1 | krby26qt/1 | ++-----------------+---------------+----------------------+---------------------+ +| **INDEPENDENT** | MLGWJO | 100002 | krby27pz | ++-----------------+---------------+----------------------+---------------------+ + +As seen above, in cases where ``:IdentifierGenerationStyle`` is set to +``storedProcGenerated`` and ``:DataFilePIDFormat`` is set to ``DEPENDENT``, +each file within a dataset will be assigned a number *within* that dataset +starting with "1". + +Otherwise, if ``:DataFilePIDFormat`` is set to ``INDEPENDENT``, each file +within the dataset is assigned with a new PID which is the next available +identifier provided from the database stored procedure. In our example: +"100002" when using sequential numbers or "krby27pz" when using base36 +timestamps. .. _:FilePIDsEnabled: @@ -1543,6 +1592,17 @@ By default this setting is absent and the Dataverse Software assumes it to be fa ``curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:IndependentHandleService`` +.. _:HandleAuthHandle: + +:HandleAuthHandle ++++++++++++++++++++++++++ + +Specific for Handle PIDs. Set this setting to / to be used on a global handle service when the public key is NOT stored in the default handle. +By default this setting is absent and the Dataverse Software assumes it to be not set. If the public key for instance is stored in handle: 21.T12996/USER01. +For this handle the prefix is '21.T12996' and the suffix is 'USER01'. The command to execute is then: + +``curl -X PUT -d '21.T12996/USER01' http://localhost:8080/api/admin/settings/:HandleAuthHandle`` + .. _:FileValidationOnPublishEnabled: :FileValidationOnPublishEnabled @@ -1677,10 +1737,12 @@ Notes: :ZipDownloadLimit +++++++++++++++++ -For performance reasons, your Dataverse installation will only create zip files on the fly up to 100 MB but the limit can be increased. Here's an example of raising the limit to 1 GB: +For performance reasons, your Dataverse installation will only allow creation of zip files up to 100 MB, but the limit can be increased. Here's an example of raising the limit to 1 GB: ``curl -X PUT -d 1000000000 http://localhost:8080/api/admin/settings/:ZipDownloadLimit`` +In the UI, users trying to download a zip file larger than the Dataverse installation's :ZipDownloadLimit will receive messaging that the zip file is too large, and the user will be presented with alternate access options. + :TabularIngestSizeLimit +++++++++++++++++++++++ @@ -2236,3 +2298,13 @@ By default, the name of the root Dataverse collection is used as the 'brandname' ++++++++++++++++++++++++++++++++++++++++++++++ In the DDI metadata exports, the default behavior is to always add the repository (using its brandname - the root collection name or the value of :ref:`:InstallationName <:InstallationName>`) to the stdyDscr/distStmt/distrbtr element. If this setting is true, this will only be done when a Distributor is not already defined in the Dataset metadata. (Note that, since metadata export files are cached, they will have to be reexported (see :doc:`/admin/metadataexport`) before they incorporate a change in this setting.) + +.. _:AnonymizedFieldTypeNames: + +:AnonymizedFieldTypeNames ++++++++++++++++++++++++++ + +A comma-separated list of field type names that should be 'withheld' when dataset access occurs via a Private Url with Anonymized Access (e.g. to support anonymized review). +A suggested minimum includes author, datasetContact, and contributor, but additional fields such as depositor, grantNumber, and publication might also need to be included. + +``curl -X PUT -d 'author, datasetContact, contributor, depositor, grantNumber, publication' http://localhost:8080/api/admin/settings/:AnonymizedFieldTypeNames`` diff --git a/doc/sphinx-guides/source/installation/intro.rst b/doc/sphinx-guides/source/installation/intro.rst index 6c6199af02d..4dd5f9e8795 100644 --- a/doc/sphinx-guides/source/installation/intro.rst +++ b/doc/sphinx-guides/source/installation/intro.rst @@ -36,7 +36,7 @@ Getting Help To get help installing or configuring a Dataverse installation, please try one or more of: - posting to the `dataverse-community `_ Google Group. -- asking at http://chat.dataverse.org (#dataverse on the freenode IRC network) +- asking at http://chat.dataverse.org - emailing support@dataverse.org to open a private ticket at https://help.hmdc.harvard.edu Improving this Guide diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index f0f7fab3511..e3dc04ac70b 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -44,7 +44,7 @@ On RHEL/derivative you can make Java 11 the default with the ``alternatives`` co Payara ------ -Payara 5.2020.6 is recommended. Newer versions might work fine, regular updates are recommended. +Payara 5.2021.5 is recommended. Newer versions might work fine, regular updates are recommended. Installing Payara ================= @@ -55,8 +55,8 @@ Installing Payara - Download and install Payara (installed in ``/usr/local/payara5`` in the example commands below):: - # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2020.6/payara-5.2020.6.zip - # unzip payara-5.2020.6.zip + # wget https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.5/payara-5.2021.5.zip + # unzip payara-5.2021.5.zip # mv payara5 /usr/local If you intend to install and run Payara under a service account (and we hope you do), chown -R the Payara hierarchy to root to protect it but give the service account access to the below directories: @@ -362,7 +362,14 @@ The Dataverse Software uses `Rserve `_ to communicat to R. Rserve is installed as a library package, as described in the step above. It runs as a daemon process on the server, accepting network connections on a dedicated port. This requires some extra -configuration and we provide a script (:fixedwidthplain:`scripts/r/rserve/rserve-setup.sh`) for setting it up. +configuration and we provide a script for setting it up. + +You'll want to obtain local copies of the Rserve setup files found in +https://github.com/IQSS/dataverse/tree/master/scripts/r/rserve +either by cloning a local copy of the IQSS repository: +:fixedwidthplain:`git clone -b master https://github.com/IQSS/dataverse.git` +or by downloading the files individually. + Run the script as follows (as root):: cd /scripts/r/rserve diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index 262862f29fc..393f9ac202a 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -508,10 +508,11 @@ Creating a Private URL for your dataset allows you to share your dataset (for vi #. Go to your unpublished dataset #. Select the “Edit” button #. Select “Private URL” in the dropdown menu -#. In the pop-up select “Create Private URL” +#. In the pop-up select “Create Private URL” or "Create URL for Anonymized Access". The latter supports anonymous review by removing author names and other potentially identifying information from citations, version history tables, and some metadata fields (as configured by the administrator). #. Copy the Private URL which has been created for this dataset and it can now be shared with anyone you wish to have access to view or download files in your unpublished dataset. To disable a Private URL and to revoke access, follow the same steps as above until step #3 when you return to the popup, click the “Disable Private URL” button. +Note that only one PrivateURL (normal or with anonymized access) can be configured per dataset at a time. Dataset Versions ================ diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 9c640a99aa7..99f088db41c 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -6,8 +6,9 @@ Dataverse Software Documentation Versions This list provides a way to refer to the documentation for previous versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. -- 5.5 +- 5.6 +- `5.5 `__ - `5.4.1 `__ - `5.4 `__ - `5.3 `__ diff --git a/doc/theTestableCommand/TheTestableCommand.md b/doc/theTestableCommand/TheTestableCommand.md index e60faa313f0..5a9fc259d4a 100644 --- a/doc/theTestableCommand/TheTestableCommand.md +++ b/doc/theTestableCommand/TheTestableCommand.md @@ -21,7 +21,7 @@ While they can't replace end-to-end tests, unit tests are a great way to validat Because unit tests are easy to create (Java only, no configuration needed) and quick to run, it is possible to write many of them, such that many aspects of the code are tested. Normally, a single unit test would test a single use case of the unit. This way, when a unit test fails, the failure describes exactly what part stopped functioning. Other unit tests are not blocked by the failure, and so by running the entire test suite, the developer can get a good overview of which parts are broken and which parts are functioning well. -Because unit tests are easy to execute, it is recommended to get in the habit of running them prior to committing code changes to the repository. These tests are also integrated into Dataverse's automatic build processes (on [Travis-ci](https://travis-ci.org/IQSS/dataverse)). A failed test halts the build. Dataverse's build process also collects data about code coverage during the unit tests, using [Coveralls](https://coveralls.io/github/IQSS/dataverse). While code coverage is a problematic measure for Java EE applications (and has some inherent problems as well), generally speaking larger coverage means better testing. +Because unit tests are easy to execute, it is recommended to get in the habit of running them prior to committing code changes to the repository. These tests are also integrated into Dataverse's automatic build processes. A failed test halts the build. Dataverse's build process also collects data about code coverage during the unit tests, using [Coveralls](https://coveralls.io/github/IQSS/dataverse). While code coverage is a problematic measure for Java EE applications (and has some inherent problems as well), generally speaking larger coverage means better testing. Unit Testing of application logic in Java EE applications is normally hard to do, as the application logic lives in the service beans, which rely on dependency injections. Writing unit tests for service beans is possible, but as it involves a test container, and a persistent context (read: in-memory database) these unit tests are not very unit-y. diff --git a/downloads/download.sh b/downloads/download.sh index 8c2b51dd4c7..33476c24b76 100755 --- a/downloads/download.sh +++ b/downloads/download.sh @@ -1,5 +1,5 @@ #!/bin/sh -curl -L -O https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2020.6/payara-5.2020.6.zip +curl -L -O https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2021.5/payara-5.2021.5.zip curl -L -O https://archive.apache.org/dist/lucene/solr/8.8.1/solr-8.8.1.tgz curl -L -O https://search.maven.org/remotecontent?filepath=org/jboss/weld/weld-osgi-bundle/2.2.10.Final/weld-osgi-bundle-2.2.10.Final-glassfish4.jar curl -s -L http://sourceforge.net/projects/schemaspy/files/schemaspy/SchemaSpy%205.0.0/schemaSpy_5.0.0.jar/download > schemaSpy_5.0.0.jar diff --git a/pom.xml b/pom.xml index 331c616f743..bb53d1efa64 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ --> edu.harvard.iq dataverse - 5.5 + 5.6 war dataverse @@ -26,11 +26,13 @@ false 8.0.0 - 5.2020.6 + 5.2021.5 42.2.19 1.11.762 1.2 - 4.5.5 + 3.12.0 + 4.5.13 + 0.157.0 4.13.1 5.7.0 ${junit.jupiter.version} @@ -58,6 +60,18 @@ + + payara-nexus-artifacts + Payara Nexus Artifacts + https://nexus.payara.fish/repository/payara-artifacts + + true + + + false + + + payara-patched-externals Payara Patched Externals @@ -120,6 +134,11 @@ commons-logging ${commons.logging.version} + + org.apache.commons + commons-lang3 + ${commons.lang3.version} + org.apache.httpcomponents httpclient @@ -128,7 +147,7 @@ com.google.cloud google-cloud-bom - 0.115.0-alpha + ${google.cloud.version} pom import @@ -155,9 +174,21 @@ org.glassfish javax.json - 1.0.4 + 1.1.4 test + + org.skyscreamer + jsonassert + 1.5.0 + test + + + com.vaadin.external.google + android-json + + + org.apache.httpcomponents httpclient @@ -200,6 +231,11 @@ aws-java-sdk-s3 + + com.apicatalog + titanium-json-ld + 0.8.6 + org.apache.abdera @@ -288,6 +324,11 @@ ${jakartaee-api.version} provided + + com.sun.mail + jakarta.mail + provided + org.glassfish jakarta.faces @@ -296,7 +337,7 @@ org.primefaces primefaces - 8.0 + 10.0.0 org.primefaces.themes @@ -318,9 +359,14 @@ provided - commons-lang - commons-lang - 2.6 + org.apache.commons + commons-lang3 + + + + org.apache.commons + commons-text + 1.9 org.apache.solr @@ -486,7 +532,7 @@ com.nimbusds oauth2-oidc-sdk - 6.18 + 9.9.1 @@ -594,7 +640,6 @@ com.google.cloud google-cloud-storage - 1.97.0 @@ -772,7 +817,7 @@ org.eluder.coveralls coveralls-maven-plugin - 4.0.0 + 4.3.0 javax.xml.bind @@ -794,12 +839,19 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.0.0 + 3.1.2 checkstyle.xml UTF-8 true + + + com.puppycrawl.tools + checkstyle + 8.42 + + diff --git a/scripts/installer/Makefile b/scripts/installer/Makefile index 180e2cb03d5..fe26bb5d6c6 100644 --- a/scripts/installer/Makefile +++ b/scripts/installer/Makefile @@ -4,7 +4,7 @@ GLASSFISH_SETUP_SCRIPT=${INSTALLER_ZIP_DIR}/as-setup.sh API_SCRIPTS=${INSTALLER_ZIP_DIR}/setup-datasetfields.sh ${INSTALLER_ZIP_DIR}/setup-users.sh ${INSTALLER_ZIP_DIR}/setup-builtin-roles.sh ${INSTALLER_ZIP_DIR}/setup-dvs.sh ${INSTALLER_ZIP_DIR}/data ${INSTALLER_ZIP_DIR}/setup-identity-providers.sh ${INSTALLER_ZIP_DIR}/setup-all.sh ${INSTALLER_ZIP_DIR}/post-install-api-block.sh JHOVE_CONFIG=${INSTALLER_ZIP_DIR}/jhove.conf JHOVE_SCHEMA=${INSTALLER_ZIP_DIR}/jhoveConfig.xsd -SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_fields.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_copies.xml ${INSTALLER_ZIP_DIR}/updateSchemaMDB.sh +SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_fields.xml ${INSTALLER_ZIP_DIR}/updateSchemaMDB.sh SOLR_CONFIG=${INSTALLER_ZIP_DIR}/solrconfig.xml PYTHON_FILES=${INSTALLER_ZIP_DIR}/README_python.txt ${INSTALLER_ZIP_DIR}/installConfig.py ${INSTALLER_ZIP_DIR}/installUtils.py ${INSTALLER_ZIP_DIR}/install.py ${INSTALLER_ZIP_DIR}/installAppServer.py ${INSTALLER_ZIP_DIR}/requirements.txt ${INSTALLER_ZIP_DIR}/default.config ${INSTALLER_ZIP_DIR}/interactive.config INSTALL_SCRIPT=${INSTALLER_ZIP_DIR}/install @@ -56,7 +56,7 @@ ${JHOVE_SCHEMA}: ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} @echo copying jhove schema file /bin/cp ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} -${SOLR_SCHEMA}: ../../conf/solr/8.8.1/schema.xml ../../conf/solr/8.8.1/schema_dv_mdb_fields.xml ../../conf/solr/8.8.1/schema_dv_mdb_copies.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR} +${SOLR_SCHEMA}: ../../conf/solr/8.8.1/schema.xml ../../conf/solr/8.8.1/schema_dv_mdb_fields.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR} @echo copying Solr schema file /bin/cp ../../conf/solr/8.8.1/schema*.xml ../../conf/solr/8.8.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR} diff --git a/scripts/installer/README.txt b/scripts/installer/README.txt index 70f08550a75..f4f316dc958 100644 --- a/scripts/installer/README.txt +++ b/scripts/installer/README.txt @@ -38,6 +38,5 @@ jhove.conf SOLR schema and config files, from conf/solr/8.8.1: schema.xml -schema_dv_cmb_copies.xml -schema_dv_cmb_fields.xml +schema_dv_mdb_fields.xml solrconfig.xml diff --git a/scripts/issues/7451/PRE-RELEASE-INFO.txt b/scripts/issues/7451/PRE-RELEASE-INFO.txt new file mode 100644 index 00000000000..c14c25aa93f --- /dev/null +++ b/scripts/issues/7451/PRE-RELEASE-INFO.txt @@ -0,0 +1,39 @@ +In the next release another constraint is being added to existing +databases, to prevent any possibility of creating datafile objects +referencing the same file. This was originally planned for v4.20, but +in that release the constraint was only added to newly created +databases, and was not forced on the databases that had already +existed. If your current database was originally created by version +4.20 or newer, you don't need to do anything. + +If you do have an older database, it MUST BE RE-CHECKED for any +existing duplicates before the next release (5.6, presumably) can be +deployed. Hopefully there are no such inconsistencies in your +database, but if there are any, they will need to be resolved, or the +next version of the application WILL FAIL TO DEPLOY, with an error +message from FlyWay. Please run the following script: + +https://github.com/IQSS/dataverse/raw/develop/scripts/issues/7451/check_datafiles_7451.sh + +The script relies on the PostgreSQL utility psql to access the +database. You will need to edit the credentials at the top of the script +to match your database configuration. + +The script will check your database for any duplicated storage +identifiers that would violate the new constraint. + +For harvested files, it will directly resolve any conflicts. + +For local files, rather than attempt to make any changes right away +(this being an issue of data integrity with a potential to affect your +users) it will instruct you to send the produced diagnostic +information to support@dataverse.org so that we can assist you +in resolving the issues in your database. + +If no inconsistencies are found, the script will report that the database +is ready to be upgraded to the next release. + +(Please note that there's a very good chance that your database does +not have any conflicts of this nature. But we want to do this to be +absolutely sure. We apologize for any inconvenience.) + diff --git a/scripts/issues/7451/check_datafiles_7451.sh b/scripts/issues/7451/check_datafiles_7451.sh new file mode 100755 index 00000000000..1e4c95c69f4 --- /dev/null +++ b/scripts/issues/7451/check_datafiles_7451.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +# begin config +# PostgresQL credentials: +# edit the following lines so that psql can talk to your database +pg_host=localhost +pg_port=5432 +pg_user=dvnapp +pg_db=dvndb +# you can leave the password blank, if Postgres is configured +# to accept connections without auth: +pg_pass= +# psql executable - add full path, if necessary: +PSQL_EXEC=psql + +# end config + +# check for duplicate storageidentifiers in harvested datafiles: + +PG_QUERY_0="SELECT COUNT(DISTINCT o.id) FROM datafile f, dataset s, dvobject p, dvobject o WHERE s.id = p.id AND o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS NOT null AND o.storageidentifier IS NOT null" + +PG_QUERY_1="SELECT s.id, o.storageidentifier FROM datafile f, dataset s, dvobject o WHERE o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS NOT null AND o.storageidentifier IS NOT null ORDER by s.id, o.storageidentifier" + +PG_QUERY_FIX_0="UPDATE dvobject SET storageidentifier=NULL WHERE dtype='DataFile' AND (storageidentifier='file://' OR storageidentifier='http://' OR storageidentifier='s3://')" + +PG_QUERY_FIX_1="UPDATE dvobject SET storageidentifier=CONCAT(storageidentifier, ' ', id) WHERE owner_id = %d AND storageidentifier='%s'" + +PGPASSWORD=$pg_pass; export PGPASSWORD + +echo "Checking the total number of storageidentifiers in harvested datafiles..." + +NUM_DATAFILES=`${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_0}"` +if [ $? != 0 ] +then + echo "FAILED to execute psql! Check the credentials and try again?" + echo "exiting..." + echo + echo "the command line that failed:" + echo "${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c \"${PG_QUERY_0}\"" + exit 1 +fi + +echo $NUM_DATAFILES total. +echo + +# Before we do anything else, reset the storageidentifiers of the datafiles (harvested and otherwise) that +# may have ended up set to invalid, prefix-only values like "file://" back to NULL: + +${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -q -c "${PG_QUERY_FIX_0}" + + +echo "Let's check if any harvested storage identifiers are referenced more than once within the same dataset:" + +${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_1}" | +uniq -c | +awk '{if ($1 > 1) print $0}' | sort -u > /tmp/harvestedidentifiers.tmp + +NUM_CONFIRMED=`cat /tmp/harvestedidentifiers.tmp | wc -l` + +if [ $NUM_CONFIRMED == 0 ] +then + echo + echo "Good news - it appears that there are NO duplicate storageidentifiers in your harvested datafiles;" + echo "nothing to fix." + echo +else + + echo "Found ${NUM_CONFIRMED} harvested files with identical storageidentifiers; fixing in place..." + + cat /tmp/harvestedidentifiers.tmp | sed 's:\\:\\\\:g' | while read howmany dataset storageidentifier + do + # Harvard prod. db had a few harvested storage identifiers consisting of a single space (" "), + # which would confuse the shell. Extremely unlikely to be found in any other installation. + if [[ "x${storageidentifier}" = "x" ]] + then + storageidentifier=" " + fi + + PG_QUERY_SI=`printf "${PG_QUERY_FIX_1}" $dataset "$storageidentifier"` + ${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_SI}" + done + + echo "... done." + echo + + echo -n "Let's confirm that all these dupes have been fixed... " + ${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_1}" | + uniq -c | + awk '{if ($1 > 1) print $0}' | sort -u > /tmp/harvestedidentifiers.tmp + + NUM_CONFIRMED=`cat /tmp/harvestedidentifiers.tmp | wc -l` + + if [ $NUM_CONFIRMED == 0 ] + then + echo "Looks good." + echo + else + echo "Oops!" + echo "Unfortunately, the script failed to fix some of the harvested duplicates." + echo "Please send the contents of the file /tmp/harvestedidentifiers.tmp" + echo "to Dataverse support at support@dataverse.org." + echo "Apologies for the extra trouble..." + echo + exit 1 + fi + +fi + + +# now, check for duplicate storageidentifiers in local datafiles: + +PG_QUERY_3="SELECT COUNT(DISTINCT o.id) FROM datafile f, dataset s, dvobject p, dvobject o WHERE s.id = p.id AND o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS null AND o.storageidentifier IS NOT null" + +PG_QUERY_4="SELECT s.id, o.storageidentifier FROM datafile f, dataset s, dvobject o WHERE o.id = f.id AND o.owner_id = s.id AND s.harvestingclient_id IS null AND o.storageidentifier IS NOT null ORDER by s.id, o.storageidentifier" + +PG_QUERY_5="SELECT p.authority, p.identifier, o.storageidentifier, o.id, o.createdate, f.contenttype FROM datafile f, dvobject p, dvobject o WHERE o.id = f.id AND o.owner_id = p.id AND p.id = %d AND o.storageidentifier='%s' ORDER by o.id" + +echo "Checking the number of non-harvested datafiles in the database..." + +NUM_DATAFILES=`${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_3}"` +echo $NUM_DATAFILES total. +echo + +echo "Let's check if any storage identifiers are referenced more than once within the same dataset:" + +${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_4}" | +uniq -c | +awk '{if ($1 > 1) print $0}' > /tmp/storageidentifiers.tmp + +NUM_CONFIRMED=`cat /tmp/storageidentifiers.tmp | wc -l` + +if [ $NUM_CONFIRMED == 0 ] +then + echo + echo "Good news - it appears that there are NO duplicate DataFile objects in your database." + echo "Your installation is ready to be upgraded to Dataverse 5.5" + echo +else + + echo "The following storage identifiers appear to be referenced from multiple non-harvested DvObjects:" + cat /tmp/storageidentifiers.tmp + echo "(output saved in /tmp/storageidentifiers.tmp)" + + echo "Looking up details for the affected datafiles:" + + cat /tmp/storageidentifiers.tmp | while read howmany dataset storageidentifier + do + PG_QUERY_SI=`printf "${PG_QUERY_5}" $dataset "$storageidentifier"` + ${PSQL_EXEC} -h ${pg_host} -U ${pg_user} -d ${pg_db} -tA -F ' ' -c "${PG_QUERY_SI}" + done | tee /tmp/duplicates_info.tmp + + echo "(output saved in /tmp/duplicates_info.tmp)" + + echo + echo "Please send the output above to Dataverse support at support@dataverse.org." + echo "We will assist you in the process of cleaning up the affected files above." + echo "We apologize for any inconvenience." + echo +fi + + diff --git a/scripts/search/tests/data/dataset-finch1.jsonld b/scripts/search/tests/data/dataset-finch1.jsonld new file mode 100644 index 00000000000..be39c9f14b2 --- /dev/null +++ b/scripts/search/tests/data/dataset-finch1.jsonld @@ -0,0 +1,26 @@ + +{ + "http://purl.org/dc/terms/title": "Darwin's Finches", + "http://purl.org/dc/terms/subject": "Medicine, Health and Life Sciences", + "http://purl.org/dc/terms/creator": { + "https://dataverse.org/schema/citation/author#Name": "Finch, Fiona", + "https://dataverse.org/schema/citation/author#Affiliation": "Birds Inc." + }, + "https://dataverse.org/schema/citation/Contact": { + "https://dataverse.org/schema/citation/datasetContact#E-mail": "finch@mailinator.com", + "https://dataverse.org/schema/citation/datasetContact#Name": "Finch, Fiona" + }, + "https://dataverse.org/schema/citation/Description": { + "https://dataverse.org/schema/citation/dsDescription#Text": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds." + }, + "@type": [ + "http://www.openarchives.org/ore/terms/Aggregation", + "http://schema.org/Dataset" + ], + "http://schema.org/version": "DRAFT", + "http://schema.org/name": "Darwin's Finches", + "https://dataverse.org/schema/core#fileTermsOfAccess": { + "https://dataverse.org/schema/core#fileRequestAccess": false + }, + "http://schema.org/includedInDataCatalog": "Root" +} \ No newline at end of file diff --git a/scripts/vagrant/setup.sh b/scripts/vagrant/setup.sh index 14f12cea692..24bac307709 100644 --- a/scripts/vagrant/setup.sh +++ b/scripts/vagrant/setup.sh @@ -35,16 +35,15 @@ echo "export MAVEN_HOME=/opt/maven" >> /etc/profile.d/maven.sh echo "export PATH=/opt/maven/bin:${PATH}" >> /etc/profile.d/maven.sh chmod 0755 /etc/profile.d/maven.sh -# disable centos8 postgresql module and install postgresql10-server -# note: postgresql10 because 9.6 isn't backwards compatible with python3-psycopg2 +# disable centos8 postgresql module and install postgresql13-server dnf -qy module disable postgresql dnf install -qy https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm -dnf install -qy postgresql10-server -/usr/pgsql-10/bin/postgresql-10-setup initdb -/usr/bin/systemctl stop postgresql-10 -cp /dataverse/conf/vagrant/var/lib/pgsql/data/pg_hba.conf /var/lib/pgsql/10/data/pg_hba.conf -/usr/bin/systemctl start postgresql-10 -/usr/bin/systemctl enable postgresql-10 +dnf install -qy postgresql13-server +/usr/pgsql-13/bin/postgresql-13-setup initdb +/usr/bin/systemctl stop postgresql-13 +cp /dataverse/conf/vagrant/var/lib/pgsql/data/pg_hba.conf /var/lib/pgsql/13/data/pg_hba.conf +/usr/bin/systemctl start postgresql-13 +/usr/bin/systemctl enable postgresql-13 PAYARA_USER=dataverse echo "Ensuring Unix user '$PAYARA_USER' exists" @@ -53,7 +52,7 @@ SOLR_USER=solr echo "Ensuring Unix user '$SOLR_USER' exists" useradd $SOLR_USER || : DOWNLOAD_DIR='/dataverse/downloads' -PAYARA_ZIP="$DOWNLOAD_DIR/payara-5.2020.6.zip" +PAYARA_ZIP="$DOWNLOAD_DIR/payara-5.2021.5.zip" SOLR_TGZ="$DOWNLOAD_DIR/solr-8.8.1.tgz" if [ ! -f $PAYARA_ZIP ] || [ ! -f $SOLR_TGZ ]; then echo "Couldn't find $PAYARA_ZIP or $SOLR_TGZ! Running download script...." diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index ce32c18fa7a..218e4c85474 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -23,7 +23,7 @@ import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.TypedQuery; -import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -175,7 +175,7 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well - String description = StringEscapeUtils.escapeXml(dataset.getLatestVersion().getDescriptionPlainText()); + String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { description = AbstractGlobalIdServiceBean.UNAVAILABLE; } @@ -185,7 +185,7 @@ public static String getMetadataFromDvObject(String identifier, Map citationList = new ArrayList<>(); - citationList.add(formatString(getAuthorsString(), html)); + if(anonymized) { + citationList.add(BundleUtil.getStringFromBundle("file.anonymized.authorsWithheld")); + } else { + citationList.add(formatString(getAuthorsString(), html)); + } citationList.add(year); if ((fileTitle != null) && isDirect()) { citationList.add(formatString(fileTitle, html, "\"")); @@ -643,7 +650,7 @@ private String formatString(String value, boolean escapeHtml, String wrapperFron private String formatString(String value, boolean escapeHtml, String wrapperStart, String wrapperEnd) { if (!StringUtils.isEmpty(value)) { - return new StringBuilder(wrapperStart).append(escapeHtml ? StringEscapeUtils.escapeHtml(value) : value) + return new StringBuilder(wrapperStart).append(escapeHtml ? StringEscapeUtils.escapeHtml4(value) : value) .append(wrapperEnd).toString(); } return null; @@ -655,7 +662,7 @@ private String formatURL(String text, String url, boolean html) { } if (html && url != null) { - return "" + StringEscapeUtils.escapeHtml(text) + ""; + return "" + StringEscapeUtils.escapeHtml4(text) + ""; } else { return text; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 706b8d9f4e5..83a65110be2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -38,7 +38,7 @@ import javax.persistence.Query; import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; -import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang3.RandomStringUtils; /** * @@ -1428,11 +1428,11 @@ public String generateDataFileIdentifier(DataFile datafile, GlobalIdServiceBean switch (doiIdentifierType) { case "randomString": return generateIdentifierAsRandomString(datafile, idServiceBean, prepend); - case "sequentialNumber": + case "storedProcGenerated": if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ - return generateIdentifierAsIndependentSequentialNumber(datafile, idServiceBean, prepend); + return generateIdentifierFromStoredProcedureIndependent(datafile, idServiceBean, prepend); } else { - return generateIdentifierAsDependentSequentialNumber(datafile, idServiceBean, prepend); + return generateIdentifierFromStoredProcedureDependent(datafile, idServiceBean, prepend); } default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ @@ -1450,24 +1450,24 @@ private String generateIdentifierAsRandomString(DataFile datafile, GlobalIdServi } - private String generateIdentifierAsIndependentSequentialNumber(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) { + private String generateIdentifierFromStoredProcedureIndependent(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) { String identifier; do { - StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber"); + StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure"); query.execute(); - Integer identifierNumeric = (Integer) query.getOutputParameterValue(1); + String identifierFromStoredProcedure = (String) query.getOutputParameterValue(1); // some diagnostics here maybe - is it possible to determine that it's failing // because the stored procedure hasn't been created in the database? - if (identifierNumeric == null) { + if (identifierFromStoredProcedure == null) { return null; } - identifier = prepend + identifierNumeric.toString(); + identifier = prepend + identifierFromStoredProcedure; } while (!isGlobalIdUnique(identifier, datafile, idServiceBean)); return identifier; } - private String generateIdentifierAsDependentSequentialNumber(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) { + private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, GlobalIdServiceBean idServiceBean, String prepend) { String identifier; Long retVal; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java index 02d3da128f1..275d47cf1de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileTag.java @@ -20,7 +20,7 @@ import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.Table; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index cd40e76a304..0f12d02fb3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -60,29 +60,22 @@ }) /* - Below is the stored procedure for getting a numeric value from a database - sequence. Used when the Dataverse is (optionally) configured to use - incremental numeric values for dataset ids, instead of the default + Below is the database stored procedure for getting a string dataset id. + Used when the Dataverse is (optionally) configured to use + procedurally generated values for dataset ids, instead of the default random strings. - Unfortunately, there's no standard EJB way of handling sequences. So in the - past we would simply use a NativeQuery to call a proprietary Postgres - sequence query. A better way of handling this however is to define any - proprietary SQL functionality outside of the application, in the database, - and call it using the standard JPA @StoredProcedureQuery. - - The identifier sequence and the stored procedure for accessing it are currently - implemented with PostgresQL "CREATE SEQUENCE ..." and "CREATE FUNCTION ..."; - (we explain how to create these in the installation documentation and supply - a script). If necessary, it can be implemented using other SQL flavors - + The use of a stored procedure to create an identifier is explained in the + installation documentation (where an example script is supplied). + The stored procedure can be implemented using other SQL flavors - without having to modify the application code. - -- L.A. 4.6.2 + -- L.A. 4.6.2 (modified by C.S. for version 5.4.1+) */ @NamedStoredProcedureQuery( - name = "Dataset.generateIdentifierAsSequentialNumber", - procedureName = "generateIdentifierAsSequentialNumber", + name = "Dataset.generateIdentifierFromStoredProcedure", + procedureName = "generateIdentifierFromStoredProcedure", parameters = { - @StoredProcedureParameter(mode = ParameterMode.OUT, type = Integer.class) + @StoredProcedureParameter(mode = ParameterMode.OUT, type = String.class) } ) @Entity @@ -649,7 +642,11 @@ public String getCitation(DatasetVersion version) { } public String getCitation(boolean isOnlineVersion, DatasetVersion version) { - return version.getCitation(isOnlineVersion); + return getCitation(isOnlineVersion, version, false); + } + + public String getCitation(boolean isOnlineVersion, DatasetVersion version, boolean anonymized) { + return version.getCitation(isOnlineVersion, anonymized); } public String getPublicationDateFormattedYYYYMMDD() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java index 79f8916deb9..228cedd8663 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java @@ -33,7 +33,7 @@ import javax.persistence.OrderBy; import javax.persistence.Table; import javax.persistence.Transient; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; @Entity @ValidateDatasetFieldType @@ -241,7 +241,7 @@ public String getValue() { return datasetFieldValues.get(0).getValue(); } else if (controlledVocabularyValues != null && !controlledVocabularyValues.isEmpty()) { if (controlledVocabularyValues.get(0) != null){ - return controlledVocabularyValues.get(0).getStrValue(); + return controlledVocabularyValues.get(0).getLocaleStrValue(); } } return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java index 221922ea004..407a1d57bd3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java @@ -25,7 +25,7 @@ import javax.persistence.OneToMany; import javax.persistence.OrderBy; import javax.persistence.Table; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java index 00b7be97b83..c685fcb3e54 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValidator.java @@ -11,7 +11,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.Collections; import java.util.List; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java index 27929dd3a39..2447a6478fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValue.java @@ -21,7 +21,7 @@ import javax.persistence.ManyToOne; import javax.persistence.Table; import javax.persistence.Transient; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index e571fd89627..0b1619e6851 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -18,7 +18,7 @@ import java.util.regex.Pattern; import javax.validation.ConstraintValidator; import javax.validation.ConstraintValidatorContext; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index dbd762bba6e..be960082bd6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -115,7 +115,7 @@ import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletResponse; -import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.lang3.mutable.MutableBoolean; import org.apache.commons.io.IOUtils; @@ -438,16 +438,6 @@ public void setRemoveUnusedTags(boolean removeUnusedTags) { private String fileSortField; private String fileSortOrder; - private LazyFileMetadataDataModel lazyModel; - - public LazyFileMetadataDataModel getLazyModel() { - return lazyModel; - } - - public void setLazyModel(LazyFileMetadataDataModel lazyModel) { - this.lazyModel = lazyModel; - } - public List> getCartList() { if (session.getUser() instanceof AuthenticatedUser) { return ((AuthenticatedUser) session.getUser()).getCart().getContents(); @@ -1862,7 +1852,8 @@ private String init(boolean initFull) { } // init the citation - displayCitation = dataset.getCitation(true, workingVersion); + displayCitation = dataset.getCitation(true, workingVersion, isAnonymizedAccess()); + logger.fine("Citation: " + displayCitation); if(workingVersion.isPublished()) { MakeDataCountEntry entry = new MakeDataCountEntry(FacesContext.getCurrentInstance(), dvRequestService, workingVersion); @@ -1899,7 +1890,8 @@ private String init(boolean initFull) { this.guestbookResponse = guestbookResponseService.initGuestbookResponseForFragment(workingVersion, null, session); logger.fine("Checking if rsync support is enabled."); if (DataCaptureModuleUtil.rsyncSupportEnabled(settingsWrapper.getValueForKey(SettingsServiceBean.Key.UploadMethods)) - && dataset.getFiles().isEmpty()) { //only check for rsync if no files exist + && dataset.getFiles().isEmpty() && this.canUpdateDataset() ) { //only check for rsync if no files exist + //and user can update dataset try { ScriptRequestResponse scriptRequestResponse = commandEngine.submit(new RequestRsyncScriptCommand(dvRequestService.getDataverseRequest(), dataset)); logger.fine("script: " + scriptRequestResponse.getScript()); @@ -1912,9 +1904,11 @@ private String init(boolean initFull) { setHasRsyncScript(false); } } catch (RuntimeException ex) { - logger.warning("Problem getting rsync script: " + ex.getLocalizedMessage()); + logger.warning("Problem getting rsync script(RuntimeException): " + ex.getLocalizedMessage()); + FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", ex.getLocalizedMessage())); } catch (CommandException cex) { logger.warning("Problem getting rsync script (Command Exception): " + cex.getLocalizedMessage()); + FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_ERROR, "Problem getting rsync script:", cex.getLocalizedMessage())); } } @@ -1988,8 +1982,10 @@ private String init(boolean initFull) { } catch (CommandException ex) { // No big deal. The user simply doesn't have access to create or delete a Private URL. } + logger.fine("PrivateUser: " + (session.getUser() instanceof PrivateUrlUser)); if (session.getUser() instanceof PrivateUrlUser) { PrivateUrlUser privateUrlUser = (PrivateUrlUser) session.getUser(); + logger.fine("Anon: " + privateUrlUser.hasAnonymizedAccess()); if (dataset != null && dataset.getId().equals(privateUrlUser.getDatasetId())) { JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.privateurl.header"), BundleUtil.getStringFromBundle("dataset.privateurl.infoMessageReviewer")); @@ -2027,8 +2023,6 @@ private String init(boolean initFull) { datasetExploreTools = externalToolService.findDatasetToolsByType(ExternalTool.Type.EXPLORE); rowsPerPage = 10; - - return null; } @@ -2783,14 +2777,22 @@ public String editFileMetadata(){ public String deleteDatasetVersion() { DeleteDatasetVersionCommand cmd; + + Map deleteStorageLocations = datafileService.getPhysicalFilesToDelete(dataset.getLatestVersion()); + boolean deleteCommandSuccess = false; try { cmd = new DeleteDatasetVersionCommand(dvRequestService.getDataverseRequest(), dataset); commandEngine.submit(cmd); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetVersion.message.deleteSuccess")); + deleteCommandSuccess = true; } catch (CommandException ex) { JH.addMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("dataset.message.deleteFailure")); logger.severe(ex.getMessage()); } + + if (deleteCommandSuccess && !deleteStorageLocations.isEmpty()) { + datafileService.finalizeFileDeletes(deleteStorageLocations); + } return returnToDatasetOnly(); } @@ -3107,8 +3109,8 @@ public void updateFileCounts(){ private List getSuccessMessageArguments() { List arguments = new ArrayList<>(); String dataverseString = ""; - arguments.add(StringEscapeUtils.escapeHtml(dataset.getDisplayName())); - dataverseString += " " + StringEscapeUtils.escapeHtml(selectedDataverseForLinking.getDisplayName()) + ""; + arguments.add(StringEscapeUtils.escapeHtml4(dataset.getDisplayName())); + dataverseString += " " + StringEscapeUtils.escapeHtml4(selectedDataverseForLinking.getDisplayName()) + ""; arguments.add(dataverseString); return arguments; } @@ -5065,23 +5067,16 @@ public void setShowLinkingPopup(boolean showLinkingPopup) { // - /* - public void setSelectedGroup(ExplicitGroup selectedGroup) { - setShowDeletePopup(true); - this.selectedGroup = selectedGroup; - } - */ - - public void createPrivateUrl() { + public void createPrivateUrl(boolean anonymizedAccess) { try { - PrivateUrl createdPrivateUrl = commandEngine.submit(new CreatePrivateUrlCommand(dvRequestService.getDataverseRequest(), dataset)); + PrivateUrl createdPrivateUrl = commandEngine.submit(new CreatePrivateUrlCommand(dvRequestService.getDataverseRequest(), dataset, anonymizedAccess)); privateUrl = createdPrivateUrl; JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("dataset.privateurl.header"), BundleUtil.getStringFromBundle("dataset.privateurl.infoMessageAuthor", Arrays.asList(getPrivateUrlLink(privateUrl)))); privateUrlWasJustCreated = true; } catch (CommandException ex) { String msg = BundleUtil.getStringFromBundle("dataset.privateurl.noPermToCreate", PrivateUrlUtil.getRequiredPermissions(ex)); - logger.info("Unable to create a Private URL for dataset id " + dataset.getId() + ". Message to user: " + msg + " Exception: " + ex); + logger.warning("Unable to create a Private URL for dataset id " + dataset.getId() + ". Message to user: " + msg + " Exception: " + ex); JH.addErrorMessage(msg); } } @@ -5103,8 +5098,31 @@ public boolean isUserCanCreatePrivateURL() { public String getPrivateUrlLink(PrivateUrl privateUrl) { return privateUrl.getLink(); } - - + + public boolean isAnonymizedAccess() { + if (session.getUser() instanceof PrivateUrlUser) { + return ((PrivateUrlUser)session.getUser()).hasAnonymizedAccess(); + } else { + return false; + } + } + + public boolean isAnonymizedPrivateUrl() { + if(privateUrl != null) { + return privateUrl.isAnonymizedAccess(); + } else { + return false; + } + } + + public boolean isAnonymizedAccessEnabled() { + if (settingsWrapper.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames) != null) { + return true; + } else { + return false; + } + } + // todo: we should be able to remove - this is passed in the html pages to other fragments, but they could just access this service bean directly. public FileDownloadServiceBean getFileDownloadService() { return fileDownloadService; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 224ccfd22f3..2cf1f0d094f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -49,7 +49,7 @@ import javax.persistence.Query; import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; -import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang3.RandomStringUtils; import org.ocpsoft.common.util.Strings; /** @@ -273,8 +273,8 @@ public String generateDatasetIdentifier(Dataset dataset, GlobalIdServiceBean idS switch (identifierType) { case "randomString": return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); - case "sequentialNumber": - return generateIdentifierAsSequentialNumber(dataset, idServiceBean, shoulder); + case "storedProcGenerated": + return generateIdentifierFromStoredProcedure(dataset, idServiceBean, shoulder); default: /* Should we throw an exception instead?? -- L.A. 4.6.2 */ return generateIdentifierAsRandomString(dataset, idServiceBean, shoulder); @@ -290,19 +290,19 @@ private String generateIdentifierAsRandomString(Dataset dataset, GlobalIdService return identifier; } - private String generateIdentifierAsSequentialNumber(Dataset dataset, GlobalIdServiceBean idServiceBean, String shoulder) { + private String generateIdentifierFromStoredProcedure(Dataset dataset, GlobalIdServiceBean idServiceBean, String shoulder) { String identifier; do { - StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierAsSequentialNumber"); + StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure"); query.execute(); - Integer identifierNumeric = (Integer) query.getOutputParameterValue(1); + String identifierFromStoredProcedure = (String) query.getOutputParameterValue(1); // some diagnostics here maybe - is it possible to determine that it's failing // because the stored procedure hasn't been created in the database? - if (identifierNumeric == null) { + if (identifierFromStoredProcedure == null) { return null; } - identifier = shoulder + identifierNumeric.toString(); + identifier = shoulder + identifierFromStoredProcedure; } while (!isIdentifierLocallyUnique(identifier, dataset)); return identifier; @@ -732,21 +732,30 @@ public void exportAllDatasets(boolean forceReExport) { //depends on dataset state and user privleges public String getReminderString(Dataset dataset, boolean canPublishDataset) { + String reminderString; + if(!dataset.isReleased() ){ //messages for draft state. if (canPublishDataset){ - return BundleUtil.getStringFromBundle("dataset.message.publish.remind.draft"); + reminderString = BundleUtil.getStringFromBundle("dataset.message.publish.remind.draft"); } else { - return BundleUtil.getStringFromBundle("dataset.message.submit.remind.draft"); + reminderString = BundleUtil.getStringFromBundle("dataset.message.submit.remind.draft"); } } else{ //messages for new version - post-publish if (canPublishDataset){ - return BundleUtil.getStringFromBundle("dataset.message.publish.remind.version"); + reminderString = BundleUtil.getStringFromBundle("dataset.message.publish.remind.version"); } else { - return BundleUtil.getStringFromBundle("dataset.message.submit.remind.version"); + reminderString = BundleUtil.getStringFromBundle("dataset.message.submit.remind.version"); } } + + if (reminderString != null) { + return reminderString; + } else { + logger.warning("Unable to get reminder string from bundle. Returning empty string."); + return ""; + } } public void updateLastExportTimeStamp(Long datasetId) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index d28ce5175d4..78c1687a7b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -49,7 +49,7 @@ import javax.validation.Validator; import javax.validation.ValidatorFactory; import javax.validation.constraints.Size; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * @@ -1340,7 +1340,11 @@ public String getCitation() { } public String getCitation(boolean html) { - return new DataCitation(this).toString(html); + return getCitation(html, false); + } + + public String getCitation(boolean html, boolean anonymized) { + return new DataCitation(this).toString(html, anonymized); } public Date getCitationDate() { @@ -1703,11 +1707,11 @@ public String getJsonLd() { JsonArrayBuilder authors = Json.createArrayBuilder(); for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) { JsonObjectBuilder author = Json.createObjectBuilder(); - String name = datasetAuthor.getName().getValue(); + String name = datasetAuthor.getName().getDisplayValue(); DatasetField authorAffiliation = datasetAuthor.getAffiliation(); String affiliation = null; if (authorAffiliation != null) { - affiliation = datasetAuthor.getAffiliation().getValue(); + affiliation = datasetAuthor.getAffiliation().getDisplayValue(); } // We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization". //author.add("@type", "Person"); @@ -1859,7 +1863,7 @@ public String getJsonLd() { JsonObjectBuilder license = Json.createObjectBuilder().add("@type", "Dataset"); if (TermsOfUseAndAccess.License.CC0.equals(terms.getLicense())) { - license.add("text", "CC0").add("url", "https://creativecommons.org/publicdomain/zero/1.0/"); + license.add("text", "CC0").add("url", TermsOfUseAndAccess.CC0_URI); } else { String termsOfUse = terms.getTermsOfUse(); // Terms of use can be null if you create the dataset with JSON. @@ -1945,6 +1949,10 @@ public String getJsonLd() { job.add("distribution", fileArray); } jsonLd = job.build().toString(); + + //Most fields above should be stripped/sanitized but, since this is output in the dataset page as header metadata, do a final sanitize step to make sure + jsonLd = MarkupChecker.stripAllTags(jsonLd); + return jsonLd; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java index 8cc25f5148e..fee8a66a290 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java @@ -12,7 +12,7 @@ import java.util.List; import java.util.Set; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.Arrays; import java.util.Date; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 3c1ae3abf38..32dd32b643f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -34,7 +34,7 @@ import javax.persistence.PersistenceContext; import javax.persistence.Query; import javax.persistence.TypedQuery; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index b46333a4287..342aaec187a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -33,7 +33,7 @@ import javax.validation.constraints.Pattern; import javax.validation.constraints.Size; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.hibernate.validator.constraints.NotBlank; import org.hibernate.validator.constraints.NotEmpty; diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java b/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java index 7e9655b3970..b806ef8e22d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseHeaderFragment.java @@ -25,7 +25,7 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index c7f816ce219..1e2d3f507a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -52,8 +52,8 @@ import javax.ejb.EJBException; import javax.faces.event.ValueChangeEvent; import javax.faces.model.SelectItem; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.text.StringEscapeUtils; +import org.apache.commons.lang3.StringUtils; import org.primefaces.PrimeFaces; import org.primefaces.event.TransferEvent; @@ -805,8 +805,8 @@ public String saveLinkedDataverse() { private List getSuccessMessageArguments() { List arguments = new ArrayList<>(); - arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); - String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + arguments.add(StringEscapeUtils.escapeHtml4(dataverse.getDisplayName())); + String linkString = "" + StringEscapeUtils.escapeHtml4(linkingDataverse.getDisplayName()) + ""; arguments.add(linkString); return arguments; } @@ -867,7 +867,7 @@ public String saveSavedSearch() { commandEngine.submit(cmd); List arguments = new ArrayList<>(); - String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + String linkString = "" + StringEscapeUtils.escapeHtml4(linkingDataverse.getDisplayName()) + ""; arguments.add(linkString); String successMessageString = BundleUtil.getStringFromBundle("dataverse.saved.search.success", arguments); JsfHelper.addSuccessMessage(successMessageString); diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index f6b396f4c00..ad72f3819fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -2,7 +2,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import javax.persistence.MappedSuperclass; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * A {@link DvObject} that can contain other {@link DvObject}s. diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index 4830c422d05..01b0890d588 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -19,7 +19,7 @@ import javax.persistence.NonUniqueResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.ocpsoft.common.util.Strings; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index aef3f7d3446..d2620d9a240 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -76,7 +76,7 @@ import javax.faces.event.FacesEvent; import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletResponse; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.mutable.MutableBoolean; import org.primefaces.PrimeFaces; @@ -598,7 +598,7 @@ public String init() { } if (mode == FileEditMode.UPLOAD) { - if (settingsWrapper.getUploadMethodsCount() == 1){ + if (settingsWrapper.getUploadMethodsCount() == 1){ JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("dataset.message.uploadFiles.label"), BundleUtil.getStringFromBundle("dataset.message.uploadFilesSingle.message", Arrays.asList(systemConfig.getGuidesBaseUrl(), systemConfig.getGuidesVersion()))); } else if (settingsWrapper.getUploadMethodsCount() > 1) { JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("dataset.message.uploadFiles.label"), BundleUtil.getStringFromBundle("dataset.message.uploadFilesMultiple.message", Arrays.asList(systemConfig.getGuidesBaseUrl(), systemConfig.getGuidesVersion()))); @@ -1527,10 +1527,22 @@ private void setUpRsync() { } } catch (EJBException ex) { logger.warning("Problem getting rsync script (EJBException): " + EjbUtil.ejbExceptionToString(ex)); + FacesContext.getCurrentInstance().addMessage(uploadComponentId, + new FacesMessage(FacesMessage.SEVERITY_ERROR, + "Problem getting rsync script (EJBException): " + EjbUtil.ejbExceptionToString(ex), + "Problem getting rsync script (EJBException):")); } catch (RuntimeException ex) { logger.warning("Problem getting rsync script (RuntimeException): " + ex.getLocalizedMessage()); + FacesContext.getCurrentInstance().addMessage(uploadComponentId, + new FacesMessage(FacesMessage.SEVERITY_ERROR, + "Problem getting rsync script (RuntimeException): " + ex.getMessage(), + "Problem getting rsync script (RuntimeException):")); } catch (CommandException cex) { logger.warning("Problem getting rsync script (Command Exception): " + cex.getLocalizedMessage()); + FacesContext.getCurrentInstance().addMessage(uploadComponentId, + new FacesMessage(FacesMessage.SEVERITY_ERROR, + "Problem getting rsync script (Command Exception): " + cex.getMessage(), + "Problem getting rsync script (Command Exception):")); } } } @@ -2037,7 +2049,9 @@ public void handleExternalUpload() { // ----------------------------------------------------------- if (this.isFileReplaceOperation()){ this.handleReplaceFileUpload(storageLocation, fileName, contentType, checksumValue, checksumType); - this.setFileMetadataSelectedForTagsPopup(fileReplacePageHelper.getNewFileMetadatasBeforeSave().get(0)); + if (fileReplacePageHelper.getNewFileMetadatasBeforeSave() != null){ + this.setFileMetadataSelectedForTagsPopup(fileReplacePageHelper.getNewFileMetadatasBeforeSave().get(0)); + } return; } // ----------------------------------------------------------- @@ -2967,8 +2981,15 @@ public boolean rsyncUploadSupported() { // ToDo - rsync was written before multiple store support and currently is hardcoded to use the "s3" store. // When those restrictions are lifted/rsync can be configured per store, the test in the // Dataset Util method should be updated + if(settingsWrapper.isRsyncUpload() && !DatasetUtil.isAppropriateStorageDriver(dataset) ){ + //dataset.file.upload.setUp.rsync.failed.detail + FacesMessage message = new FacesMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("dataset.file.upload.setUp.rsync.failed"), BundleUtil.getStringFromBundle("dataset.file.upload.setUp.rsync.failed.detail")); + FacesContext.getCurrentInstance().addMessage(null, message); + } + - return settingsWrapper.isRsyncUpload() && DatasetUtil.isAppropriateStorageDriver(dataset); + + return settingsWrapper.isRsyncUpload() && DatasetUtil.isAppropriateStorageDriver(dataset); } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java index 7b0fb0fd76c..0b2a92fe06a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileMetadata.java @@ -374,23 +374,19 @@ public String getFileDateToDisplay() { } return ""; } - + public String getFileCitation(){ - return getFileCitation(false); - } - + return getFileCitation(false, false); + } - - - public String getFileCitation(boolean html){ - return new DataCitation(this).toString(html); - } - - public String getDirectFileCitation(boolean html){ - return new DataCitation(this, true).toString(html); + public String getFileCitation(boolean html, boolean anonymized){ + return new DataCitation(this).toString(html, anonymized); } - - + + public String getDirectFileCitation(boolean html, boolean anonymized){ + return new DataCitation(this, true).toString(html, anonymized); + } + public DatasetVersion getDatasetVersion() { return datasetVersion; } diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index d863500d137..045ac1f934a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -971,5 +971,11 @@ public boolean isFileAccessRequest() { public void setFileAccessRequest(boolean fileAccessRequest) { this.fileAccessRequest = fileAccessRequest; - } + } + public boolean isAnonymizedAccess() { + if(session.getUser() instanceof PrivateUrlUser) { + return ((PrivateUrlUser)session.getUser()).hasAnonymizedAccess(); + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/Guestbook.java b/src/main/java/edu/harvard/iq/dataverse/Guestbook.java index 742e73403c1..18913bfd5bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Guestbook.java +++ b/src/main/java/edu/harvard/iq/dataverse/Guestbook.java @@ -23,7 +23,7 @@ import javax.persistence.Transient; import edu.harvard.iq.dataverse.util.DateUtil; -import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.hibernate.validator.constraints.NotBlank; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java index a7fb2b5a3fd..37d7169b959 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java @@ -24,7 +24,7 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index 809417e3f9c..f2d290215da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -128,6 +128,18 @@ public void streamResponsesByDataverseIdAndGuestbookId(OutputStream out, Long da // of queries now) -- L.A. Map customQandAs = mapCustomQuestionAnswersAsStrings(dataverseId, guestbookId); + + List guestbookResults = getGuestbookResults( dataverseId, guestbookId ); + // the CSV header: + out.write("Guestbook, Dataset, Dataset PID, Date, Type, File Name, File Id, File PID, User Name, Email, Institution, Position, Custom Questions\n".getBytes()); + for (Object[] result : guestbookResults) { + StringBuilder sb = convertGuestbookResponsesToCSV(customQandAs, result); + out.write(sb.toString().getBytes()); + out.flush(); + } + } + + public List getGuestbookResults(Long dataverseId, Long guestbookId ){ String queryString = BASE_QUERY_STRING_FOR_DOWNLOAD_AS_CSV + " and o.owner_id = " @@ -137,15 +149,15 @@ public void streamResponsesByDataverseIdAndGuestbookId(OutputStream out, Long da queryString+= (" and r.guestbook_id = " + guestbookId.toString()); } - queryString += ";"; + queryString += " ORDER by r.id DESC;"; logger.fine("stream responses query: " + queryString); - List guestbookResults = em.createNativeQuery(queryString).getResultList(); - - // the CSV header: - out.write("Guestbook, Dataset, Dataset PID, Date, Type, File Name, File Id, File PID, User Name, Email, Institution, Position, Custom Questions\n".getBytes()); + return em.createNativeQuery(queryString).getResultList(); - for (Object[] result : guestbookResults) { + } + + public StringBuilder convertGuestbookResponsesToCSV ( Map customQandAs, Object[] result) throws IOException { + Integer guestbookResponseId = (Integer)result[0]; StringBuilder sb = new StringBuilder(); @@ -208,36 +220,17 @@ public void streamResponsesByDataverseIdAndGuestbookId(OutputStream out, Long da // Finally, custom questions and answers, if present: - // (the old implementation, below, would run one extra query FOR EVERY SINGLE - // guestbookresponse entry! -- instead, we are now pre-caching all the - // available custom question responses, with a single native query at - // the top of this method. -- L.A.) - - /*String cqString = "select q.questionstring, r.response from customquestionresponse r, customquestion q where q.id = r.customquestion_id and r.guestbookResponse_id = " + result[0]; - List customResponses = em.createNativeQuery(cqString).getResultList(); - if (customResponses != null) { - for (Object[] response : customResponses) { - sb.append(SEPARATOR); - sb.append(response[0]); - sb.append(SEPARATOR); - sb.append(response[1] == null ? "" : response[1]); - } - }*/ if (customQandAs.containsKey(guestbookResponseId)) { sb.append(customQandAs.get(guestbookResponseId)); } sb.append(NEWLINE); - - // Finally, write the line out: - // (i.e., we are writing one guestbook response at a time, thus allowing the - // whole thing to stream in real time -- L.A.) - out.write(sb.toString().getBytes()); - out.flush(); - } + return sb; + } + private String formatPersistentIdentifier(String protocol, String authority, String identifier) { // Note that the persistent id may be unavailable for this dvObject: if (StringUtil.nonEmpty(protocol) && StringUtil.nonEmpty(authority) && StringUtil.nonEmpty(identifier)) { @@ -349,7 +342,7 @@ private Map mapCustomQuestionAnswersAsLists(Long dataverseId, L return selectCustomQuestionAnswers(dataverseId, guestbookId, false, firstResponse, lastResponse); } - private Map mapCustomQuestionAnswersAsStrings(Long dataverseId, Long guestbookId) { + public Map mapCustomQuestionAnswersAsStrings(Long dataverseId, Long guestbookId) { return selectCustomQuestionAnswers(dataverseId, guestbookId, true, null, null); } diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index 0549baaf8ae..1a8ee8a85e8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -48,7 +48,7 @@ import net.handle.hdllib.PublicKeyAuthenticationInfo; import net.handle.hdllib.ResolutionRequest; import net.handle.hdllib.Util; -import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.lang3.NotImplementedException; /** * @@ -314,7 +314,9 @@ private String getAuthenticationHandle(DvObject dvObject){ private String getAuthenticationHandle(String handlePrefix) { logger.log(Level.FINE,"getAuthenticationHandle"); - if (systemConfig.isIndependentHandleService()) { + if (systemConfig.getHandleAuthHandle()!=null) { + return systemConfig.getHandleAuthHandle(); + } else if (systemConfig.isIndependentHandleService()) { return handlePrefix + "/ADMIN"; } else { return "0.NA/" + handlePrefix; diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java index a88c4833f54..bc83c15dcd7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java @@ -40,7 +40,7 @@ import javax.inject.Inject; import javax.inject.Named; import javax.servlet.http.HttpServletRequest; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java index 496050ca7f4..9781ff90732 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java @@ -35,7 +35,7 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/LazyFileMetadataDataModel.java b/src/main/java/edu/harvard/iq/dataverse/LazyFileMetadataDataModel.java deleted file mode 100644 index aaf87342417..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/LazyFileMetadataDataModel.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse; - -import java.util.List; -import java.util.Map; -import org.primefaces.model.FilterMeta; -import org.primefaces.model.LazyDataModel; -import org.primefaces.model.SortOrder; - -/** - * - * @author skraffmi - */ -public class LazyFileMetadataDataModel extends LazyDataModel { - - private final DataFileServiceBean fileServiceBean; - private final Long datasetVersionId; - - public LazyFileMetadataDataModel(Long datasetVersionId, DataFileServiceBean fileServiceBean) { - this.fileServiceBean = fileServiceBean; - this.datasetVersionId = datasetVersionId; - } - - - @Override - public List load(int first, int pageSize, String sortField, - SortOrder sortOrder, Map filters) { - - List listFileMetadata = null; //fileServiceBean.findFileMetadataByDatasetVersionIdLazy(datasetVersionId, pageSize, sortField, sortField, first); - //this.setRowCount(fileServiceBean.findCountByDatasetVersionId(datasetVersionId).intValue()); - return listFileMetadata; - } - - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 432f45e1af9..619f6577b61 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -38,7 +38,7 @@ import javax.mail.internet.AddressException; import javax.mail.internet.InternetAddress; import javax.mail.internet.MimeMessage; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java index 82e1681b37d..c728062a5a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java @@ -34,7 +34,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.primefaces.event.SelectEvent; import org.primefaces.event.ToggleSelectEvent; import org.primefaces.event.UnselectEvent; @@ -115,6 +115,17 @@ public TreeMap> getFileAccessRequestMap() { } + private boolean backingShowDeleted = true; + + public void showDeletedCheckboxChange() { + + if (backingShowDeleted != showDeleted) { + initMaps(); + backingShowDeleted = showDeleted; + } + + } + public String init() { if (dataset.getId() != null) { dataset = datasetService.find(dataset.getId()); @@ -136,17 +147,22 @@ private void initMaps() { // initialize files and usergroup list roleAssigneeMap.clear(); fileMap.clear(); - fileAccessRequestMap.clear(); + fileAccessRequestMap.clear(); for (DataFile file : dataset.getFiles()) { - boolean fileIsDeleted = !((dataset.getLatestVersion().isDraft() && file.getFileMetadata().getDatasetVersion().isDraft()) - || (dataset.getLatestVersion().isReleased() && file.getFileMetadata().getDatasetVersion().equals(dataset.getLatestVersion()))); // only include if the file is restricted (or its draft version is restricted) //Added a null check in case there are files that have no metadata records SEK //for 6587 make sure that a file is in the current version befor adding to the fileMap SEK 2/11/2020 - if (file.getFileMetadata() != null && (file.isRestricted() || file.getFileMetadata().isRestricted()) - && (!fileIsDeleted || isShowDeleted())) { + if (file.getFileMetadata() != null && (file.isRestricted() || file.getFileMetadata().isRestricted())) { + //only test if file is deleted if it's restricted + boolean fileIsDeleted = !((dataset.getLatestVersion().isDraft() && file.getFileMetadata().getDatasetVersion().isDraft()) + || (dataset.getLatestVersion().isReleased() && file.getFileMetadata().getDatasetVersion().equals(dataset.getLatestVersion()))); + + if (!isShowDeleted() && fileIsDeleted) { + //if don't show deleted and is deleted go to next file... + continue; + } // we get the direct role assignments assigned to the file List ras = roleService.directRoleAssignments(file); List raList = new ArrayList<>(ras.size()); diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java index e86ce1e22ad..d08337ec832 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageGroupsPage.java @@ -33,7 +33,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * @author michaelsuo diff --git a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java index 79a3ca800e2..4a734c11941 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java @@ -41,7 +41,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; -import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; /** * @@ -516,7 +516,7 @@ private void assignRole(RoleAssignee ra, DataverseRole r) { List args = Arrays.asList( r.getName(), ra.getDisplayInfo().getTitle(), - StringEscapeUtils.escapeHtml(dvObject.getDisplayName()) + StringEscapeUtils.escapeHtml4(dvObject.getDisplayName()) ); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("permission.roleAssignedToFor", args)); // don't notify if role = file downloader and object is not released @@ -530,7 +530,7 @@ private void assignRole(RoleAssignee ra, DataverseRole r) { List args = Arrays.asList( r.getName(), ra.getDisplayInfo().getTitle(), - StringEscapeUtils.escapeHtml(dvObject.getDisplayName()) + StringEscapeUtils.escapeHtml4(dvObject.getDisplayName()) ); String message = BundleUtil.getStringFromBundle("permission.roleNotAssignedFor", args); JsfHelper.addErrorMessage(message); diff --git a/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java b/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java index c9bc8722391..37a11396f37 100644 --- a/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java @@ -20,7 +20,7 @@ import javax.inject.Named; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java index 6b207ed0e75..c6f2b7f28a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java @@ -27,7 +27,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * The place to obtain {@link RoleAssignee}s, based on their identifiers. diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssignment.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssignment.java index 22d2679efb6..f053a449da4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/RoleAssignment.java +++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssignment.java @@ -71,13 +71,21 @@ public class RoleAssignment implements java.io.Serializable { @Column(nullable = true) private String privateUrlToken; + @Column(nullable = true) + private Boolean privateUrlAnonymizedAccess; + public RoleAssignment() {} public RoleAssignment(DataverseRole aRole, RoleAssignee anAssignee, DvObject aDefinitionPoint, String privateUrlToken) { + this(aRole, anAssignee, aDefinitionPoint, privateUrlToken, false); + } + + public RoleAssignment(DataverseRole aRole, RoleAssignee anAssignee, DvObject aDefinitionPoint, String privateUrlToken, Boolean anonymizedAccess) { role = aRole; assigneeIdentifier = anAssignee.getIdentifier(); definitionPoint = aDefinitionPoint; this.privateUrlToken = privateUrlToken; + this.privateUrlAnonymizedAccess=anonymizedAccess; } public Long getId() { @@ -116,6 +124,10 @@ public String getPrivateUrlToken() { return privateUrlToken; } + public boolean isAnonymizedAccess(){ + return (privateUrlAnonymizedAccess==null) ? false: privateUrlAnonymizedAccess; + } + @Override public int hashCode() { int hash = 7; diff --git a/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java b/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java index 204806e7894..dd3044d3749 100644 --- a/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java @@ -36,8 +36,8 @@ import javax.persistence.PersistenceContext; import edu.harvard.iq.dataverse.util.BundleUtil; -import org.apache.commons.lang.StringEscapeUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.text.StringEscapeUtils; +import org.apache.commons.lang3.StringUtils; /** * @@ -193,7 +193,7 @@ private void assignRole(RoleAssignee ra, DataverseRole r) { commandEngine.submit(new AssignRoleCommand(ra, r, dvObject, dvRequestService.getDataverseRequest(), privateUrlToken)); JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("permission.roleAssignedToOn" , - Arrays.asList( r.getName() , ra.getDisplayInfo().getTitle() , StringEscapeUtils.escapeHtml(dvObject.getDisplayName()) )) ); + Arrays.asList( r.getName() , ra.getDisplayInfo().getTitle() , StringEscapeUtils.escapeHtml4(dvObject.getDisplayName()) )) ); } catch (CommandException ex) { JH.addMessage(FacesMessage.SEVERITY_ERROR, BundleUtil.getStringFromBundle("permission.cannotAssignRole" , Arrays.asList( ex.getMessage()))); } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index eb4527f2aaa..dab4435da6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -12,8 +12,12 @@ import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; + +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import javax.ejb.EJB; import javax.faces.view.ViewScoped; @@ -275,6 +279,18 @@ public boolean displayChronologicalDateFacets() { return isTrueForKey(SettingsServiceBean.Key.ChronologicalDateFacets, true); } + + List anonymizedFieldTypes = null; + + public boolean shouldBeAnonymized(DatasetField df) { + // Set up once per view + if (anonymizedFieldTypes == null) { + anonymizedFieldTypes = new ArrayList(); + String names = get(SettingsServiceBean.Key.AnonymizedFieldTypeNames.toString(), ""); + anonymizedFieldTypes.addAll(Arrays.asList(names.split(",\\s"))); + } + return anonymizedFieldTypes.contains(df.getDatasetFieldType().getName()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index 4ad50320f23..b71fe3cd566 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -15,7 +15,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.io.IOException; import java.sql.Timestamp; diff --git a/src/main/java/edu/harvard/iq/dataverse/TermsOfUseAndAccess.java b/src/main/java/edu/harvard/iq/dataverse/TermsOfUseAndAccess.java index ad6775d6efd..72f4ab54ee8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/TermsOfUseAndAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/TermsOfUseAndAccess.java @@ -280,7 +280,7 @@ public enum License { * API use? See also https://github.com/IQSS/dataverse/issues/1385 */ public static TermsOfUseAndAccess.License defaultLicense = TermsOfUseAndAccess.License.CC0; - + public static String CC0_URI = "https://creativecommons.org/publicdomain/zero/1.0/"; @Override public int hashCode() { int hash = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java b/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java index 9f282b436db..e270d3842f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThemeWidgetFragment.java @@ -29,7 +29,7 @@ import javax.inject.Inject; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.primefaces.PrimeFaces; //import org.primefaces.context.RequestContext; diff --git a/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java index 58b55c091da..9ec0527a318 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java @@ -19,7 +19,7 @@ import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.Query; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.ocpsoft.common.util.Strings; @Stateless diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index b33422f5b7f..746a721e4e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -79,7 +79,7 @@ import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.ResponseBuilder; import javax.ws.rs.core.Response.Status; -import static org.apache.commons.lang.StringUtils.isNumeric; +import static org.apache.commons.lang3.StringUtils.isNumeric; /** * Base class for API beans @@ -362,7 +362,17 @@ protected User findUserOrDie() throws WrappedResponse { return GuestUser.get(); } PrivateUrlUser privateUrlUser = privateUrlSvc.getPrivateUrlUserFromToken(requestApiKey); + // For privateUrlUsers restricted to anonymized access, all api calls are off-limits except for those used in the UI + // to download the file or image thumbs if (privateUrlUser != null) { + if (privateUrlUser.hasAnonymizedAccess()) { + String pathInfo = httpRequest.getPathInfo(); + String prefix= "/access/datafile/"; + if (!(pathInfo.startsWith(prefix) && !pathInfo.substring(prefix.length()).contains("/"))) { + logger.info("Anonymized access request for " + pathInfo); + throw new WrappedResponse(error(Status.UNAUTHORIZED, "API Access not allowed with this Key")); + } + } return privateUrlUser; } return findAuthenticatedUserOrDie(requestApiKey, requestWFKey); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index bcb6f3d6c61..9fd63a5fe04 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -748,13 +748,8 @@ public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbr } private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { - long setLimit = systemConfig.getZipDownloadLimit(); - if (!(setLimit > 0L)) { - setLimit = DataFileZipper.DEFAULT_ZIPFILE_LIMIT; - } - - final long zipDownloadSizeLimit = setLimit; //to use via anon inner class - + final long zipDownloadSizeLimit = systemConfig.getZipDownloadLimit(); + logger.fine("setting zip download size limit to " + zipDownloadSizeLimit + " bytes."); if (rawFileIds == null || rawFileIds.equals("")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index f0a9f8cf780..1df51137969 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -379,9 +379,8 @@ private Response deleteAuthenticatedUser(AuthenticatedUser au) { authSvc.removeAuthentictedUserItems(au); authSvc.deleteAuthenticatedUser(au.getId()); - return ok("AuthenticatedUser " + au.getIdentifier() + " deleted. "); - - } + return ok("AuthenticatedUser " + au.getIdentifier() + " deleted."); + } @POST @Path("authenticatedUsers/{identifier}/deactivate") diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java index 646abf51b6c..a057068aa1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java @@ -34,7 +34,7 @@ import javax.ws.rs.core.Response; import edu.harvard.iq.dataverse.util.BundleUtil; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.asJsonArray; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -137,6 +137,7 @@ public Response getByName(@PathParam("name") String name) { String solrFieldSearchable = dsf.getSolrField().getNameSearchable(); String solrFieldFacetable = dsf.getSolrField().getNameFacetable(); String metadataBlock = dsf.getMetadataBlock().getName(); + String uri=dsf.getUri(); boolean hasParent = dsf.isHasParent(); boolean allowsMultiples = dsf.isAllowMultiples(); boolean isRequired = dsf.isRequired(); @@ -168,7 +169,8 @@ public Response getByName(@PathParam("name") String name) { .add("parentAllowsMultiples", parentAllowsMultiplesDisplay) .add("solrFieldSearchable", solrFieldSearchable) .add("solrFieldFacetable", solrFieldFacetable) - .add("isRequired", isRequired)); + .add("isRequired", isRequired) + .add("uri", uri)); } catch ( NoResultException nre ) { return notFound(name); @@ -356,7 +358,7 @@ public String getArrayIndexOutOfBoundMessage(HeaderType header, int wrongIndex) { List columns = getColumnsByHeader(header); - + String column = columns.get(wrongIndex - 1); List arguments = new ArrayList<>(); arguments.add(header.name()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d8eb6d7e6b5..f382320d8a2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -6,7 +6,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetLock; @@ -52,6 +51,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand; import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; +import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; @@ -78,7 +78,6 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.S3PackageImporter; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; @@ -103,16 +102,25 @@ import edu.harvard.iq.dataverse.util.EjbUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JSONLDUtil; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonParseException; import edu.harvard.iq.dataverse.search.IndexServiceBean; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import edu.harvard.iq.dataverse.workflow.Workflow; +import edu.harvard.iq.dataverse.workflow.WorkflowContext; +import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; +import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; + import java.io.IOException; import java.io.InputStream; import java.io.StringReader; import java.sql.Timestamp; import java.text.MessageFormat; +import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -122,6 +130,8 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Map.Entry; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -135,11 +145,15 @@ import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import javax.json.JsonReader; +import javax.json.stream.JsonParsingException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.BadRequestException; import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; +import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; +import javax.ws.rs.NotAcceptableException; import javax.ws.rs.POST; import javax.ws.rs.PUT; import javax.ws.rs.Path; @@ -153,14 +167,14 @@ import javax.ws.rs.core.Response.Status; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import javax.ws.rs.core.UriInfo; + +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataContentDisposition; import org.glassfish.jersey.media.multipart.FormDataParam; import com.amazonaws.services.s3.model.PartETag; -import edu.harvard.iq.dataverse.FileMetadata; -import java.util.Map.Entry; @Path("datasets") public class Datasets extends AbstractApiBean { @@ -187,9 +201,6 @@ public class Datasets extends AbstractApiBean { @EJB DDIExportServiceBean ddiExportService; - @EJB - DatasetFieldServiceBean datasetfieldService; - @EJB MetadataBlockServiceBean metadataBlockService; @@ -223,6 +234,9 @@ public class Datasets extends AbstractApiBean { @Inject DataverseRequestServiceBean dvRequestService; + + @Inject + WorkflowServiceBean wfService; /** * Used to consolidate the way we parse and handle dataset versions. @@ -497,7 +511,7 @@ public Response getFileAccessFolderView(@PathParam("id") String datasetId, @Quer String indexFileName = folderName.equals("") ? ".index.html" : ".index-" + folderName.replace('/', '_') + ".html"; - response.setHeader("Content-disposition", "attachment; filename=\"" + indexFileName + "\""); + response.setHeader("Content-disposition", "filename=\"" + indexFileName + "\""); return Response.ok() @@ -597,6 +611,7 @@ public Response updateDatasetPIDMetadataAll() { @PUT @Path("{id}/versions/{versionId}") + @Consumes(MediaType.APPLICATION_JSON) public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, @PathParam("versionId") String versionId ){ if ( ! ":draft".equals(versionId) ) { @@ -649,6 +664,94 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, } } + + @GET + @Path("{id}/versions/{versionId}/metadata") + @Produces("application/ld+json, application/json-ld") + public Response getVersionJsonLDMetadata(@PathParam("id") String id, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(id), uriInfo, headers); + OREMap ore = new OREMap(dsv, + settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false)); + return ok(ore.getOREMapBuilder(true)); + + } catch (WrappedResponse ex) { + ex.printStackTrace(); + return ex.getResponse(); + } catch (Exception jpe) { + logger.log(Level.SEVERE, "Error getting jsonld metadata for dsv: ", jpe.getLocalizedMessage()); + jpe.printStackTrace(); + return error(Response.Status.INTERNAL_SERVER_ERROR, jpe.getLocalizedMessage()); + } + } + + @GET + @Path("{id}/metadata") + @Produces("application/ld+json, application/json-ld") + public Response getVersionJsonLDMetadata(@PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return getVersionJsonLDMetadata(id, ":draft", uriInfo, headers); + } + + @PUT + @Path("{id}/metadata") + @Consumes("application/ld+json, application/json-ld") + public Response updateVersionMetadata(String jsonLDBody, @PathParam("id") String id, @DefaultValue("false") @QueryParam("replace") boolean replaceTerms) { + + try { + Dataset ds = findDatasetOrDie(id); + DataverseRequest req = createDataverseRequest(findUserOrDie()); + DatasetVersion dsv = ds.getEditVersion(); + boolean updateDraft = ds.getLatestVersion().isDraft(); + dsv = JSONLDUtil.updateDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, datasetFieldSvc, !replaceTerms, false); + + DatasetVersion managedVersion; + if (updateDraft) { + Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); + managedVersion = managedDataset.getEditVersion(); + } else { + managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); + } + String info = updateDraft ? "Version Updated" : "Version Created"; + return ok(Json.createObjectBuilder().add(info, managedVersion.getVersionDate())); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } catch (JsonParsingException jpe) { + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}", jsonLDBody); + return error(Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage()); + } + } + + @PUT + @Path("{id}/metadata/delete") + @Consumes("application/ld+json, application/json-ld") + public Response deleteMetadata(String jsonLDBody, @PathParam("id") String id) { + try { + Dataset ds = findDatasetOrDie(id); + DataverseRequest req = createDataverseRequest(findUserOrDie()); + DatasetVersion dsv = ds.getEditVersion(); + boolean updateDraft = ds.getLatestVersion().isDraft(); + dsv = JSONLDUtil.deleteDatasetVersionMDFromJsonLD(dsv, jsonLDBody, metadataBlockService, datasetFieldSvc); + DatasetVersion managedVersion; + if (updateDraft) { + Dataset managedDataset = execCommand(new UpdateDatasetVersionCommand(ds, req)); + managedVersion = managedDataset.getEditVersion(); + } else { + managedVersion = execCommand(new CreateDatasetVersionCommand(req, ds, dsv)); + } + String info = updateDraft ? "Version Updated" : "Version Created"; + return ok(Json.createObjectBuilder().add(info, managedVersion.getVersionDate())); + + } catch (WrappedResponse ex) { + ex.printStackTrace(); + return ex.getResponse(); + } catch (JsonParsingException jpe) { + logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}", jsonLDBody); + jpe.printStackTrace(); + return error(Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage()); + } + } @PUT @Path("{id}/deleteMetadata") @@ -1100,6 +1203,96 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S } } + @POST + @Path("{id}/actions/:releasemigrated") + @Consumes("application/ld+json, application/json-ld") + public Response publishMigratedDataset(String jsonldBody, @PathParam("id") String id, @DefaultValue("false") @QueryParam ("updatepidatprovider") boolean contactPIDProvider) { + try { + AuthenticatedUser user = findAuthenticatedUserOrDie(); + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Only superusers can release migrated datasets"); + } + + Dataset ds = findDatasetOrDie(id); + try { + JsonObject metadata = JSONLDUtil.decontextualizeJsonLD(jsonldBody); + String pubDate = metadata.getString(JsonLDTerm.schemaOrg("datePublished").getUrl()); + logger.fine("Submitted date: " + pubDate); + LocalDateTime dateTime = null; + if(!StringUtils.isEmpty(pubDate)) { + dateTime = JSONLDUtil.getDateTimeFrom(pubDate); + final Timestamp time = Timestamp.valueOf(dateTime); + //Set version release date + ds.getLatestVersion().setReleaseTime(new Date(time.getTime())); + } + // dataset.getPublicationDateFormattedYYYYMMDD()) + // Assign a version number if not set + if (ds.getLatestVersion().getVersionNumber() == null) { + + if (ds.getVersions().size() == 1) { + // First Release + ds.getLatestVersion().setVersionNumber(Long.valueOf(1)); + ds.getLatestVersion().setMinorVersionNumber(Long.valueOf(0)); + } else if (ds.getLatestVersion().isMinorUpdate()) { + ds.getLatestVersion().setVersionNumber(Long.valueOf(ds.getVersionNumber())); + ds.getLatestVersion().setMinorVersionNumber(Long.valueOf(ds.getMinorVersionNumber() + 1)); + } else { + // major, non-first release + ds.getLatestVersion().setVersionNumber(Long.valueOf(ds.getVersionNumber() + 1)); + ds.getLatestVersion().setMinorVersionNumber(Long.valueOf(0)); + } + } + if(ds.getLatestVersion().getVersionNumber()==1 && ds.getLatestVersion().getMinorVersionNumber()==0) { + //Also set publication date if this is the first + if(dateTime != null) { + ds.setPublicationDate(Timestamp.valueOf(dateTime)); + } + // Release User is only set in FinalizeDatasetPublicationCommand if the pub date + // is null, so set it here. + ds.setReleaseUser((AuthenticatedUser) user); + } + } catch (Exception e) { + logger.fine(e.getMessage()); + throw new BadRequestException("Unable to set publication date (" + + JsonLDTerm.schemaOrg("datePublished").getUrl() + "): " + e.getMessage()); + } + /* + * Note: The code here mirrors that in the + * edu.harvard.iq.dataverse.DatasetPage:updateCurrentVersion method. Any changes + * to the core logic (i.e. beyond updating the messaging about results) should + * be applied to the code there as well. + */ + String errorMsg = null; + Optional prePubWf = wfService.getDefaultWorkflow(TriggerType.PrePublishDataset); + + try { + // ToDo - should this be in onSuccess()? May relate to todo above + if (prePubWf.isPresent()) { + // Start the workflow, the workflow will call FinalizeDatasetPublication later + wfService.start(prePubWf.get(), + new WorkflowContext(createDataverseRequest(user), ds, TriggerType.PrePublishDataset, !contactPIDProvider), + false); + } else { + FinalizeDatasetPublicationCommand cmd = new FinalizeDatasetPublicationCommand(ds, + createDataverseRequest(user), !contactPIDProvider); + ds = commandEngine.submit(cmd); + } + } catch (CommandException ex) { + errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.failure") + " - " + ex.toString(); + logger.severe(ex.getMessage()); + } + + if (errorMsg != null) { + return error(Response.Status.INTERNAL_SERVER_ERROR, errorMsg); + } else { + return prePubWf.isPresent() ? accepted(json(ds)) : ok(json(ds)); + } + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + @POST @Path("{id}/move/{targetDataverseAlias}") public Response moveDataset(@PathParam("id") String id, @PathParam("targetDataverseAlias") String targetDataverseAlias, @QueryParam("forceMove") Boolean force) { @@ -1255,10 +1448,13 @@ public Response getPrivateUrlData(@PathParam("id") String idSupplied) { @POST @Path("{id}/privateUrl") - public Response createPrivateUrl(@PathParam("id") String idSupplied) { + public Response createPrivateUrl(@PathParam("id") String idSupplied,@DefaultValue("false") @QueryParam ("anonymizedAccess") boolean anonymizedAccess) { + if(anonymizedAccess && settingsSvc.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames)==null) { + throw new NotAcceptableException("Anonymized Access not enabled"); + } return response( req -> ok(json(execCommand( - new CreatePrivateUrlCommand(req, findDatasetOrDie(idSupplied)))))); + new CreatePrivateUrlCommand(req, findDatasetOrDie(idSupplied), anonymizedAccess))))); } @DELETE @@ -2439,4 +2635,73 @@ public Response getTimestamps(@PathParam("identifier") String id) { return wr.getResponse(); } } + + + /** + * Add multiple Files to an existing Dataset + * + * @param idSupplied + * @param jsonData + * @return + */ + @POST + @Path("{id}/addFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addFilesToDataset(@PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + + //------------------------------------ + // (2a) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + this.ingestService, + this.datasetService, + this.fileService, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + return addFileHelper.addFiles(jsonData, dataset, authUser); + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 2c73c5ad36e..5725636d787 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -11,6 +11,8 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; +import edu.harvard.iq.dataverse.GuestbookServiceBean; import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.RoleAssignment; import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; @@ -61,6 +63,8 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; import static edu.harvard.iq.dataverse.util.StringUtil.nonEmpty; + +import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; import java.io.StringReader; @@ -84,6 +88,7 @@ import javax.json.stream.JsonParsingException; import javax.validation.ConstraintViolation; import javax.validation.ConstraintViolationException; +import javax.ws.rs.BadRequestException; import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; import javax.ws.rs.GET; @@ -98,10 +103,16 @@ import javax.ws.rs.core.Response.Status; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import java.io.IOException; import java.text.MessageFormat; +import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; +import java.util.Map; import java.util.Optional; +import javax.servlet.ServletOutputStream; +import javax.servlet.http.HttpServletResponse; +import javax.ws.rs.core.Context; import javax.xml.stream.XMLStreamException; /** @@ -114,6 +125,7 @@ public class Dataverses extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Dataverses.class.getCanonicalName()); + private static final SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @EJB ExplicitGroupServiceBean explicitGroupSvc; @@ -123,6 +135,12 @@ public class Dataverses extends AbstractApiBean { @EJB SettingsServiceBean settingsService; + + @EJB + GuestbookResponseServiceBean guestbookResponseService; + + @EJB + GuestbookServiceBean guestbookService; @POST public Response addRoot(String body) { @@ -214,6 +232,7 @@ public Response addDataverse(String body, @PathParam("identifier") String parent @POST @Path("{identifier}/datasets") + @Consumes("application/json") public Response createDataset(String jsonBody, @PathParam("identifier") String parentIdtf) { try { User u = findUserOrDie(); @@ -251,6 +270,45 @@ public Response createDataset(String jsonBody, @PathParam("identifier") String p return ex.getResponse(); } } + + @POST + @Path("{identifier}/datasets") + @Consumes("application/ld+json, application/json-ld") + public Response createDatasetFromJsonLd(String jsonLDBody, @PathParam("identifier") String parentIdtf) { + try { + User u = findUserOrDie(); + Dataverse owner = findDataverseOrDie(parentIdtf); + Dataset ds = new Dataset(); + + ds.setOwner(owner); + ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, false); + + ds.setOwner(owner); + + + + // clean possible dataset/version metadata + DatasetVersion version = ds.getVersions().get(0); + version.setMinorVersionNumber(null); + version.setVersionNumber(null); + version.setVersionState(DatasetVersion.VersionState.DRAFT); + + ds.setAuthority(null); + ds.setIdentifier(null); + ds.setProtocol(null); + ds.setGlobalIdCreateTime(null); + + Dataset managedDs = execCommand(new CreateNewDatasetCommand(ds, createDataverseRequest(u))); + return created("/datasets/" + managedDs.getId(), + Json.createObjectBuilder() + .add("id", managedDs.getId()) + .add("persistentId", managedDs.getGlobalIdString()) + ); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } @POST @Path("{identifier}/datasets/:import") @@ -393,6 +451,59 @@ public Response importDatasetDdi(String xml, @PathParam("identifier") String par } } + @POST + @Path("{identifier}/datasets/:startmigration") + @Consumes("application/ld+json, application/json-ld") + public Response recreateDataset(String jsonLDBody, @PathParam("identifier") String parentIdtf) { + try { + User u = findUserOrDie(); + if (!u.isSuperuser()) { + return error(Status.FORBIDDEN, "Not a superuser"); + } + Dataverse owner = findDataverseOrDie(parentIdtf); + + Dataset ds = new Dataset(); + + ds.setOwner(owner); + ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, true); + //ToDo - verify PID is one Dataverse can manage (protocol/authority/shoulder match) + if(! + (ds.getAuthority().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Authority))&& + ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol))&& + ds.getIdentifier().startsWith(settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder)))) { + throw new BadRequestException("Cannot recreate a dataset that has a PID that doesn't match the server's settings"); + } + if(!datasetSvc.isIdentifierLocallyUnique(ds)) { + throw new BadRequestException("Cannot recreate a dataset whose PID is already in use"); + } + + + + if (ds.getVersions().isEmpty()) { + return badRequest("Supplied json must contain a single dataset version."); + } + + DatasetVersion version = ds.getVersions().get(0); + if (!version.isPublished()) { + throw new BadRequestException("Cannot recreate a dataset that hasn't been published."); + } + //While the datasetversion whose metadata we're importing has been published, we consider it in draft until the API caller adds files and then completes the migration + version.setVersionState(DatasetVersion.VersionState.DRAFT); + + DataverseRequest request = createDataverseRequest(u); + + Dataset managedDs = execCommand(new ImportDatasetCommand(ds, request)); + JsonObjectBuilder responseBld = Json.createObjectBuilder() + .add("id", managedDs.getId()) + .add("persistentId", managedDs.getGlobalId().toString()); + + return created("/datasets/" + managedDs.getId(), responseBld); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + private Dataset parseDataset(String datasetJson) throws WrappedResponse { try (StringReader rdr = new StringReader(datasetJson)) { return jsonParser().parseDataset(Json.createReader(rdr).readObject()); @@ -832,7 +943,49 @@ public Response getGroupByOwnerAndAliasInOwner(@PathParam("identifier") String d req, grpAliasInOwner)))); } + + @GET + @Path("{identifier}/guestbookResponses/") + @Produces({"application/download"}) + public Response getGuestbookResponsesByDataverse(@PathParam("identifier") String dvIdtf, + @QueryParam("guestbookId") Long gbId, @Context HttpServletResponse response) { + + try { + Dataverse dv = findDataverseOrDie(dvIdtf); + User u = findUserOrDie(); + DataverseRequest req = createDataverseRequest(u); + if (permissionSvc.request(req) + .on(dv) + .has(Permission.EditDataverse)) { + } else { + return error(Status.FORBIDDEN, "Not authorized"); + } + + String fileTimestamp = dateFormatter.format(new Date()); + String filename = dv.getAlias() + "_GBResponses_" + fileTimestamp + ".csv"; + + response.setHeader("Content-Disposition", "attachment; filename=" + + filename); + ServletOutputStream outputStream = response.getOutputStream(); + + Map customQandAs = guestbookResponseService.mapCustomQuestionAnswersAsStrings(dv.getId(), gbId); + + List guestbookResults = guestbookResponseService.getGuestbookResults(dv.getId(), gbId); + outputStream.write("Guestbook, Dataset, Dataset PID, Date, Type, File Name, File Id, File PID, User Name, Email, Institution, Position, Custom Questions\n".getBytes()); + for (Object[] result : guestbookResults) { + StringBuilder sb = guestbookResponseService.convertGuestbookResponsesToCSV(customQandAs, result); + outputStream.write(sb.toString().getBytes()); + outputStream.flush(); + } + return Response.ok().build(); + } catch (IOException io) { + return error(Status.BAD_REQUEST, "Failed to produce response file. Exception: " + io.getMessage()); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + @PUT @Path("{identifier}/groups/{aliasInOwner}") public Response updateGroup(ExplicitGroupDTO groupDto, diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 7f3b652af33..2621a5e0b09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -208,7 +208,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } if (redirect_uri != null) { // increment the download count, if necessary: - if (di.getGbr() != null) { + if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) { try { logger.fine("writing guestbook response, for an S3 download redirect."); Command cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner()); @@ -385,91 +385,91 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } - InputStream instream = storageIO.getInputStream(); - if (instream != null) { - // headers: - - String fileName = storageIO.getFileName(); - String mimeType = storageIO.getMimeType(); - - // Provide both the "Content-disposition" and "Content-Type" headers, - // to satisfy the widest selection of browsers out there. - // Encode the filename as UTF-8, then deal with spaces. "encode" changes - // a space to + so we change it back to a space (%20). - String finalFileName = URLEncoder.encode(fileName, "UTF-8").replaceAll("\\+", "%20"); - httpHeaders.add("Content-disposition", "attachment; filename=\"" + finalFileName + "\""); - httpHeaders.add("Content-Type", mimeType + "; name=\"" + finalFileName + "\""); - - long contentSize; - boolean useChunkedTransfer = false; - //if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) { - if ((contentSize = getContentSize(storageIO)) > 0) { - logger.fine("Content size (retrieved from the AccessObject): " + contentSize); - httpHeaders.add("Content-Length", contentSize); - } else { - //httpHeaders.add("Transfer-encoding", "chunked"); - //useChunkedTransfer = true; - } + try (InputStream instream = storageIO.getInputStream()) { + if (instream != null) { + // headers: + + String fileName = storageIO.getFileName(); + String mimeType = storageIO.getMimeType(); + + // Provide both the "Content-disposition" and "Content-Type" headers, + // to satisfy the widest selection of browsers out there. + // Encode the filename as UTF-8, then deal with spaces. "encode" changes + // a space to + so we change it back to a space (%20). + String finalFileName = URLEncoder.encode(fileName, "UTF-8").replaceAll("\\+", "%20"); + httpHeaders.add("Content-disposition", "attachment; filename=\"" + finalFileName + "\""); + httpHeaders.add("Content-Type", mimeType + "; name=\"" + finalFileName + "\""); + + long contentSize; + boolean useChunkedTransfer = false; + //if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) { + if ((contentSize = getContentSize(storageIO)) > 0) { + logger.fine("Content size (retrieved from the AccessObject): " + contentSize); + httpHeaders.add("Content-Length", contentSize); + } else { + //httpHeaders.add("Transfer-encoding", "chunked"); + //useChunkedTransfer = true; + } + + // (the httpHeaders map must be modified *before* writing any + // data in the output stream!) + int bufsize; + byte[] bffr = new byte[4 * 8192]; + byte[] chunkClose = "\r\n".getBytes(); + + // before writing out any bytes from the input stream, flush + // any extra content, such as the variable header for the + // subsettable files: + if (storageIO.getVarHeader() != null) { + if (storageIO.getVarHeader().getBytes().length > 0) { + if (useChunkedTransfer) { + String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length); + outstream.write(chunkSizeLine.getBytes()); + } + outstream.write(storageIO.getVarHeader().getBytes()); + if (useChunkedTransfer) { + outstream.write(chunkClose); + } + } + } - // (the httpHeaders map must be modified *before* writing any - // data in the output stream!) - int bufsize; - byte[] bffr = new byte[4 * 8192]; - byte[] chunkClose = "\r\n".getBytes(); - - // before writing out any bytes from the input stream, flush - // any extra content, such as the variable header for the - // subsettable files: - if (storageIO.getVarHeader() != null) { - if (storageIO.getVarHeader().getBytes().length > 0) { + while ((bufsize = instream.read(bffr)) != -1) { if (useChunkedTransfer) { - String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length); + String chunkSizeLine = String.format("%x\r\n", bufsize); outstream.write(chunkSizeLine.getBytes()); } - outstream.write(storageIO.getVarHeader().getBytes()); + outstream.write(bffr, 0, bufsize); if (useChunkedTransfer) { outstream.write(chunkClose); } } - } - while ((bufsize = instream.read(bffr)) != -1) { - if (useChunkedTransfer) { - String chunkSizeLine = String.format("%x\r\n", bufsize); - outstream.write(chunkSizeLine.getBytes()); - } - outstream.write(bffr, 0, bufsize); if (useChunkedTransfer) { - outstream.write(chunkClose); + String chunkClosing = "0\r\n\r\n"; + outstream.write(chunkClosing.getBytes()); } - } - if (useChunkedTransfer) { - String chunkClosing = "0\r\n\r\n"; - outstream.write(chunkClosing.getBytes()); - } + logger.fine("di conversion param: " + di.getConversionParam() + ", value: " + di.getConversionParamValue()); - logger.fine("di conversion param: " + di.getConversionParam() + ", value: " + di.getConversionParamValue()); - - // Downloads of thumbnail images (scaled down, low-res versions of graphic image files) and - // "preprocessed metadata" records for tabular data files are NOT considered "real" downloads, - // so these should not produce guestbook entries: - if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) { - try { - logger.fine("writing guestbook response."); - Command cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner()); - di.getCommand().submit(cmd); - MakeDataCountEntry entry = new MakeDataCountEntry(di.getRequestUriInfo(), di.getRequestHttpHeaders(), di.getDataverseRequestService(), di.getGbr().getDataFile()); - mdcLogService.logEntry(entry); - } catch (CommandException e) { + // Downloads of thumbnail images (scaled down, low-res versions of graphic image files) and + // "preprocessed metadata" records for tabular data files are NOT considered "real" downloads, + // so these should not produce guestbook entries: + if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) { + try { + logger.fine("writing guestbook response."); + Command cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner()); + di.getCommand().submit(cmd); + MakeDataCountEntry entry = new MakeDataCountEntry(di.getRequestUriInfo(), di.getRequestHttpHeaders(), di.getDataverseRequestService(), di.getGbr().getDataFile()); + mdcLogService.logEntry(entry); + } catch (CommandException e) { + } + } else { + logger.fine("not writing guestbook response"); } - } else { - logger.fine("not writing guestbook response"); - } - instream.close(); - outstream.close(); - return; + outstream.close(); + return; + } } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Groups.java b/src/main/java/edu/harvard/iq/dataverse/api/Groups.java index f2a262bb7fb..5a587efadf3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Groups.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Groups.java @@ -29,7 +29,7 @@ import javax.ws.rs.POST; import javax.ws.rs.PUT; import javax.ws.rs.PathParam; -import static org.apache.commons.lang.StringUtils.isNumeric; +import static org.apache.commons.lang3.StringUtils.isNumeric; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingServer.java b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingServer.java index cb28d1fae49..b8950edc6a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/HarvestingServer.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/HarvestingServer.java @@ -46,7 +46,7 @@ import javax.ws.rs.PathParam; import javax.ws.rs.QueryParam; import javax.ws.rs.core.Response; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index 3a49385c0f6..93baa878e9d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -35,7 +35,7 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * User-facing documentation: diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index ce226ea14b8..b1177531874 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -13,16 +13,30 @@ import edu.harvard.iq.dataverse.engine.command.impl.GetUserTracesCommand; import edu.harvard.iq.dataverse.engine.command.impl.MergeInAccountCommand; import edu.harvard.iq.dataverse.engine.command.impl.RevokeAllRolesCommand; +import edu.harvard.iq.dataverse.metrics.MetricsUtil; +import edu.harvard.iq.dataverse.util.FileUtil; + import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.logging.Logger; import javax.ejb.Stateless; +import javax.json.JsonArray; import javax.json.JsonObjectBuilder; +import javax.ws.rs.BadRequestException; import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Request; import javax.ws.rs.core.Response; +import javax.ws.rs.core.Variant; /** * @@ -214,11 +228,45 @@ public Response removeUserRoles(@PathParam("identifier") String identifier) { public Response getTraces(@PathParam("identifier") String identifier) { try { AuthenticatedUser userToQuery = authSvc.getAuthenticatedUser(identifier); - JsonObjectBuilder jsonObj = execCommand(new GetUserTracesCommand(createDataverseRequest(findUserOrDie()), userToQuery)); + JsonObjectBuilder jsonObj = execCommand(new GetUserTracesCommand(createDataverseRequest(findUserOrDie()), userToQuery, null)); return ok(jsonObj); } catch (WrappedResponse ex) { return ex.getResponse(); } } + private List elements = Arrays.asList("roleAssignments","dataverseCreator", "dataversePublisher","datasetCreator", "datasetPublisher","dataFileCreator","dataFilePublisher","datasetVersionUsers","explicitGroups","guestbookEntries", "savedSearches"); + + @GET + @Path("{identifier}/traces/{element}") + @Produces("text/csv, application/json") + public Response getTraces(@Context Request req, @PathParam("identifier") String identifier, @PathParam("element") String element) { + try { + AuthenticatedUser userToQuery = authSvc.getAuthenticatedUser(identifier); + if(!elements.contains(element)) { + throw new BadRequestException("Not a valid element"); + } + JsonObjectBuilder jsonObj = execCommand(new GetUserTracesCommand(createDataverseRequest(findUserOrDie()), userToQuery, element)); + + List vars = Variant + .mediaTypes(MediaType.valueOf(FileUtil.MIME_TYPE_CSV), MediaType.APPLICATION_JSON_TYPE) + .add() + .build(); + MediaType requestedType = req.selectVariant(vars).getMediaType(); + if ((requestedType != null) && (requestedType.equals(MediaType.APPLICATION_JSON_TYPE))) { + return ok(jsonObj); + + } + JsonArray items=null; + try { + items = jsonObj.build().getJsonObject("traces").getJsonObject(element).getJsonArray("items"); + } catch(Exception e) { + return ok(jsonObj); + } + return ok(FileUtil.jsonArrayOfObjectsToCSV(items, items.getJsonObject(0).keySet().toArray(new String[0])), MediaType.valueOf(FileUtil.MIME_TYPE_CSV), element + ".csv"); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java index 4daaad76978..e4171c19017 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/SwordServiceBean.java @@ -16,7 +16,7 @@ import javax.ejb.EJB; import javax.ejb.Stateless; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.swordapp.server.SwordEntry; import org.swordapp.server.SwordError; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/UrlManager.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/UrlManager.java index ce1e7fb9051..e8621ef7a35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/UrlManager.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/UrlManager.java @@ -6,7 +6,7 @@ import java.util.Arrays; import java.util.List; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.swordapp.server.SwordError; import org.swordapp.server.UriRegistry; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index 52459dc3c31..49cd1a380c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -31,7 +31,7 @@ import javax.xml.stream.XMLInputFactory; import edu.harvard.iq.dataverse.util.json.ControlledVocabularyException; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * @@ -103,6 +103,8 @@ public class ImportDDIServiceBean { @EJB DatasetFieldServiceBean datasetFieldService; + @EJB ImportGenericServiceBean importGenericService; + // TODO: stop passing the xml source as a string; (it could be huge!) -- L.A. 4.5 // TODO: what L.A. Said. @@ -281,7 +283,7 @@ private void processDocDscr(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws // this will set a StudyId if it has not yet been set; it will get overridden by a metadata // id in the StudyDscr section, if one exists if ( AGENCY_HANDLE.equals( xmlr.getAttributeValue(null, "agency") ) ) { - parseStudyIdHandle( parseText(xmlr), datasetDTO ); + importGenericService.reassignIdentifierAsGlobalId( parseText(xmlr), datasetDTO ); } // EMK TODO: we need to save this somewhere when we add harvesting infrastructure } /*else if ( xmlr.getLocalName().equals("holdings") && StringUtil.isEmpty(datasetDTO..getHarvestHoldings()) ) { @@ -1409,10 +1411,8 @@ private void processTitlStmt(XMLStreamReader xmlr, DatasetDTO datasetDTO) throws FieldDTO field = FieldDTO.createPrimitiveFieldDTO("alternativeTitle", parseText(xmlr)); citation.getFields().add(field); } else if (xmlr.getLocalName().equals("IDNo")) { - if ( AGENCY_HANDLE.equals( xmlr.getAttributeValue(null, "agency") ) ) { - parseStudyIdHandle( parseText(xmlr), datasetDTO ); - } else if ( AGENCY_DOI.equals( xmlr.getAttributeValue(null, "agency") ) ) { - parseStudyIdDOI( parseText(xmlr), datasetDTO ); + if ( AGENCY_HANDLE.equals( xmlr.getAttributeValue(null, "agency") ) || AGENCY_DOI.equals( xmlr.getAttributeValue(null, "agency") ) ) { + importGenericService.reassignIdentifierAsGlobalId(parseText(xmlr), datasetDTO); } else if ( AGENCY_DARA.equals( xmlr.getAttributeValue(null, "agency"))) { /* da|ra - "Registration agency for social and economic data" @@ -1689,43 +1689,6 @@ else if (xmlr.getLocalName().equals("notes")) { return returnValues; } - private void parseStudyIdHandle(String _id, DatasetDTO datasetDTO) { - - int index1 = _id.indexOf(':'); - int index2 = _id.indexOf('/'); - if (index1==-1) { - throw new EJBException("Error parsing (Handle) IdNo: "+_id+". ':' not found in string"); - } else { - datasetDTO.setProtocol(_id.substring(0,index1)); - } - if (index2 == -1) { - throw new EJBException("Error parsing (Handle) IdNo: "+_id+". '/' not found in string"); - - } else { - datasetDTO.setAuthority(_id.substring(index1+1, index2)); - } - datasetDTO.setProtocol("hdl"); - datasetDTO.setIdentifier(_id.substring(index2+1)); - } - - private void parseStudyIdDOI(String _id, DatasetDTO datasetDTO) throws ImportException{ - int index1 = _id.indexOf(':'); - int index2 = _id.indexOf('/'); - if (index1==-1) { - throw new EJBException("Error parsing (DOI) IdNo: "+_id+". ':' not found in string"); - } - - if (index2 == -1) { - throw new ImportException("Error parsing (DOI) IdNo: "+_id+". '/' not found in string"); - - } else { - datasetDTO.setAuthority(_id.substring(index1+1, index2)); - } - datasetDTO.setProtocol("doi"); - - datasetDTO.setIdentifier(_id.substring(index2+1)); - } - private void parseStudyIdDoiICPSRdara(String _id, DatasetDTO datasetDTO) throws ImportException{ /* dara/ICPSR DOIs are formatted without the hdl: prefix; for example - diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index 84195227b33..bd7975835e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -407,7 +407,12 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { return null; } - private String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO datasetDTO) { + /* This is a general parser that can take DOI and Handle Ids, in their local or + * URL forms (e.g. doi:... or https://doi.org/...) and parse them into + * protocol/authority/identifier parts that are assigned to the datasetDTO. + * The name reflects the original purpose but it is now used in ImportDDIServiceBean as well. + */ + public String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO datasetDTO) { int index1 = identifierString.indexOf(':'); int index2 = identifierString.indexOf('/'); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index 9f7444f028c..6fc385af3ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -67,7 +67,7 @@ import javax.validation.Validator; import javax.validation.ValidatorFactory; import javax.xml.stream.XMLStreamException; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthTestDataServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthTestDataServiceBean.java index ec64800c92d..3715900733c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthTestDataServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthTestDataServiceBean.java @@ -16,7 +16,7 @@ import java.util.Map; import java.util.logging.Logger; import javax.ejb.Stateless; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; @Stateless public class AuthTestDataServiceBean { diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/DataverseRolePermissionHelper.java b/src/main/java/edu/harvard/iq/dataverse/authorization/DataverseRolePermissionHelper.java index 6d0df17e520..4e6b54a8d49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/DataverseRolePermissionHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/DataverseRolePermissionHelper.java @@ -19,7 +19,7 @@ import javax.ejb.EJB; import javax.ejb.Stateless; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /* * To change this license header, choose License Headers in Project Properties. diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index 673839450d6..d050dbc0dbd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -62,7 +62,7 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.hibernate.validator.constraints.NotBlank; import org.primefaces.event.TabChangeEvent; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java index 38cabb6ef25..4446f68228d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/PasswordEncryption.java @@ -3,7 +3,7 @@ import java.io.UnsupportedEncodingException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang3.RandomStringUtils; import org.mindrot.jbcrypt.BCrypt; //import org.primefaces.util.Base64; import java.util.Base64; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java index d9a1baa9e3e..a9c44010950 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/oidc/OIDCAuthProvider.java @@ -1,7 +1,15 @@ package edu.harvard.iq.dataverse.authorization.providers.oauth2.oidc; import com.github.scribejava.core.builder.api.DefaultApi20; -import com.nimbusds.oauth2.sdk.*; +import com.nimbusds.oauth2.sdk.AuthorizationCode; +import com.nimbusds.oauth2.sdk.AuthorizationCodeGrant; +import com.nimbusds.oauth2.sdk.AuthorizationGrant; +import com.nimbusds.oauth2.sdk.ErrorObject; +import com.nimbusds.oauth2.sdk.ParseException; +import com.nimbusds.oauth2.sdk.ResponseType; +import com.nimbusds.oauth2.sdk.Scope; +import com.nimbusds.oauth2.sdk.TokenRequest; +import com.nimbusds.oauth2.sdk.TokenResponse; import com.nimbusds.oauth2.sdk.auth.ClientAuthentication; import com.nimbusds.oauth2.sdk.auth.ClientSecretBasic; import com.nimbusds.oauth2.sdk.auth.Secret; @@ -11,7 +19,12 @@ import com.nimbusds.oauth2.sdk.id.Issuer; import com.nimbusds.oauth2.sdk.id.State; import com.nimbusds.oauth2.sdk.token.BearerAccessToken; -import com.nimbusds.openid.connect.sdk.*; +import com.nimbusds.openid.connect.sdk.AuthenticationRequest; +import com.nimbusds.openid.connect.sdk.Nonce; +import com.nimbusds.openid.connect.sdk.OIDCTokenResponse; +import com.nimbusds.openid.connect.sdk.OIDCTokenResponseParser; +import com.nimbusds.openid.connect.sdk.UserInfoRequest; +import com.nimbusds.openid.connect.sdk.UserInfoResponse; import com.nimbusds.openid.connect.sdk.claims.UserInfo; import com.nimbusds.openid.connect.sdk.op.OIDCProviderConfigurationRequest; import com.nimbusds.openid.connect.sdk.op.OIDCProviderMetadata; @@ -19,7 +32,6 @@ import edu.harvard.iq.dataverse.authorization.exceptions.AuthorizationSetupException; import edu.harvard.iq.dataverse.authorization.providers.oauth2.AbstractOAuth2AuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2Exception; -import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2TokenData; import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2UserRecord; import edu.harvard.iq.dataverse.util.BundleUtil; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java index 59c3240fdfa..f64b5c301e7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java @@ -20,15 +20,25 @@ public class PrivateUrlUser implements User { * is a DvObject. */ private final long datasetId; + private final boolean anonymizedAccess; public PrivateUrlUser(long datasetId) { + this(datasetId, false); + } + + public PrivateUrlUser(long datasetId, boolean anonymizedAccess) { this.datasetId = datasetId; + this.anonymizedAccess = anonymizedAccess; } public long getDatasetId() { return datasetId; } + public boolean hasAnonymizedAccess() { + return anonymizedAccess; + } + /** * By always returning false for isAuthenticated(), we prevent a * name from appearing in the corner as well as preventing an account page diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 0e2320401dd..a422a825259 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -27,7 +27,7 @@ import java.util.Properties; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * * @author Leonid Andreev diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataFileZipper.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataFileZipper.java index 15ff7b5ac99..68553f7f5c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataFileZipper.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataFileZipper.java @@ -39,7 +39,6 @@ * @author Leonid Andreev */ public class DataFileZipper { - public static long DEFAULT_ZIPFILE_LIMIT = 100 * 1024 * 1024; // 100MB private static final Logger logger = Logger.getLogger(DataFileZipper.class.getCanonicalName()); private static final String MANIFEST_FILE_NAME = "MANIFEST.TXT"; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java index c97dc747505..0b6b37af9f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @@ -22,11 +22,21 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.datavariable.DataVariable; -import java.util.*; -import java.util.Scanner; -import java.util.logging.*; -import java.io.*; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; +import java.util.Set; import java.math.BigDecimal; import java.math.MathContext; import java.math.RoundingMode; @@ -34,10 +44,11 @@ import java.nio.channels.FileChannel; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; +import java.util.logging.Logger; import java.util.regex.Matcher; -import org.apache.commons.lang.*; +import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 2aa04c86f5d..1fcc355ae6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -9,12 +9,15 @@ import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.api.Files; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -31,6 +34,7 @@ import edu.harvard.iq.dataverse.util.json.JsonPrinter; import java.io.IOException; import java.io.InputStream; +import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -42,13 +46,25 @@ import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.EJBException; +import javax.json.Json; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; +import javax.json.JsonArray; import javax.json.JsonObjectBuilder; +import javax.json.JsonReader; import javax.validation.ConstraintViolation; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; -import org.apache.commons.lang.StringUtils; + +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.io.IOUtils; import org.ocpsoft.common.util.Strings; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.STATUS_ERROR; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.STATUS_OK; +import static javax.ws.rs.core.Response.Status.BAD_REQUEST; + /** * Methods to add or replace a single file. * @@ -100,7 +116,7 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - + public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION"; private String currentOperation; @@ -299,33 +315,49 @@ public AddReplaceFileHelper(DataverseRequest dvRequest, this.user = dvRequest.getUser(); } - + /** - * + * * @param chosenDataset * @param newFileName * @param newFileContentType * @param newFileInputStream * @param optionalFileParams - * @return + * @return */ - public boolean runAddFileByDataset(Dataset chosenDataset, - String newFileName, - String newFileContentType, - String newStorageIdentifier, - InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ - + public boolean runAddFileByDataset(Dataset chosenDataset, + String newFileName, + String newFileContentType, + String newStorageIdentifier, + InputStream newFileInputStream, + OptionalFileParams optionalFileParams){ + return this.runAddFileByDataset(chosenDataset,newFileName,newFileContentType,newStorageIdentifier,newFileInputStream,optionalFileParams,false); + + } + + public boolean runAddFileByDataset(Dataset chosenDataset, + String newFileName, + String newFileContentType, + String newStorageIdentifier, + InputStream newFileInputStream, + OptionalFileParams optionalFileParams, + boolean multipleFiles) { + msgt(">> runAddFileByDatasetId"); initErrorHandling(); - - this.currentOperation = FILE_ADD_OPERATION; - + + if(multipleFiles) { + this.currentOperation = MULTIPLEFILES_ADD_OPERATION; + } + else { + this.currentOperation = FILE_ADD_OPERATION; + } + if (!this.step_001_loadDataset(chosenDataset)){ return false; } - + //return this.runAddFile(this.dataset, newFileName, newFileContentType, newFileInputStream, optionalFileParams); return this.runAddReplaceFile(dataset, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); @@ -727,8 +759,10 @@ private boolean runAddReplacePhase2(){ }else{ msgt("step_070_run_update_dataset_command"); - if (!this.step_070_run_update_dataset_command()){ - return false; + if (!this.isMultipleFilesAddOperation()) { + if (!this.step_070_run_update_dataset_command()) { + return false; + } } } @@ -791,6 +825,16 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } + /** + * Is this a multiple files add operation ? + * @return + */ + + public boolean isMultipleFilesAddOperation(){ + + return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION); + } + /** * Initialize error handling vars */ @@ -1864,14 +1908,13 @@ private boolean step_100_startIngestJobs(){ //if (true){ //return true; //} - - msg("pre ingest start"); - // start the ingest! - // - - ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); - - msg("post ingest start"); + + if (!this.isMultipleFilesAddOperation()) { + msg("pre ingest start"); + // start the ingest! + ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); + msg("post ingest start"); + } return true; } @@ -1959,6 +2002,160 @@ public String getDuplicateFileWarning() { public void setDuplicateFileWarning(String duplicateFileWarning) { this.duplicateFileWarning = duplicateFileWarning; } + + public Response addFiles(String jsonData, Dataset dataset, User authUser) { + msgt("(addFilesToDataset) jsonData: " + jsonData.toString()); + + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + JsonArray filesJson = null; + + int totalNumberofFiles = 0; + int successNumberofFiles = 0; + // ----------------------------------------------------------- + // Read jsonData and Parse files information from jsondata : + // ----------------------------------------------------------- + try (StringReader rdr = new StringReader(jsonData)) { + JsonReader dbJsonReader = Json.createReader(rdr); + filesJson = dbJsonReader.readArray(); + dbJsonReader.close(); + + + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + + OptionalFileParams optionalFileParams = null; + try { + optionalFileParams = new OptionalFileParams(fileJson.toString()); + + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + + msgt("ADD! = " + newFilename); + + runAddFileByDataset(dataset, + newFilename, + newFileContentType, + newStorageIdentifier, + null, + optionalFileParams, true); + + if (hasError()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("errorMessage", getHttpErrorCode().toString() +":"+ getErrorMessagesAsString("\n")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } else { + JsonObject successresult = getSuccessResultAsJsonObjectBuilder().build(); + String duplicateWarning = getDuplicateFileWarning(); + + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("warningMessage", getDuplicateFileWarning()) + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("successMessage", "Added successfully to the dataset") + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + } + successNumberofFiles = successNumberofFiles + 1; + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorMessage", "You must provide a storageidentifier, filename, and mimetype.") + .add("fileDetails", fileJson); + + jarr.add(fileoutput); + } + + } catch (DataFileTagException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", ex.getMessage()) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + + } + catch (NoFilesException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } + + }// End of adding files + + DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (eipLock == null) { + logger.log(Level.WARNING, "Dataset not locked for EditInProgress "); + } else { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.log(Level.INFO, "Removed EditInProgress lock "); + } + + try { + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "CommandException updating DatasetVersion from addFiles job: " + ex.getMessage()); + } + + dataset = datasetService.find(dataset.getId()); + + List s = dataset.getFiles(); + for (DataFile dataFile : s) { + } + //ingest job + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + } + } + catch ( javax.json.stream.JsonParsingException ex) { + ex.printStackTrace(); + return error(BAD_REQUEST, "Json Parsing Exception :" + ex.getMessage()); + } + catch (Exception e) { + e.printStackTrace(); + return error(BAD_REQUEST, e.getMessage()); + } + + JsonObjectBuilder result = Json.createObjectBuilder() + .add("Total number of files", totalNumberofFiles) + .add("Number of files successfully added", successNumberofFiles); + + + return Response.ok().entity(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); + } + + protected static Response error(Response.Status sts, String msg ) { + return Response.status(sts) + .entity( NullSafeJsonBuilder.jsonObjectBuilder() + .add("status", STATUS_ERROR) + .add( "message", msg ).build() + ).type(MediaType.APPLICATION_JSON_TYPE).build(); + } } // end class /* diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java index 276f52a5802..b2af28befb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java @@ -4,6 +4,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.DvObject; @@ -11,6 +12,7 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -32,6 +34,7 @@ public class AssignRoleCommand extends AbstractCommand { private final RoleAssignee grantee; private final DvObject defPoint; private final String privateUrlToken; + private boolean anonymizedAccess; /** * @param anAssignee The user being granted the role @@ -47,6 +50,12 @@ public AssignRoleCommand(RoleAssignee anAssignee, DataverseRole aRole, DvObject grantee = anAssignee; defPoint = assignmentPoint; this.privateUrlToken = privateUrlToken; + this.anonymizedAccess=false; + } + + public AssignRoleCommand(PrivateUrlUser privateUrlUser, DataverseRole memberRole, Dataset dataset, DataverseRequest request, String privateUrlToken, boolean anonymizedAccess) { + this(privateUrlUser, memberRole, dataset, request, privateUrlToken); + this.anonymizedAccess= anonymizedAccess; } @Override @@ -58,7 +67,7 @@ public RoleAssignment execute(CommandContext ctxt) throws CommandException { } } // TODO make sure the role is defined on the dataverse. - RoleAssignment roleAssignment = new RoleAssignment(role, grantee, defPoint, privateUrlToken); + RoleAssignment roleAssignment = new RoleAssignment(role, grantee, defPoint, privateUrlToken, anonymizedAccess); return ctxt.roles().save(roleAssignment); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommand.java index cc1adbc984a..3f5a9b042f0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommand.java @@ -13,6 +13,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.util.BundleUtil; + import java.util.UUID; import java.util.logging.Logger; @@ -22,45 +24,39 @@ public class CreatePrivateUrlCommand extends AbstractCommand { private static final Logger logger = Logger.getLogger(CreatePrivateUrlCommand.class.getCanonicalName()); final Dataset dataset; + final boolean anonymizedAccess; - public CreatePrivateUrlCommand(DataverseRequest dataverseRequest, Dataset theDataset) { + public CreatePrivateUrlCommand(DataverseRequest dataverseRequest, Dataset theDataset, boolean anonymizedAccess) { super(dataverseRequest, theDataset); dataset = theDataset; + this.anonymizedAccess = anonymizedAccess; } @Override public PrivateUrl execute(CommandContext ctxt) throws CommandException { logger.fine("Executing CreatePrivateUrlCommand..."); if (dataset == null) { - /** - * @todo Internationalize this. - */ - String message = "Can't create Private URL. Dataset is null."; - logger.info(message); - throw new IllegalCommandException(message, this); + logger.info("Can't create Private URL. Dataset is null."); + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.privateurl.error.datasetnotfound"), this); } PrivateUrl existing = ctxt.privateUrl().getPrivateUrlFromDatasetId(dataset.getId()); if (existing != null) { - /** - * @todo Internationalize this. - */ - String message = "Private URL already exists for dataset id " + dataset.getId() + "."; - logger.info(message); - throw new IllegalCommandException(message, this); + logger.info("Private URL already exists for dataset id " + dataset.getId() + "."); + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.privateurl.error.alreadyexists"), this); } DatasetVersion latestVersion = dataset.getLatestVersion(); if (!latestVersion.isDraft()) { - /** - * @todo Internationalize this. - */ - String message = "Can't create Private URL because the latest version of dataset id " + dataset.getId() + " is not a draft."; - logger.info(message); - throw new IllegalCommandException(message, this); + logger.info("Can't create Private URL because the latest version of dataset id" + dataset.getId() + " is not a draft."); + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.privateurl.error.notdraft"), this); + } + if (dataset.isReleased() && anonymizedAccess) { + logger.info("Can't create anonymized access Private URL because the dataset id" + dataset.getId() + " has a published version."); + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.privateurl.anonymized.error.released"), this); } - PrivateUrlUser privateUrlUser = new PrivateUrlUser(dataset.getId()); + PrivateUrlUser privateUrlUser = new PrivateUrlUser(dataset.getId(), anonymizedAccess); DataverseRole memberRole = ctxt.roles().findBuiltinRoleByAlias(DataverseRole.MEMBER); final String privateUrlToken = UUID.randomUUID().toString(); - RoleAssignment roleAssignment = ctxt.engine().submit(new AssignRoleCommand(privateUrlUser, memberRole, dataset, getRequest(), privateUrlToken)); + RoleAssignment roleAssignment = ctxt.engine().submit(new AssignRoleCommand(privateUrlUser, memberRole, dataset, getRequest(), privateUrlToken, anonymizedAccess)); PrivateUrl privateUrl = new PrivateUrl(roleAssignment, dataset, ctxt.systemConfig().getDataverseSiteUrl()); return privateUrl; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 4fa07dedede..3b445c2683f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -115,12 +115,16 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } // update metadata - theDataset.getLatestVersion().setReleaseTime(getTimestamp()); + if (theDataset.getLatestVersion().getReleaseTime() == null) { + // Allow migrated versions to keep original release dates + theDataset.getLatestVersion().setReleaseTime(getTimestamp()); + } theDataset.getLatestVersion().setLastUpdateTime(getTimestamp()); theDataset.setModificationTime(getTimestamp()); theDataset.setFileAccessRequest(theDataset.getLatestVersion().getTermsOfUseAndAccess().isFileAccessRequest()); - updateFiles(getTimestamp(), ctxt); + //Use dataset pub date (which may not be the current date for migrated datasets) + updateFiles(new Timestamp(theDataset.getLatestVersion().getReleaseTime().getTime()), ctxt); // // TODO: Not sure if this .merge() is necessary here - ? diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java index 41a1708e4c5..f3324ba6f2e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetUserTracesCommand.java @@ -20,6 +20,8 @@ import java.math.BigDecimal; import java.util.List; import java.util.Set; +import java.util.logging.Logger; + import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; @@ -28,13 +30,17 @@ @RequiredPermissions({}) public class GetUserTracesCommand extends AbstractCommand { + private static final Logger logger = Logger.getLogger(GetUserTracesCommand.class.getCanonicalName()); + private DataverseRequest request; private AuthenticatedUser user; + private String element; - public GetUserTracesCommand(DataverseRequest request, AuthenticatedUser user) { + public GetUserTracesCommand(DataverseRequest request, AuthenticatedUser user, String element) { super(request, (DvObject) null); this.request = request; this.user = user; + this.element = element; } @Override @@ -47,180 +53,206 @@ public JsonObjectBuilder execute(CommandContext ctxt) throws CommandException { } Long userId = user.getId(); JsonObjectBuilder traces = Json.createObjectBuilder(); -// List roleAssignments = ctxt.permissions().getDvObjectsUserHasRoleOn(user); - List roleAssignments = ctxt.roleAssignees().getAssignmentsFor(user.getIdentifier()); - if (roleAssignments != null && !roleAssignments.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (RoleAssignment roleAssignment : roleAssignments) { - jab.add(NullSafeJsonBuilder.jsonObjectBuilder() - .add("id", roleAssignment.getId()) - .add("definitionPointName", roleAssignment.getDefinitionPoint().getCurrentName()) - .add("definitionPointIdentifier", roleAssignment.getDefinitionPoint().getIdentifier()) - .add("definitionPointId", roleAssignment.getDefinitionPoint().getId()) - .add("roleAlias", roleAssignment.getRole().getAlias()) - .add("roleName", roleAssignment.getRole().getName()) - ); + if (element == null || element.equals("roleAssignments")) { + // List roleAssignments = + // ctxt.permissions().getDvObjectsUserHasRoleOn(user); + List roleAssignments = ctxt.roleAssignees().getAssignmentsFor(user.getIdentifier()); + if (roleAssignments != null && !roleAssignments.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (RoleAssignment roleAssignment : roleAssignments) { + jab.add(NullSafeJsonBuilder.jsonObjectBuilder() + .add("id", roleAssignment.getId()) + .add("definitionPointName", roleAssignment.getDefinitionPoint().getCurrentName()) + .add("definitionPointIdentifier", roleAssignment.getDefinitionPoint().getIdentifier()) + .add("definitionPointId", roleAssignment.getDefinitionPoint().getId()) + .add("roleAlias", roleAssignment.getRole().getAlias()) + .add("roleName", roleAssignment.getRole().getName())); + } + job.add("count", roleAssignments.size()); + job.add("items", jab); + traces.add("roleAssignments", job); } - job.add("count", roleAssignments.size()); - job.add("items", jab); - traces.add("roleAssignments", job); } - List dataversesCreated = ctxt.dataverses().findByCreatorId(userId); - if (dataversesCreated != null && !dataversesCreated.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (Dataverse dataverse : dataversesCreated) { - jab.add(Json.createObjectBuilder() - .add("id", dataverse.getId()) - .add("alias", dataverse.getAlias()) - ); + if (element == null || element.equals("dataverseCreator")) { + List dataversesCreated = ctxt.dataverses().findByCreatorId(userId); + if (dataversesCreated != null && !dataversesCreated.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (Dataverse dataverse : dataversesCreated) { + jab.add(Json.createObjectBuilder() + .add("id", dataverse.getId()) + .add("alias", dataverse.getAlias())); + } + job.add("count", dataversesCreated.size()); + job.add("items", jab); + traces.add("dataverseCreator", job); } - job.add("count", dataversesCreated.size()); - job.add("items", jab); - traces.add("dataverseCreator", job); } - List dataversesPublished = ctxt.dataverses().findByReleaseUserId(userId); - if (dataversesPublished != null && !dataversesPublished.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (Dataverse dataverse : dataversesPublished) { - jab.add(Json.createObjectBuilder() - .add("id", dataverse.getId()) - .add("alias", dataverse.getAlias()) - ); + if (element == null || element.equals("dataversePublisher")) { + List dataversesPublished = ctxt.dataverses().findByReleaseUserId(userId); + if (dataversesPublished != null && !dataversesPublished.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (Dataverse dataverse : dataversesPublished) { + jab.add(Json.createObjectBuilder() + .add("id", dataverse.getId()) + .add("alias", dataverse.getAlias())); + } + job.add("count", dataversesPublished.size()); + job.add("items", jab); + traces.add("dataversePublisher", job); } - job.add("count", dataversesPublished.size()); - job.add("items", jab); - traces.add("dataversePublisher", job); } - List datasetsCreated = ctxt.datasets().findByCreatorId(userId); - if (datasetsCreated != null && !datasetsCreated.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (Dataset dataset : datasetsCreated) { - jab.add(Json.createObjectBuilder() - .add("id", dataset.getId()) - .add("pid", dataset.getGlobalId().asString()) - ); + if (element == null || element.equals("datasetCreator")) { + List datasetsCreated = ctxt.datasets().findByCreatorId(userId); + if (datasetsCreated != null && !datasetsCreated.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (Dataset dataset : datasetsCreated) { + jab.add(Json.createObjectBuilder() + .add("id", dataset.getId()) + .add("pid", dataset.getGlobalId().asString())); + } + job.add("count", datasetsCreated.size()); + job.add("items", jab); + traces.add("datasetCreator", job); } - job.add("count", datasetsCreated.size()); - job.add("items", jab); - traces.add("datasetCreator", job); } - List datasetsPublished = ctxt.datasets().findByReleaseUserId(userId); - if (datasetsPublished != null && !datasetsPublished.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (Dataset dataset : datasetsPublished) { - jab.add(Json.createObjectBuilder() - .add("id", dataset.getId()) - .add("pid", dataset.getGlobalId().asString()) - ); + if (element == null || element.equals("datasetPublisher")) { + List datasetsPublished = ctxt.datasets().findByReleaseUserId(userId); + if (datasetsPublished != null && !datasetsPublished.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (Dataset dataset : datasetsPublished) { + jab.add(Json.createObjectBuilder() + .add("id", dataset.getId()) + .add("pid", dataset.getGlobalId().asString())); + } + job.add("count", datasetsPublished.size()); + job.add("items", jab); + traces.add("datasetPublisher", job); } - job.add("count", datasetsPublished.size()); - job.add("items", jab); - traces.add("datasetPublisher", job); } - List dataFilesCreated = ctxt.files().findByCreatorId(userId); - if (dataFilesCreated != null && !dataFilesCreated.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (DataFile dataFile : dataFilesCreated) { - jab.add(Json.createObjectBuilder() - .add("id", dataFile.getId()) - .add("filename", dataFile.getCurrentName()) - .add("datasetPid", dataFile.getOwner().getGlobalId().asString()) - ); + if (element == null || element.equals("dataFileCreator")) { + List dataFilesCreated = ctxt.files().findByCreatorId(userId); + if (dataFilesCreated != null && !dataFilesCreated.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (DataFile dataFile : dataFilesCreated) { + jab.add(Json.createObjectBuilder() + .add("id", dataFile.getId()) + .add("filename", dataFile.getCurrentName()) + .add("datasetPid", dataFile.getOwner().getGlobalId().asString())); + } + job.add("count", dataFilesCreated.size()); + job.add("items", jab); + traces.add("dataFileCreator", job); } - job.add("count", dataFilesCreated.size()); - job.add("items", jab); - traces.add("dataFileCreator", job); } - // TODO: Consider removing this because we don't seem to populate releaseuser_id for files. - List dataFilesPublished = ctxt.files().findByReleaseUserId(userId); - if (dataFilesPublished != null && !dataFilesPublished.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (DataFile dataFile : dataFilesPublished) { - jab.add(Json.createObjectBuilder() - .add("id", dataFile.getId()) - .add("filename", dataFile.getCurrentName()) - .add("datasetPid", dataFile.getOwner().getGlobalId().asString()) - ); + if (element == null || element.equals("dataFilePublisher")) { + // TODO: Consider removing this because we don't seem to populate releaseuser_id + // for files. + List dataFilesPublished = ctxt.files().findByReleaseUserId(userId); + if (dataFilesPublished != null && !dataFilesPublished.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (DataFile dataFile : dataFilesPublished) { + jab.add(Json.createObjectBuilder() + .add("id", dataFile.getId()) + .add("filename", dataFile.getCurrentName()) + .add("datasetPid", dataFile.getOwner().getGlobalId().asString())); + } + job.add("count", dataFilesPublished.size()); + job.add("items", jab); + traces.add("dataFilePublisher", job); } - job.add("count", dataFilesPublished.size()); - job.add("items", jab); - traces.add("dataFileCreator", job); } - // These are the users who have published a version (or created a draft). - List datasetVersionUsers = ctxt.datasetVersion().getDatasetVersionUsersByAuthenticatedUser(user); - if (datasetVersionUsers != null && !datasetVersionUsers.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (DatasetVersionUser datasetVersionUser : datasetVersionUsers) { - jab.add(Json.createObjectBuilder() - .add("id", datasetVersionUser.getId()) - .add("dataset", datasetVersionUser.getDatasetVersion().getDataset().getGlobalId().asString()) - .add("version", datasetVersionUser.getDatasetVersion().getSemanticVersion()) - ); + if (element == null || element.equals("datasetVersionUsers")) { + // These are the users who have published a version (or created a draft). + List datasetVersionUsers = ctxt.datasetVersion().getDatasetVersionUsersByAuthenticatedUser(user); + if (datasetVersionUsers != null && !datasetVersionUsers.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (DatasetVersionUser datasetVersionUser : datasetVersionUsers) { + jab.add(Json.createObjectBuilder() + .add("id", datasetVersionUser.getId()) + .add("dataset", datasetVersionUser.getDatasetVersion().getDataset().getGlobalId().asString()) + .add("version", datasetVersionUser.getDatasetVersion().getSemanticVersion())); + } + job.add("count", datasetVersionUsers.size()); + job.add("items", jab); + traces.add("datasetVersionUsers", job); } - job.add("count", datasetVersionUsers.size()); - job.add("items", jab); - traces.add("datasetVersionUsers", job); } - Set explicitGroups = ctxt.explicitGroups().findDirectlyContainingGroups(user); - if (explicitGroups != null && !explicitGroups.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (ExplicitGroup explicitGroup : explicitGroups) { - jab.add(Json.createObjectBuilder() - .add("id", explicitGroup.getId()) - .add("name", explicitGroup.getDisplayName()) - ); + if (element == null || element.equals("explicitGroups")) { + Set explicitGroups = ctxt.explicitGroups().findDirectlyContainingGroups(user); + if (explicitGroups != null && !explicitGroups.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (ExplicitGroup explicitGroup : explicitGroups) { + jab.add(Json.createObjectBuilder() + .add("id", explicitGroup.getId()) + .add("name", explicitGroup.getDisplayName())); + } + job.add("count", explicitGroups.size()); + job.add("items", jab); + traces.add("explicitGroups", job); } - job.add("count", explicitGroups.size()); - job.add("items", jab); - traces.add("explicitGroups", job); } - List guestbookResponses = ctxt.responses().findByAuthenticatedUserId(user); - if (guestbookResponses != null && !guestbookResponses.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - // The feeling is that this is too much detail for now so we only show a count. -// JsonArrayBuilder jab = Json.createArrayBuilder(); -// for (GuestbookResponse guestbookResponse : guestbookResponses) { -// jab.add(Json.createObjectBuilder() -// .add("id", guestbookResponse.getId()) -// .add("downloadType", guestbookResponse.getDownloadtype()) -// .add("filename", guestbookResponse.getDataFile().getCurrentName()) -// .add("date", guestbookResponse.getResponseDate()) -// .add("guestbookName", guestbookResponse.getGuestbook().getName()) -// .add("dataset", guestbookResponse.getDatasetVersion().getDataset().getGlobalId().asString()) -// .add("version", guestbookResponse.getDatasetVersion().getSemanticVersion()) -// ); -// } - job.add("count", guestbookResponses.size()); -// job.add("items", jab); - traces.add("guestbookEntries", job); + if (element == null || element.equals("guestbookEntries")) { + List guestbookResponses = ctxt.responses().findByAuthenticatedUserId(user); + if (guestbookResponses != null && !guestbookResponses.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + // The feeling is that this is too much detail for the call for all elements so + // we only show a count in that case. + if (element != null) { + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (GuestbookResponse guestbookResponse : guestbookResponses) { + try { + JsonObjectBuilder gbe = Json.createObjectBuilder() + .add("id", guestbookResponse.getId()) + .add("downloadType", guestbookResponse.getDownloadtype()) + .add("filename", guestbookResponse.getDataFile().getCurrentName()) + .add("date", guestbookResponse.getResponseDate()) + .add("guestbookName", guestbookResponse.getGuestbook().getName()); + if(guestbookResponse.getDataset().getGlobalId()!=null) { + gbe.add("dataset", guestbookResponse.getDataset().getGlobalId().asString()); + } + if (guestbookResponse.getDatasetVersion() != null) { + gbe.add("version", guestbookResponse.getDatasetVersion().getSemanticVersion()); + } + jab.add(gbe); + } catch (NullPointerException npe) { + //Legacy/bad db entries + logger.warning("Guestbook id:" + guestbookResponse.getId() + " does not have required info."); + } + } + job.add("items", jab); + } + job.add("count", guestbookResponses.size()); + // job.add("items", jab); + traces.add("guestbookEntries", job); + } } - List savedSearchs = ctxt.savedSearches().findByAuthenticatedUser(user); - if (savedSearchs != null && !savedSearchs.isEmpty()) { - JsonObjectBuilder job = Json.createObjectBuilder(); - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (SavedSearch savedSearch : savedSearchs) { - jab.add(Json.createObjectBuilder() - .add("id", savedSearch.getId()) - ); + if (element == null || element.equals("savedSearches")) { + List savedSearchs = ctxt.savedSearches().findByAuthenticatedUser(user); + if (savedSearchs != null && !savedSearchs.isEmpty()) { + JsonObjectBuilder job = Json.createObjectBuilder(); + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (SavedSearch savedSearch : savedSearchs) { + jab.add(Json.createObjectBuilder() + .add("id", savedSearch.getId())); + } + job.add("count", savedSearchs.size()); + job.add("items", jab); + traces.add("savedSearches", job); } - job.add("count", savedSearchs.size()); - job.add("items", jab); - traces.add("savedSearches", job); } JsonObjectBuilder result = Json.createObjectBuilder(); result.add("user", Json.createObjectBuilder() .add("identifier", user.getIdentifier()) - .add("name", user.getName()) - ); + .add("name", user.getName())); result.add("traces", traces); return result; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java index daab9771b4e..a6875369493 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java @@ -13,7 +13,7 @@ import java.util.logging.Logger; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.methods.GetMethod; -import static org.apache.commons.lang.StringUtils.isEmpty; +import static org.apache.commons.lang3.StringUtils.isEmpty; /** * Imports a dataset from a different system. This command validates that the PID diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 0a1de25bed0..d87c3011c15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -84,6 +84,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } catch (Exception e) { logger.warning(e.getLocalizedMessage() + "here"); + e.printStackTrace(); } return WorkflowStepResult.OK; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java index 2a6d7216aa5..a29e7fdd59c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java @@ -51,7 +51,7 @@ public ScriptRequestResponse execute(CommandContext ctxt) throws CommandExceptio } String dcmBaseUrl = ctxt.settings().getValueForKey(DataCaptureModuleUrl); if (dcmBaseUrl == null) { - throw new RuntimeException(DataCaptureModuleUrl + " is null!"); + throw new CommandException("DataCaptureModuleUrl is null!", this); } User user = request.getUser(); if (!(user instanceof AuthenticatedUser)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 4fed5913263..9061c890f01 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -164,6 +164,16 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) DatasetVersionDTO version = datasetDto.getDatasetVersion(); String persistentProtocol = datasetDto.getProtocol(); String persistentAgency = persistentProtocol; + + String persistentAuthority = datasetDto.getAuthority(); + String persistentId = datasetDto.getIdentifier(); + + String pid = persistentProtocol + ":" + persistentAuthority + "/" + persistentId; + String pidUri = pid; + //Some tests don't send real PIDs - don't try to get their URL form + if(!pidUri.equals("null:null/null")) { + pidUri= new GlobalId(persistentProtocol + ":" + persistentAuthority + "/" + persistentId).toURL().toString(); + } // The "persistentAgency" tag is used for the "agency" attribute of the // ddi section; back in the DVN3 days we used "handle" and "DOI" // for the 2 supported protocols, respectively. For the sake of backward @@ -174,8 +184,6 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) persistentAgency = "DOI"; } - String persistentAuthority = datasetDto.getAuthority(); - String persistentId = datasetDto.getIdentifier(); //docDesc Block writeDocDescElement (xmlw, datasetDto); //stdyDesc Block @@ -189,7 +197,9 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeStartElement("IDNo"); writeAttribute(xmlw, "agency", persistentAgency); - xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); + + + xmlw.writeCharacters(pid); xmlw.writeEndElement(); // IDNo writeOtherIdElement(xmlw, version); xmlw.writeEndElement(); // titlStmt @@ -225,7 +235,10 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeEndElement(); // diststmt writeSeriesElement(xmlw, version); - + xmlw.writeStartElement("holdings"); + writeAttribute(xmlw, "URI", pidUri); + xmlw.writeEndElement(); //holdings + xmlw.writeEndElement(); // citation //End Citation Block @@ -1409,10 +1422,10 @@ public static void createDataDscr(XMLStreamWriter xmlw, DatasetVersion datasetVe * included for restricted files but that meant that summary * statistics were exposed. (To get at these statistics, API users * should instead use the "Data Variable Metadata Access" endpoint.) - * These days we return early to avoid this exposure. + * These days we skip restricted files to avoid this exposure. */ if (dataFile.isRestricted()) { - return; + continue; } if (dataFile != null && dataFile.isTabularData()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index fe0c15969ca..f972ae2a983 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -19,6 +19,7 @@ import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; @@ -1135,7 +1136,7 @@ public static void writeAccessRightsElement(XMLStreamWriter xmlw, DatasetVersion writeRightsHeader(xmlw, language); if (StringUtils.isNotBlank(datasetVersionDTO.getLicense())) { if (StringUtils.containsIgnoreCase(datasetVersionDTO.getLicense(), "cc0")) { - xmlw.writeAttribute("rightsURI", "https://creativecommons.org/publicdomain/zero/1.0/"); + xmlw.writeAttribute("rightsURI", TermsOfUseAndAccess.CC0_URI); if (StringUtils.isNotBlank(datasetVersionDTO.getTermsOfUse())) { xmlw.writeCharacters(datasetVersionDTO.getTermsOfUse()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index eb5cda8d72b..71cc23e242b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -31,8 +31,8 @@ //import javax.xml.bind.Unmarshaller; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; -import org.apache.commons.lang.mutable.MutableBoolean; -import org.apache.commons.lang.mutable.MutableLong; +import org.apache.commons.lang3.mutable.MutableBoolean; +import org.apache.commons.lang3.mutable.MutableLong; import org.xml.sax.SAXException; import com.lyncode.xoai.model.oaipmh.Header; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java index 38ebd83843b..d1aaea50793 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java @@ -24,7 +24,7 @@ import java.io.UnsupportedEncodingException; import javax.xml.parsers.ParserConfigurationException; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.xml.sax.SAXException; import javax.xml.transform.TransformerException; import java.net.URLEncoder; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 5d4c02a87e2..d8619c42dfa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -40,19 +40,15 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; import java.util.Date; import java.util.HashMap; import java.util.logging.Logger; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.GZIPOutputStream; import javax.ejb.EJB; import javax.mail.internet.InternetAddress; import javax.servlet.ServletConfig; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/XlistRecords.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/XlistRecords.java index e2366119f54..15bd005cacf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/XlistRecords.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/XlistRecords.java @@ -1,4 +1,3 @@ - package edu.harvard.iq.dataverse.harvest.server.xoai; import com.lyncode.xml.exceptions.XmlWriteException; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java index 538f0fe7219..7e115c78f06 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/Xrecord.java @@ -1,4 +1,3 @@ - package edu.harvard.iq.dataverse.harvest.server.xoai; import com.lyncode.xoai.model.oaipmh.Header; @@ -17,6 +16,7 @@ import java.io.OutputStream; import java.net.InetAddress; import java.net.UnknownHostException; +import org.apache.poi.util.ReplacingInputStream; /** * @@ -85,7 +85,11 @@ public void writeToStream(OutputStream outputStream) throws IOException { if (dataset != null && formatName != null) { InputStream inputStream = null; try { - inputStream = ExportService.getInstance().getExport(dataset, formatName); + inputStream = new ReplacingInputStream( + ExportService.getInstance().getExport(dataset, formatName), + "", + "" + ); } catch (ExportException ex) { inputStream = null; } @@ -101,7 +105,6 @@ public void writeToStream(OutputStream outputStream) throws IOException { } } outputStream.flush(); - } private String itemHeaderToString(Header header) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java index 13d4ed96815..9484a412913 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java @@ -57,7 +57,7 @@ public class IngestUtil { * * @param version the dataset version * @param newFiles the list of new data files to add to it - * @param fileToReplace + * @param fileToReplace */ public static void checkForDuplicateFileNamesFinal(DatasetVersion version, List newFiles, DataFile fileToReplace) { @@ -257,7 +257,7 @@ public static Set existingPathNamesAsSet(DatasetVersion version, FileMet // #6942 added proxy for existing files to a boolean set when dataset version copy is done for (Iterator fmIt = version.getFileMetadatas().iterator(); fmIt.hasNext();) { FileMetadata fm = fmIt.next(); - if((fm.isInPriorVersion() || fm.getId() != null) && (replacedFmd==null) || (!fm.getDataFile().equals(replacedFmd.getDataFile()))) { + if((fm.isInPriorVersion() || fm.getId() != null) && (replacedFmd==null || !fm.getDataFile().equals(replacedFmd.getDataFile()))) { String existingName = fm.getLabel(); String existingDir = fm.getDirectoryLabel(); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java index 512c744c07a..5f771d2756b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestableDataChecker.java @@ -29,7 +29,7 @@ import java.util.regex.*; import java.util.zip.*; import java.util.logging.Logger; -import org.apache.commons.lang.builder.*; +import org.apache.commons.lang3.builder.*; import org.apache.commons.io.IOUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/InvalidData.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/InvalidData.java index 4f584abc4cb..de8e2f7aa82 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/InvalidData.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/InvalidData.java @@ -20,8 +20,8 @@ package edu.harvard.iq.dataverse.ingest.tabulardata; import java.util.*; -import org.apache.commons.lang.builder.ToStringBuilder; -import org.apache.commons.lang.builder.ToStringStyle; +import org.apache.commons.lang3.builder.ToStringBuilder; +import org.apache.commons.lang3.builder.ToStringStyle; /** * A class that stores information about a variables' invalid data. * Akio Sone's original DVN v.2.* implementation, virtually unchanged. diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java index d1e85c208ae..57f76df3802 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java @@ -50,7 +50,7 @@ import java.util.Set; import java.util.logging.Logger; import org.apache.commons.csv.CSVFormat; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVRecord; @@ -71,7 +71,7 @@ public class CSVFileReader extends TabularDataFileReader { private static final String FORMAT_IEEE754 = "%+#." + DIGITS_OF_PRECISION_DOUBLE + "e"; private MathContext doubleMathContext; private CSVFormat inFormat; - private final Set firstNumCharSet = new HashSet<>(); + //private final Set firstNumCharSet = new HashSet<>(); // DATE FORMATS private static SimpleDateFormat[] DATE_FORMATS = new SimpleDateFormat[]{ @@ -99,7 +99,7 @@ public CSVFileReader(TabularDataFileReaderSpi originator, char delim) { private void init() throws IOException { doubleMathContext = new MathContext(DIGITS_OF_PRECISION_DOUBLE, RoundingMode.HALF_EVEN); - firstNumCharSet.addAll(Arrays.asList(new Character[]{'+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'})); + //firstNumCharSet.addAll(Arrays.asList(new Character[]{'+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'})); } /** @@ -207,8 +207,9 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f && varString != null && (varString.isEmpty() || varString.equals("null") - || (firstNumCharSet.contains(varString.charAt(0)) - && StringUtils.isNumeric(varString.substring(1)))); + || (StringUtils.isNumeric(varString) + || (varString.substring(0,1).matches("[+-]") + && StringUtils.isNumeric(varString.substring(1))))); if (isNumericVariable[i]) { // If variable might be "numeric" test to see if this value is a parsable number: if (varString != null && !varString.isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java index 48a6212ffd7..2dec701592e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java @@ -20,34 +20,46 @@ package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InvalidObjectException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; + +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; - -import java.io.*; -import java.nio.*; -import java.util.logging.*; - -import java.util.*; -import java.util.regex.*; -import java.text.*; - - -import org.apache.commons.lang.*; import org.apache.commons.codec.binary.Hex; -import javax.inject.Inject; -import javax.naming.Context; -import javax.naming.InitialContext; -import javax.naming.NamingException; import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.datavariable.VariableCategory; -//import edu.harvard.iq.dataverse.datavariable.VariableFormatType; -//import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; -import edu.harvard.iq.dataverse.ingest.plugin.spi.*; import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader; import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi; import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest; +import org.apache.commons.lang3.StringUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java index d523b3d4587..994b4901bee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java @@ -1,12 +1,30 @@ package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta; -import java.io.*; -import java.util.logging.*; - -import java.util.*; -import java.text.*; - -import org.apache.commons.lang.*; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; +import java.util.TimeZone; +import java.util.logging.Logger; import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.datavariable.DataVariable; @@ -15,6 +33,7 @@ import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader; import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi; import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest; +import org.apache.commons.lang3.StringUtils; /** * ingest plugin for Stata 13-15 (117-119) DTA file format. A copy and paste from diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java index a39b044e1b3..c90b0ea6950 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java @@ -19,19 +19,45 @@ */ package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por; -import java.io.*; -import java.nio.*; -import java.util.logging.*; - -import java.util.*; -import java.util.regex.*; -import java.text.*; +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.Writer; +import java.nio.ByteBuffer; + +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.Set; +import java.util.TimeZone; +import java.util.logging.Logger; import java.math.BigDecimal; import java.math.MathContext; import java.math.RoundingMode; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -import org.apache.commons.lang.*; import org.apache.commons.codec.binary.Hex; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.datavariable.DataVariable; @@ -44,7 +70,6 @@ import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SPSSConstants; - /** * ingest plugin for SPSS/POR ("portable") file format. * diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index b2da2f12ff6..c2899b29d1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -52,8 +52,8 @@ import javax.naming.NamingException; -import org.apache.commons.lang.RandomStringUtils; -import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.ArrayUtils; /** * Dataverse 4.0 implementation of TabularDataFileReader for the * RData Binary Format. diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java index d5ca06a96f3..f60b7733463 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java @@ -24,7 +24,7 @@ import java.util.logging.*; import edu.harvard.iq.dataverse.util.BundleUtil; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.datavariable.DataVariable; diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java index 53441c0de65..682b8f1166c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java @@ -19,17 +19,42 @@ */ package edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav; -import java.io.*; -import java.nio.*; -import java.util.logging.*; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -import java.util.*; -import java.util.regex.*; -import java.text.*; - -import org.apache.commons.lang.*; import org.apache.commons.codec.binary.Hex; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.StringUtils; import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.datavariable.DataVariable; @@ -40,8 +65,7 @@ import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataFileReader; import edu.harvard.iq.dataverse.ingest.tabulardata.spi.TabularDataFileReaderSpi; import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest; -import edu.harvard.iq.dataverse.ingest.tabulardata.InvalidData; - +import edu.harvard.iq.dataverse.ingest.tabulardata.InvalidData; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java index 5a7642126fe..914e8d56432 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java @@ -33,7 +33,7 @@ import edu.harvard.iq.dataverse.ingest.tabulardata.TabularDataIngest; import edu.harvard.iq.dataverse.util.BundleUtil; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.usermodel.XSSFRichTextString; diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index 523d06bb8a1..ec3a1f357f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -39,7 +39,7 @@ import javax.ws.rs.QueryParam; import edu.harvard.iq.dataverse.util.BundleUtil; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java index a1e94e01815..0e99220005c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java @@ -20,7 +20,7 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java index c9faf797d57..eaea5ab2296 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java @@ -21,7 +21,7 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Given a user and a set of filters (dvobject type, roles, publication status): diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/RoleTagRetriever.java b/src/main/java/edu/harvard/iq/dataverse/mydata/RoleTagRetriever.java index 9f03f8addb1..4556c92ff19 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/RoleTagRetriever.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/RoleTagRetriever.java @@ -25,7 +25,7 @@ import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonArrayBuilder; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Input: dvObject id, parent Id, and dvObject type (from Solr) diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/SolrQueryFormatter.java b/src/main/java/edu/harvard/iq/dataverse/mydata/SolrQueryFormatter.java index 9aefbee240c..1c3fca1537b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/SolrQueryFormatter.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/SolrQueryFormatter.java @@ -12,7 +12,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Convenience methods for formatting long arrays of ids into solrQuery strings diff --git a/src/main/java/edu/harvard/iq/dataverse/passwordreset/PasswordResetPage.java b/src/main/java/edu/harvard/iq/dataverse/passwordreset/PasswordResetPage.java index aea910c496e..e0ab786b68b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/passwordreset/PasswordResetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/passwordreset/PasswordResetPage.java @@ -29,7 +29,7 @@ import java.util.List; import javax.faces.component.UIComponent; import javax.faces.component.UIInput; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.hibernate.validator.constraints.NotBlank; @ViewScoped diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java index ff3a14ec72e..beb676f60d1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java +++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrl.java @@ -51,4 +51,8 @@ public String getLink() { return link; } + public boolean isAnonymizedAccess() { + return roleAssignment.isAnonymizedAccess(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtil.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtil.java index 0a092f36851..c363139c912 100644 --- a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtil.java @@ -93,7 +93,7 @@ static public PrivateUrlUser getPrivateUrlUserFromRoleAssignment(RoleAssignment } Dataset dataset = getDatasetFromRoleAssignment(roleAssignment); if (dataset != null) { - PrivateUrlUser privateUrlUser = new PrivateUrlUser(dataset.getId()); + PrivateUrlUser privateUrlUser = new PrivateUrlUser(dataset.getId(), roleAssignment.isAnonymizedAccess()); return privateUrlUser; } return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java index 16ba6e5bf4c..6e939c1bb6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java +++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/package-info.java @@ -6,6 +6,13 @@ * all files (regardless of if the files are restricted or not) of a draft * version of a dataset. *

+ * As of this note, a second option - to create a Private URL that provides an + * anonymized view of the dataset has been added. This option works the same as + * the original except that it hides author names in the citation block, hides + * the values for an admin specified list of metadata fields, disables citation + * downloads, and disables API access (except for file and file thumbnail + * downloads which are used by the UI). + *

* The primary use case for a Private URL is for journal editors to send a link * to reviewers of a dataset before publication. In most cases, these journal * editors do not permit depositors to publish on their own, which is to say @@ -26,12 +33,20 @@ * assign roles to them directly, rather than using a Private URL which requires * no username or password. *

+ * As of this note, a second option aimed specifically at the review use case - + * to create a Private URL that provides an anonymized view of the dataset - has + * been added. This option works the same as the original except that it hides + * author names in the citation block, hides the values for an admin specified + * list of metadata fields, disables citation downloads, and disables API access + * (except for file and file thumbnail downloads which are used by the UI). + *

* The token associated with the Private URL role assignment that can be used - * either in the GUI or via the API to elevate privileges beyond what a "Guest" - * can see. The ability to use a Private URL token via API was added mostly to - * facilitate automated testing of the feature but the far more common case is - * expected to be use of the Private URL token in a link that is clicked to open - * a browser, similar to links shared via Dropbox, Google, etc. + * either in the GUI or, for the non-anonymized-access option, via the API to + * elevate privileges beyond what a "Guest" can see. The ability to use a + * Private URL token via API was added mostly to facilitate automated testing of + * the feature but the far more common case is expected to be use of the Private + * URL token in a link that is clicked to open a browser, similar to links + * shared via Dropbox, Google, etc. *

* When reviewers click a Private URL their browser sessions are set to the * "{@link edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser}" that @@ -65,16 +80,24 @@ * using the "PrivateUrlUser" that has the "Member" role only on the dataset in * question. This means that read-only operations such as downloads of the * dataset's files are permitted. The Search API does not respect the Private - * URL token but you can download unpublished metadata using the Native API and - * download files using the Access API. + * URL token but you can download files using the Access API, and, with the + * non-anonymized-access option, download unpublished metadata using the Native + * API. *

* A Private URL cannot be created for a published version of a dataset. In the * GUI, you will be reminded of this fact with a popup. The API will explain * this as well. *

- * If a draft dataset containing a Private URL is published, the Private URL is - * deleted. This means that reviewers who click the link after publication will - * see a 404. + * An anonymized-access Private URL can't be created if any published dataset + * version exists. The primary reason for this is that, since datasets have + * DOIs, the full metadata about published versions is available directly from + * the DOI provider. (While the metadata for that version could be somewhat + * different, in practice it would probably provide a means of identifying + * some/all of the authors). + *

+ * If a draft dataset containing a Private URL is + * published, the Private URL is deleted. This means that reviewers who click + * the link after publication will see a 404. *

* If a post-publication draft containing a Private URL is deleted, the Private * URL is deleted. This is to ensure that if a new draft is created in the @@ -106,8 +129,8 @@ *

  • {@link edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand}
  • *
  • {@link edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand}
  • * - * See also the Private URL To Unpublished Dataset BRD at - * + * See also the Private URL To Unpublished Dataset BRD at * https://docs.google.com/document/d/1FT47QkZKcmjSgRnePaJO2g1nzcotLyN3Yb2ORvBr6cs/edit?usp=sharing */ package edu.harvard.iq.dataverse.privateurl; diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/RJobRequest.java b/src/main/java/edu/harvard/iq/dataverse/rserve/RJobRequest.java index d59e8f1050f..9171b8313ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/rserve/RJobRequest.java +++ b/src/main/java/edu/harvard/iq/dataverse/rserve/RJobRequest.java @@ -25,9 +25,15 @@ * @author Leonid Andreev */ import edu.harvard.iq.dataverse.datavariable.DataVariable; -import java.util.*; -import java.util.logging.*; -import org.apache.commons.lang.*; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import org.apache.commons.lang3.StringUtils; public class RJobRequest { diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java index 503125eb45a..f13b6f11434 100644 --- a/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java +++ b/src/main/java/edu/harvard/iq/dataverse/rserve/RemoteDataFrameService.java @@ -23,16 +23,31 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest; -import java.io.*; -import java.util.*; -import java.util.logging.*; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; import org.rosuda.REngine.*; import org.rosuda.REngine.Rserve.*; -import org.apache.commons.lang.*; - /** * * @author Leonid Andreev diff --git a/src/main/java/edu/harvard/iq/dataverse/rserve/VariableNameCheckerForR.java b/src/main/java/edu/harvard/iq/dataverse/rserve/VariableNameCheckerForR.java index dea0c886cd1..65a64241364 100644 --- a/src/main/java/edu/harvard/iq/dataverse/rserve/VariableNameCheckerForR.java +++ b/src/main/java/edu/harvard/iq/dataverse/rserve/VariableNameCheckerForR.java @@ -20,10 +20,14 @@ package edu.harvard.iq.dataverse.rserve; -import org.apache.commons.lang.*; -import java.util.*; -import java.util.regex.*; +import org.apache.commons.lang3.StringUtils; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java index f3af7bf90c7..55516d68880 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AdvancedSearchPage.java @@ -23,7 +23,7 @@ import javax.faces.view.ViewScoped; import javax.inject.Inject; import javax.inject.Named; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; @ViewScoped @Named("AdvancedSearchPage") diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index f61f879eee7..d72e2a7f642 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -61,7 +61,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.SortClause; @@ -256,15 +256,14 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) List dataverseSegments = findPathSegments(dataverse, dataversePathSegmentsAccumulator); List dataversePaths = getDataversePathsFromSegments(dataverseSegments); if (dataversePaths.size() > 0) { - // don't show yourself while indexing or in search results: - // https://redmine.hmdc.harvard.edu/issues/3613 - // logger.info(dataverse.getName() + " size " + dataversePaths.size()); + // removing the dataverse's own id from the paths dataversePaths.remove(dataversePaths.size() - 1); } //Add paths for my linking dataverses - List linkingDataversePaths = findLinkingDataversePaths(dataverse); - for (String dvPath : linkingDataversePaths) { + List linkingDataverses = findAllLinkingDataverses(dataverse); + List linkingDataversePaths = findLinkingDataversePaths(linkingDataverses); + for (String dvPath:linkingDataversePaths ){ dataversePaths.add(dvPath); } //only do this if we're indexing an individual dataverse ie not full re-index @@ -1316,34 +1315,56 @@ private boolean hasAnyLinkingDataverses(Dataverse dataverse) { return false; } - private List findLinkingDataversePaths(Dataverse dataverse) { - Dataverse rootDataverse = findRootDataverseCached(); - List pathListAccumulator = new ArrayList<>(); - List ancestorList = dataverse.getOwners(); - - ancestorList.add(dataverse); + private List findAllLinkingDataverses(DvObject dvObject){ + /* + here we find the linking dataverse of the input object + then any linked dvs in its owners list + */ + Dataset dataset = null; + Dataverse dv = null; + Dataverse rootDataverse = findRootDataverseCached(); + List linkingDataverses = new ArrayList(); + List ancestorList = new ArrayList(); + + try { + if(dvObject.isInstanceofDataset()){ + dataset = (Dataset) dvObject; + linkingDataverses = dsLinkingService.findLinkingDataverses(dataset.getId()); + ancestorList = dataset.getOwner().getOwners(); + } + if(dvObject.isInstanceofDataverse()){ + dv = (Dataverse) dvObject; + linkingDataverses = dvLinkingService.findLinkingDataverses(dv.getId()); + ancestorList = dv.getOwners(); + } + } catch (Exception ex) { + logger.info("failed to find Linking Dataverses for " + SearchFields.SUBTREE + ": " + ex); + } + + for (Dataverse owner : ancestorList) { + if (!owner.equals(rootDataverse)) { + linkingDataverses.addAll(dvLinkingService.findLinkingDataverses(owner.getId())); + } + } + + return linkingDataverses; + } + + private List findLinkingDataversePaths(List linkingDVs) { - for (Dataverse prior : ancestorList) { - if (!dataverse.equals(rootDataverse)) { - // important when creating root dataverse - List linkingDVs = dvLinkingService.findLinkingDataverses(prior.getId()); - for (Dataverse toAdd : linkingDVs) { - List linkingDataversePathSegmentsAccumulator = new ArrayList<>(); - //path starts with linking dataverse - linkingDataversePathSegmentsAccumulator.add(toAdd.getId().toString()); - //then add segments from the target dataverse up to the linked dataverse - List linkingdataverseSegments = findPathSegments(dataverse, linkingDataversePathSegmentsAccumulator, prior); - - List linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments); - for (String dvPath : linkingDataversePaths) { - pathListAccumulator.add(dvPath); - } + List pathListAccumulator = new ArrayList<>(); + for (Dataverse toAdd : linkingDVs) { + //get paths for each linking dataverse + List linkingDataversePathSegmentsAccumulator = findPathSegments(toAdd, new ArrayList<>()); + List linkingDataversePaths = getDataversePathsFromSegments(linkingDataversePathSegmentsAccumulator); + for (String dvPath : linkingDataversePaths) { + if (!pathListAccumulator.contains(dvPath)) { + pathListAccumulator.add(dvPath); } } } return pathListAccumulator; - } private List getDataversePathsFromSegments(List dataversePathSegments) { @@ -1430,7 +1451,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc List paths = object.isInstanceofDataset() ? retrieveDVOPaths(datasetService.find(object.getId())) : retrieveDVOPaths(dataverseService.find(object.getId())); - + sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); @@ -1472,35 +1493,12 @@ private List retrieveDVOPaths(DvObject dvo) { } } catch (Exception ex) { logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); - } + } List dataversePaths = getDataversePathsFromSegments(dataverseSegments); - // Add Paths for linking dataverses - List linkingDataverses = new ArrayList(); - if (dataset != null){ - linkingDataverses = dsLinkingService.findLinkingDataverses(dataset.getId()); - } else{ - linkingDataverses = dvLinkingService.findLinkingDataverses(dv.getId()); - } - for (Dataverse linkingDataverse : linkingDataverses) { - List linkingDataversePathSegmentsAccumulator = new ArrayList<>(); - List linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator); - List linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments); - for (String dvPath : linkingDataversePaths) { - dataversePaths.add(dvPath); - } - } - - //Add paths for my linking dataverses - List linkingDataversePaths = new ArrayList(); - if (dataset != null) { - linkingDataversePaths = findLinkingDataversePaths(dataset.getOwner()); - } else { - linkingDataversePaths = findLinkingDataversePaths(dv); - } - - for (String dvPath : linkingDataversePaths) { - dataversePaths.add(dvPath); - } + /* + add linking paths + */ + dataversePaths.addAll(findLinkingDataversePaths(findAllLinkingDataverses(dvo))); return dataversePaths; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index f7a3b874c24..09ab372c687 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -39,7 +39,7 @@ import javax.inject.Inject; import javax.inject.Named; import javax.servlet.http.HttpServletRequest; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; //@ViewScoped @RequestScoped diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index a8e013f3ca3..c226d77f885 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -5,7 +5,7 @@ import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.common.SolrInputDocument; public class SearchUtil { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 02637bfa8df..b3685b5b951 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -356,6 +356,11 @@ Whether Harvesting (OAI) service is enabled */ IndependentHandleService, + /** + Handle to use for authentication if the default is not being used + */ + HandleAuthHandle, + /** * Archiving can be configured by providing an Archiver class name (class must extend AstractSubmitToArchiverCommand) * and a list of settings that should be passed to the Archiver. @@ -433,7 +438,15 @@ Whether Harvesting (OAI) service is enabled * Installation Brand Name is always included (default/false) or is not included * when the Distributor field (citation metadatablock) is set (true) */ - ExportInstallationAsDistributorOnlyWhenNotSet + ExportInstallationAsDistributorOnlyWhenNotSet, + /** + * A comma-separated list of field type names that should be 'withheld' when + * dataset access occurs via a Private Url with Anonymized Access (e.g. to + * support anonymized review). A suggested minimum includes author, + * datasetContact, and contributor, but additional fields such as depositor, grantNumber, and + * publication might also need to be included. + */ + AnonymizedFieldTypeNames ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java index 02ba331cdd5..e9ac5bfd230 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/DataSourceProducer.java @@ -38,7 +38,7 @@ //}) // // ... but at this time we don't think we need any. The full list -// of properties can be found at https://docs.payara.fish/community/docs/5.2020.6/documentation/payara-server/jdbc/advanced-connection-pool-properties.html#full-list-of-properties +// of properties can be found at https://docs.payara.fish/community/docs/5.2021.5/documentation/payara-server/jdbc/advanced-connection-pool-properties.html#full-list-of-properties // // All these properties cannot be configured via MPCONFIG as Payara doesn't support this (yet). To be enhanced. // See also https://github.com/payara/Payara/issues/5024 diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileSortFieldAndOrder.java b/src/main/java/edu/harvard/iq/dataverse/util/FileSortFieldAndOrder.java index ba66c404349..ef921ff185c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileSortFieldAndOrder.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileSortFieldAndOrder.java @@ -1,7 +1,7 @@ package edu.harvard.iq.dataverse.util; import edu.harvard.iq.dataverse.search.SortBy; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; public class FileSortFieldAndOrder { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index cb7362e02a7..e7f77bc4fb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -100,7 +100,7 @@ import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import java.util.Arrays; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * a 4.0 implementation of the DVN FileUtil; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java b/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java index 7b096786351..3131afbf010 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MarkupChecker.java @@ -5,7 +5,7 @@ */ package edu.harvard.iq.dataverse.util; -import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.safety.Whitelist; import org.jsoup.parser.Parser; @@ -60,7 +60,7 @@ public static String stripAllTags(String unsafe) { } public static String escapeHtml(String unsafe) { - return StringEscapeUtils.escapeHtml(unsafe); + return StringEscapeUtils.escapeHtml4(unsafe); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SumStatCalculator.java b/src/main/java/edu/harvard/iq/dataverse/util/SumStatCalculator.java index ee37bd51206..545f2937f70 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SumStatCalculator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SumStatCalculator.java @@ -19,13 +19,13 @@ */ package edu.harvard.iq.dataverse.util; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.logging.Logger; -import org.apache.commons.lang.*; -import org.apache.commons.math.stat.*; -//import cern.colt.list.*; -//import cern.jet.stat.Descriptive; +import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.math.stat.StatUtils; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 71accdaa8c8..cecba030199 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -93,6 +93,7 @@ public class SystemConfig { * zip file upload. */ private static final int defaultZipUploadFilesLimit = 1000; + private static final long defaultZipDownloadLimit = 104857600L; // 100MB private static final int defaultMultipleUploadFilesLimit = 1000; private static final int defaultLoginSessionTimeout = 480; // = 8 hours @@ -423,13 +424,12 @@ static int getIntLimitFromStringOrDefault(String limitSetting, Integer defaultVa /** * Download-as-zip size limit. - * returns 0 if not specified; - * (the file zipper will then use the default value) + * returns defaultZipDownloadLimit if not specified; * set to -1 to disable zip downloads. */ public long getZipDownloadLimit() { String zipLimitOption = settingsService.getValueForKey(SettingsServiceBean.Key.ZipDownloadLimit); - return getLongLimitFromStringOrDefault(zipLimitOption, 0L); + return getLongLimitFromStringOrDefault(zipLimitOption, defaultZipDownloadLimit); } public int getZipUploadFilesLimit() { @@ -1017,7 +1017,7 @@ public Integer getUploadMethodCount(){ public boolean isDataFilePIDSequentialDependent(){ String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - if (doiIdentifierType.equals("sequentialNumber") && doiDataFileFormat.equals("DEPENDENT")){ + if (doiIdentifierType.equals("storedProcGenerated") && doiDataFileFormat.equals("DEPENDENT")){ return true; } return false; @@ -1045,6 +1045,11 @@ public boolean isIndependentHandleService() { } + public String getHandleAuthHandle() { + String handleAuthHandle = settingsService.getValueForKey(SettingsServiceBean.Key.HandleAuthHandle, null); + return handleAuthHandle; + } + public String getMDCLogPath() { String mDCLogPath = settingsService.getValueForKey(SettingsServiceBean.Key.MDCLogPath, null); return mDCLogPath; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 7c3db485e47..0d079f4b172 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -177,7 +177,11 @@ public BagGenerator(OREMap oreMap, String dataciteXml) throws JsonSyntaxExceptio public void setIgnoreHashes(boolean val) { ignorehashes = val; } - + + public void setDefaultCheckSumType(ChecksumType type) { + hashtype=type; + } + public static void println(String s) { System.out.println(s); System.out.flush(); @@ -531,18 +535,22 @@ private void processContainer(JsonObject item, String currentPath) throws IOExce if (child.has(JsonLDTerm.checksum.getLabel())) { ChecksumType childHashType = ChecksumType.fromString( child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@type").getAsString()); - if (hashtype != null && !hashtype.equals(childHashType)) { - logger.warning("Multiple hash values in use - not supported"); - } - if (hashtype == null) + if (hashtype == null) { + //If one wasn't set as a default, pick up what the first child with one uses hashtype = childHashType; - childHash = child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@value").getAsString(); - if (checksumMap.containsValue(childHash)) { - // Something else has this hash - logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + " has SHA1 Hash: " - + childHash); - } - checksumMap.put(childPath, childHash); + } + if (hashtype != null && !hashtype.equals(childHashType)) { + logger.warning("Multiple hash values in use - will calculate " + hashtype.toString() + + " hashes for " + childTitle); + } else { + childHash = child.getAsJsonObject(JsonLDTerm.checksum.getLabel()).get("@value").getAsString(); + if (checksumMap.containsValue(childHash)) { + // Something else has this hash + logger.warning("Duplicate/Collision: " + child.get("@id").getAsString() + " has SHA1 Hash: " + + childHash); + } + checksumMap.put(childPath, childHash); + } } if ((hashtype == null) | ignorehashes) { // Pick sha512 when ignoring hashes or none exist @@ -816,7 +824,7 @@ private String generateInfoFile() { } else { info.append( // FixMe - handle description having subfields better - WordUtils.wrap(getSingleValue(aggregation.getAsJsonObject(descriptionTerm.getLabel()), + WordUtils.wrap(getSingleValue(aggregation.get(descriptionTerm.getLabel()), descriptionTextTerm.getLabel()), 78, CRLF + " ", true)); info.append(CRLF); @@ -862,22 +870,24 @@ private String generateInfoFile() { * - the key to find a value(s) for * @return - a single string */ - String getSingleValue(JsonObject jsonObject, String key) { + String getSingleValue(JsonElement jsonElement, String key) { String val = ""; - if (jsonObject.get(key).isJsonPrimitive()) { + if(jsonElement.isJsonObject()) { + JsonObject jsonObject=jsonElement.getAsJsonObject(); val = jsonObject.get(key).getAsString(); - } else if (jsonObject.get(key).isJsonArray()) { - Iterator iter = jsonObject.getAsJsonArray(key).iterator(); + } else if (jsonElement.isJsonArray()) { + + Iterator iter = jsonElement.getAsJsonArray().iterator(); ArrayList stringArray = new ArrayList(); while (iter.hasNext()) { - stringArray.add(iter.next().getAsString()); + stringArray.add(iter.next().getAsJsonObject().getAsJsonPrimitive(key).getAsString()); } if (stringArray.size() > 1) { - val = StringUtils.join((String[]) stringArray.toArray(), ","); + val = StringUtils.join(stringArray.toArray(), ","); } else { val = stringArray.get(0); } - logger.warning("Multiple values found for: " + key + ": " + val); + logger.fine("Multiple values found for: " + key + ": " + val); } return val; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 38303eb1f41..b0abc3ce9a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -39,7 +39,7 @@ public class OREMap { private Map localContext = new TreeMap(); private DatasetVersion version; private Boolean excludeEmail = null; - + public OREMap(DatasetVersion version) { this.version = version; } @@ -56,6 +56,14 @@ public void writeOREMap(OutputStream outputStream) throws Exception { } public JsonObject getOREMap() throws Exception { + return getOREMap(false); + } + + public JsonObject getOREMap(boolean aggregationOnly) throws Exception { + return getOREMapBuilder(aggregationOnly).build(); + } + + public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Exception { //Set this flag if it wasn't provided if(excludeEmail==null) { @@ -79,7 +87,7 @@ public JsonObject getOREMap() throws Exception { for (DatasetField field : fields) { if (!field.isEmpty()) { DatasetFieldType dfType = field.getDatasetFieldType(); - if(excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dfType.getFieldType())) { + if (excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dfType.getFieldType())) { continue; } JsonLDTerm fieldName = getTermFor(dfType); @@ -101,13 +109,13 @@ public JsonObject getOREMap() throws Exception { for (DatasetField dsf : dscv.getChildDatasetFields()) { DatasetFieldType dsft = dsf.getDatasetFieldType(); - if(excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dsft.getFieldType())) { + if (excludeEmail && DatasetFieldType.FieldType.EMAIL.equals(dsft.getFieldType())) { continue; } // which may have multiple values if (!dsf.isEmpty()) { - // Add context entry - //ToDo - also needs to recurse here? + // Add context entry + // ToDo - also needs to recurse here? JsonLDTerm subFieldName = getTermFor(dfType, dsft); if (subFieldName.inNamespace()) { localContext.putIfAbsent(subFieldName.getNamespace().getPrefix(), @@ -143,14 +151,14 @@ public JsonObject getOREMap() throws Exception { Json.createArrayBuilder().add(JsonLDTerm.ore("Aggregation").getLabel()) .add(JsonLDTerm.schemaOrg("Dataset").getLabel())) .add(JsonLDTerm.schemaOrg("version").getLabel(), version.getFriendlyVersionNumber()) - .add(JsonLDTerm.schemaOrg("datePublished").getLabel(), dataset.getPublicationDateFormattedYYYYMMDD()) .add(JsonLDTerm.schemaOrg("name").getLabel(), version.getTitle()) .add(JsonLDTerm.schemaOrg("dateModified").getLabel(), version.getLastUpdateTime().toString()); + addIfNotNull(aggBuilder, JsonLDTerm.schemaOrg("datePublished"), dataset.getPublicationDateFormattedYYYYMMDD()); TermsOfUseAndAccess terms = version.getTermsOfUseAndAccess(); if (terms.getLicense() == TermsOfUseAndAccess.License.CC0) { aggBuilder.add(JsonLDTerm.schemaOrg("license").getLabel(), - "https://creativecommons.org/publicdomain/zero/1.0/"); + TermsOfUseAndAccess.CC0_URI); } else { addIfNotNull(aggBuilder, JsonLDTerm.termsOfUse, terms.getTermsOfUse()); } @@ -183,96 +191,101 @@ public JsonObject getOREMap() throws Exception { // The aggregation aggregates aggregatedresources (Datafiles) which each have // their own entry and metadata JsonArrayBuilder aggResArrayBuilder = Json.createArrayBuilder(); + if (!aggregationOnly) { - for (FileMetadata fmd : version.getFileMetadatas()) { - DataFile df = fmd.getDataFile(); - JsonObjectBuilder aggRes = Json.createObjectBuilder(); + for (FileMetadata fmd : version.getFileMetadatas()) { + DataFile df = fmd.getDataFile(); + JsonObjectBuilder aggRes = Json.createObjectBuilder(); - if (fmd.getDescription() != null) { - aggRes.add(JsonLDTerm.schemaOrg("description").getLabel(), fmd.getDescription()); - } else { - addIfNotNull(aggRes, JsonLDTerm.schemaOrg("description"), df.getDescription()); - } - addIfNotNull(aggRes, JsonLDTerm.schemaOrg("name"), fmd.getLabel()); // "label" is the filename - addIfNotNull(aggRes, JsonLDTerm.restricted, fmd.isRestricted()); - addIfNotNull(aggRes, JsonLDTerm.directoryLabel, fmd.getDirectoryLabel()); - addIfNotNull(aggRes, JsonLDTerm.schemaOrg("version"), fmd.getVersion()); - addIfNotNull(aggRes, JsonLDTerm.datasetVersionId, fmd.getDatasetVersion().getId()); - JsonArray catArray = null; - if (fmd != null) { - List categories = fmd.getCategoriesByName(); - if (categories.size() > 0) { - JsonArrayBuilder jab = Json.createArrayBuilder(); - for (String s : categories) { - jab.add(s); + if (fmd.getDescription() != null) { + aggRes.add(JsonLDTerm.schemaOrg("description").getLabel(), fmd.getDescription()); + } else { + addIfNotNull(aggRes, JsonLDTerm.schemaOrg("description"), df.getDescription()); + } + addIfNotNull(aggRes, JsonLDTerm.schemaOrg("name"), fmd.getLabel()); // "label" is the filename + addIfNotNull(aggRes, JsonLDTerm.restricted, fmd.isRestricted()); + addIfNotNull(aggRes, JsonLDTerm.directoryLabel, fmd.getDirectoryLabel()); + addIfNotNull(aggRes, JsonLDTerm.schemaOrg("version"), fmd.getVersion()); + addIfNotNull(aggRes, JsonLDTerm.datasetVersionId, fmd.getDatasetVersion().getId()); + JsonArray catArray = null; + if (fmd != null) { + List categories = fmd.getCategoriesByName(); + if (categories.size() > 0) { + JsonArrayBuilder jab = Json.createArrayBuilder(); + for (String s : categories) { + jab.add(s); + } + catArray = jab.build(); } - catArray = jab.build(); } + addIfNotNull(aggRes, JsonLDTerm.categories, catArray); + // File DOI if it exists + String fileId = null; + String fileSameAs = null; + if (df.getGlobalId().asString().length() != 0) { + fileId = df.getGlobalId().asString(); + fileSameAs = SystemConfig.getDataverseSiteUrlStatic() + + "/api/access/datafile/:persistentId?persistentId=" + fileId; + } else { + fileId = SystemConfig.getDataverseSiteUrlStatic() + "/file.xhtml?fileId=" + df.getId(); + fileSameAs = SystemConfig.getDataverseSiteUrlStatic() + "/api/access/datafile/" + df.getId(); + } + aggRes.add("@id", fileId); + aggRes.add(JsonLDTerm.schemaOrg("sameAs").getLabel(), fileSameAs); + fileArray.add(fileId); + + aggRes.add("@type", JsonLDTerm.ore("AggregatedResource").getLabel()); + addIfNotNull(aggRes, JsonLDTerm.schemaOrg("fileFormat"), df.getContentType()); + addIfNotNull(aggRes, JsonLDTerm.filesize, df.getFilesize()); + addIfNotNull(aggRes, JsonLDTerm.storageIdentifier, df.getStorageIdentifier()); + addIfNotNull(aggRes, JsonLDTerm.originalFileFormat, df.getOriginalFileFormat()); + addIfNotNull(aggRes, JsonLDTerm.originalFormatLabel, df.getOriginalFormatLabel()); + addIfNotNull(aggRes, JsonLDTerm.UNF, df.getUnf()); + addIfNotNull(aggRes, JsonLDTerm.rootDataFileId, df.getRootDataFileId()); + addIfNotNull(aggRes, JsonLDTerm.previousDataFileId, df.getPreviousDataFileId()); + JsonObject checksum = null; + // Add checksum. RDA recommends SHA-512 + if (df.getChecksumType() != null && df.getChecksumValue() != null) { + checksum = Json.createObjectBuilder().add("@type", df.getChecksumType().toString()) + .add("@value", df.getChecksumValue()).build(); + aggRes.add(JsonLDTerm.checksum.getLabel(), checksum); + } + JsonArray tabTags = null; + JsonArrayBuilder jab = JsonPrinter.getTabularFileTags(df); + if (jab != null) { + tabTags = jab.build(); + } + addIfNotNull(aggRes, JsonLDTerm.tabularTags, tabTags); + // Add latest resource to the array + aggResArrayBuilder.add(aggRes.build()); } - addIfNotNull(aggRes, JsonLDTerm.categories, catArray); - // File DOI if it exists - String fileId = null; - String fileSameAs = null; - if (df.getGlobalId().asString().length() != 0) { - fileId = df.getGlobalId().asString(); - fileSameAs = SystemConfig.getDataverseSiteUrlStatic() - + "/api/access/datafile/:persistentId?persistentId=" + fileId; - } else { - fileId = SystemConfig.getDataverseSiteUrlStatic() + "/file.xhtml?fileId=" + df.getId(); - fileSameAs = SystemConfig.getDataverseSiteUrlStatic() + "/api/access/datafile/" + df.getId(); - } - aggRes.add("@id", fileId); - aggRes.add(JsonLDTerm.schemaOrg("sameAs").getLabel(), fileSameAs); - fileArray.add(fileId); - - aggRes.add("@type", JsonLDTerm.ore("AggregatedResource").getLabel()); - addIfNotNull(aggRes, JsonLDTerm.schemaOrg("fileFormat"), df.getContentType()); - addIfNotNull(aggRes, JsonLDTerm.filesize, df.getFilesize()); - addIfNotNull(aggRes, JsonLDTerm.storageIdentifier, df.getStorageIdentifier()); - addIfNotNull(aggRes, JsonLDTerm.originalFileFormat, df.getOriginalFileFormat()); - addIfNotNull(aggRes, JsonLDTerm.originalFormatLabel, df.getOriginalFormatLabel()); - addIfNotNull(aggRes, JsonLDTerm.UNF, df.getUnf()); - addIfNotNull(aggRes, JsonLDTerm.rootDataFileId, df.getRootDataFileId()); - addIfNotNull(aggRes, JsonLDTerm.previousDataFileId, df.getPreviousDataFileId()); - JsonObject checksum = null; - // Add checksum. RDA recommends SHA-512 - if (df.getChecksumType() != null && df.getChecksumValue() != null) { - checksum = Json.createObjectBuilder().add("@type", df.getChecksumType().toString()) - .add("@value", df.getChecksumValue()).build(); - aggRes.add(JsonLDTerm.checksum.getLabel(), checksum); - } - JsonArray tabTags = null; - JsonArrayBuilder jab = JsonPrinter.getTabularFileTags(df); - if (jab != null) { - tabTags = jab.build(); - } - addIfNotNull(aggRes, JsonLDTerm.tabularTags, tabTags); - //Add latest resource to the array - aggResArrayBuilder.add(aggRes.build()); } // Build the '@context' object for json-ld based on the localContext entries JsonObjectBuilder contextBuilder = Json.createObjectBuilder(); for (Entry e : localContext.entrySet()) { contextBuilder.add(e.getKey(), e.getValue()); } - // Now create the overall map object with it's metadata - JsonObject oremap = Json.createObjectBuilder() - .add(JsonLDTerm.dcTerms("modified").getLabel(), LocalDate.now().toString()) - .add(JsonLDTerm.dcTerms("creator").getLabel(), - BrandingUtil.getInstallationBrandName()) - .add("@type", JsonLDTerm.ore("ResourceMap").getLabel()) - // Define an id for the map itself (separate from the @id of the dataset being - // described - .add("@id", - SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/export?exporter=" - + OAI_OREExporter.NAME + "&persistentId=" + id) - // Add the aggregation (Dataset) itself to the map. - .add(JsonLDTerm.ore("describes").getLabel(), - aggBuilder.add(JsonLDTerm.ore("aggregates").getLabel(), aggResArrayBuilder.build()) - .add(JsonLDTerm.schemaOrg("hasPart").getLabel(), fileArray.build()).build()) - // and finally add the context - .add("@context", contextBuilder.build()).build(); - return oremap; + if (aggregationOnly) { + return aggBuilder.add("@context", contextBuilder.build()); + } else { + // Now create the overall map object with it's metadata + JsonObjectBuilder oremapBuilder = Json.createObjectBuilder() + .add(JsonLDTerm.dcTerms("modified").getLabel(), LocalDate.now().toString()) + .add(JsonLDTerm.dcTerms("creator").getLabel(), BrandingUtil.getInstallationBrandName()) + .add("@type", JsonLDTerm.ore("ResourceMap").getLabel()) + // Define an id for the map itself (separate from the @id of the dataset being + // described + .add("@id", + SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/export?exporter=" + + OAI_OREExporter.NAME + "&persistentId=" + id) + // Add the aggregation (Dataset) itself to the map. + .add(JsonLDTerm.ore("describes").getLabel(), + aggBuilder.add(JsonLDTerm.ore("aggregates").getLabel(), aggResArrayBuilder.build()) + .add(JsonLDTerm.schemaOrg("hasPart").getLabel(), fileArray.build()).build()) + // and finally add the context + .add("@context", contextBuilder.build()); + return oremapBuilder; + } } /* @@ -353,7 +366,7 @@ private JsonLDTerm getTermFor(DatasetFieldType dsft) { namespaceUri = SystemConfig.getDataverseSiteUrlStatic() + "/schema/" + dsft.getMetadataBlock().getName() + "#"; } - JsonLDNamespace blockNamespace = new JsonLDNamespace(dsft.getMetadataBlock().getName(), namespaceUri); + JsonLDNamespace blockNamespace = JsonLDNamespace.defineNamespace(dsft.getMetadataBlock().getName(), namespaceUri); return new JsonLDTerm(blockNamespace, dsft.getTitle()); } } @@ -369,7 +382,7 @@ private JsonLDTerm getTermFor(DatasetFieldType dfType, DatasetFieldType dsft) { + dfType.getMetadataBlock().getName() + "/"; } subFieldNamespaceUri = subFieldNamespaceUri + dfType.getName() + "#"; - JsonLDNamespace fieldNamespace = new JsonLDNamespace(dfType.getName(), subFieldNamespaceUri); + JsonLDNamespace fieldNamespace = JsonLDNamespace.defineNamespace(dfType.getName(), subFieldNamespaceUri); return new JsonLDTerm(fieldNamespace, dsft.getTitle()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java new file mode 100644 index 00000000000..1e868fa0fc7 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java @@ -0,0 +1,896 @@ +package edu.harvard.iq.dataverse.util.json; + +import java.io.StringReader; +import java.io.StringWriter; +import java.sql.Timestamp; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Map.Entry; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonString; +import javax.json.JsonValue; +import javax.json.JsonWriter; +import javax.json.JsonWriterFactory; +import javax.json.JsonValue.ValueType; +import javax.json.stream.JsonGenerator; +import javax.ws.rs.BadRequestException; + +import org.apache.commons.lang3.StringUtils; + +import com.apicatalog.jsonld.JsonLd; +import com.apicatalog.jsonld.api.JsonLdError; +import com.apicatalog.jsonld.document.JsonDocument; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldValue; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.MetadataBlock; +import edu.harvard.iq.dataverse.MetadataBlockServiceBean; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess.License; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; +import edu.harvard.iq.dataverse.util.bagit.OREMap; + +public class JSONLDUtil { + + private static final Logger logger = Logger.getLogger(JSONLDUtil.class.getCanonicalName()); + + /* + * private static Map populateContext(JsonValue json) { + * Map context = new TreeMap(); if (json + * instanceof JsonArray) { logger.warning("Array @context not yet supported"); } + * else { for (String key : ((JsonObject) json).keySet()) { + * context.putIfAbsent(key, ((JsonObject) json).getString(key)); } } return + * context; } + */ + + public static JsonObject getContext(Map contextMap) { + JsonObjectBuilder contextBuilder = Json.createObjectBuilder(); + for (Entry e : contextMap.entrySet()) { + contextBuilder.add(e.getKey(), e.getValue()); + } + return contextBuilder.build(); + } + + public static Dataset updateDatasetMDFromJsonLD(Dataset ds, String jsonLDBody, + MetadataBlockServiceBean metadataBlockSvc, DatasetFieldServiceBean datasetFieldSvc, boolean append, boolean migrating) { + + DatasetVersion dsv = new DatasetVersion(); + + JsonObject jsonld = decontextualizeJsonLD(jsonLDBody); + if (migrating) { + Optional maybePid = GlobalId.parse(jsonld.getString("@id")); + if (maybePid.isPresent()) { + ds.setGlobalId(maybePid.get()); + } else { + // unparsable PID passed. Terminate. + throw new BadRequestException("Cannot parse the @id '" + jsonld.getString("@id") + + "'. Make sure it is in valid form - see Dataverse Native API documentation."); + } + } + + dsv = updateDatasetVersionMDFromJsonLD(dsv, jsonld, metadataBlockSvc, datasetFieldSvc, append, migrating); + dsv.setDataset(ds); + + List versions = new ArrayList<>(1); + versions.add(dsv); + + ds.setVersions(versions); + if (migrating) { + if (jsonld.containsKey(JsonLDTerm.schemaOrg("dateModified").getUrl())) { + String dateString = jsonld.getString(JsonLDTerm.schemaOrg("dateModified").getUrl()); + LocalDateTime dateTime = getDateTimeFrom(dateString); + ds.setModificationTime(Timestamp.valueOf(dateTime)); + } + } + return ds; + } + + public static DatasetVersion updateDatasetVersionMDFromJsonLD(DatasetVersion dsv, String jsonLDBody, + MetadataBlockServiceBean metadataBlockSvc, DatasetFieldServiceBean datasetFieldSvc, boolean append, boolean migrating) { + JsonObject jsonld = decontextualizeJsonLD(jsonLDBody); + return updateDatasetVersionMDFromJsonLD(dsv, jsonld, metadataBlockSvc, datasetFieldSvc, append, migrating); + } + + /** + * + * @param dsv + * @param jsonld + * @param metadataBlockSvc + * @param datasetFieldSvc + * @param append - if append, will add new top level field values for + * multi-valued fields, if true and field type isn't + * multiple, will fail. if false will replace all + * value(s) for fields found in the json-ld. + * @return + */ + public static DatasetVersion updateDatasetVersionMDFromJsonLD(DatasetVersion dsv, JsonObject jsonld, + MetadataBlockServiceBean metadataBlockSvc, DatasetFieldServiceBean datasetFieldSvc, boolean append, boolean migrating) { + + //Assume draft to start + dsv.setVersionState(VersionState.DRAFT); + + populateFieldTypeMap(metadataBlockSvc); + + // get existing ones? + List dsfl = dsv.getDatasetFields(); + Map fieldByTypeMap = new HashMap(); + for (DatasetField dsf : dsfl) { + if (fieldByTypeMap.containsKey(dsf.getDatasetFieldType())) { + // May have multiple values per field, but not multiple fields of one type? + logger.warning("Multiple fields of type " + dsf.getDatasetFieldType().getName()); + } + fieldByTypeMap.put(dsf.getDatasetFieldType(), dsf); + } + + TermsOfUseAndAccess terms = (dsv.getTermsOfUseAndAccess()!=null) ? dsv.getTermsOfUseAndAccess().copyTermsOfUseAndAccess() : new TermsOfUseAndAccess(); + + for (String key : jsonld.keySet()) { + if (!key.equals("@context")) { + if (dsftMap.containsKey(key)) { + + DatasetFieldType dsft = dsftMap.get(key); + DatasetField dsf = null; + if (fieldByTypeMap.containsKey(dsft)) { + dsf = fieldByTypeMap.get(dsft); + // If there's an existing field, we use it with append and remove it for !append + // unless it's multiple + if (!append && !dsft.isAllowMultiples()) { + dsfl.remove(dsf); + dsf=null; + } + } + if (dsf == null) { + dsf = new DatasetField(); + dsfl.add(dsf); + dsf.setDatasetFieldType(dsft); + } + + // Todo - normalize object vs. array + JsonArray valArray = getValues(jsonld.get(key), dsft.isAllowMultiples(), dsft.getName()); + + addField(dsf, valArray, dsft, datasetFieldSvc, append); + + } else { + //When migrating, the publication date and version number can be set + if (key.equals(JsonLDTerm.schemaOrg("datePublished").getUrl())&& migrating && !append) { + dsv.setVersionState(VersionState.RELEASED); + } else if (key.equals(JsonLDTerm.schemaOrg("version").getUrl())&& migrating && !append) { + String friendlyVersion = jsonld.getString(JsonLDTerm.schemaOrg("version").getUrl()); + int index = friendlyVersion.indexOf("."); + if (index > 0) { + dsv.setVersionNumber(Long.parseLong(friendlyVersion.substring(0, index))); + dsv.setMinorVersionNumber(Long.parseLong(friendlyVersion.substring(index + 1))); + } + } else if (key.equals(JsonLDTerm.schemaOrg("license").getUrl())) { + //Special handling for license + if (!append || !isSet(terms, key)) { + // Mirror rules from SwordServiceBean + if (jsonld.containsKey(JsonLDTerm.termsOfUse.getUrl())) { + throw new BadRequestException( + "Cannot specify " + JsonLDTerm.schemaOrg("license").getUrl() + " and " + + JsonLDTerm.termsOfUse.getUrl()); + } + setSemTerm(terms, key, TermsOfUseAndAccess.defaultLicense); + } else { + throw new BadRequestException( + "Can't append to a single-value field that already has a value: " + + JsonLDTerm.schemaOrg("license").getUrl()); + } + + } else if (datasetTerms.contains(key)) { + // Other Dataset-level TermsOfUseAndAccess + if (!append || !isSet(terms, key)) { + setSemTerm(terms, key, jsonld.getString(key)); + } else { + throw new BadRequestException( + "Can't append to a single-value field that already has a value: " + key); + } + } else if (key.equals(JsonLDTerm.fileTermsOfAccess.getUrl())) { + // Other DataFile-level TermsOfUseAndAccess + JsonObject fAccessObject = jsonld.getJsonObject(JsonLDTerm.fileTermsOfAccess.getUrl()); + for (String fileKey : fAccessObject.keySet()) { + if (datafileTerms.contains(fileKey)) { + if (!append || !isSet(terms, fileKey)) { + if (fileKey.equals(JsonLDTerm.fileRequestAccess.getUrl())) { + setSemTerm(terms, fileKey, fAccessObject.getBoolean(fileKey)); + } else { + setSemTerm(terms, fileKey, fAccessObject.getString(fileKey)); + } + } else { + throw new BadRequestException( + "Can't append to a single-value field that already has a value: " + + fileKey); + } + } + } + } + dsv.setTermsOfUseAndAccess(terms); + // ToDo: support Dataverse location metadata? e.g. move to new dataverse? + // re: JsonLDTerm.schemaOrg("includedInDataCatalog") + } + } + } + + dsv.setDatasetFields(dsfl); + + return dsv; + } + /** + * + * @param dsv + * @param jsonLDBody + * @param metadataBlockService + * @param datasetFieldSvc + * @param b + * @param c + * @return + */ + public static DatasetVersion deleteDatasetVersionMDFromJsonLD(DatasetVersion dsv, String jsonLDBody, + MetadataBlockServiceBean metadataBlockSvc, DatasetFieldServiceBean datasetFieldSvc) { + logger.fine("deleteDatasetVersionMD"); + JsonObject jsonld = decontextualizeJsonLD(jsonLDBody); + //All terms are now URIs + //Setup dsftMap - URI to datasetFieldType map + populateFieldTypeMap(metadataBlockSvc); + + + //Another map - from datasetFieldType to an existing field in the dataset + List dsfl = dsv.getDatasetFields(); + Map fieldByTypeMap = new HashMap(); + for (DatasetField dsf : dsfl) { + if (fieldByTypeMap.containsKey(dsf.getDatasetFieldType())) { + // May have multiple values per field, but not multiple fields of one type? + logger.warning("Multiple fields of type " + dsf.getDatasetFieldType().getName()); + } + fieldByTypeMap.put(dsf.getDatasetFieldType(), dsf); + } + + TermsOfUseAndAccess terms = dsv.getTermsOfUseAndAccess().copyTermsOfUseAndAccess(); + + //Iterate through input json + for (String key : jsonld.keySet()) { + //Skip context (shouldn't be present with decontextualize used above) + if (!key.equals("@context")) { + if (dsftMap.containsKey(key)) { + //THere's a field type with theis URI + DatasetFieldType dsft = dsftMap.get(key); + DatasetField dsf = null; + if (fieldByTypeMap.containsKey(dsft)) { + //There's a field of this type + dsf = fieldByTypeMap.get(dsft); + + // Todo - normalize object vs. array + JsonArray valArray = getValues(jsonld.get(key), dsft.isAllowMultiples(), dsft.getName()); + logger.fine("Deleting: " + key + " : " + valArray.toString()); + DatasetField dsf2 = getReplacementField(dsf, valArray); + if(dsf2 == null) { + //Exact match - remove the field + dsfl.remove(dsf); + } else { + //Partial match - some values of a multivalue field match, so keep the remaining values + dsfl.remove(dsf); + dsfl.add(dsf2); + } + } + } else { + // Internal/non-metadatablock terms + boolean found=false; + if (key.equals(JsonLDTerm.schemaOrg("license").getUrl())) { + if(jsonld.getString(key).equals(TermsOfUseAndAccess.CC0_URI)) { + setSemTerm(terms, key, TermsOfUseAndAccess.License.NONE); + } else { + throw new BadRequestException( + "Term: " + key + " with value: " + jsonld.getString(key) + " not found."); + } + found=true; + } else if (datasetTerms.contains(key)) { + if(!deleteIfSemTermMatches(terms, key, jsonld.get(key))) { + throw new BadRequestException( + "Term: " + key + " with value: " + jsonld.getString(key) + " not found."); + } + found=true; + } else if (key.equals(JsonLDTerm.fileTermsOfAccess.getUrl())) { + JsonObject fAccessObject = jsonld.getJsonObject(JsonLDTerm.fileTermsOfAccess.getUrl()); + for (String fileKey : fAccessObject.keySet()) { + if (datafileTerms.contains(fileKey)) { + if(!deleteIfSemTermMatches(terms, key, jsonld.get(key))) { + throw new BadRequestException( + "Term: " + key + " with value: " + jsonld.getString(key) + " not found."); + } + found=true; + } + } + } else if(!found) { + throw new BadRequestException( + "Term: " + key + " not found."); + } + + dsv.setTermsOfUseAndAccess(terms); + } + } + } + dsv.setDatasetFields(dsfl); + return dsv; + } + + /** + * + * @param dsf + * @param valArray + * @return null if exact match, otherwise return a field without the value to be deleted + */ + private static DatasetField getReplacementField(DatasetField dsf, JsonArray valArray) { + // TODO Auto-generated method stub + return null; + } + + private static void addField(DatasetField dsf, JsonArray valArray, DatasetFieldType dsft, + DatasetFieldServiceBean datasetFieldSvc, boolean append) { + + if (append && !dsft.isAllowMultiples()) { + if ((dsft.isCompound() && !dsf.getDatasetFieldCompoundValues().isEmpty()) + || (dsft.isAllowControlledVocabulary() && !dsf.getControlledVocabularyValues().isEmpty()) + || !dsf.getDatasetFieldValues().isEmpty()) { + throw new BadRequestException( + "Can't append to a single-value field that already has a value: " + dsft.getName()); + } + } + logger.fine("Name: " + dsft.getName()); + logger.fine("val: " + valArray.toString()); + logger.fine("Compound: " + dsft.isCompound()); + logger.fine("CV: " + dsft.isAllowControlledVocabulary()); + + if (dsft.isCompound()) { + /* + * List vals = parseCompoundValue(type, + * jsonld.get(key),testType); for (DatasetFieldCompoundValue dsfcv : vals) { + * dsfcv.setParentDatasetField(ret); } dsf.setDatasetFieldCompoundValues(vals); + */ + List cvList = dsf.getDatasetFieldCompoundValues(); + if (!cvList.isEmpty()) { + if (!append) { + cvList.clear(); + } else if (!dsft.isAllowMultiples() && cvList.size() == 1) { + // Trying to append but only a single value is allowed (and there already is + // one) + // (and we don't currently support appending new fields within a compound value) + throw new BadRequestException( + "Append with compound field with single value not yet supported: " + dsft.getDisplayName()); + } + } + + List vals = new LinkedList<>(); + for (JsonValue val : valArray) { + if (!(val instanceof JsonObject)) { + throw new BadRequestException( + "Compound field values must be JSON objects, field: " + dsft.getName()); + } + DatasetFieldCompoundValue cv = null; + + cv = new DatasetFieldCompoundValue(); + cv.setDisplayOrder(cvList.size()); + cvList.add(cv); + cv.setParentDatasetField(dsf); + + JsonObject obj = (JsonObject) val; + for (String childKey : obj.keySet()) { + if (dsftMap.containsKey(childKey)) { + DatasetFieldType childft = dsftMap.get(childKey); + if (!dsft.getChildDatasetFieldTypes().contains(childft)) { + throw new BadRequestException( + "Compound field " + dsft.getName() + "can't include term " + childKey); + } + DatasetField childDsf = new DatasetField(); + cv.getChildDatasetFields().add(childDsf); + childDsf.setDatasetFieldType(childft); + childDsf.setParentDatasetFieldCompoundValue(cv); + + JsonArray childValArray = getValues(obj.get(childKey), childft.isAllowMultiples(), + childft.getName()); + addField(childDsf, childValArray, childft, datasetFieldSvc, append); + } + } + } + + } else if (dsft.isControlledVocabulary()) { + + List vals = dsf.getControlledVocabularyValues(); + for (JsonString strVal : valArray.getValuesAs(JsonString.class)) { + String strValue = strVal.getString(); + ControlledVocabularyValue cvv = datasetFieldSvc + .findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, strValue, true); + if (cvv == null) { + throw new BadRequestException( + "Unknown value for Controlled Vocab Field: " + dsft.getName() + " : " + strValue); + } + // Only add value to the list if it is not a duplicate + if (strValue.equals("Other")) { + System.out.println("vals = " + vals + ", contains: " + vals.contains(cvv)); + } + if (!vals.contains(cvv)) { + if (vals.size() > 0) { + cvv.setDisplayOrder(vals.size()); + } + vals.add(cvv); + cvv.setDatasetFieldType(dsft); + } + } + dsf.setControlledVocabularyValues(vals); + + } else { + List vals = dsf.getDatasetFieldValues(); + + for (JsonString strVal : valArray.getValuesAs(JsonString.class)) { + String strValue = strVal.getString(); + DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); + + datasetFieldValue.setDisplayOrder(vals.size()); + datasetFieldValue.setValue(strValue.trim()); + vals.add(datasetFieldValue); + datasetFieldValue.setDatasetField(dsf); + + } + dsf.setDatasetFieldValues(vals); + } + } + + private static JsonArray getValues(JsonValue val, boolean allowMultiples, String name) { + JsonArray valArray = null; + if (val instanceof JsonArray) { + if ((((JsonArray) val).size() > 1) && !allowMultiples) { + throw new BadRequestException("Array for single value notsupported: " + name); + } else { + valArray = (JsonArray) val; + } + } else { + valArray = Json.createArrayBuilder().add(val).build(); + } + return valArray; + } + + static Map localContext = new TreeMap(); + static Map dsftMap = new TreeMap(); + + private static void populateFieldTypeMap(MetadataBlockServiceBean metadataBlockSvc) { + if (dsftMap.isEmpty()) { + + List mdbList = metadataBlockSvc.listMetadataBlocks(); + + for (MetadataBlock mdb : mdbList) { + boolean blockHasUri = mdb.getNamespaceUri() != null; + for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { + if (dsft.getUri() != null) { + dsftMap.put(dsft.getUri(), dsft); + } + if (blockHasUri) { + if (dsft.getParentDatasetFieldType() != null) { + // ToDo - why not getName for child type? Would have to fix in ORE generation + // code and handle legacy bags + dsftMap.put(mdb.getNamespaceUri() + dsft.getParentDatasetFieldType().getName() + "#" + + dsft.getTitle(), dsft); + } else { + dsftMap.put(mdb.getNamespaceUri() + dsft.getTitle(), dsft); + } + } + } + } + logger.fine("DSFT Map: " + String.join(", ", dsftMap.keySet())); + } + } + + public static void populateContext(MetadataBlockServiceBean metadataBlockSvc) { + if (localContext.isEmpty()) { + + List mdbList = metadataBlockSvc.listMetadataBlocks(); + + for (MetadataBlock mdb : mdbList) { + boolean blockHasUri = mdb.getNamespaceUri() != null; + if (blockHasUri) { + JsonLDNamespace.defineNamespace(mdb.getName(), mdb.getNamespaceUri()); + } + for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { + if ((dsft.getUri() != null) && !JsonLDNamespace.isInNamespace(dsft.getUri())) { + //Add term if uri exists and it's not in one of the namespaces already defined + localContext.putIfAbsent(dsft.getName(), dsft.getUri()); + } + } + } + JsonLDNamespace.addNamespacesToContext(localContext); + logger.fine("LocalContext keys: " + String.join(", ", localContext.keySet())); + } + } + + public static JsonObject decontextualizeJsonLD(String jsonLDString) { + logger.fine(jsonLDString); + try (StringReader rdr = new StringReader(jsonLDString)) { + + // Use JsonLd to expand/compact to localContext + JsonObject jsonld = Json.createReader(rdr).readObject(); + JsonDocument doc = JsonDocument.of(jsonld); + JsonArray array = null; + try { + array = JsonLd.expand(doc).get(); + jsonld = JsonLd.compact(JsonDocument.of(array), JsonDocument.of(Json.createObjectBuilder().build())) + .get(); + // jsonld = array.getJsonObject(0); + logger.fine("Decontextualized object: " + jsonld); + return jsonld; + } catch (JsonLdError e) { + System.out.println(e.getMessage()); + return null; + } + } + } + + private static JsonObject recontextualizeJsonLD(JsonObject jsonldObj, MetadataBlockServiceBean metadataBlockSvc) { + + populateContext(metadataBlockSvc); + + // Use JsonLd to expand/compact to localContext + JsonDocument doc = JsonDocument.of(jsonldObj); + JsonArray array = null; + try { + array = JsonLd.expand(doc).get(); + + jsonldObj = JsonLd.compact(JsonDocument.of(array), JsonDocument.of(JSONLDUtil.getContext(localContext))) + .get(); + logger.fine("Compacted: " + jsonldObj.toString()); + return jsonldObj; + } catch (JsonLdError e) { + System.out.println(e.getMessage()); + return null; + } + } + + public static String prettyPrint(JsonValue val) { + StringWriter sw = new StringWriter(); + Map properties = new HashMap<>(1); + properties.put(JsonGenerator.PRETTY_PRINTING, true); + JsonWriterFactory writerFactory = Json.createWriterFactory(properties); + JsonWriter jsonWriter = writerFactory.createWriter(sw); + jsonWriter.write(val); + jsonWriter.close(); + return sw.toString(); + } + +//Modified from https://stackoverflow.com/questions/3389348/parse-any-date-in-java + + private static final Map DATE_FORMAT_REGEXPS = new HashMap() { + { + put("^\\d{8}$", "yyyyMMdd"); + put("^\\d{1,2}-\\d{1,2}-\\d{4}$", "dd-MM-yyyy"); + put("^\\d{4}-\\d{1,2}-\\d{1,2}$", "yyyy-MM-dd"); + put("^\\d{1,2}/\\d{1,2}/\\d{4}$", "MM/dd/yyyy"); + put("^\\d{4}/\\d{1,2}/\\d{1,2}$", "yyyy/MM/dd"); + put("^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$", "dd MMM yyyy"); + put("^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$", "dd MMMM yyyy"); + } + }; + + private static final Map DATETIME_FORMAT_REGEXPS = new HashMap() { + { + put("^\\d{12}$", "yyyyMMddHHmm"); + put("^\\d{8}\\s\\d{4}$", "yyyyMMdd HHmm"); + put("^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}$", "dd-MM-yyyy HH:mm"); + put("^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}$", "yyyy-MM-dd HH:mm"); + put("^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}$", "MM/dd/yyyy HH:mm"); + put("^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}$", "yyyy/MM/dd HH:mm"); + put("^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}$", "dd MMM yyyy HH:mm"); + put("^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}$", "dd MMMM yyyy HH:mm"); + put("^\\d{14}$", "yyyyMMddHHmmss"); + put("^\\d{8}\\s\\d{6}$", "yyyyMMdd HHmmss"); + put("^\\d{1,2}-\\d{1,2}-\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$", "dd-MM-yyyy HH:mm:ss"); + put("^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$", "yyyy-MM-dd HH:mm:ss"); + put("^\\d{1,2}/\\d{1,2}/\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$", "MM/dd/yyyy HH:mm:ss"); + put("^\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}$", "yyyy/MM/dd HH:mm:ss"); + put("^\\d{1,2}\\s[a-z]{3}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$", "dd MMM yyyy HH:mm:ss"); + put("^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}\\s\\d{1,2}:\\d{2}:\\d{2}$", "dd MMMM yyyy HH:mm:ss"); + put("^\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\.\\d{3}$", "yyyy-MM-dd HH:mm:ss.SSS"); + put("^[a-z,A-Z]{3}\\s[a-z,A-Z]{3}\\s\\d{1,2}\\s\\d{1,2}:\\d{2}:\\d{2}\\s[a-z,A-Z]{3}\\s\\d{4}$", + "EEE MMM dd HH:mm:ss zzz yyyy"); // Wed Sep 23 19:33:46 UTC 2020 + + } + }; + + /** + * Determine DateTimeFormatter pattern matching with the given date string. + * Returns null if format is unknown. You can simply extend DateUtil with more + * formats if needed. + * + * @param dateString The date string to determine the SimpleDateFormat pattern + * for. + * @return The matching SimpleDateFormat pattern, or null if format is unknown. + * @see SimpleDateFormat + */ + public static DateTimeFormatter determineDateTimeFormat(String dateString) { + for (String regexp : DATETIME_FORMAT_REGEXPS.keySet()) { + if (dateString.toLowerCase().matches(regexp)) { + return DateTimeFormatter.ofPattern(DATETIME_FORMAT_REGEXPS.get(regexp)); + } + } + logger.warning("Unknown datetime format: " + dateString); + return null; // Unknown format. + } + + public static DateTimeFormatter determineDateFormat(String dateString) { + for (String regexp : DATE_FORMAT_REGEXPS.keySet()) { + if (dateString.toLowerCase().matches(regexp)) { + return DateTimeFormatter.ofPattern(DATE_FORMAT_REGEXPS.get(regexp)); + } + } + logger.warning("Unknown date format: " + dateString); + return null; // Unknown format. + } + + public static LocalDateTime getDateTimeFrom(String dateString) { + DateTimeFormatter dtf = determineDateTimeFormat(dateString); + if (dtf != null) { + return LocalDateTime.parse(dateString, dtf); + } else { + dtf = determineDateFormat(dateString); + if (dtf != null) { + return LocalDate.parse(dateString, dtf).atStartOfDay(); + } + } + + return null; + } + + // Convenience methods for TermsOfUseAndAccess + + public static final List datasetTerms = new ArrayList(Arrays.asList( + "https://dataverse.org/schema/core#termsOfUse", + "https://dataverse.org/schema/core#confidentialityDeclaration", + "https://dataverse.org/schema/core#specialPermissions", "https://dataverse.org/schema/core#restrictions", + "https://dataverse.org/schema/core#citationRequirements", + "https://dataverse.org/schema/core#depositorRequirements", "https://dataverse.org/schema/core#conditions", + "https://dataverse.org/schema/core#disclaimer")); + public static final List datafileTerms = new ArrayList(Arrays.asList( + "https://dataverse.org/schema/core#termsOfAccess", "https://dataverse.org/schema/core#fileRequestAccess", + "https://dataverse.org/schema/core#dataAccessPlace", "https://dataverse.org/schema/core#originalArchive", + "https://dataverse.org/schema/core#availabilityStatus", + "https://dataverse.org/schema/core#contactForAccess", "https://dataverse.org/schema/core#sizeOfCollection", + "https://dataverse.org/schema/core#studyCompletion")); + + public static boolean isSet(TermsOfUseAndAccess terms, String semterm) { + switch (semterm) { + case "http://schema.org/license": + return !terms.getLicense().equals(TermsOfUseAndAccess.License.NONE); + case "https://dataverse.org/schema/core#termsOfUse": + return !StringUtils.isBlank(terms.getTermsOfUse()); + case "https://dataverse.org/schema/core#confidentialityDeclaration": + return !StringUtils.isBlank(terms.getConfidentialityDeclaration()); + case "https://dataverse.org/schema/core#specialPermissions": + return !StringUtils.isBlank(terms.getSpecialPermissions()); + case "https://dataverse.org/schema/core#restrictions": + return !StringUtils.isBlank(terms.getRestrictions()); + case "https://dataverse.org/schema/core#citationRequirements": + return !StringUtils.isBlank(terms.getCitationRequirements()); + case "https://dataverse.org/schema/core#depositorRequirements": + return !StringUtils.isBlank(terms.getDepositorRequirements()); + case "https://dataverse.org/schema/core#conditions": + return !StringUtils.isBlank(terms.getConditions()); + case "https://dataverse.org/schema/core#disclaimer": + return !StringUtils.isBlank(terms.getDisclaimer()); + case "https://dataverse.org/schema/core#termsOfAccess": + return !StringUtils.isBlank(terms.getTermsOfAccess()); + case "https://dataverse.org/schema/core#fileRequestAccess": + return !terms.isFileAccessRequest(); + case "https://dataverse.org/schema/core#dataAccessPlace": + return !StringUtils.isBlank(terms.getDataAccessPlace()); + case "https://dataverse.org/schema/core#originalArchive": + return !StringUtils.isBlank(terms.getOriginalArchive()); + case "https://dataverse.org/schema/core#availabilityStatus": + return !StringUtils.isBlank(terms.getAvailabilityStatus()); + case "https://dataverse.org/schema/core#contactForAccess": + return !StringUtils.isBlank(terms.getContactForAccess()); + case "https://dataverse.org/schema/core#sizeOfCollection": + return !StringUtils.isBlank(terms.getSizeOfCollection()); + case "https://dataverse.org/schema/core#studyCompletion": + return !StringUtils.isBlank(terms.getStudyCompletion()); + default: + logger.warning("isSet called for " + semterm); + return false; + } + } + + public static void setSemTerm(TermsOfUseAndAccess terms, String semterm, Object value) { + switch (semterm) { + case "http://schema.org/license": + // Mirror rules from SwordServiceBean + if (((License) value).equals(TermsOfUseAndAccess.defaultLicense)) { + terms.setLicense(TermsOfUseAndAccess.defaultLicense); + } else { + throw new BadRequestException("The only allowed value for " + JsonLDTerm.schemaOrg("license").getUrl() + + " is " + TermsOfUseAndAccess.CC0_URI); + } + break; + case "https://dataverse.org/schema/core#termsOfUse": + terms.setTermsOfUse((String) value); + break; + case "https://dataverse.org/schema/core#confidentialityDeclaration": + terms.setConfidentialityDeclaration((String) value); + break; + case "https://dataverse.org/schema/core#specialPermissions": + terms.setSpecialPermissions((String) value); + break; + case "https://dataverse.org/schema/core#restrictions": + terms.setRestrictions((String) value); + break; + case "https://dataverse.org/schema/core#citationRequirements": + terms.setCitationRequirements((String) value); + break; + case "https://dataverse.org/schema/core#depositorRequirements": + terms.setDepositorRequirements((String) value); + break; + case "https://dataverse.org/schema/core#conditions": + terms.setConditions((String) value); + break; + case "https://dataverse.org/schema/core#disclaimer": + terms.setDisclaimer((String) value); + break; + case "https://dataverse.org/schema/core#termsOfAccess": + terms.setTermsOfAccess((String) value); + break; + case "https://dataverse.org/schema/core#fileRequestAccess": + terms.setFileAccessRequest((boolean) value); + break; + case "https://dataverse.org/schema/core#dataAccessPlace": + terms.setDataAccessPlace((String) value); + break; + case "https://dataverse.org/schema/core#originalArchive": + terms.setOriginalArchive((String) value); + break; + case "https://dataverse.org/schema/core#availabilityStatus": + terms.setAvailabilityStatus((String) value); + break; + case "https://dataverse.org/schema/core#contactForAccess": + terms.setContactForAccess((String) value); + break; + case "https://dataverse.org/schema/core#sizeOfCollection": + terms.setSizeOfCollection((String) value); + break; + case "https://dataverse.org/schema/core#studyCompletion": + terms.setStudyCompletion((String) value); + break; + default: + logger.warning("setSemTerm called for " + semterm); + break; + } + } + + private static boolean deleteIfSemTermMatches(TermsOfUseAndAccess terms, String semterm, JsonValue jsonValue) { + boolean foundTerm=false; + String val = null; + if(jsonValue.getValueType().equals(ValueType.STRING)) { + val = ((JsonString)jsonValue).getString(); + } + switch (semterm) { + + case "https://dataverse.org/schema/core#termsOfUse": + if(terms.getTermsOfUse().equals(val)) { + terms.setTermsOfUse(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#confidentialityDeclaration": + if(terms.getConfidentialityDeclaration().equals(val)) { + terms.setConfidentialityDeclaration(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#specialPermissions": + if(terms.getSpecialPermissions().equals(val)) { + terms.setSpecialPermissions(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#restrictions": + if(terms.getRestrictions().equals(val)) { + terms.setRestrictions(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#citationRequirements": + if(terms.getCitationRequirements().equals(val)) { + terms.setCitationRequirements(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#depositorRequirements": + if(terms.getDepositorRequirements().equals(val)) { + terms.setDepositorRequirements(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#conditions": + if(terms.getConditions().equals(val)) { + terms.setConditions(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#disclaimer": + if(terms.getDisclaimer().equals(val)) { + terms.setDisclaimer(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#termsOfAccess": + if(terms.getTermsOfAccess().equals(val)) { + terms.setTermsOfAccess(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#fileRequestAccess": + if(terms.isFileAccessRequest() && (jsonValue.equals(JsonValue.TRUE))) { + terms.setFileAccessRequest(false); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#dataAccessPlace": + if(terms.getDataAccessPlace().equals(val)) { + terms.setDataAccessPlace(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#originalArchive": + if(terms.getOriginalArchive().equals(val)) { + terms.setOriginalArchive(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#availabilityStatus": + if(terms.getAvailabilityStatus().equals(val)) { + terms.setAvailabilityStatus(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#contactForAccess": + if(terms.getContactForAccess().equals(val)) { + terms.setContactForAccess(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#sizeOfCollection": + if(terms.getSizeOfCollection().equals(val)) { + terms.setSizeOfCollection(null); + foundTerm=true; + } + break; + case "https://dataverse.org/schema/core#studyCompletion": + if(terms.getStudyCompletion().equals(val)) { + terms.setStudyCompletion(null); + foundTerm=true; + } + break; + default: + logger.warning("deleteIfSemTermMatches called for " + semterm); + break; + } + return foundTerm; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDNamespace.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDNamespace.java index bda4a55d623..904419775c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDNamespace.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDNamespace.java @@ -1,5 +1,13 @@ package edu.harvard.iq.dataverse.util.json; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import edu.harvard.iq.dataverse.DataFile; + public class JsonLDNamespace { String prefix; @@ -12,7 +20,40 @@ public class JsonLDNamespace { public static JsonLDNamespace ore = new JsonLDNamespace("ore","http://www.openarchives.org/ore/terms/"); public static JsonLDNamespace schema = new JsonLDNamespace("schema","http://schema.org/"); - public JsonLDNamespace(String prefix, String url) { + private static List namespaces = new ArrayList(Arrays.asList(dvcore, dcterms, ore, schema)); + + public static JsonLDNamespace defineNamespace(String prefix, String url) { + + JsonLDNamespace ns = new JsonLDNamespace(prefix, url); + int i = namespaces.indexOf(ns); + if(i>=0) { + return namespaces.get(i); + } else { + namespaces.add(ns); + return ns; + } + } + + public static void deleteNamespace(JsonLDNamespace ns) { + namespaces.remove(ns); + } + + public static boolean isInNamespace(String url) { + for(JsonLDNamespace ns: namespaces) { + if(url.startsWith(ns.getUrl())) { + return true; + } + } + return false; + } + + public static void addNamespacesToContext(Map context) { + for(JsonLDNamespace ns: namespaces) { + context.putIfAbsent(ns.getPrefix(), ns.getUrl()); + }; + } + + private JsonLDNamespace(String prefix, String url) { this.prefix = prefix; this.url = url; } @@ -24,5 +65,14 @@ public String getPrefix() { public String getUrl() { return url; } + + @Override + public boolean equals(Object object) { + if (!(object instanceof JsonLDNamespace)) { + return false; + } + JsonLDNamespace other = (JsonLDNamespace) object; + return (other.getPrefix().equals(getPrefix()) && other.getUrl().equals(getUrl())); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java index 5acb0c437ae..20aeceda7de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonLDTerm.java @@ -1,8 +1,5 @@ package edu.harvard.iq.dataverse.util.json; -import java.util.HashMap; -import java.util.Map; - public class JsonLDTerm { JsonLDNamespace namespace = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorUtil.java b/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorUtil.java index 6d78ab44a5f..9a4f1610e62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorUtil.java @@ -5,7 +5,7 @@ import java.util.Arrays; import java.util.List; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.passay.CharacterRule; import org.passay.EnglishCharacterData; diff --git a/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java b/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java index aeb8bcf6c87..e6e6bfd23c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflows/WorkflowUtil.java @@ -19,7 +19,7 @@ import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import static org.apache.commons.lang3.StringEscapeUtils.escapeHtml4; +import static org.apache.commons.text.StringEscapeUtils.escapeHtml4; public class WorkflowUtil { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 77248d76ea8..ca2299e711f 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1132,6 +1132,7 @@ dataset.message.manageTemplates.label=Manage Dataset Templates dataset.message.manageTemplates.message=Create a template prefilled with metadata fields standard values, such as Author Affiliation, or add instructions in the metadata fields to give depositors more information on what metadata is expected. # metadataFragment.xhtml +dataset.anonymized.withheld=withheld # template.xhtml dataset.template.name.tip=The name of the dataset template. @@ -1501,12 +1502,16 @@ dataset.privateurl.header=Unpublished Dataset Private URL dataset.privateurl.tip=Use a Private URL to allow those without Dataverse accounts to access your unpublished dataset. For more information about the Private URL feature, please refer to the User Guide. dataset.privateurl.absent=Private URL has not been created. dataset.privateurl.createPrivateUrl=Create Private URL +dataset.privateurl.createPrivateUrl.anonymized=Create URL for Anonymized Access +dataset.privateurl.createPrivateUrl.anonymized.unavailable=Anonymized Access is not available once a version of the dataset has been published dataset.privateurl.disablePrivateUrl=Disable Private URL dataset.privateurl.disablePrivateUrlConfirm=Yes, Disable Private URL dataset.privateurl.disableConfirmationText=Are you sure you want to disable the Private URL? If you have shared the Private URL with others they will no longer be able to use it to access your unpublished dataset. dataset.privateurl.cannotCreate=Private URL can only be used with unpublished versions of datasets. dataset.privateurl.roleassigeeTitle=Private URL Enabled dataset.privateurl.createdSuccess=Success! +dataset.privateurl.full=This Private URL provides full read access to the dataset +dataset.privateurl.anonymized=This Private URL provides access to the anonymized dataset dataset.privateurl.disabledSuccess=You have successfully disabled the Private URL for this unpublished dataset. dataset.privateurl.noPermToCreate=To create a Private URL you must have the following permissions: {0}. file.display.label=Change View @@ -1651,8 +1656,8 @@ file.downloadBtn.format.citation=Data File Citation file.download.filetype.unknown=Original File Format file.more.information.link=Link to more file information for file.requestAccess=Request Access -file.requestAccess.dialog.msg=You need to Log In to request access to this file. -file.requestAccess.dialog.msg.signup=You need to Sign Up or Log In to request access to this file. +file.requestAccess.dialog.msg=You need to Log In to request access. +file.requestAccess.dialog.msg.signup=You need to Sign Up or Log In to request access. file.accessRequested=Access Requested file.ingestInProgress=Ingest in progress... file.dataFilesTab.metadata.header=Metadata @@ -1736,6 +1741,7 @@ file.dataFilesTab.versions=Versions file.dataFilesTab.versions.headers.dataset=Dataset file.dataFilesTab.versions.headers.summary=Summary file.dataFilesTab.versions.headers.contributors=Contributors +file.dataFilesTab.versions.headers.contributors.withheld=Contributor name(s) withheld file.dataFilesTab.versions.headers.published=Published file.dataFilesTab.versions.viewDiffBtn=View Differences file.dataFilesTab.versions.citationMetadata=Citation Metadata: @@ -1770,12 +1776,12 @@ file.deaccessionDialog.tip=Once you deaccession this dataset it will no longer b file.deaccessionDialog.version=Version file.deaccessionDialog.reason.question1=Which version(s) do you want to deaccession? file.deaccessionDialog.reason.question2=What is the reason for deaccession? -file.deaccessionDialog.reason.selectItem.identifiable=There is identifiable data in one or more files -file.deaccessionDialog.reason.selectItem.beRetracted=The research article has been retracted -file.deaccessionDialog.reason.selectItem.beTransferred=The dataset has been transferred to another repository -file.deaccessionDialog.reason.selectItem.IRB=IRB request -file.deaccessionDialog.reason.selectItem.legalIssue=Legal issue or Data Usage Agreement -file.deaccessionDialog.reason.selectItem.notValid=Not a valid dataset +file.deaccessionDialog.reason.selectItem.identifiable=There is identifiable data in one or more files. +file.deaccessionDialog.reason.selectItem.beRetracted=The research article has been retracted. +file.deaccessionDialog.reason.selectItem.beTransferred=The dataset has been transferred to another repository. +file.deaccessionDialog.reason.selectItem.IRB=IRB request. +file.deaccessionDialog.reason.selectItem.legalIssue=Legal issue or Data Usage Agreement. +file.deaccessionDialog.reason.selectItem.notValid=Not a valid dataset. file.deaccessionDialog.reason.selectItem.other=Other (Please type reason in space provided below) file.deaccessionDialog.enterInfo=Please enter additional information about the reason for deaccession. file.deaccessionDialog.leaveURL=If applicable, please leave a URL where this dataset can be accessed after deaccessioning. @@ -1936,6 +1942,7 @@ file.versionDifferences.fileRestricted=Restricted file.versionDifferences.fileUnrestricted=Unrestricted file.versionDifferences.fileGroupTitle=File +file.anonymized.authorsWithheld=Author name(s) withheld # File Ingest ingest.csv.invalidHeader=Invalid header row. One of the cells is empty. ingest.csv.lineMismatch=Mismatch between line counts in first and final passes!, {0} found on first pass, but {1} found on second. @@ -2189,7 +2196,9 @@ dataset.file.exist.test={0, choice, 1#File |2#Files |} {1} {0, choice, 1#has |2# dataset.files.duplicate=Files {0} have the same content as {1} that have already been uploaded. dataset.file.duplicate=File {0} has the same content as {1} that has already been uploaded. dataset.file.inline.message= This file has the same content as {0}. -dataset.file.upload=Succesful {0} is uploaded. +dataset.file.upload=Successful {0} is uploaded. +dataset.file.upload.setUp.rsync.failed=Rsync upload setup failed! +dataset.file.upload.setUp.rsync.failed.detail=Unable to find appropriate storage driver. dataset.file.uploadFailure=upload failure dataset.file.uploadFailure.detailmsg=the file {0} failed to upload! dataset.file.uploadWarning=upload warning @@ -2371,6 +2380,11 @@ datasets.api.grant.role.cant.create.assignment.error=Cannot create assignment: { datasets.api.grant.role.assignee.not.found.error=Assignee not found datasets.api.revoke.role.not.found.error="Role assignment {0} not found" datasets.api.revoke.role.success=Role {0} revoked for assignee {1} in {2} +datasets.api.privateurl.error.datasetnotfound=Could not find dataset. +datasets.api.privateurl.error.alreadyexists=Private URL already exists for this dataset. +datasets.api.privateurl.error.notdraft=Can't create Private URL because the latest version of this dataset is not a draft. +datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymized access because this dataset has been published. + #Dataverses.java dataverses.api.update.default.contributor.role.failure.role.not.found=Role {0} not found. diff --git a/src/main/resources/db/migration/V5.5.0.3__1724-anonymizedaccess.sql b/src/main/resources/db/migration/V5.5.0.3__1724-anonymizedaccess.sql new file mode 100644 index 00000000000..da016361596 --- /dev/null +++ b/src/main/resources/db/migration/V5.5.0.3__1724-anonymizedaccess.sql @@ -0,0 +1,2 @@ +ALTER TABLE roleassignment ADD COLUMN IF NOT EXISTS privateurlanonymizedaccess boolean; + diff --git a/src/main/resources/db/migration/V5.5.0.4__7451-unique-constraint-storageidentifier.sql b/src/main/resources/db/migration/V5.5.0.4__7451-unique-constraint-storageidentifier.sql new file mode 100644 index 00000000000..93104da27a6 --- /dev/null +++ b/src/main/resources/db/migration/V5.5.0.4__7451-unique-constraint-storageidentifier.sql @@ -0,0 +1,10 @@ +-- ALTER TABLE dvobject ADD CONSTRAINT unq_dvobject_storageidentifier UNIQUE(owner_id, storageidentifier); +-- Instead of a uniform constraint on all dvobjects (as above), force a +-- conditional unique constraint on datafiles only: +CREATE UNIQUE INDEX IF NOT EXISTS unq_dvobject_storageidentifier ON dvobject (owner_id, storageidentifier) WHERE dtype='DataFile'; +-- This is not going to have any effect on new databases (since v4.20+), +-- where the table was created with the full constraint; but for legacy +-- installations it would spare them having to clean up any dataset-level +-- storageidentifiers. We know that some old installations have datasets +-- with junk values in that column (like "file" - ??) that are meaningless, +-- but otherwise harmless. diff --git a/src/main/resources/db/migration/V5.5.0.5__7548-stored-procedure-update.sql b/src/main/resources/db/migration/V5.5.0.5__7548-stored-procedure-update.sql new file mode 100644 index 00000000000..6377f2ddec2 --- /dev/null +++ b/src/main/resources/db/migration/V5.5.0.5__7548-stored-procedure-update.sql @@ -0,0 +1,33 @@ +-- If the installation is using a stored procedure for generating +-- sequential numeric identifiers, create a wrapper function that +-- works with the new framework (the stored procedure now needs to +-- return a string) and update the database setting +DO $BODY$ +BEGIN + UPDATE setting SET content='storedProcGenerated' + WHERE name=':IdentifierGenerationStyle' + AND content='sequentialNumber'; + BEGIN + PERFORM generateIdentifierAsSequentialNumber(); + EXCEPTION + -- If the above function does not exist, we can stop executing this script + WHEN undefined_function THEN + RETURN; + END; + BEGIN + PERFORM generateIdentifierFromStoredProcedure(); + EXCEPTION + -- We only create this function if it doesn't already exist, + -- to avoid overwriting user modifications + WHEN undefined_function THEN + CREATE FUNCTION generateIdentifierFromStoredProcedure() + RETURNS varchar AS $$ + DECLARE + identifier varchar; + BEGIN + identifier := generateIdentifierAsSequentialNumber()::varchar; + RETURN identifier; + END; + $$ LANGUAGE plpgsql IMMUTABLE; + END; +END $BODY$; diff --git a/src/main/webapp/dataset-citation.xhtml b/src/main/webapp/dataset-citation.xhtml index 6c99cf568f9..16396110217 100644 --- a/src/main/webapp/dataset-citation.xhtml +++ b/src/main/webapp/dataset-citation.xhtml @@ -26,7 +26,7 @@ -
    +
    @@ -819,6 +827,7 @@ + @@ -832,7 +841,9 @@ - + + +
    @@ -989,7 +1000,8 @@ } else { selectText(this); }"> -

    #{bundle['dataset.privateurl.createdSuccess']}

    +

    #{bundle['dataset.privateurl.anonymized']}

    +

    #{bundle['dataset.privateurl.full']}

    #{privateUrlLink} @@ -997,7 +1009,8 @@

    - + + diff --git a/src/main/webapp/dataverse_header.xhtml b/src/main/webapp/dataverse_header.xhtml index 268f4912efc..9acf84bb617 100644 --- a/src/main/webapp/dataverse_header.xhtml +++ b/src/main/webapp/dataverse_header.xhtml @@ -212,7 +212,7 @@
    - +
    - +
    diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index ac8f7c28c40..84ddb4b0005 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -197,7 +197,7 @@ -