diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..bb0342712 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +.bundle +.convox +.env +.env.* +.ruby-* +db/*.sqlite3 +db/*.sqlite3-journal +log/* +node_modules +tmp diff --git a/.env b/.env new file mode 100644 index 000000000..47ffa7d49 --- /dev/null +++ b/.env @@ -0,0 +1,37 @@ +APP_NAME=gbh-hyrax +PASSENGER_APP_ENV=development +REGISTRY_HOST=registry.gitlab.com +REGISTRY_URI=/notch8/gbh + +# SMTP Mailer variables +# To enable mailer: +# - Uncomment and edit SMTP vars +# - Uncomment SMTP Mailer section in respective config/environments file +# SMTP_USER_NAME=CHANGEME +# SMTP_PASSWORD=CHANGEME +# SMTP_ADDRESS=CHANGEME +# SMTP_DOMAIN=CHANGEME +# SMTP_PORT=CHANGEME +# SMTP_TYPE=CHA NGEME + +TAG=dev +MYSQL_DATABASE=gbh +MYSQL_USER=root +MYSQL_PASSWORD=DatabaseFTW +MYSQL_ROOT_PASSWORD=DatabaseFTW +MYSQL_HOST=db +DB_ADAPTER=mysql2 +TEST_DB=GBH_test +FCREPO_URL=http://fcrepo:8080/rest +SETTINGS__BULKRAX__ENABLED=true +SOLR_URL=http://admin:admin@solr:8983/solr/hydra-development +SOLR_ADMIN_PASSWORD=admin +SOLR_ADMIN_USER=admin +SOLR_COLLECTION_NAME=hydra-development +SOLR_CONFIGSET_NAME=hyku +SOLR_HOST=solr +SOLR_PORT=8983 +REDIS_SERVER=redis +REDIS_HOST=redis +CHROME_HOSTNAME=chrome +SIDEKIQ_CONCURRENCY=1 \ No newline at end of file diff --git a/.env.development.enc b/.env.development.enc new file mode 100644 index 000000000..cc7a28aef --- /dev/null +++ b/.env.development.enc @@ -0,0 +1,21 @@ +{ + "data": "ENC[AES256_GCM,data:/QV46n6aYq3DbVF9dIRE9rnjme03aRW0clulhlNfvLl9aqWFIzDgUA2ubT9Zu+SE9P/8z0pzlRHcLsHl9WrKy0nVBZeP5Gii0qMbBBNSiFLnREcEG+oAz6K465O3Is9QI6bMkXMKrE5hzoBaB9K/xUJaVUvw//k6rC4smsHy12AKzdY4HLocXg6JvC6BkqdFlyljbbRKbDxp3/JDVv8jFw==,iv:PImElXiALSCWqPSXBp3glD8ukQtxV4e1pRAjekiUgtM=,tag:0XS/X2E2gGEU/AneSsOzxQ==,type:str]", + "sops": { + "kms": null, + "gcp_kms": null, + "azure_kv": null, + "hc_vault": null, + "age": null, + "lastmodified": "2021-09-07T19:03:04Z", + "mac": "ENC[AES256_GCM,data:3Tg4mRiqyloOlOsgR+rxD6+9UNYZqMDyJ/GNnqsJCqVFfekA/jh0fkKYdYm3R2beCuwohPnjvBci5uVNrA3p26LOUoa/oIWtqbaAnNzfQllFDO2w1xZ722Jii4GidH6YHHiAO1DzkC5mgrMpAoiQQRZqa5rMaNxEnPxVe5/Ha3Y=,iv:WRqT9a9Q4wqxBCOZ6x5BOaP+e4Cvwry4MyERgYOEPNM=,tag:uJvNaXBzifyfEWRvG1dbCQ==,type:str]", + "pgp": [ + { + "created_at": "2021-09-07T19:03:03Z", + "enc": "-----BEGIN PGP MESSAGE-----\n\nhQIMAx1u4ocvSXxJARAAzXKg3pOsYJXSS5Vch2egHvAxenS+HcufS3NsEvbT5J7Q\ny2hYYtA089hj7jG2J+u2oU317W74xiGrtmsMtNmqoyuiZI/dW8arVePlIPwtSlR1\nomIQjIdRRQ6T7qa8UKQmjmIWtQ7UImyLElI8J6jPvfca2EW0/Du8SGo+89DkP0gl\n5X9+79zMJQkUj1vmpoY4FKKjGDDKzMmS07KNUh+BzYnnlruSKWyrpBe9ZA6jclQP\nLuvlTQsGdbFcommLwg/o7CbJAgXOGEpUCXK+ahdTWuup/YnFZcmamNAzQkNMWIkV\n8d2QMqN5DHrCtYOpnsqdtj4L8WvfZYd8KS8BM3CaOk3mJi/9WbbkpfEkOGfcGoV4\n1dQH2h2LRyh4L6bIs5UiXRC1PjRGM1PkqJSiaqcWOF61OqUyfjTzggHPetP+NtSx\nzBeFBYMjaiylUF+5iyPYTIwNqBr4QUAugu19VYa2ro/e5wuh49T18cJ/6XsAhD41\nMGbr1JSRqi0pcBIaNdgaq5U1Rx7TzR7hkvf7sf55MCGxoH9CDguQqt94QCOZ2SyI\n51MQox72uaD7JLHIN4TjK8KFv0xayaAXN3afUti+rMzT4uRGQHoW3RWAOFSL3IO3\ntrCFJw/rbcwkEp+bMLb51TCYS7FHoAbckcdJMJhnlPWsFaAPx5aj6z7r/xKAnGLS\nXgHsnyf/n10R9eTLkA/K2WUmVQn+ZiOTf9nvjXFB/ztWge5PuE+T3feMT/dYkJck\nHJhyvhchCRqGnvuaPm/l8KwvnplSJ1Yjs9EcQ+oOem9jpCRemNgd8VJmwDEzhm8=\n=sAaC\n-----END PGP MESSAGE-----\n", + "fp": "B6125B16B0DD59F34D6975FBF885927FDA9C48E2" + } + ], + "unencrypted_suffix": "_unencrypted", + "version": "3.7.1" + } +} \ No newline at end of file diff --git a/.env.enc b/.env.enc new file mode 100644 index 000000000..8b771c75d --- /dev/null +++ b/.env.enc @@ -0,0 +1,29 @@ +APP_NAME=ENC[AES256_GCM,data:362ZhN7vUMr7,iv:FuUFP9lWdOeBwQK2E8N+UgXnmfH0mVDQkQenFQ5IdUc=,tag:dtwGivM24zJDtHHcajUQ2A==,type:str] +PASSENGER_APP_ENV=ENC[AES256_GCM,data:RUmOJggK6S5H6T4=,iv:n4oSgwNlwjnkpzvnmT+wUUKxfYLLN2L6rOi73kfoT0c=,tag:LwwrPd1iOlrHoDR275NeOw==,type:str] +REGISTRY_HOST=ENC[AES256_GCM,data:Ee4l3tS1+6Xv8R9zP22NvI/GTg==,iv:cLWms6WoytHZ5DAirXfQq6LHrAoAnQgK6BOc7a79TNE=,tag:ZZF3SXRfCzJp3eiLwETu8g==,type:str] +REGISTRY_URI=ENC[AES256_GCM,data:1rPTdUEs6q84ZBg=,iv:/hWmn2NsXSnPBBD1Bsx184leK+NSzA1xpXRjctqjqA8=,tag:2WrHfeg0fGkxKN0UeqMRYg==,type:str] +#ENC[AES256_GCM,data:WjFwhy2zUexA1IlIRWzRsh9p/k+cng==,iv:Lef2FnXFpe7KM9SOKo0CIIbACN+RQiU10svcMVO9iWU=,tag:5lMQQxvtf9T4UP1EkERGuA==,type:comment] +#ENC[AES256_GCM,data:RgMnweZSw/nyd1bjywTYAJf1,iv:w2iUt+ouwb4ymDYX146gsmYlZmNyWlZuw+UiaJXeklM=,tag:UYVOsmyUz60OvLozZCWtaQ==,type:comment] +#ENC[AES256_GCM,data:WsyCbyMFJPoS9s3Z7mdJlPTKiCs6XcPbOUDJHJJC+g==,iv:+o1z8RSakj7x7GDZBNWX+H9A/jCcHDc8TQPZYmWMNC0=,tag:ez7mbrVTzq/05x337VSgCA==,type:comment] +#ENC[AES256_GCM,data:16JjRKHZkEA1Xh4bfT8y4QzIXgHj8omwi0f25KcMu6rkqsK9hFD+wUJgqNpzkiYhqghiWS7f9sqDmFy2l7RgvWya9WumOw4=,iv:CNZ+tr0qDQt6aiLay1kZ9d3YMx5IZ4yzl8jkOBKVXIg=,tag:rhG9628RMjUZrxh23tW07g==,type:comment] +#ENC[AES256_GCM,data:jVkqHqK/sx1auWrRIiCbEIn8ig0buLIZ,iv:HKUCwwxJauHhqjSRRJ/gqkcIyFPKn1rWg36JcLmNNFQ=,tag:TYqja9gpBNCtv4YMDbBd9w==,type:comment] +#ENC[AES256_GCM,data:lzeYQydWcQCCJorjEZuZcIcPT5Akeqk=,iv:EgCT1wEHAOe7mIxoQu5Rg80GUCmq7dsKvVSSnlKOzJk=,tag:LCUaTm/yuNj1lbNyYnXEqg==,type:comment] +#ENC[AES256_GCM,data:1i2/YgnWdm3gIPRFQklqkm/1S6Essw==,iv:gX+8haUM3Og/LpTndN7/s0wPTK6O2Dx5lXOCry5gc2c=,tag:9Tb50DP2bCRQQuFrR2lnNw==,type:comment] +#ENC[AES256_GCM,data:FDAt/3xZ2ArMWzNdPe4Ko5fYCaz9,iv:V7Va+53+SJ5mtVzh5fFec7G8UVBcidfQg7n6a9SaKgs=,tag:xh2GnouH+sQsTntiR0LYJw==,type:comment] +#ENC[AES256_GCM,data:f5L7qKnRmuZt6JDZ47jM0EGkcw==,iv:/IVuHXq3J6wk54IrOtJSW/4eGs3T9OmsVnb9Lpo9ms0=,tag:glUhR5J9CR5/2/OaXFOAqQ==,type:comment] +#ENC[AES256_GCM,data:TXBXs2Rgmj4stzc5Bo0vCHM+SlY=,iv:UIUpeQ1d48ZTavutJjP4PX7IK2i5O8jTno/8d1PuL9c=,tag:cfUVQrFA2LPcUqYTGonwIg==,type:comment] +TAG=ENC[AES256_GCM,data:jZyI,iv:eQ6tIU/IbTsaHYbAM34NscgtTos8MJhnT6MKAh9KYdc=,tag:hzjKcaL8DYE4v7IaeQCGgw==,type:str] +MYSQL_DATABASE=ENC[AES256_GCM,data:UhrC,iv:rLVjQayrVIhT1mdI4Yfu3KGnWEvNNntw1sLsX4Bcqas=,tag:6VzFa4nx3Ig0fh/MV9kWiA==,type:str] +MYSQL_USER=ENC[AES256_GCM,data:gteXqw==,iv:mK2zP/ViZz2c8pftvIx1NPDHWWhvJIyuzxLCxc5aGQs=,tag:AEa8eLQ4Usbm6UtWzL9ChA==,type:str] +MYSQL_PASSWORD=ENC[AES256_GCM,data:VnXTVU9YETOQ2VI=,iv:6fRDRdefxIjkJn+axDf+bSxwVfyvPqzj9h0xBORW9as=,tag:bWLuzAYRy+KKEJEu763pCA==,type:str] +MYSQL_ROOT_PASSWORD=ENC[AES256_GCM,data:9sRKIkzrWYLXtIg=,iv:M6Ub+lArlLnJOg4Ypn9utJMbSjLtDjtm0U3jhkLjp0c=,tag:tOtI9noInjRlKMES7zQ8bQ==,type:str] +MYSQL_HOST=ENC[AES256_GCM,data:f/g=,iv:l3RVhdsBMGTtL7UgNrRouTK0fKTZBJzEUCx+ZNmpCuY=,tag:GPHN3m0Vl7y7WfgUQANWzw==,type:str] +DB_ADAPTER=ENC[AES256_GCM,data:IoB1HNlA,iv:2ONmsqTqmbOoA7kY7xV7HRxwtoBL/eqZugZr3NGl07c=,tag:bE9/SgCUxsYfWxToQKFplg==,type:str] +TEST_DB=ENC[AES256_GCM,data:pnA4V/trO2o=,iv:NSWohoQLuAy+rTikGBCBOsD0rX6eUlefVVNEq8Ad4M0=,tag:GWVKZie1FjuLDU3gQI9Bmw==,type:str] +sops_pgp__list_0__map_fp=B6125B16B0DD59F34D6975FBF885927FDA9C48E2 +sops_unencrypted_suffix=_unencrypted +sops_pgp__list_0__map_created_at=2021-09-07T19:03:01Z +sops_pgp__list_0__map_enc=-----BEGIN PGP MESSAGE-----\n\nhQIMAx1u4ocvSXxJARAAogJrncDOvrnQjtNQw4aqKayykk4sYippOuZXVKaGyrYP\nAqZmrsC0/ra830O+GHI8DuWJCUTndy1busQmAxIWNL4EZ30wbPeWd3KPrWXGrZ6E\nnof6TVXoXFmHMcgJcrsVknKVbhZaAyOmBOk9vnExzjwVQJXhM/xN0W9n2c5x0F0k\ns7iea2QjLghLBOg7xZp/ardNDl9K56XpIXRKQnuA4skWoGUL626PTy48vE2Gh5+l\nYgc3RLagDHTEr1zK5V6VwIEfiu6ODPzdlInJSUaGp1PtaL8gUDxOao8ke3McBidC\nAuwDTXTnIb4oeoqp32i7o/+2XymGUe8qXP1b0PoZ4gd2c5b66OxWgLmSaTulOday\nYtHWR26N+XhzP19mHFfZKIbjUATB6A0vawPQIzn3SGqyo8Y5Pmlu/cu3I3iBHzdo\nm0TO5b+6LZ3fIFthKLcelbKwEFiAdKVGW7CtUaIelZ/aT9c5poDVq8ewvT3v5Y89\nnUU1QqULRIKoQ18EJ9OQen9WC3VQnfk5XSYDc/DBzt12UjSdwdKUXlNKYDrgjKdi\n901Aab10VsjY8Eb+02R3Nvp+4bewdxegsUaA2U2F07Ahqzkby7M8GfbGYX28rqSR\ngU4dlfG/TDrIB/oiWIRVOXQY+u9jbKoFelc4n378Zu0Sv5uXrahgIKsnpQSySkjS\nXAGhOBQAhNA9no554zSwLdHT+XUitfHMjUw4TJsEu8P4t9cY/EGuaEDofx6OIkf1\nbfQPIJ/kZkD0OhJ5iMxkMBWcZhJjMkTi/qO0yTf/XfIrVLyrTc/u6G35hLYH\n=dSrj\n-----END PGP MESSAGE-----\n +sops_version=3.7.1 +sops_lastmodified=2021-09-07T19:03:03Z +sops_mac=ENC[AES256_GCM,data:LutzEHOdycJ+RUgztgQ53b8J+CRnxtoQv19744sXO75t4ZpEHhL/qYOKv3qXoRb6cuk5xTiCTKZrTA8wT8fyzQnzXh0HR/8wdiQLFfBcn5nbIW8gsjA+uoNaFk0hdqF2eHozpam3bL0m9SaXm4jh4mEHpZkVV6OxSaOUi5mmM1s=,iv:0uPfG/7QXuSyUuITUEnsy9fawyLR+QwrLtt/+3gM1NM=,tag:CNi5HzEqA9UACWEaqQ7IRA==,type:str] diff --git a/.github/workflows/ams-ci.yml b/.github/workflows/ams-ci.yml new file mode 100644 index 000000000..5154297d5 --- /dev/null +++ b/.github/workflows/ams-ci.yml @@ -0,0 +1,50 @@ +name: CI RSpec Tests + +on: [push, pull_request] + +jobs: + tests: + name: CI + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Install Redis + run: sudo apt-get install -y redis-tools redis-server + + - name: Install libcurl4-openssl-dev for Curb Gem + run: sudo apt-get install libcurl4-openssl-dev + + - name: Setup Ruby and Install RubyGems + uses: ruby/setup-ruby@v1 + with: + ruby-version: 2.5.3 + bundler-cache: true + + - name: Install JDK + uses: actions/setup-java@v2 + with: + distribution: 'temurin' + java-version: '11' + + - name: Install Node + shell: bash -l -eo pipefail {0} + run: nvm install 12.9.0 + + - name: Install Chrome Browser + run: google-chrome-stable --headless --disable-gpu --no-sandbox --remote-debugging-port=9222 http://localhost & + + - name: Prepare Test Environment + run: | + cp config/travis/solr_wrapper_test.yml config/solr_wrapper_test.yml + cp config/travis/fcrepo_wrapper_test.yml config/fcrepo_wrapper_test.yml + export DISPLAY=:99.0 + RAILS_ENV=test bundle exec rake db:environment:set db:create db:migrate --trace + RAILS_ENV=test npm install yarn + RAILS_ENV=test yarn --ignore-engines install + RAILS_ENV=test bundle exec rake webpacker:compile + + - name: Run Rspec specs using CI config + run: bundle exec rake ci diff --git a/.github/workflows/deploy-staging.yaml b/.github/workflows/deploy-staging.yaml new file mode 100644 index 000000000..bee6a0689 --- /dev/null +++ b/.github/workflows/deploy-staging.yaml @@ -0,0 +1,33 @@ +name: "Deploy Staging" +on: + workflow_dispatch: + +env: + REGISTRY: ghcr.io + +jobs: + deployment: + runs-on: 'ubuntu-latest' + container: dtzar/helm-kubectl:3.9.4 + env: + DEPLOY_IMAGE: ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }} + WORKER_IMAGE: ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}/worker + HELM_EXPERIMENTAL_OCI: 1 + HELM_RELEASE_NAME: gbh-staging + KUBE_NAMESPACE: gbh-staging + HELM_EXTRA_ARGS: > + --values ops/staging-deploy.yaml + KUBECONFIG_FILE: ${{ secrets.KUBECONFIG_FILE_STAGING }} + KUBECONFIG: ./kubeconfig.yml + steps: + - name: Set env + run: >- + echo "TAG=${GITHUB_SHA::8}" >> $GITHUB_ENV; + echo "DEPLOY_TAG=${GITHUB_SHA::8}" >> $GITHUB_ENV; + - name: Checkout code + uses: actions/checkout@v3 + - name: Do deploy + run: >- + echo $KUBECONFIG_FILE | base64 --decode > $KUBECONFIG + DOLLAR=$ envsubst < ops/staging-deploy.tmpl.yaml > ops/staging-deploy.yaml; + ./bin/helm_deploy gbh-staging gbh-staging diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 000000000..6c9428f44 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,169 @@ +# TODO retag latest on merge to main +# TODO helm deployment + +name: "Ruby on Rails CI" +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false + +env: + REGISTRY: ghcr.io + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Set env + run: echo "TAG=${GITHUB_SHA::8}" >> $GITHUB_ENV + - name: Checkout code + uses: actions/checkout@v3 + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Github Container Login + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Pull from cache to speed up build + run: TAG=latest docker-compose pull web || true + - name: Build and push + uses: docker/build-push-action@v3 + with: + context: . + platforms: linux/amd64,linux/arm64 + target: hyku-base + build-args: | + SETTINGS__BULKRAX__ENABLED=true + EXTRA_APK_PACKAGES=less vim bash openjdk11-jre ffmpeg rsync yarn + cache-from: | + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}:latest + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}:${TAG} + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}:latest + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}:${{ env.TAG }} + - name: Build and push worker + uses: docker/build-push-action@v3 + with: + context: . + platforms: linux/amd64,linux/arm64 + target: hyku-worker + build-args: | + SETTINGS__BULKRAX__ENABLED=true + EXTRA_APK_PACKAGES=less vim bash openjdk11-jre ffmpeg rsync yarn + cache-from: | + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}:latest + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}:${TAG} + push: true + tags: | + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}/worker:${{ env.TAG }} + ${{ env.REGISTRY }}/${{ env.GITHUB_REPOSITORY }}:latest + + lint: + needs: build + runs-on: ubuntu-latest + steps: + - name: Set env + run: echo "TAG=${GITHUB_SHA::8}" >> $GITHUB_ENV + # run: echo "TAG=f9867d90" >> $GITHUB_ENV + - name: Checkout code + uses: actions/checkout@v3 + - name: Github Container Login + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Pull from cache to speed up build + run: >- + docker-compose pull web; + docker-compose pull worker + - name: Run Rubocop + run: docker-compose run web bundle exec rubocop --parallel --format progress --format junit --out rubocop.xml --display-only-failed + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: 'rubocop*.xml' + + test: + needs: build + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # Set N number of parallel jobs you want to run tests on. + # Use higher number if you have slow tests to split them on more parallel jobs. + # Remember to update ci_node_index below to 0..N-1 + ci_node_total: [3] + # set N-1 indexes for parallel jobs + # When you run 2 parallel jobs then first job will have index 0, the second job will have index 1 etc + ci_node_index: [0, 1, 2] + env: + ALLOW_ANONYMOUS_LOGIN: "yes" + CONFDIR: "/app/samvera/hyrax-webapp/solr/config" + DB_CLEANER_ALLOW_REMOTE_DB_URL: "true" + TB_RSPEC_FORMATTER: progress + TB_RSPEC_OPTIONS: --format RspecJunitFormatter --out rspec.xml + steps: + - name: Set env + run: echo "TAG=${GITHUB_SHA::8}" >> $GITHUB_ENV + # run: echo "TAG=f9867d90" >> $GITHUB_ENV + - name: Checkout code + uses: actions/checkout@v3 + - name: Github Container Login + uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Pull from cache to speed up build + run: >- + docker-compose pull web; + docker-compose pull worker + - name: Start containers + run: docker-compose up -d web + - name: Setup solr + run: >- + docker-compose exec -T web bash -c + "solrcloud-upload-configset.sh /app/samvera/hyrax-webapp/solr/config && + SOLR_COLLECTION_NAME=hydra-test solrcloud-assign-configset.sh && + solrcloud-assign-configset.sh" + - name: Setup db + run: >- + docker-compose exec -T web bash -c + "RAILS_ENV=test bundle exec rake db:schema:load db:migrate db:seed" + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} + with: + limit-access-to-actor: true + - name: Run Specs + env: + # Specifies how many jobs you would like to run in parallel, + # used for partitioning + CI_NODE_TOTAL: ${{ matrix.ci_node_total }} + # Use the index from matrix as an environment variable + CI_NODE_INDEX: ${{ matrix.ci_node_index }} + continue-on-error: true + run: >- + docker-compose exec -T web bash -c + "gem install semaphore_test_boosters + rspec_booster --job $CI_NODE_INDEX/$CI_NODE_TOTAL" + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: 'rspec*.xml' diff --git a/.gitignore b/.gitignore index bf61a9173..fce02f96f 100644 --- a/.gitignore +++ b/.gitignore @@ -31,12 +31,14 @@ # Redis dumps dump.rdb +.env .env.development .env.test .env.production /public/packs /public/packs-test /node_modules +/ops/*-deploy.yaml yarn-debug.log* .yarn-integrity fits.log @@ -47,3 +49,10 @@ demo* # This file is generated on deployment public/deployment.html + +# Ignore vendor files +/vendor/* +.dory.yml +solr_db_initialized +docker-compose.override.yml +*~undo-tree~ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 000000000..bdf38381c --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,171 @@ +stages: + - build + - go + +variables: + REGISTRY_HOST: registry.gitlab.com + REGISTRY_URI: /notch8/gbh + +before_script: + - export TAG=${CI_COMMIT_SHORT_SHA} + - export BRANCH=${CI_COMMIT_REF_NAME} + - export REGISTRY_HOST=${CI_REGISTRY} + - export REGISTRY_URI="/${CI_PROJECT_PATH}" + +build: + stage: build + script: + - docker login -u "gitlab-ci-token" -p "$CI_JOB_TOKEN" $CI_REGISTRY + - sc build + - docker login -u "gitlab-ci-token" -p "$CI_JOB_TOKEN" $CI_REGISTRY + - sc push + - sc build -s sidekiq + - docker login -u "gitlab-ci-token" -p "$CI_JOB_TOKEN" $CI_REGISTRY + - sc push -s sidekiq + - docker tag $CI_REGISTRY_IMAGE:$TAG $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME + - docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_NAME + tags: + - local + +lint: + stage: go + image: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHORT_SHA + variables: + PRONTO_GITLAB_API_ENDPOINT: "https://gitlab.com/api/v4" + PRONTO_GITLAB_API_PRIVATE_TOKEN: ${GITLAB_API_TOKEN} + script: + - bundle install + - bundle exec pronto run -f gitlab -c origin/n8-staging + tags: + - docker + allow_failure: true + +test: + stage: go + image: $CI_REGISTRY_IMAGE:$CI_COMMIT_SHORT_SHA + services: + - name: mysql:5.7 + alias: db + - name: bitnami/zookeeper:3 + alias: zk + - name: bitnami/solr:8 + alias: solr + - name: redis:5-alpine + alias: redis + - name: samvera/fcrepo4:4.7.5 + alias: fcrepo + - name: selenium/standalone-chrome:3.141 + alias: chrome + + variables: + ALLOW_ANONYMOUS_LOGIN: "yes" + CONFDIR: "/app/samvera/hyrax-webapp/solr/config" + CHROME_HOSTNAME: chrome + DB_ADAPTER: mysql2 + FCREPO_URL: http://fcrepo:8080/rest + FF_NETWORK_PER_BUILD: 1 + MYSQL_DATABASE: gbh_test + MYSQL_HOST: db + MYSQL_PASSWORD: DumbDatabase123 + MYSQL_ROOT_PASSWORD: DumbDatabase123 + MYSQL_USER: gbh + REDIS_HOST: redis + REDIS_SERVER: redis + SETTINGS__BULKRAX__ENABLED: 'true' + SIDEKIQ_CONCURRENCY: 5 + SOLR_ADMIN_PASSWORD: admin + SOLR_ADMIN_USER: admin + SOLR_ADMIN_USERNAME: admin + SOLR_CLOUD_BOOTSTRAP: "yes" + SOLR_COLLECTION_NAME: hydra-test + SOLR_CONFIGSET_NAME: hyrax + SOLR_ENABLE_AUTHENTICATION: "yes" + SOLR_ENABLE_CLOUD_MODE: "yes" + SOLR_HOST: solr + SOLR_PORT: 8983 + SOLR_URL: http://admin:admin@solr:8983/solr/hydra-test + SOLR_ZK_HOSTS: zk:2181 + TB_RSPEC_FORMATTER: progress + TB_RSPEC_OPTIONS: --format RspecJunitFormatter --out rspec.xml + TEST_DB: gbh_test + ZOO_HEAP_SIZE: 128 + ZOO_PORT_NUMBER: 2181 + before_script: + - cd /app/samvera/hyrax-webapp + - solrcloud-upload-configset.sh /app/samvera/hyrax-webapp/solr/config + - solrcloud-assign-configset.sh + - SOLR_COLLECTION_NAME=hydra-test solrcloud-assign-configset.sh + - RAILS_ENV=test bundle exec rake db:schema:load db:migrate db:seed + + script: + - rm -f /app/samvera/hyrax-webapp/tmp/capybara/* + - rm -rf $CI_PROJECT_DIR/capybara + - gem install semaphore_test_boosters + - cd /app/samvera/hyrax-webapp + # debugging tip: un-comment the sleep below in order to use k8's or docker to log into the actual running container + # - sleep 30m + - rspec_booster --job $CI_NODE_INDEX/$CI_NODE_TOTAL + # - rspec --format progress --tag ~speed:slow --format RspecJunitFormatter --out rspec.xml + after_script: + - test -e /app/samvera/hyrax-webapp/tmp/capybara && cp -r /app/samvera/hyrax-webapp/tmp/capybara $CI_PROJECT_DIR/capybara + - test -e /app/samvera/hyrax-webapp/rspec.xml && cp /app/samvera/hyrax-webapp/rspec.xml $CI_PROJECT_DIR/rspec.xml + artifacts: + when: always + paths: + - capybara/*.html + - rspec.xml + reports: + junit: rspec.xml + tags: + - docker + allow_failure: false + parallel: 3 + +gbh.staging: + stage: go + extends: + - .deploy + environment: + name: gbh.staging + url: http://gbh-staging.$KUBE_INGRESS_BASE_DOMAIN + on_stop: gbh.staging.stop + only: + refs: + - n8-staging + variables: + DEPLOY_IMAGE: $CI_REGISTRY_IMAGE + DEPLOY_TAG: $CI_COMMIT_SHORT_SHA + WORKER_IMAGE: $CI_REGISTRY_IMAGE/worker + HELM_EXPERIMENTAL_OCI: 1 + HELM_RELEASE_NAME: gbh-staging + KUBE_NAMESPACE: gbh-staging + HELM_EXTRA_ARGS: > + --values ops/staging-deploy.yaml + script: + - export KUBECONFIG=$KUBECONFIG_R2 + - envsubst < ops/staging-deploy.tmpl.yaml > ops/staging-deploy.yaml + - ./bin/helm_deploy gbh-staging gbh-staging + tags: + - local + +gbh.staging.stop: + stage: go + extends: + - .deploy + environment: + name: gbh.staging + url: http://gbh-staging.$KUBE_INGRESS_BASE_DOMAIN + action: stop + when: manual + only: + refs: + - n8-staging + allow_failure: true + script: + - export KUBECONFIG=$KUBECONFIG_GBH + - ./bin/helm_delete gbh-staging gbh-staging + tags: + - local + +.deploy: + image: dtzar/helm-kubectl:3.5.3 diff --git a/.gitlab/issue_templates/Issue.md b/.gitlab/issue_templates/Issue.md new file mode 100644 index 000000000..18942e4a8 --- /dev/null +++ b/.gitlab/issue_templates/Issue.md @@ -0,0 +1,13 @@ +# Summary + +# Acceptance Criteria + +- [ ] + +# Screenshots or Video + +# Testing Instructions + +# Notes + +Confused how to fill this out? Check out the [playbook](https://playbook-staging.notch8.com/en/notch8/process/issues) diff --git a/.gitlab/merge_request_templates/Issue.md b/.gitlab/merge_request_templates/Issue.md new file mode 100644 index 000000000..2d85d983b --- /dev/null +++ b/.gitlab/merge_request_templates/Issue.md @@ -0,0 +1,9 @@ +# Summary + +# Screenshots / Video + +# Expected Behavior + +# Notes + +If you need further information on how to fill this out, read the [playbook](https://playbook-staging.notch8.com/en/notch8/process/merge-requests) diff --git a/.rubocop.yml b/.rubocop.yml index e0e45d90f..5738a1239 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,2 +1,124 @@ inherit_gem: - bixby: bixby_default.yml \ No newline at end of file + bixby: bixby_default.yml + +# Turn on RSpec cops +require: rubocop-rspec + +# With the rubocop 0.47.0 and rubocop-rspec 1.8.0 the following stderr message was sent: +# An error occurred while RSpec/DescribedClass cop was inspecting path/to/file +RSpec/DescribedClass: + Enabled: false + +AllCops: + DisplayCopNames: true + TargetRubyVersion: 2.5 + Exclude: + - 'bin/**/*' + - 'db/**/*' + - 'config/**/*' + - 'vendor/**/*' + - '.internal_test_app/**/*' + - 'spec/fixtures/**/*' + - 'spec/internal/**/*' + - 'spec/test_app_templates/**/*' + - 'Rakefile' + - 'lib/tasks/rubocop.rake' + # disabling collections controller as most of the rubocop errors are in hyrax + - 'app/controllers/hyrax/dashboard/collections_controller.rb' + +Rails: + Enabled: true + +Rails/DynamicFindBy: + Whitelist: + - find_by_user_key + Exclude: + - 'lib/importer/factory/object_factory.rb' + +Metrics/LineLength: + Max: 120 + Exclude: + - 'app/controllers/catalog_controller.rb' + - 'spec/controllers/curation_concerns/generic_works_controller_spec.rb' + - 'spec/services/iiif_collection_thumbnail_path_service_spec.rb' + - 'spec/services/iiif_work_thumbnail_path_service_spec.rb' + - 'spec/routing/proprietor/accounts_routing_spec.rb' + +Layout/IndentationConsistency: + EnforcedStyle: indented_internal_methods + +Layout/DotPosition: + EnforcedStyle: leading + +Style/Documentation: + Enabled: false + +Style/StringLiterals: + Enabled: false + +Style/WordArray: + Enabled: false + +Metrics/ClassLength: + Exclude: + - 'app/controllers/catalog_controller.rb' + +Metrics/ModuleLength: + Max: 200 + +Rails/HasAndBelongsToMany: + Exclude: + - 'app/models/role.rb' + +RSpec/AnyInstance: + Enabled: false + +RSpec/InstanceVariable: + Exclude: + - 'spec/controllers/hyku/registrations_controller_spec.rb' + +RSpec/NamedSubject: + Enabled: false + +RSpec/DescribeClass: + Exclude: + - 'spec/requests/**/*' + - 'spec/features/**/*' + - 'spec/views/**/*' + - 'spec/routing/**/*' + - 'spec/tasks/**/*' + +Rails/FilePath: + Exclude: + - 'spec/routing/**/*' + +RSpec/ExpectActual: + Exclude: + - 'spec/routing/**/*' + +RSpec/VerifiedDoubles: + Enabled: false + +RSpec/MessageExpectation: + Enabled: false + +# By default RSpec/MessageSpies has the following: +# Prefer have_received for setting message expectations. Setup form as a spy using allow or instance_spy. +RSpec/MessageSpies: + Enabled: true + EnforcedStyle: receive + +RSpec/ExampleLength: + Max: 16 + +RSpec/NestedGroups: + Max: 4 + +RSpec/MultipleExpectations: + Enabled: false + +Metrics/BlockLength: + Exclude: + - 'spec/**/*.rb' + - 'lib/tasks/*.rake' + - 'app/controllers/catalog_controller.rb' diff --git a/.sops.yaml b/.sops.yaml new file mode 100644 index 000000000..31c502a70 --- /dev/null +++ b/.sops.yaml @@ -0,0 +1,3 @@ +--- +creation_rules: + - pgp: "B6125B16B0DD59F34D6975FBF885927FDA9C48E2" diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b531be476..000000000 --- a/.travis.yml +++ /dev/null @@ -1,59 +0,0 @@ -dist: xenial -language: ruby -services: -- redis-server -- xvfb -jdk: -- openjdk11 -rvm: -- 2.5.3 -addons: - chrome: stable -cache: - bundler: true - directories: - - dep_cache -before_install: -- google-chrome-stable --headless --disable-gpu --no-sandbox --remote-debugging-port=9222 - http://localhost & -- mkdir -p dep_cache -- ls -l dep_cache -- cp config/travis/solr_wrapper_test.yml config/solr_wrapper_test.yml -- cp config/travis/fcrepo_wrapper_test.yml config/fcrepo_wrapper_test.yml -- gem update bundler -- gem update --system 3.0.6 -- nvm install 12.9.0 -before_script: -- export DISPLAY=:99.0 -- RAILS_ENV=test bundle exec rake db:environment:set db:create db:migrate --trace -- RAILS_ENV=test npm install yarn -- RAILS_ENV=test yarn --ignore-engines install -- RAILS_ENV=test bundle exec rake webpacker:compile -script: -- bundle exec rake ci -deploy: -- provider: codedeploy - access_key_id: AKIAR3SRUQECDKWELTGU - secret_access_key: - secure: "vAmLUYYon/glBfFog8+xyChUMidy+5GciyNSfuNpkF+dWH+r5FYmMpA9hbfcTZhNrM9XSZxjf+heVltkz/nTNwb+U7Q35e1k1KR4NO/juf8+VNk+jwK9oQESoSeEU+Pplkl7sUCCZikqPO07aYtIPzCJy8pt2hUsA9EzxPny6vWPSZiGxghxcCqZIHmuiJJFg39Pnl8P4R8MM3EqGr3qExCEtapO4ca4s+JVr8dvhiJCUac7e0rWHSTTzGh1qTyOk1d4T6tU4edJZXc8BFzsZ4O/GETzGfA8bYiaHfizAQNcwpcQ9cP66k+XQkQ+CYRUlHlNbq6JGItkJTUGkY/MHhs3jIy58iWKVEg1dyPsOsjWs6aL4UA52Uo/uV3TPsB/GHbC52gVonzz7pqNeNPoS46RHO2ekqlerkeE488uuM0fGwamQ3JhfDwKBklZcGjzo/sxFD8IKEAieGikcuO3fYIhijhuCBUuOyGt/bCMfMh3rStKGeDRVfzCFHi+WNQY1FtSqqh6P+PNfrR0Y5j2cwKF/6cUFN0iFMZ+LWeEmduTtRDaG1tCyZ1UKRetG69zpWgVn4tmMrVdDOTyyHoE2BrD8D4qft1+690Lvji+6y5g3vqM9sl1aPZO4t21BxKSwwcNLmS/fZo114YpU9Eu9xA3ly7h36OYaei5gh1Z4jc=" - revision_type: github - application: ams-production-restored - deployment_group: ams-production-restored-DG - region: us-east-1 - on: - branch: main - ruby: 2.5.3 -- provider: codedeploy - access_key_id: AKIAR3SRUQECDKWELTGU - secret_access_key: - secure: "vAmLUYYon/glBfFog8+xyChUMidy+5GciyNSfuNpkF+dWH+r5FYmMpA9hbfcTZhNrM9XSZxjf+heVltkz/nTNwb+U7Q35e1k1KR4NO/juf8+VNk+jwK9oQESoSeEU+Pplkl7sUCCZikqPO07aYtIPzCJy8pt2hUsA9EzxPny6vWPSZiGxghxcCqZIHmuiJJFg39Pnl8P4R8MM3EqGr3qExCEtapO4ca4s+JVr8dvhiJCUac7e0rWHSTTzGh1qTyOk1d4T6tU4edJZXc8BFzsZ4O/GETzGfA8bYiaHfizAQNcwpcQ9cP66k+XQkQ+CYRUlHlNbq6JGItkJTUGkY/MHhs3jIy58iWKVEg1dyPsOsjWs6aL4UA52Uo/uV3TPsB/GHbC52gVonzz7pqNeNPoS46RHO2ekqlerkeE488uuM0fGwamQ3JhfDwKBklZcGjzo/sxFD8IKEAieGikcuO3fYIhijhuCBUuOyGt/bCMfMh3rStKGeDRVfzCFHi+WNQY1FtSqqh6P+PNfrR0Y5j2cwKF/6cUFN0iFMZ+LWeEmduTtRDaG1tCyZ1UKRetG69zpWgVn4tmMrVdDOTyyHoE2BrD8D4qft1+690Lvji+6y5g3vqM9sl1aPZO4t21BxKSwwcNLmS/fZo114YpU9Eu9xA3ly7h36OYaei5gh1Z4jc=" - revision_type: github - application: ams-demo-restore1 - deployment_group: ams-demo-restore1-DG - region: us-east-1 - on: - branch: develop - ruby: 2.5.3 -env: - matrix: - secure: bhFHxEHJJKvXc1rXvhx6ip9anTD9vEZSUO+rkXDN3M2HOV3wco2Dt8HH+7gy1fS3A8l/5+VB1LQ0vwRzykQlGARuGIFFd9y9VaPsdAdjqJbTeD6Neb4SHFu7pOEbhfCfdkU/wOLTn1HQ46bl0u33E3fFeVLRyN1vyIuvYW3o9ZHpfhni8enGC9UbQt65DHUVSUCgynutKIWK/lIiiIzxrOhySjQN3u05/W38o1nwsQLi3pWjj20SLD7U42VPK72TzIqkfs4LPcOSb9we/EMdWhIcrfqRZrC/bbVXB/56Un4ZUF/83y0dQJoglcHB7S+rRCGSx48b2ZtojG6B2vdJ96fNuDePf1YhTkolt9VxDL70AZdIiszADSPYJY4OgI4bUInl2BQvxueXQqoZjLkXSxLdHTD5ImZwfYioV3qgmdWXKdmxc6+MRlOznKXE1oHJqCtnwFC47BN4gq7VZoQHiQdpx4BMOWF13b6qGtO8pJK59bGDQPSO+eskBpZfghad3aMJ9c8+FESkDN9la8HXxlwyZDVNpysVLFFZqFcQlWQ/NG3r/e/NhmAfs3uyqi+bC4dsgg4MInfNQnzErshpfbvTXFJ29cKYnWCY9cWD0zLau2VehXHUxdaBoPaBONM8K73i3aqv/2YMZwSYAy/1egDslliOdyDb9ACKkKt/g9w= diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..e2923bb06 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,44 @@ +ARG HYRAX_IMAGE_VERSION=3.1.0 +FROM ghcr.io/samvera/hyku/hyku-base:$HYRAX_IMAGE_VERSION as hyku-base + +USER root + +ARG EXTRA_APK_PACKAGES="openjdk11-jre ffmpeg pkg-config yarn" +RUN apk --no-cache upgrade && \ + apk --no-cache add \ + curl \ + curl-dev \ + libcurl \ + libxml2-dev \ + mariadb-dev \ + mediainfo \ + openssh \ + perl \ + cmake \ + $EXTRA_APK_PACKAGES && \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh && \ + source "$HOME/.cargo/env" && \ + cargo install rbspy && \ + echo "******** Packages Installed *********" + +USER app + +RUN mkdir -p /app/fits && \ + cd /app/fits && \ + wget https://github.com/harvard-lts/fits/releases/download/1.5.0/fits-1.5.0.zip -O fits.zip && \ + unzip fits.zip && \ + rm fits.zip && \ + chmod a+x /app/fits/fits.sh +ENV PATH="${PATH}:/app/fits" + +COPY --chown=1001:101 $APP_PATH/Gemfile* /app/samvera/hyrax-webapp/ +RUN bundle install --jobs "$(nproc)" + +COPY --chown=1001:101 $APP_PATH /app/samvera/hyrax-webapp + +ARG SETTINGS__BULKRAX__ENABLED="false" +RUN RAILS_ENV=production SECRET_KEY_BASE=`bin/rake secret` DB_ADAPTER=nulldb bundle exec rake assets:precompile + +FROM hyku-base as hyku-worker +ENV MALLOC_ARENA_MAX=2 +CMD bundle exec sidekiq diff --git a/Gemfile b/Gemfile index adf7e76d5..93a9a0681 100644 --- a/Gemfile +++ b/Gemfile @@ -1,12 +1,17 @@ source 'https://rubygems.org' -ruby '2.5.3' - git_source(:github) do |repo_name| repo_name = "#{repo_name}/#{repo_name}" unless repo_name.include?("/") "https://github.com/#{repo_name}.git" end +# Bulkrax +group :bulkrax do + # our custom changes require us to lock in the version of bulkrax + gem 'bulkrax', git: 'https://github.com/samvera-labs/bulkrax.git', ref: '23efea3fd9d8d98746b73e570e0dc214ff764271' + gem 'willow_sword', git: 'https://github.com/notch8/willow_sword.git' +end + gem 'dotenv-rails' # Bundle edge Rails instead: gem 'rails', github: 'rails/rails' @@ -35,13 +40,9 @@ gem 'hydra-role-management', '~> 1.0' # Use Capistrano for deployment # gem 'capistrano-rails', group: :development -group :production do - gem 'mysql2', '~> 0.4.10' -end - group :development, :test do # Use sqlite3 as the database for Active Record - gem 'sqlite3', '1.3.13' + # gem 'sqlite3', '1.3.13' gem 'capybara-screenshot' gem 'rspec', "~> 3.7" gem 'rspec-rails', "~> 3.7" @@ -57,6 +58,7 @@ group :development, :test do gem 'solr_wrapper', '~> 2.1' gem 'webmock', '~> 3.7' gem 'rails-controller-testing' + gem 'rspec_junit_formatter' end group :development do @@ -84,6 +86,7 @@ gem 'simple_form', '5.0.0' gem 'aws-sdk-s3' gem 'aws-sdk-codedeploy' gem 'carrierwave', '~> 1.3' +gem 'mysql2', '~> 0.5.3' gem 'nokogiri' gem 'bootstrap-multiselect-rails' gem 'hyrax-batch_ingest', git: 'https://github.com/samvera-labs/hyrax-batch_ingest' @@ -95,7 +98,6 @@ gem 'sony_ci_api', github: 'WGBH-MLA/sony_ci_api_rewrite', branch: 'v0.1' # gem 'hyrax-iiif_av', github: 'samvera-labs/hyrax-iiif_av', branch: 'hyrax_master' gem 'webpacker' gem 'react-rails' -gem 'faker' gem 'database_cleaner' gem 'redlock', '~> 1.0' gem 'httparty', '~> 0.18' @@ -103,3 +105,11 @@ gem 'httparty', '~> 0.18' # Adding pry to all environments, because it's very useful for debugging # production environments on demo instances. gem 'pry-byebug', platforms: [:mri, :mingw, :x64_mingw] +gem 'activerecord-nulldb-adapter' +gem 'pronto' +gem 'pronto-brakeman', require: false +gem 'pronto-flay', require: false +gem 'pronto-rails_best_practices', require: false +gem 'pronto-rails_schema', require: false +gem 'pronto-rubocop', require: false +gem "sentry-raven" diff --git a/Gemfile.lock b/Gemfile.lock index a4b0529dc..2333f74c8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,30 @@ +GIT + remote: https://github.com/notch8/willow_sword.git + revision: 0a669d78617c6003e4aa1a46a10447be92be27d5 + specs: + willow_sword (0.2.0) + bagit (~> 0.4.1) + rails (>= 5.1.6) + rubyzip (>= 1.0.0) + +GIT + remote: https://github.com/samvera-labs/bulkrax.git + revision: 23efea3fd9d8d98746b73e570e0dc214ff764271 + ref: 23efea3fd9d8d98746b73e570e0dc214ff764271 + specs: + bulkrax (1.0.2) + bagit (~> 0.4) + coderay + iso8601 (~> 0.9.0) + kaminari + language_list (~> 1.2, >= 1.2.1) + libxml-ruby (~> 3.1.0) + loofah (>= 2.2.3) + oai (>= 0.4, < 2.x) + rack (>= 2.0.6) + rails (>= 5.1.6) + rdf (>= 2.0.2, < 4.0) + simple_form GIT remote: https://github.com/WGBH-MLA/sony_ci_api_rewrite.git revision: f98576c7060e11cc50da0f67cf4642d2b4372517 @@ -77,6 +104,8 @@ GEM arel (~> 8.0) activerecord-import (1.2.0) activerecord (>= 3.2) + activerecord-nulldb-adapter (0.4.0) + activerecord (>= 2.0.0) activesupport (5.1.7) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 0.7, < 2) @@ -89,33 +118,36 @@ GEM amazing_print (1.3.0) arel (8.0.0) ast (2.4.2) - autoprefixer-rails (10.3.1.0) + autoprefixer-rails (10.3.3.0) execjs (~> 2) awesome_nested_set (3.4.0) activerecord (>= 4.0.0, < 7.0) - aws-eventstream (1.1.1) - aws-partitions (1.488.0) - aws-sdk-codedeploy (1.42.0) - aws-sdk-core (~> 3, >= 3.119.0) + aws-eventstream (1.2.0) + aws-partitions (1.501.0) + aws-sdk-codedeploy (1.43.0) + aws-sdk-core (~> 3, >= 3.120.0) aws-sigv4 (~> 1.1) - aws-sdk-core (3.119.0) + aws-sdk-core (3.121.0) aws-eventstream (~> 1, >= 1.0.2) aws-partitions (~> 1, >= 1.239.0) aws-sigv4 (~> 1.1) jmespath (~> 1.0) - aws-sdk-kms (1.46.0) - aws-sdk-core (~> 3, >= 3.119.0) + aws-sdk-kms (1.48.0) + aws-sdk-core (~> 3, >= 3.120.0) aws-sigv4 (~> 1.1) - aws-sdk-s3 (1.99.0) - aws-sdk-core (~> 3, >= 3.119.0) + aws-sdk-s3 (1.102.0) + aws-sdk-core (~> 3, >= 3.120.0) aws-sdk-kms (~> 1) - aws-sigv4 (~> 1.1) - aws-sigv4 (1.2.4) + aws-sigv4 (~> 1.4) + aws-sigv4 (1.4.0) aws-eventstream (~> 1, >= 1.0.2) babel-source (5.8.35) babel-transpiler (0.7.0) babel-source (>= 4.0, < 6) execjs (~> 2.0) + bagit (0.4.4) + docopt (~> 0.5.0) + validatable (~> 1.6) bcp47 (0.3.3) i18n bcrypt (3.1.16) @@ -156,6 +188,7 @@ GEM bootstrap_form (4.4.0) actionpack (>= 5.0) activemodel (>= 5.0) + brakeman (5.1.1) breadcrumbs_on_rails (3.0.1) browse-everything (1.1.2) addressable (~> 2.5) @@ -189,6 +222,8 @@ GEM ssrf_filter (~> 1.0) childprocess (3.0.0) clipboard-rails (1.7.1) + code_analyzer (0.5.2) + sexp_processor coderay (1.1.3) coffee-rails (4.2.2) coffee-script (>= 2.2.0) @@ -221,6 +256,7 @@ GEM devise-guests (0.7.0) devise diff-lcs (1.4.4) + docopt (0.5.0) dotenv (2.7.6) dotenv-rails (2.7.6) dotenv (= 2.7.6) @@ -235,32 +271,31 @@ GEM dropbox_api (0.1.18) faraday (<= 1.0) oauth2 (~> 1.1) - dry-configurable (0.12.1) + dry-configurable (0.13.0) concurrent-ruby (~> 1.0) - dry-core (~> 0.5, >= 0.5.0) - dry-container (0.7.2) + dry-core (~> 0.6) + dry-container (0.9.0) concurrent-ruby (~> 1.0) - dry-configurable (~> 0.1, >= 0.1.3) - dry-core (0.6.0) + dry-configurable (~> 0.13, >= 0.13.0) + dry-core (0.7.1) concurrent-ruby (~> 1.0) dry-equalizer (0.3.0) dry-events (0.3.0) concurrent-ruby (~> 1.0) dry-core (~> 0.5, >= 0.5) - dry-inflector (0.2.0) + dry-inflector (0.2.1) dry-initializer (3.0.4) dry-logic (1.2.0) concurrent-ruby (~> 1.0) dry-core (~> 0.5, >= 0.5) dry-matcher (0.9.0) dry-core (~> 0.4, >= 0.4.8) - dry-monads (1.3.5) + dry-monads (1.4.0) concurrent-ruby (~> 1.0) - dry-core (~> 0.4, >= 0.4.4) - dry-equalizer - dry-schema (1.6.2) + dry-core (~> 0.7) + dry-schema (1.8.0) concurrent-ruby (~> 1.0) - dry-configurable (~> 0.8, >= 0.8.3) + dry-configurable (~> 0.13, >= 0.13.0) dry-core (~> 0.5, >= 0.5) dry-initializer (~> 3.0) dry-logic (~> 1.0) @@ -269,7 +304,7 @@ GEM dry-core (~> 0.5, >= 0.5) dry-types (~> 1.5) ice_nine (~> 0.11) - dry-transaction (0.13.2) + dry-transaction (0.13.3) dry-container (>= 0.2.8) dry-events (>= 0.1.0) dry-matcher (>= 0.7.0) @@ -280,14 +315,13 @@ GEM dry-core (~> 0.5, >= 0.5) dry-inflector (~> 0.1, >= 0.1.2) dry-logic (~> 1.0, >= 1.0.2) - dry-validation (1.6.0) + dry-validation (1.7.0) concurrent-ruby (~> 1.0) dry-container (~> 0.7, >= 0.7.1) - dry-core (~> 0.4) - dry-equalizer (~> 0.2) + dry-core (~> 0.5, >= 0.5) dry-initializer (~> 3.0) - dry-schema (~> 1.5, >= 1.5.2) - ebnf (2.1.3) + dry-schema (~> 1.8, >= 1.8.0) + ebnf (2.2.1) amazing_print (~> 1.2) htmlentities (~> 4.3) rdf (~> 3.1) @@ -297,6 +331,7 @@ GEM equivalent-xml (0.6.0) nokogiri (>= 1.4.3) erubi (1.10.0) + erubis (2.7.0) ethon (0.14.0) ffi (>= 1.15.0) execjs (2.8.1) @@ -315,7 +350,12 @@ GEM faraday (>= 0.7.4, < 1.0) fcrepo_wrapper (0.9.0) ruby-progressbar - ffi (1.15.3) + ffi (1.15.4) + flay (2.12.1) + erubis (~> 2.7.0) + path_expander (~> 1.0) + ruby_parser (~> 3.0) + sexp_processor (~> 4.0) flipflop (2.6.0) activesupport (>= 4.0) flot-rails (0.0.7) @@ -323,6 +363,9 @@ GEM font-awesome-rails (4.7.0.7) railties (>= 3.2, < 7) gems (1.2.0) + gitlab (4.17.0) + httparty (~> 0.18) + terminal-table (~> 1.5, >= 1.5.1) globalid (0.5.2) activesupport (>= 5.0) google-api-client (0.53.0) @@ -354,13 +397,13 @@ GEM google-apis-sheets_v4 (>= 0.4.0, < 1.0.0) googleauth (>= 0.5.0, < 1.0.0) nokogiri (>= 1.5.3, < 2.0.0) - googleauth (0.17.0) + googleauth (0.17.1) faraday (>= 0.17.3, < 2.0) jwt (>= 1.4, < 3.0) memoist (~> 0.16) multi_json (~> 1.11) os (>= 0.9, < 2.0) - signet (~> 0.14) + signet (~> 0.15) haml (5.2.2) temple (>= 0.8.0) tilt @@ -370,7 +413,7 @@ GEM hiredis (0.6.3) htmlentities (4.3.4) http_logger (0.6.0) - httparty (0.18.1) + httparty (0.19.0) mime-types (~> 3.0) multi_xml (>= 0.5.2) httpclient (2.8.3) @@ -477,6 +520,7 @@ GEM ice_nine (0.11.2) iiif_manifest (0.5.0) activesupport (>= 4) + iso8601 (0.9.1) jbuilder (2.11.2) activesupport (>= 5.0.0) jmespath (1.4.0) @@ -520,6 +564,7 @@ GEM kaminari-core (1.2.1) kaminari_route_prefix (0.1.1) kaminari (~> 1.0) + language_list (1.2.1) launchy (2.5.0) addressable (~> 2.7) ld-patch (3.1.3) @@ -542,6 +587,7 @@ GEM multi_json letter_opener (1.7.0) launchy (~> 2.2) + libxml-ruby (3.1.0) link_header (0.0.8) linkeddata (3.1.1) equivalent-xml (~> 0.6) @@ -573,6 +619,7 @@ GEM rb-fsevent (~> 0.9, >= 0.9.4) rb-inotify (~> 0.9, >= 0.9.7) ruby_dep (~> 1.2) + logger (1.4.3) loofah (2.12.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) @@ -585,15 +632,15 @@ GEM method_source (1.0.0) mime-types (3.3.1) mime-types-data (~> 3.2015) - mime-types-data (3.2021.0704) + mime-types-data (3.2021.0901) mini_magick (4.11.0) - mini_mime (1.1.0) + mini_mime (1.1.1) mini_portile2 (2.6.1) minitest (5.14.4) multi_json (1.15.0) multi_xml (0.6.0) multipart-post (2.1.1) - mysql2 (0.4.10) + mysql2 (0.5.3) nest (3.2.0) redic net-http-persistent (4.0.1) @@ -603,11 +650,15 @@ GEM noid-rails (3.0.3) actionpack (>= 5.0.0, < 7) noid (~> 0.9) - nokogiri (1.12.3) + nokogiri (1.12.4) mini_portile2 (~> 2.6.1) racc (~> 1.4) nokogumbo (2.0.5) nokogiri (~> 1.8, >= 1.8.4) + oai (1.1.0) + builder (>= 3.1.0) + faraday + faraday_middleware oauth (0.5.6) oauth2 (1.4.7) faraday (>= 0.8, < 2.0) @@ -615,14 +666,18 @@ GEM multi_json (~> 1.3) multi_xml (~> 0.5) rack (>= 1.2, < 3) + octokit (4.21.0) + faraday (>= 0.9) + sawyer (~> 0.8.0, >= 0.5.3) openseadragon (0.6.0) rails (> 3.2.0) orm_adapter (0.5.0) os (1.1.1) - parallel (1.20.1) + parallel (1.21.0) parser (3.0.2.0) ast (~> 2.4.1) parslet (2.0.0) + path_expander (1.1.0) pbcore (0.3.2) factory_bot (~> 4.11) faker (~> 1.9) @@ -630,6 +685,27 @@ GEM sax-machine (~> 1.3) posix-spawn (0.3.15) power_converter (0.1.2) + pronto (0.10.0) + gitlab (~> 4.0, >= 4.0.0) + httparty (>= 0.13.7) + octokit (~> 4.7, >= 4.7.0) + rainbow (>= 2.2, < 4.0) + rugged (~> 0.24, >= 0.23.0) + thor (~> 0.20.0) + pronto-brakeman (0.10.0) + brakeman (>= 3.2.0) + pronto (~> 0.10.0) + pronto-flay (0.10.0) + flay (~> 2.8) + pronto (~> 0.10.0) + pronto-rails_best_practices (0.10.0) + pronto (~> 0.10.0) + rails_best_practices (~> 1.16, >= 1.15.0) + pronto-rails_schema (0.10.0) + pronto (~> 0.10.0) + pronto-rubocop (0.10.0) + pronto (~> 0.10.0) + rubocop (~> 0.50, >= 0.49.1) pry (0.13.1) coderay (~> 1.1) method_source (~> 1.0) @@ -671,10 +747,18 @@ GEM rails-dom-testing (2.0.3) activesupport (>= 4.2.0) nokogiri (>= 1.6) - rails-html-sanitizer (1.3.0) + rails-html-sanitizer (1.4.2) loofah (~> 2.3) rails_autolink (1.1.6) rails (> 3.1) + rails_best_practices (1.21.0) + activesupport + code_analyzer (>= 0.5.2) + erubis + i18n + json + require_all (~> 3.0) + ruby-progressbar railties (5.1.7) actionpack (= 5.1.7) activesupport (= 5.1.7) @@ -763,6 +847,7 @@ GEM uber (< 0.2.0) request_store (1.5.0) rack (>= 1.4) + require_all (3.0.0) responders (3.0.1) actionpack (>= 5.0) railties (>= 5.0) @@ -802,6 +887,8 @@ GEM rspec-mocks (~> 3.9.0) rspec-support (~> 3.9.0) rspec-support (3.9.4) + rspec_junit_formatter (0.4.1) + rspec-core (>= 2, < 4, != 2.12.0) rubocop (0.85.1) parallel (~> 1.10) parser (>= 2.7.0.1) @@ -829,7 +916,10 @@ GEM ruby-progressbar (1.11.0) ruby2_keywords (0.0.5) ruby_dep (1.5.0) + ruby_parser (3.17.0) + sexp_processor (~> 4.15, >= 4.15.1) rubyzip (1.3.0) + rugged (0.99.0) samvera-nesting_indexer (2.0.0) dry-equalizer sass (3.7.4) @@ -845,14 +935,21 @@ GEM tilt (>= 1.1, < 3) sassc (2.4.0) ffi (~> 1.9) + sawyer (0.8.2) + addressable (>= 2.3.5) + faraday (> 0.8, < 2.0) sax-machine (1.3.2) scanf (1.0.0) select2-rails (3.5.11) selenium-webdriver (3.142.7) childprocess (>= 0.5, < 4.0) rubyzip (>= 1.2.2) - shex (0.6.1) - ebnf (~> 2.0) + sentry-raven (2.13.0) + faraday (>= 0.7.6, < 1.0) + sexp_processor (4.15.3) + shex (0.6.3) + ebnf (~> 2.1, >= 2.2) + htmlentities (~> 4.3) json-ld (~> 3.1) json-ld-preloaded (~> 3.1) rdf (~> 3.1) @@ -861,12 +958,12 @@ GEM sxp (~> 1.1) shoulda-matchers (4.5.1) activesupport (>= 4.2.0) - sidekiq (6.2.1) + sidekiq (6.2.2) connection_pool (>= 2.2.2) rack (~> 2.0) redis (>= 4.2.0) - signet (0.15.0) - addressable (~> 2.3) + signet (0.16.0) + addressable (~> 2.8) faraday (>= 0.17.3, < 2.0) jwt (>= 1.5, < 3.0) multi_json (~> 1.10) @@ -883,9 +980,11 @@ GEM activesupport nokogiri xml-simple - sparql (3.1.7) + sony_ci_api (0.2.1) + sparql (3.1.8) builder (~> 3.2) ebnf (~> 2.1) + logger (~> 1.4) rdf (~> 3.1, >= 3.1.14) rdf-aggregate-repo (~> 3.1) rdf-xsd (~> 3.1) @@ -905,11 +1004,12 @@ GEM actionpack (>= 4.0) activesupport (>= 4.0) sprockets (>= 3.0.0) - sqlite3 (1.3.13) ssrf_filter (1.0.7) sxp (1.1.0) rdf (~> 3.1) temple (0.8.2) + terminal-table (1.8.0) + unicode-display_width (~> 1.1, >= 1.1.1) thor (1.1.0) thread_safe (0.3.6) tilt (2.0.10) @@ -930,8 +1030,9 @@ GEM uber (0.1.0) uglifier (4.2.0) execjs (>= 0.3.0, < 3) - unicode-display_width (1.7.0) - unicode-types (1.6.0) + unicode-display_width (1.8.0) + unicode-types (1.7.0) + validatable (1.6.7) warden (1.2.9) rack (>= 2.0.9) web-console (3.7.0) @@ -939,7 +1040,7 @@ GEM activemodel (>= 5.0) bindex (>= 0.4.0) railties (>= 5.0) - webdrivers (4.6.0) + webdrivers (4.6.1) nokogiri (~> 1.6) rubyzip (>= 1.3.0) selenium-webdriver (>= 3.0, < 4.0) @@ -955,7 +1056,8 @@ GEM websocket-driver (0.6.5) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) - xml-simple (1.1.8) + xml-simple (1.1.9) + rexml xpath (3.2.0) nokogiri (~> 1.8) xray-rails (0.3.2) @@ -965,11 +1067,13 @@ PLATFORMS ruby DEPENDENCIES + activerecord-nulldb-adapter aws-sdk-codedeploy aws-sdk-s3 bixby blacklight_advanced_search (~> 6.4.0) bootstrap-multiselect-rails + bulkrax! capybara (~> 3.0) capybara-screenshot carrierwave (~> 1.3) @@ -990,9 +1094,15 @@ DEPENDENCIES jquery-rails letter_opener listen (>= 3.0.5, < 3.2) - mysql2 (~> 0.4.10) + mysql2 (~> 0.5.3) nokogiri pbcore (~> 0.3.0) + pronto + pronto-brakeman + pronto-flay + pronto-rails_best_practices + pronto-rails_schema + pronto-rubocop pry-byebug puma (~> 3.12) rails (~> 5.1.5) @@ -1005,8 +1115,10 @@ DEPENDENCIES rspec-activemodel-mocks rspec-its rspec-rails (~> 3.7) + rspec_junit_formatter sass-rails (~> 5.0) selenium-webdriver + sentry-raven shoulda-matchers sidekiq simple_form (= 5.0.0) @@ -1019,10 +1131,8 @@ DEPENDENCIES webdrivers (~> 4.0) webmock (~> 3.7) webpacker + willow_sword! xray-rails -RUBY VERSION - ruby 2.5.3p105 - BUNDLED WITH - 2.0.2 + 2.2.16 diff --git a/README.md b/README.md index e6c35d5fe..d195b7ca8 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,102 @@ +[Docker development setup](#docker-development-setup) + +[Bash into the container](#bash-into-the-container) + +[Handling Secrets with SOPS](#handling-secrets-with-sops) + +[Deploy a new release](#deploy-a-new-release) + +[Run import from admin page](#run-import-from-admin-page) + +# Docker development setup + +We recommend committing .env to your repo with good defaults. .env.development, .env.production etc can be used for local overrides and should not be in the repo. See [Handling Secrets with SOPS](#handling-secrets-with-sops) for how to manage secrets. + +1) Install Docker.app + +2) Install stack car + ``` bash + gem install stack_car + ``` + +3) Sign in with dory + ``` bash + dory up + +4) Start the server + ``` bash + sc up + ``` + +5) Load and seed the database + ``` bash + sc be rake db:migrate + sc be rake ams:reset_data + sc be rake db:seed + ``` + +6) The app should be visible at in the browser at `hyku.test` + +### While in the container you can do the following +- Run rspec + ``` bash + bundle exec rspec + ``` +- Access the rails console + ``` bash + bundle exec rails c + ``` + +### Handling Secrets with SOPS + +[**SOPS**](https://github.com/mozilla/sops) is used to handle this project's secrets. + +The secrets in this repository include: +- `.env*` files +- `*-values.yaml` files + +Scripts (`bin/decrypt-secrets` and `bin/encrypt-secrets`) are included in this project to help with managing secrets. + +**To decrypt secrets**: + +You will need to do this if you are new to the project or there have been changes to any secrets files that are required for development. + +In terminal: +```bash +bin/decrypt-secrets +``` + +This will find and decrypt files with the `.enc` extension. + +**To encrypt secrets**: + +You will need to do this when you have edited secrets and are ready to commit them. + +In terminal: +```bash +bin/encrypt-secrets +``` + +This will find and output an encrypted version of secret files with an `.enc` extension. + +Release and Deployment are handled by the gitlab ci by default. See ops/deploy-app to deploy from locally, but note all Rancher install pull the currently tagged registry image + +## Staging Deploys: N8 Architecture + +Staging builds and deploys to Notch8 infrastructure are handled by Gitlab CI. + +**Setup your `gitlab` git remote** + +You'll only need to do this once. You need to set this remote to push, build and deploy your work. + +- Run `git remote add gitlab git@gitlab.com:notch8/GBH.git` +- Run `git remote`. You've successfully added the **gitlab** remote if your output lists it. It will look like: +``` +> git remote # Run git remote +gitlab # New gitlab remote +origin +``` + # ams Archival Management System to support the American Archive of Public Broadcasting @@ -5,10 +104,19 @@ Archival Management System to support the American Archive of Public Broadcastin `master`: [![Build Status](https://travis-ci.org/WGBH-MLA/ams.svg?branch=master)](https://travis-ci.org/WGBH-MLA/ams) `develop`: [![Build Status](https://travis-ci.org/WGBH-MLA/ams.svg?branch=develop)](https://travis-ci.org/WGBH-MLA/ams) -The Archival Managment System is an application using the [Hyrax gem](https://github.com/samvera/hyrax) to provide a repository for [PBCore](http://pbcore.org/) data about externally hosted AV content. It includes models, controllers, actors, and presenters for PBCore-based worktypes of Assets, Contributions, Physical Instantiations, Digital Instantiations, and Essence Tracks. +The Archival Managment System is an application using the [Hyrax gem](https://github.com/samvera/hyrax) to provide a repository for [PBCore](http://pbcore.org/) data about externally hosted AV content. It includes models, controllers, actors, and presenters for PBCore-based worktypes of Assets, Contributions, Physical Instantiations, Digital Instantiations, and Essence Tracks. AMS also adds the ability to export records in several user-friendly CSV reports and PBCore XML files, as well as using [hyrax-batch_ingest gem](https://github.com/samvera-labs/hyrax-batch_ingest) to implement batch ingest of PBCore XML and spreadsheets, and batch metadata updates via spreadsheets. +### Enable Bulkrax: + +- Add SETTINGS__BULKRAX__ENABLED=true to [.env](.env) files +- Add ` require bulkrax/application` to app/assets/javascripts/application.js and app/assets/stylesheets/application.css files. + +(in a `docker-compose exec web bash` if you're doing docker otherwise in your terminal) +```bash +bundle exec rails db:migrate +``` ### Dependencies diff --git a/app/actors/hyrax/actors/asset_actor.rb b/app/actors/hyrax/actors/asset_actor.rb index cb570a5ad..f9713ba70 100644 --- a/app/actors/hyrax/actors/asset_actor.rb +++ b/app/actors/hyrax/actors/asset_actor.rb @@ -13,6 +13,8 @@ def create(env) add_description_types(env) add_date_types(env) + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing save_aapb_admin_data(env) && super && create_or_update_contributions(env, contributions) end @@ -21,6 +23,8 @@ def update(env) add_title_types(env) add_description_types(env) add_date_types(env) + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing save_aapb_admin_data(env) && super && create_or_update_contributions(env, contributions) end @@ -55,7 +59,9 @@ def set_admin_data_attributes(admin_data, env) end def should_empty_admin_data_value?(key, admin_data, env) - admin_data.send(key).present? && !env.attributes[key].present? + key != :bulkrax_importer_id && + admin_data.send(key).present? && + !env.attributes[key].present? end def delete_removed_annotations(admin_data, env) @@ -111,7 +117,12 @@ def annotation_attributes end def find_or_create_admin_data(env) - admin_data = ::AdminData.create unless env.curation_concern.admin_data_gid.present? + admin_data = if env.attributes['bulkrax_identifier'].present? + AdminData.find_by_gid(env.attributes['admin_data_gid']) + else + ::AdminData.create unless env.curation_concern.admin_data_gid.present? + end + if admin_data Rails.logger.debug "Create AdminData at #{admin_data.gid}" return admin_data diff --git a/app/actors/hyrax/actors/contribution_actor.rb b/app/actors/hyrax/actors/contribution_actor.rb index 3be66594f..2fff4a0a6 100644 --- a/app/actors/hyrax/actors/contribution_actor.rb +++ b/app/actors/hyrax/actors/contribution_actor.rb @@ -3,6 +3,16 @@ module Hyrax module Actors class ContributionActor < Hyrax::Actors::BaseActor + def create(env) + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing + super + end + + def update(env) + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing + super + end end end end diff --git a/app/actors/hyrax/actors/digital_instantiation_actor.rb b/app/actors/hyrax/actors/digital_instantiation_actor.rb index d328063de..b2449fabd 100644 --- a/app/actors/hyrax/actors/digital_instantiation_actor.rb +++ b/app/actors/hyrax/actors/digital_instantiation_actor.rb @@ -4,25 +4,33 @@ module Hyrax module Actors class DigitalInstantiationActor < Hyrax::Actors::BaseActor def create(env) - if file_uploaded?(env) - xml_file = uploaded_xml(env) - else - xml_file = env.attributes.delete(:pbcore_xml) - end + xml_file = file_uploaded?(env) ? uploaded_xml(env) : env.attributes.delete(:pbcore_xml) pbcore_doc = PBCore::InstantiationDocument.parse(xml_file) set_env_attributes_from_pbcore(env, pbcore_doc) - save_instantiation_aapb_admin_data(env) && super && parse_pbcore_essense_track(env,pbcore_doc) + + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing + + if env.attributes['bulkrax_identifier'].present? + save_instantiation_aapb_admin_data(env) && super + else + save_instantiation_aapb_admin_data(env) && super && parse_pbcore_essense_track(env,pbcore_doc) + end end def update(env) - if file_uploaded?(env) - xml_file = uploaded_xml(env) + xml_file = file_uploaded?(env) ? uploaded_xml(env) : env.attributes.delete(:pbcore_xml) + + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing + + if env.curation_concern&.bulkrax_identifier + save_instantiation_aapb_admin_data(env) && super else - xml_file = env.attributes.delete(:pbcore_xml) + pbcore_doc = PBCore::InstantiationDocument.parse(xml_file) + env = parse_pbcore_instantiation(env,pbcore_doc) + save_instantiation_aapb_admin_data(env) && super && destroy_child_objects(env) && parse_pbcore_essense_track(env,pbcore_doc) end - pbcore_doc = PBCore::InstantiationDocument.parse(xml_file) - env = parse_pbcore_instantiation(env,pbcore_doc) - save_instantiation_aapb_admin_data(env) && super && destroy_child_objects(env) && parse_pbcore_essense_track(env,pbcore_doc) end def destroy(env) @@ -146,11 +154,13 @@ def instantiation_admin_data_attributes end def find_or_create_instantiation_admin_data(env) - instantiation_admin_data = if env.curation_concern.instantiation_admin_data_gid.blank? - InstantiationAdminData.create - else - InstantiationAdminData.find_by_gid!(env.curation_concern.instantiation_admin_data_gid) - end + instantiation_admin_data_gid = env.curation_concern.instantiation_admin_data_gid || env.attributes['instantiation_admin_data_gid'] + instantiation_admin_data = if instantiation_admin_data_gid + InstantiationAdminData.find_by_gid!(instantiation_admin_data_gid) + else + InstantiationAdminData.create + end + instantiation_admin_data end diff --git a/app/actors/hyrax/actors/environment.rb b/app/actors/hyrax/actors/environment.rb new file mode 100644 index 000000000..814895f87 --- /dev/null +++ b/app/actors/hyrax/actors/environment.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +# OVERRIDE Hyrax 2.9 to add in import flag +module Hyrax + module Actors + class Environment + # @param [ActiveFedora::Base] curation_concern work to operate on + # @param [Ability] current_ability the authorizations of the acting user + # @param [ActionController::Parameters] attributes user provided form attributes + def initialize(curation_concern, current_ability, attributes, importing = false) + @curation_concern = curation_concern + @current_ability = current_ability + @attributes = attributes.to_h.with_indifferent_access + @importing = importing + end + + attr_reader :curation_concern, :current_ability, :attributes, :importing + + # @return [User] the user from the current_ability + def user + current_ability.current_user + end + end + end +end diff --git a/app/actors/hyrax/actors/essence_track_actor.rb b/app/actors/hyrax/actors/essence_track_actor.rb index 2eaba9f3a..57cd0168c 100644 --- a/app/actors/hyrax/actors/essence_track_actor.rb +++ b/app/actors/hyrax/actors/essence_track_actor.rb @@ -3,6 +3,17 @@ module Hyrax module Actors class EssenceTrackActor < Hyrax::Actors::BaseActor + + def create(env) + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing + super + end + + def update(env) + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing + super + end end end end diff --git a/app/actors/hyrax/actors/interpret_visibility_actor_decorator.rb b/app/actors/hyrax/actors/interpret_visibility_actor_decorator.rb new file mode 100644 index 000000000..68797d4a2 --- /dev/null +++ b/app/actors/hyrax/actors/interpret_visibility_actor_decorator.rb @@ -0,0 +1,30 @@ +# deal with fact that this class creates a brand new environment and does not pass +# any added arguments down to the new version. For importer flag compatibility + +module Hyrax + module Actors + module InterpretVisibilityActorDecorator + # @param [Hyrax::Actors::Environment] env + # @return [Boolean] true if create was successful + def create(env) + intention = Hyrax::Actors::InterpretVisibilityActor::Intention.new(env.attributes) + attributes = intention.sanitize_params + new_env = Hyrax::Actors::Environment.new(env.curation_concern, env.current_ability, attributes, env.importing) + validate(env, intention, attributes) && apply_visibility(new_env, intention) && + next_actor.create(new_env) + end + + # @param [Hyrax::Actors::Environment] env + # @return [Boolean] true if update was successful + def update(env) + intention = Hyrax::Actors::InterpretVisibilityActor::Intention.new(env.attributes) + attributes = intention.sanitize_params + new_env = Hyrax::Actors::Environment.new(env.curation_concern, env.current_ability, attributes, env.importing) + validate(env, intention, attributes) && apply_visibility(new_env, intention) && + next_actor.update(new_env) + end + end + end +end + +::Hyrax::Actors::InterpretVisibilityActor.prepend(Hyrax::Actors::InterpretVisibilityActorDecorator) diff --git a/app/actors/hyrax/actors/physical_instantiation_actor.rb b/app/actors/hyrax/actors/physical_instantiation_actor.rb index edfe852a6..a8a01f79f 100644 --- a/app/actors/hyrax/actors/physical_instantiation_actor.rb +++ b/app/actors/hyrax/actors/physical_instantiation_actor.rb @@ -4,10 +4,15 @@ module Hyrax module Actors class PhysicalInstantiationActor < Hyrax::Actors::BaseActor def create(env) + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing + save_instantiation_aapb_admin_data(env) && super end def update(env) + # queue indexing if we are importing + env.curation_concern.reindex_extent = "queue#{env.importing.id}" if env.importing save_instantiation_aapb_admin_data(env) && super end diff --git a/app/assets/images/bulkrax/removed.png b/app/assets/images/bulkrax/removed.png new file mode 100644 index 000000000..f9b0b388f Binary files /dev/null and b/app/assets/images/bulkrax/removed.png differ diff --git a/app/assets/javascripts/application.js b/app/assets/javascripts/application.js index 019c7c2f8..30e0244cf 100644 --- a/app/assets/javascripts/application.js +++ b/app/assets/javascripts/application.js @@ -24,6 +24,7 @@ //= require blacklight/blacklight //= require video +//= require bulkrax/application //= require_tree ./global //= require hyrax //= require bootstrap-multiselect diff --git a/app/assets/stylesheets/application.css b/app/assets/stylesheets/application.css index c8045a1ff..f5595fb21 100644 --- a/app/assets/stylesheets/application.css +++ b/app/assets/stylesheets/application.css @@ -12,6 +12,7 @@ * *= require_tree ./global *= require dataTables/bootstrap/3/jquery.dataTables.bootstrap + *= require 'bulkrax/application' *= require_self *= require 'blacklight_advanced_search' diff --git a/app/controllers/api/assets_controller.rb b/app/controllers/api/assets_controller.rb new file mode 100644 index 000000000..ae394eb9b --- /dev/null +++ b/app/controllers/api/assets_controller.rb @@ -0,0 +1,38 @@ +module API + class AssetsController < APIController + # Authenticate user before all actions. + # NOTE: For Basic HTTP auth to work: + # * the `http_authenticatable` config option for Devise must be set to true + # (see config/initializers/devise.rb). + # * The Authorization request header must be set to "Basic {cred}" where + # {cred} is the base64 encoded username:password. + # TODO: Move authn into base APIController class and make modifications so + # that the SonyCi::APIController will work with authn, which needs to be + # done. + before_action do + authenticate_user! + end + + + def show + respond_to do |format| + format.json { render json: pbcore_json } + format.xml { render xml: pbcore_xml } + end + end + + private + + def pbcore_json + @pbcore_json ||= Hash.from_xml(pbcore_xml).to_json + end + + def pbcore_xml + @pbcore_xml ||= solr_doc.export_as_pbcore + end + + def solr_doc + @solr_doc ||= SolrDocument.find(params[:id]) + end + end +end diff --git a/app/controllers/api_controller.rb b/app/controllers/api_controller.rb index 5908a91df..748eba396 100644 --- a/app/controllers/api_controller.rb +++ b/app/controllers/api_controller.rb @@ -1,3 +1,15 @@ class APIController < ActionController::API + # Gives us respond_to in controller actions which we use to respond with + # JSON or PBCore XML. + include ActionController::MimeResponds + # Common API features here, e.g. auth. + rescue_from ActiveFedora::ObjectNotFoundError, with: :not_found + + private + + def not_found(error) + # TODO: render errors in the proper format: xml or json. + render text: "Not Found", status: 404 + end end diff --git a/app/controllers/hyrax/batch_ingest/batches_controller.rb b/app/controllers/hyrax/batch_ingest/batches_controller.rb new file mode 100644 index 000000000..469c65264 --- /dev/null +++ b/app/controllers/hyrax/batch_ingest/batches_controller.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +# OVERRIDE HYRAX-Batch_Ingest revision: dc9d38039728eab581ab7b1cb55cf9ff33984b13 +# disable /batches endpoint for new creation. Redirect to bulkrax's importer paths + +require_dependency Hyrax::BatchIngest::Engine.root.join('app', 'controllers', 'hyrax', 'batch_ingest', 'batches_controller').to_s + +Hyrax::BatchIngest::BatchesController.class_eval do + def new + # OVERRIDE HYRAX-Batch_Ingest revision: dc9d38039728eab581ab7b1cb55cf9ff33984b13 + redirect_to '/importers/new' + end + + def create + # OVERRIDE HYRAX-Batch_Ingest revision: dc9d38039728eab581ab7b1cb55cf9ff33984b13 + redirect_to '/importers/new' + end + + def index + # OVERRIDE HYRAX-Batch_Ingest revision: dc9d38039728eab581ab7b1cb55cf9ff33984b13 + redirect_to '/importers' + end +end diff --git a/app/controllers/hyrax/my_controller.rb b/app/controllers/hyrax/my_controller.rb index cf1aa4d8f..352833970 100644 --- a/app/controllers/hyrax/my_controller.rb +++ b/app/controllers/hyrax/my_controller.rb @@ -19,6 +19,7 @@ def self.configure_facets config.add_facet_field IndexesWorkflow.suppressed_field, helper_method: :suppressed_to_status config.add_facet_field solr_name("resource_type", :facetable), limit: 5 config.add_facet_field solr_name("hyrax_batch_ingest_batch_id", :stored_searchable) + config.add_facet_field solr_name("bulkrax_importer_id", :stored_searchable) end end diff --git a/app/controllers/sony_ci/api_controller.rb b/app/controllers/sony_ci/api_controller.rb index 9df9e8596..1b043622d 100644 --- a/app/controllers/sony_ci/api_controller.rb +++ b/app/controllers/sony_ci/api_controller.rb @@ -2,6 +2,9 @@ module SonyCi class APIController < ::APIController + + respond_to :json + # Specify error handlers for different kinds of errors. NOTE: for *all* # endpoints, we *always* want to respond with JSON and an appropriate HTTP # error, regardless of success or error. We *never* want to accidentally diff --git a/app/controllers/sony_ci/webhook_logs_controller.rb b/app/controllers/sony_ci/webhook_logs_controller.rb index 11150f06c..91feb296b 100644 --- a/app/controllers/sony_ci/webhook_logs_controller.rb +++ b/app/controllers/sony_ci/webhook_logs_controller.rb @@ -1,25 +1,75 @@ class SonyCi::WebhookLogsController < ApplicationController - before_action :set_sony_ci_webhook_log, only: [:show ] + + before_action(only: :index) do + @pagination = Pagination.new( + total: SonyCi::WebhookLog.count, + page: params.fetch('page', 1), + per_page: params.fetch('per_page', 50) + ) + end # GET /sony_ci/webhook_logs # GET /sony_ci/webhook_logs.json def index - @sony_ci_webhook_logs = SonyCi::WebhookLog.all + @presenters = sony_ci_webhook_logs.map do |sony_ci_webhook_log| + SonyCi::WebhookLogPresenter.new(sony_ci_webhook_log) + end end # GET /sony_ci/webhook_logs/1 # GET /sony_ci/webhook_logs/1.json def show + respond_to do |format| + format.html do + @presenter = SonyCi::WebhookLogPresenter.new(sony_ci_webhook_log) + end + format.json + end end private - # Use callbacks to share common setup or constraints between actions. - def set_sony_ci_webhook_log - @sony_ci_webhook_log = SonyCi::WebhookLog.find(params[:id]) + def sony_ci_webhook_log + @sony_ci_webhook_log ||= SonyCi::WebhookLog.find(params[:id]) + end + + def sony_ci_webhook_logs + @sony_ci_webhook_logs ||= SonyCi::WebhookLog.all.order(sort_order).limit(per_page).offset(offset) + end + + def sort_order + { created_at: :desc } + end + + def per_page + params.fetch(:per_page, 50).to_i + end + + def offset + [0, page.to_i - 1].max * per_page + end + + def page + params.fetch(:page, 1) end - # Only allow a list of trusted parameters through. - def sony_ci_webhook_log_params - params.fetch(:sony_ci_webhook_log, {}) + class Pagination + attr_reader :total, :page, :per_page + def initialize(total:, page: 1, per_page: 50) + @total, @page, @per_page = total.to_i, page.to_i, per_page.to_i + end + + def showing + "#{lower_bound} - #{upper_bound}" + end + + private + + def lower_bound + ((page - 1) * per_page) + 1 + end + + def upper_bound + [ ( page * per_page ), total ].min + end end end diff --git a/app/controllers/sony_ci/webhooks_controller.rb b/app/controllers/sony_ci/webhooks_controller.rb index b80067a32..20f526757 100644 --- a/app/controllers/sony_ci/webhooks_controller.rb +++ b/app/controllers/sony_ci/webhooks_controller.rb @@ -1,5 +1,5 @@ module SonyCi - class WebhooksController < APIController + class WebhooksController < ::APIController after_action :create_webhook_log rescue_from StandardError do |error| @@ -57,9 +57,12 @@ def sony_ci_id def create_webhook_log(error: nil) webhook_log.response_headers = response.headers.to_h webhook_log.response_body = response_json + webhook_log.response_status = response.status if error webhook_log.error = error.class webhook_log.error_message = error.message + else + webhook_log.guids = [ guid_from_sony_ci_filename ] end webhook_log.save! end diff --git a/app/factories/bulkrax/object_factory.rb b/app/factories/bulkrax/object_factory.rb new file mode 100644 index 000000000..b250966dd --- /dev/null +++ b/app/factories/bulkrax/object_factory.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_dependency Bulkrax::Engine.root.join('app', 'factories', 'bulkrax', 'object_factory') + +Bulkrax::ObjectFactory.class_eval do # rubocop:disable Metrics/ParameterLists + # rubocop:disable Metrics/ParameterLists + def initialize(attributes:, source_identifier_value:, work_identifier:, collection_field_mapping:, replace_files: false, user: nil, klass: nil, update_files: false, importer: nil) + @attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes) + @replace_files = replace_files + @update_files = update_files + @user = user || User.batch_user + @work_identifier = work_identifier + @collection_field_mapping = collection_field_mapping + @source_identifier_value = source_identifier_value + @klass = klass || Bulkrax.default_work_type.constantize + @importer = importer + end + + # Regardless of what the Parser gives us, these are the properties we are prepared to accept. + def permitted_attributes + klass.properties.keys.map(&:to_sym) + %i[id edit_users edit_groups read_groups visibility work_members_attributes admin_set_id member_of_collections_attributes pbcore_xml skip_file_upload_validation bulkrax_importer_id] + end + + def environment(attrs) + Hyrax::Actors::Environment.new(object, Ability.new(@user), attrs, @importer) + end +end diff --git a/app/forms/hyrax/asset_form.rb b/app/forms/hyrax/asset_form.rb index 5cf327578..247712bf1 100644 --- a/app/forms/hyrax/asset_form.rb +++ b/app/forms/hyrax/asset_form.rb @@ -33,7 +33,7 @@ class AssetForm < Hyrax::Forms::WorkForm annotations: [:child_annotations] } - self.hidden_fields += [ :hyrax_batch_ingest_batch_id, :last_pushed, :last_updated, :needs_update ] + self.hidden_fields += [ :hyrax_batch_ingest_batch_id, :last_pushed, :last_updated, :needs_update, :bulkrax_importer_id ] self.terms += (self.required_fields + field_groups.values.map(&:to_a).flatten).uniq @@ -172,6 +172,14 @@ def annotations end end + def bulkrax_importer_id + if model.admin_data + model.admin_data.bulkrax_importer_id + else + "" + end + end + def hyrax_batch_ingest_batch_id if model.admin_data model.admin_data.hyrax_batch_ingest_batch_id diff --git a/app/indexers/asset_indexer.rb b/app/indexers/asset_indexer.rb index 8a0c0fc95..41f022717 100644 --- a/app/indexers/asset_indexer.rb +++ b/app/indexers/asset_indexer.rb @@ -17,6 +17,7 @@ def generate_solr_document solr_doc['broadcast_date_drsim'] = object.broadcast_date if object.broadcast_date solr_doc['created_date_drsim'] = object.created_date if object.created_date solr_doc['copyright_date_drsim'] = object.copyright_date if object.copyright_date + solr_doc[Solrizer.solr_name('bulkrax_identifier', :facetable)] = object.bulkrax_identifier if object.admin_data # Index the admin_data_gid @@ -32,10 +33,12 @@ def generate_solr_document #Indexing for search by batch_id solr_doc['hyrax_batch_ingest_batch_id_tesim'] = object.admin_data.hyrax_batch_ingest_batch_id if !object.admin_data.hyrax_batch_ingest_batch_id.blank? + solr_doc['bulkrax_importer_id_tesim'] = object.admin_data.bulkrax_importer_id if !object.admin_data.bulkrax_importer_id.blank? solr_doc['last_pushed'] = object.admin_data.last_pushed if !object.admin_data.last_pushed.blank? solr_doc['last_updated'] = object.admin_data.last_updated if !object.admin_data.last_updated.blank? solr_doc['needs_update'] = object.admin_data.needs_update if !object.admin_data.needs_update.blank? + end end end diff --git a/app/indexers/digital_instantiation_indexer.rb b/app/indexers/digital_instantiation_indexer.rb index faaf4f310..fbe81538a 100644 --- a/app/indexers/digital_instantiation_indexer.rb +++ b/app/indexers/digital_instantiation_indexer.rb @@ -13,6 +13,7 @@ class DigitalInstantiationIndexer < AMS::WorkIndexer def generate_solr_document super.tap do |solr_doc| + solr_doc[Solrizer.solr_name('bulkrax_identifier', :facetable)] = object.bulkrax_identifier if object.instantiation_admin_data #Indexing as english text so we can use it on asset show page solr_doc['instantiation_admin_data_tesim'] = object.instantiation_admin_data.gid if !object.instantiation_admin_data.gid.blank? diff --git a/app/indexers/essence_track_indexer.rb b/app/indexers/essence_track_indexer.rb index 87dccbe51..3425c9ad1 100644 --- a/app/indexers/essence_track_indexer.rb +++ b/app/indexers/essence_track_indexer.rb @@ -11,9 +11,9 @@ class EssenceTrackIndexer < AMS::WorkIndexer self.thumbnail_path_service = AAPB::WorkThumbnailPathService # Uncomment this block if you want to add custom indexing behavior: - # def generate_solr_document - # super.tap do |solr_doc| - # solr_doc['my_custom_field_ssim'] = object.my_custom_property - # end - # end + def generate_solr_document + super.tap do |solr_doc| + solr_doc[Solrizer.solr_name('bulkrax_identifier', :facetable)] = object.bulkrax_identifier + end + end end diff --git a/app/indexers/physical_instantiation_indexer.rb b/app/indexers/physical_instantiation_indexer.rb index 9dcd9379b..0a551d444 100644 --- a/app/indexers/physical_instantiation_indexer.rb +++ b/app/indexers/physical_instantiation_indexer.rb @@ -13,6 +13,7 @@ class PhysicalInstantiationIndexer < AMS::WorkIndexer def generate_solr_document super.tap do |solr_doc| + solr_doc[Solrizer.solr_name('bulkrax_identifier', :facetable)] = object.bulkrax_identifier if object.instantiation_admin_data #Indexing as english text so we can use it on asset show page solr_doc['instantiation_admin_data_tesim'] = object.instantiation_admin_data.gid if !object.instantiation_admin_data.gid.blank? diff --git a/app/indexers/queued_nesting_indexer.rb b/app/indexers/queued_nesting_indexer.rb new file mode 100644 index 000000000..e63de0b42 --- /dev/null +++ b/app/indexers/queued_nesting_indexer.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class QueuedNestingIndexer + extend Samvera::NestingIndexer + def self.reindex_relationships(id:, maximum_nesting_depth: nil, extent:) + if extent.match("queue") + Rails.logger.info("nested indexing queued") + Redis.current.zadd("nested:index:#{extent.delete("queue")}", 0, id.to_s) + else + ::Samvera::NestingIndexer.reindex_relationships(id: id, extent: 'full') + end + true + end +end diff --git a/app/jobs/bulkrax/child_relationships_job_decorator.rb b/app/jobs/bulkrax/child_relationships_job_decorator.rb new file mode 100644 index 000000000..5519dd15d --- /dev/null +++ b/app/jobs/bulkrax/child_relationships_job_decorator.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +# OVERRIDE bulkrax v.1.0.0 to add a limit to the job rescheduling +# while forming relationships to child works that were found + +module Bulkrax + module ChildRelationshipsJobDecorator + def perform(*args) + @args = args + + if entry.factory_class == Collection + collection_membership + else + work_membership + end + # Not all of the Works/Collections exist yet; reschedule + rescue Bulkrax::ChildWorksError + # OVERRIDE bulkrax v.1.0.0 + # In case the work hasn't been created, don't endlessly reschedule the job + attempts = (args[3] || 0) + 1 + child_ids = @missing_entry_ids.presence || args[1] + + reschedule(args[0], child_ids, args[2], attempts) unless attempts > 5 + end + + def work_membership + members_works = [] + # reject any Collections, they can't be children of Works + child_works_hash.each { |k, v| members_works << k if v[:class_name] != 'Collection' } + if members_works.length < child_entries.length # rubocop:disable Style/IfUnlessModifier + Rails.logger.warn("Cannot add collections as children of works: #{(@child_entries.length - members_works.length)} collections were discarded for parent entry #{@entry.id} (of #{@child_entries.length})") + end + work_parent_work_child(members_works) if members_works.present? + # OVERRIDE bulkrax v.1.0.0 + # reschedule the job only with works that don't exist yet + raise ChildWorksError if @missing_entry_ids.present? + end + + # OVERRIDE bulkrax v.1.0.0 + # don't stop all child relationships from being formed just because some child works don't exist + def child_works_hash + @missing_entry_ids = [] + + @child_works_hash ||= child_entries.each_with_object({}) do |child_entry, hash| + work = child_entry.factory.find + + if work.blank? + @missing_entry_ids << child_entry.id + next + end + + hash[work.id] = { class_name: work.class.to_s, entry.parser.source_identifier => child_entry.identifier } + end + end + + private + + # OVERRIDE bulkrax v.1.0.0 + # passing 4 args now + def reschedule(entry_id, child_entry_ids, importer_run_id, attempts) + ChildRelationshipsJob.set(wait: 10.minutes).perform_later(entry_id, child_entry_ids, importer_run_id, attempts) + end + end +end + +::Bulkrax::ChildRelationshipsJob.prepend(Bulkrax::ChildRelationshipsJobDecorator) diff --git a/app/jobs/bulkrax/delete_work_job.rb b/app/jobs/bulkrax/delete_work_job.rb new file mode 100644 index 000000000..2db068655 --- /dev/null +++ b/app/jobs/bulkrax/delete_work_job.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +require_dependency Bulkrax::Engine.root.join('app', 'jobs', 'bulkrax', 'delete_work_job') + +Bulkrax::DeleteWorkJob.class_eval do + # rubocop:disable Rails/SkipsModelValidations + def perform(entry, importer_run) + work = entry.factory.find + if work.is_a? Asset + asset_destroyer = AMS::AssetDestroyer.new + asset_destroyer.destroy([work.id]) + end + importer_run.increment!(:deleted_records) + importer_run.decrement!(:enqueued_records) + end + # rubocop:enable Rails/SkipsModelValidations +end diff --git a/app/jobs/bulkrax/importer_job.rb b/app/jobs/bulkrax/importer_job.rb new file mode 100644 index 000000000..cb6b70aa9 --- /dev/null +++ b/app/jobs/bulkrax/importer_job.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true +# OVERRIDE Bulkrax 1.0.2 to rescue errors + +require_dependency Bulkrax::Engine.root.join('app', 'jobs', 'bulkrax', 'importer_job').to_s + + Bulkrax::ImporterJob.class_eval do + def perform(importer_id, only_updates_since_last_import = false) + importer = Bulkrax::Importer.find(importer_id) + + importer.current_run + unzip_imported_file(importer.parser) + import(importer, only_updates_since_last_import) + update_current_run_counters(importer) + schedule(importer) if importer.schedulable? + Bulkrax::IndexAfterJob.set(wait: 1.minute).perform_later(importer) + rescue RuntimeError => e + # Quits job when xml format is invalid + Rails.logger.error "#{e.class}: #{e.message}\n\nBacktrace:\n#{e.backtrace.join("\n")}" + nil + end + end diff --git a/app/jobs/bulkrax/index_after_job.rb b/app/jobs/bulkrax/index_after_job.rb new file mode 100644 index 000000000..7932c0fb8 --- /dev/null +++ b/app/jobs/bulkrax/index_after_job.rb @@ -0,0 +1,30 @@ +module Bulkrax + class IndexAfterJob < ApplicationJob + queue_as :import + + def perform(importer) + # check if importer is done, otherwise reschedule + pending_num = importer.entries.left_outer_joins(:latest_status) + .where('bulkrax_statuses.status_message IS NULL ').count + return reschedule(importer.id) unless pending_num.zero? + + # read queue and index objects + set = Redis.current.zpopmax("nested:index:#{importer.id}", 100) + logger.error(set.to_s) + return if set.blank? + loop do + set.each do |key, score| + Hyrax.config.nested_relationship_reindexer.call(id: key, extent: 'full') + end + set = Redis.current.zpopmax("nested:index:#{importer.id}", 100) + logger.error(set.to_s) + break if set.blank? + end + end + + def reschedule(importer_id) + Bulkrax::IndexAfterJob.set(wait: 1.minutes).perform_later(importer_id: importer_id) + false + end + end +end diff --git a/app/models/admin_data.rb b/app/models/admin_data.rb index eb6febcd0..d72d88142 100644 --- a/app/models/admin_data.rb +++ b/app/models/admin_data.rb @@ -2,6 +2,7 @@ class AdminData < ApplicationRecord attr_reader :asset_error belongs_to :hyrax_batch_ingest_batch, optional: true + belongs_to :bulkrax_importer, optional: true, class_name: 'Bulkrax::Importer' has_many :annotations, dependent: :destroy self.table_name = "admin_data" diff --git a/app/models/asset.rb b/app/models/asset.rb index 8c9de7538..78175b8bc 100644 --- a/app/models/asset.rb +++ b/app/models/asset.rb @@ -60,6 +60,9 @@ def annotations # TODO: Use RDF::Vocab for applicable terms. # See https://github.com/ruby-rdf/rdf-vocab/tree/develop/lib/rdf/vocab + property :bulkrax_identifier, predicate: ::RDF::URI("http://ams2.wgbh-mla.org/resource#bulkraxIdentifier"), multiple: false do |index| + index.as :stored_searchable, :facetable + end property :asset_types, predicate: ::RDF::URI.new("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#hasType"), multiple: true do |index| index.as :stored_searchable, :facetable diff --git a/app/models/bulkrax/csv_entry.rb b/app/models/bulkrax/csv_entry.rb new file mode 100644 index 000000000..daffaec16 --- /dev/null +++ b/app/models/bulkrax/csv_entry.rb @@ -0,0 +1,162 @@ +# frozen_string_literal: true +# OVERRIDE Bulkrax 1.0.2 + +require_dependency Bulkrax::Engine.root.join('app', 'models', 'bulkrax', 'csv_entry') + +Bulkrax::CsvEntry.class_eval do + def self.read_data(path) + raise StandardError, 'CSV path empty' if path.blank? + + CSV.read(path, + headers: true, + encoding: 'utf-8') + end + + def self.data_for_entry(data, _source_id = nil) + # If a multi-line CSV data is passed, grab the first row + data = data.first if data.is_a?(CSV::Table) + # model has to be separated so that it doesn't get mistranslated by to_h + raw_data = data.to_h + raw_data[:model] = data[:model] if data[:model].present? + # If the collection field mapping is not 'collection', add 'collection' - the parser needs it + raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection' + # If the children field mapping is not 'children', add 'children' - the parser needs it + raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children' + return raw_data + end + + def build_metadata + raise StandardError, 'Record not found' if record.nil? + raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys)) + + self.parsed_metadata = {} + add_identifier + add_metadata_for_model + self.parsed_metadata['bulkrax_importer_id'] = importer.id if self.raw_metadata['model'] == 'Asset' + add_visibility + add_ingested_metadata + add_rights_statement + add_collections + add_local + + self.parsed_metadata + end + + def build_export_metadata + # make_round_trippable + self.parsed_metadata = {} + build_mapping_metadata + + self.parsed_metadata = flatten_hash(self.parsed_metadata) + + # TODO: fix the "send" parameter in the conditional below + # currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #" + if mapping['collection']&.[]('join') + self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ') + # self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';') + else + hyrax_record.member_of_collections.each_with_index do |collection, i| + self.parsed_metadata["collection_#{i + 1}"] = collection.id + # self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first + end + end + + build_files unless hyrax_record.is_a?(Collection) + self.parsed_metadata + end + + def build_mapping_metadata + # OVERRIDE Bulkrax 1.0.2 + mapping.each do |key, value| + next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key) + next if ['access_control_id', 'admin_set_id', 'model'].include?(key) + next if value['excluded'] + + object_key = key if value.key?('object') + models = valid_attribute(hyrax_record, key) + + next unless models.present? || object_key.present? + + if object_key.present? + # this will need to be updated if objects are ever used in the bulkrax mapping + # build_object(value) + else + # always start a new model at index 1 so they align on the csv + last_model = '' + index = 1 + + models.each do |model| + next unless model + index = last_model != model.class.to_s ? 1 : index + 1 + last_model = model.class.to_s + + build_value(model, key, value, index) + end + end + end + end + + def valid_attribute(hyrax_record, key) + # we only want the models that have our current attribute. plus we need to retain the order + # of the models so they are mapped properly in the build_value method + models = [] + + models << (model_responds_to(hyrax_record, key) ? hyrax_record : nil) + + hyrax_record.child_works&.each do |child_work| + models << (model_responds_to(child_work, key) ? child_work : nil) + + child_work.child_works&.each do |grandchild_work| + models << (model_responds_to(grandchild_work, key) ? grandchild_work : nil) + end + end + + models.sort_by! { |item| item.class.to_s } + end + + def model_responds_to(model, key) + # 'id' is not an attribute on a model, but we need to account for it + key == 'id' || model.respond_to?(key.to_s) && model[key].present? + end + + def build_value(current_record, key, value, index) + # OVERRIDE Bulkrax 1.0.2 + model = current_record.class + data = current_record.send(key.to_s) + parsed_metadata["#{model}_#{index}"] ||= {} + + if data.is_a?(ActiveTriples::Relation) + if value['join'] + self.parsed_metadata["#{model}_#{index}"][key_for_export(key, model)] = data.map { |d| prepare_export_data(d) }.join('| ') + end + elsif data + self.parsed_metadata["#{model}_#{index}"][key_for_export(key, model)] = prepare_export_data(data) + end + end + + def key_for_export(key, model) + # OVERRIDE Bulkrax 1.0.2 + clean_key = key_without_numbers(key) + "#{model}.#{clean_key}" + end + + def flatten_hash(data, initializer = {}, index = '') + data.each_with_object(initializer) do |(key, value), hash| + curation_concerns = ['Asset', 'Contribution', 'DigitalInstantiation', 'EssenceTrack', 'PhysicalInstantiation'] + model = key_without_numbers(key) + + if curation_concerns.include?(model) + index = key.split('_').last + hash[key] ||= '' + end + + if value.is_a? Hash + flatten_hash(value, hash, index) + else + index.present? ? hash["#{key}_#{index}"] = value : hash[key] = value + end + + hash + end + end +end diff --git a/app/models/bulkrax/pbcore_xml_entry.rb b/app/models/bulkrax/pbcore_xml_entry.rb new file mode 100644 index 000000000..9a9966585 --- /dev/null +++ b/app/models/bulkrax/pbcore_xml_entry.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +require 'nokogiri' + +module Bulkrax + class PbcoreXmlEntry < XmlEntry + def self.read_data(path) + if MIME::Types.type_for(path).include?('text/csv') + CSV.read(path, + headers: true, + encoding: 'utf-8') + else + # This doesn't cope with BOM sequences: + Nokogiri::XML(open(path), &:strict).remove_namespaces! + end + end + + def self.data_for_entry(data, source_id) + collections = [] + children = [] + xpath_for_source_id = ".//*[name()='#{source_id}']" + { + source_id => data.xpath(xpath_for_source_id).first.text.gsub('cpb-aacip/', 'cpb-aacip-'), + delete: data.xpath(".//*[name()='delete']").first&.text, + data: + data.to_xml( + encoding: 'UTF-8', + save_with: + Nokogiri::XML::Node::SaveOptions::NO_DECLARATION | Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS + ).delete("\n").delete("\t").squeeze(' '), # Remove newlines, tabs, and extra whitespace + collection: collections, + children: children + } + end + + def build_metadata + raise StandardError, 'Record not found' if record.nil? + + self.parsed_metadata = {} + self.parsed_metadata[work_identifier] = self.raw_metadata[source_identifier] + self.parsed_metadata['model'] = self.raw_metadata['model'] + self.parsed_metadata['pbcore_xml'] = self.raw_metadata['pbcore_xml'] if self.raw_metadata['pbcore_xml'].present? + self.parsed_metadata['format'] = self.raw_metadata['format'] if self.raw_metadata['model'] == 'DigitalInstantiation' + + if self.raw_metadata['skip_file_upload_validation'] == true + self.parsed_metadata['skip_file_upload_validation'] = self.raw_metadata['skip_file_upload_validation'] + end + + self.raw_metadata.each do |key, value| + add_metadata(key_without_numbers(key), value) + end + + if self.raw_metadata['model'] == 'Asset' + bulkrax_importer_id = importer.id + admin_data_gid = update_or_create_admin_data_gid(bulkrax_importer_id) + + self.parsed_metadata['bulkrax_importer_id'] = bulkrax_importer_id + self.parsed_metadata['admin_data_gid'] = admin_data_gid + build_annotations(self.raw_metadata['annotations'], admin_data_gid) if self.raw_metadata['annotations'].present? + end + + add_visibility + add_rights_statement + add_admin_set_id + add_collections + self.parsed_metadata['file'] = self.raw_metadata['file'] + add_local + + self.parsed_metadata + end + + def update_or_create_admin_data_gid(bulkrax_importer_id) + manifest_asset_id = self.raw_metadata['Asset.id'].strip if self.raw_metadata.keys.include?('Asset.id') + xml_asset_id = self.raw_metadata['id'] + work = Asset.where(id: manifest_asset_id || xml_asset_id).first if manifest_asset_id || xml_asset_id + + admin_data_gid = if work.present? + work.admin_data.update!(bulkrax_importer_id: bulkrax_importer_id) + work.admin_data_gid + else + AdminData.create(bulkrax_importer_id: bulkrax_importer_id).gid + end + + admin_data_gid + end + + def build_annotations(annotations, admin_data_gid) + annotations.each do |annotation| + if annotation['annotation_type'].nil? + raise "annotation_type not registered with the AnnotationTypesService: #{annotation['annotation_type']}." + end + + admin_data = AdminData.find_by_gid(admin_data_gid) + Annotation.find_or_create_by( + annotation_type: annotation['annotation_type'], + source: annotation['source'], + value: annotation['value'], + annotation: annotation['annotation'], + version: annotation['version'], + admin_data_id: admin_data.id + ) + end + end + end +end diff --git a/app/models/concerns/bulkrax/has_local_processing.rb b/app/models/concerns/bulkrax/has_local_processing.rb new file mode 100644 index 000000000..7423b69b5 --- /dev/null +++ b/app/models/concerns/bulkrax/has_local_processing.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module Bulkrax::HasLocalProcessing + # This method is called during build_metadata + # add any special processing here, for example to reset a metadata property + # to add a custom property from outside of the import data + def add_local; end +end diff --git a/app/models/concerns/bulkrax/has_matchers.rb b/app/models/concerns/bulkrax/has_matchers.rb new file mode 100644 index 000000000..d13f742ff --- /dev/null +++ b/app/models/concerns/bulkrax/has_matchers.rb @@ -0,0 +1,58 @@ +# OVERRIDE BULKRAX 1.0.2 to avoid ActiveTriples::Relation::ValueError +require_dependency Bulkrax::Engine.root.join('app', 'models', 'concerns', 'bulkrax', 'has_matchers') + +Bulkrax::HasMatchers.class_eval do # rubocop:disable Metrics/ParameterLists + def add_metadata(node_name, node_content, index = nil) + field_to(node_name).each do |name| + matcher = self.class.matcher(name, mapping[name].symbolize_keys) if mapping[name] # the field matched to a pre parsed value in application_matcher.rb + object_name = get_object_name(name) || false # the "key" of an object property. e.g. { object_name: { alpha: 'beta' } } + multiple = multiple?(name) # the property has multiple values. e.g. 'letters': ['a', 'b', 'c'] + object_multiple = object_name && multiple?(object_name) # the property's value is an array of object(s) + + next unless field_supported?(name) || (object_name && field_supported?(object_name)) + + if object_name + Rails.logger.info("Bulkrax Column automatically matched object #{node_name}, #{node_content}") + parsed_metadata[object_name] ||= object_multiple ? [{}] : {} + end + + value = if matcher + result = matcher.result(self, node_content) + matched_metadata(multiple, name, result, object_multiple) + elsif multiple + Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}") + # OVERRIDE BULKRAX 1.0.2 to avoid ActiveTriples::Relation::ValueError + multiple_metadata(node_content, node_name) + else + Rails.logger.info("Bulkrax Column automatically matched #{node_name}, #{node_content}") + single_metadata(node_content) + end + + set_parsed_data(object_multiple, object_name, name, index, value) + end + end + + def multiple_metadata(content, name = nil) + return unless content + + case content + when Nokogiri::XML::NodeSet + content&.content + when Array + # OVERRIDE BULKRAX 1.0.2 to avoid ActiveTriples::Relation::ValueError + if name == 'head' || name == 'tail' + content.map do |obj| + obj.delete("id") + end + else + content + end + when Hash + Array.wrap(content) + when String + Array.wrap(content.strip) + else + Array.wrap(content) + end + end +end \ No newline at end of file diff --git a/app/models/concerns/bulkrax/import_behavior.rb b/app/models/concerns/bulkrax/import_behavior.rb new file mode 100644 index 000000000..737680e2b --- /dev/null +++ b/app/models/concerns/bulkrax/import_behavior.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +# adds importer to allow index queueing + +require_dependency Bulkrax::Engine.root.join('app', 'models', 'concerns', 'bulkrax', 'import_behavior') + +Bulkrax::ImportBehavior.class_eval do + def factory + @factory ||= Bulkrax::ObjectFactory.new(attributes: self.parsed_metadata || self.raw_metadata, + source_identifier_value: identifier, + work_identifier: parser.work_identifier, + collection_field_mapping: parser.collection_field_mapping, + replace_files: replace_files, + user: user, + klass: factory_class, + update_files: update_files, + importer: self.importer + ) + end +end diff --git a/app/models/concerns/bulkrax/pbcore_parser_behavior.rb b/app/models/concerns/bulkrax/pbcore_parser_behavior.rb new file mode 100644 index 000000000..450b0d4c5 --- /dev/null +++ b/app/models/concerns/bulkrax/pbcore_parser_behavior.rb @@ -0,0 +1,64 @@ +module Bulkrax + module PbcoreParserBehavior + def entry_class + Bulkrax::PbcoreXmlEntry + end + # Return all files in the import directory and sub-directories + def file_paths + @file_paths ||= + # Relative to the file + if file? && zip? + Dir.glob("#{importer_unzip_path}/**/*").reject { |f| File.file?(f) == false } + elsif file? + Dir.glob("#{File.dirname(parser_fields['import_file_path'])}/**/*").reject { |f| File.file?(f) == false } + # In the supplied directory + else + Dir.glob("#{parser_fields['import_file_path']}/**/*").reject { |f| File.file?(f) == false } + end + end + + private + + # these methods are shared between the xml and manifest parsers; they don't pass in the same amount of arguments + def parse_rows(rows, type, index, related_identifier = nil, parent_asset = nil) + rows.each do |current_object| + set_model(type, index, current_object, parent_asset) + add_object(current_object.symbolize_keys, type, related_identifier) + end + end + + def set_model(type, index, current_object, parent_asset) + key_count = objects.select { |obj| obj[:model] == type }.size + 1 + bulkrax_identifier = current_object[:bulkrax_identifier] || Bulkrax.fill_in_blank_source_identifiers.call(self, "#{type}-#{index}-#{key_count}") + + if current_object && current_object[:model].blank? + current_object.merge!({ + model: type, + work_identifier => bulkrax_identifier, + title: create_title(parent_asset) + }) + else + # always return a bulkrax_identifier + current_object&.merge!({ work_identifier => bulkrax_identifier }) || + { work_identifier => bulkrax_identifier } + end + end + + def create_title(parent_asset) + # the xml parser doesn't pass an asset but the manifest parser does + asset = parent_asset || objects.first + return unless asset + + asset[:series_title] || asset[:title] + end + + def raise_format_errors(invalid_files) + return unless invalid_files.present? + + error_msg = invalid_files.map do |failure| + "#{failure[:message]}, in file: #{failure[:filepath]}" + end + raise "#{ error_msg.count == 1 ? error_msg.first : error_msg.join(" ****** ")}" + end + end +end \ No newline at end of file diff --git a/app/models/contribution.rb b/app/models/contribution.rb index 80036578e..5fc3db3bc 100644 --- a/app/models/contribution.rb +++ b/app/models/contribution.rb @@ -8,7 +8,10 @@ class Contribution < ActiveFedora::Base # Change this to restrict which works can be added as a child. # self.valid_child_concerns = [] - + property :bulkrax_identifier, predicate: ::RDF::URI("http://ams2.wgbh-mla.org/resource#bulkraxIdentifier"), multiple: false do |index| + index.as :stored_searchable, :facetable + end + property :contributor, predicate: ::RDF::URI.new("http://www.w3.org/2006/vcard/ns#hasName"), multiple: false do |index| index.as :stored_searchable end diff --git a/app/models/digital_instantiation.rb b/app/models/digital_instantiation.rb index 2a13f7990..99d558d6b 100644 --- a/app/models/digital_instantiation.rb +++ b/app/models/digital_instantiation.rb @@ -43,6 +43,10 @@ def pbcore_validate_instantiation_xsd end end + property :bulkrax_identifier, predicate: ::RDF::URI("http://ams2.wgbh-mla.org/resource#bulkraxIdentifier"), multiple: false do |index| + index.as :stored_searchable, :facetable + end + property :date, predicate: ::RDF::URI.new("http://purl.org/dc/terms/date"), multiple: true, index_to_parent: true do |index| index.as :stored_searchable, :facetable end diff --git a/app/models/essence_track.rb b/app/models/essence_track.rb index 8ad98cebb..ddef737fd 100644 --- a/app/models/essence_track.rb +++ b/app/models/essence_track.rb @@ -18,6 +18,10 @@ class EssenceTrack < ActiveFedora::Base end end + property :bulkrax_identifier, predicate: ::RDF::URI("http://ams2.wgbh-mla.org/resource#bulkraxIdentifier"), multiple: false do |index| + index.as :stored_searchable, :facetable + end + property :track_type, predicate: ::RDF::URI.new("http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#trackType"), multiple: false do |index| index.as :stored_searchable end diff --git a/app/models/physical_instantiation.rb b/app/models/physical_instantiation.rb index d9477a810..1be5e005a 100644 --- a/app/models/physical_instantiation.rb +++ b/app/models/physical_instantiation.rb @@ -24,6 +24,10 @@ class PhysicalInstantiation < ActiveFedora::Base end end + property :bulkrax_identifier, predicate: ::RDF::URI("http://ams2.wgbh-mla.org/resource#bulkraxIdentifier"), multiple: false do |index| + index.as :stored_searchable, :facetable + end + property :date, predicate: ::RDF::URI.new("http://purl.org/dc/terms/date"), multiple: true, index_to_parent: true do |index| index.as :stored_searchable, :facetable end diff --git a/app/models/solr_document.rb b/app/models/solr_document.rb index 509959514..79d44701b 100644 --- a/app/models/solr_document.rb +++ b/app/models/solr_document.rb @@ -87,6 +87,10 @@ def asset_types self[Solrizer.solr_name('asset_types')] end + def bulkrax_identifier + self[Solrizer.solr_name('bulkrax_identifier')] + end + def genre self[Solrizer.solr_name('genre')] end @@ -447,6 +451,10 @@ def aapb_preservation_disk self[Solrizer.solr_name('aapb_preservation_disk')] end + def bulkrax_importer_id + self[Solrizer.solr_name('bulkrax_importer_id')] + end + def hyrax_batch_ingest_batch_id self[Solrizer.solr_name('hyrax_batch_ingest_batch_id')] end diff --git a/app/models/sony_ci/webhook_log.rb b/app/models/sony_ci/webhook_log.rb index 8a401d8df..dcc8f48dc 100644 --- a/app/models/sony_ci/webhook_log.rb +++ b/app/models/sony_ci/webhook_log.rb @@ -1,6 +1,11 @@ class SonyCi::WebhookLog < ApplicationRecord - serialize :request_header, JSON + serialize :request_headers, JSON serialize :request_body, JSON - serialize :response_header, JSON + serialize :response_headers, JSON serialize :response_body, JSON + serialize :guids, Array + + validates :url, presence: true + validates :action, presence: true + validates :response_status, inclusion: { in: 200..599 } end diff --git a/app/parsers/csv_parser.rb b/app/parsers/csv_parser.rb new file mode 100644 index 000000000..1c0769cc7 --- /dev/null +++ b/app/parsers/csv_parser.rb @@ -0,0 +1,293 @@ +# frozen_string_literal: true +# OVERRIDE Bulkrax 1.0.2 + +class CsvParser < Bulkrax::CsvParser + attr_accessor :objects, :record_objects + def records(_opts = {}) + # OVERRIDE Bulkrax 1.0.2 + file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path + # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read. + csv_data = entry_class.read_data(file_for_import) + csv_headers = csv_data.headers.map { |header| key_without_numbers(header) } + invalid_headers = validate_csv_headers(csv_headers, file_for_import) + raise_format_errors(invalid_headers) if invalid_headers.present? + importer.parser_fields['total'] = csv_data.count + importer.save + @records ||= csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil) } + end + + def create_works + # OVERRIDE Bulkrax 1.0.2 + self.record_objects = [] + records.each_with_index do |full_row, index| + + set_objects(full_row, index).each do |record| + break if limit_reached?(limit, index) + + seen[record[work_identifier]] = true + new_entry = find_or_create_entry(entry_class, record[work_identifier], 'Bulkrax::Importer', record.to_h.compact) + if record[:delete].present? + Bulkrax::DeleteWorkJob.send(perform_method, new_entry, current_run) + else + Bulkrax::ImportWorkJob.send(perform_method, new_entry.id, current_run.id) + end + end + increment_counters(index) + end + importer.record_status + rescue StandardError => e + status_info(e) + end + + def missing_elements(keys) + # OVERRIDE Bulkrax 1.0.2 + required_elements.map(&:to_s) - keys.map(&:to_s) - ['title'] + end + + def setup_parents + # OVERRIDE Bulkrax 1.0.2 + pts = [] + record_objects.each do |record| + r = if record.respond_to?(:to_h) + record.to_h + else + record + end + next unless r.is_a?(Hash) + children = if r[:children].is_a?(String) + r[:children].split(/\s*[:;|]\s*/) + else + r[:children] + end + next if children.blank? + pts << { + r[source_identifier] => children + } + end + pts.blank? ? pts : pts.inject(:merge) + end + + def collections + # retrieve a list of unique collections + records.map do |r| + collections = [] + r[collection_field_mapping].split(/\s*[;|]\s*/).each { |title| collections << { title: title } } if r[collection_field_mapping].present? + model_field_mappings.each do |model_mapping| + collections << r if r[model_mapping.to_sym]&.downcase == 'collection' + end + collections + end.flatten.compact.uniq + rescue RuntimeError => e + nil + end + + def collections_total + collections.present? ? collections.size : 0 + rescue RuntimeError => e + nil + end + + def works_total + works.present? ? works.size : 0 + rescue RuntimeError => e + nil + end + + def works + records - collections + rescue RuntimeError => e + nil + end + + def create_new_entries + current_work_ids.each_with_index do |wid, index| + break if limit_reached?(limit, index) + new_entry = find_or_create_entry(entry_class, wid, 'Bulkrax::Exporter') + entry = Bulkrax::ExportWorkJob.perform_now(new_entry.id, current_run.id) + + self.headers |= entry.parsed_metadata.keys if entry + end + end + + # All possible column names + def export_headers + # OVERRIDE Bulkrax 1.0.2 + headers = sort_headers(self.headers) + headers.delete('file') if headers.include?('file') + + headers.uniq + end + + def sort_headers(headers) + headers.sort_by! do |item| + klass = '' + attribute = '' + index = '' + + if item.include? '.' + klass, remainder = item.split('.') + parts = remainder.split('_') + index = parts.pop + attribute = parts.join('_') + elsif item.include? '_' + klass, index = item.split('_') + end + + order = if klass == 'Asset' + 1 + elsif klass == 'PhysicalInstantiation' + 2 + elsif klass == 'Contribution' + 3 + elsif klass == 'DigitalInstantiation' + 4 + elsif klass == 'EssenceTrack' + 5 + end + + "#{order}_#{index}_#{attribute}" + end + end + + private + + def validate_csv_headers(headers, file_for_import) + csv_headers = headers - ['annotation', 'children', 'id', 'model', 'ref', 'source', 'version'] + unknown_headers = [] + + csv_headers.sort.each do |key| + unknown_headers << { message: "Unknown header: #{key}", filepath: "#{file_for_import}" } unless valid_header_key?(key.strip) + end + unknown_headers + end + + def valid_header_key?(key) + klass, value = key.split('.') + object_class = klass if Hyrax.config.curation_concerns.include?(klass.constantize) + extra_attr = if object_class == "Asset" + (AdminData.attribute_names.dup - ['created_at', 'updated_at'] + + Annotation.ingestable_attributes).uniq + elsif object_class.include?("Instantiation") + (InstantiationAdminData.attribute_names.dup - ['created_at', 'updated_at']) + end + fedora_attr = object_class.constantize.properties.collect { |p| p.first.dup }.push('id'.dup) + attr = extra_attr.nil? ? fedora_attr : fedora_attr.concat(extra_attr.deep_dup) + attr.collect { |a| a.prepend(object_class + ".") } + [[object_class] + attr].flatten.include?(key) + end + + def raise_format_errors(invalid_headers) + return unless invalid_headers.present? + + error_msg = invalid_headers.map do |failure| + "#{failure[:message]}, in file: #{failure[:filepath]}" + end + raise "#{ error_msg.count == 1 ? error_msg.first : error_msg.join(" ****** ")}" + end + + def set_objects(full_row, index) + self.objects = [] + current_object = {} + full_row = full_row.select { |k, v| !k.nil? } + full_row_to_hash = full_row.to_hash + asset_id = full_row_to_hash['Asset.id'].strip if full_row_to_hash.keys.include?('Asset.id') + asset = Asset.find(asset_id) if asset_id.present? + + full_row_to_hash.keys.each do |key| + standarized_key = key_without_numbers(key) + # if the key is a Class, but not a property (e.g. "Asset", not "Asset.id") + unless key.match(/\./) + add_object(current_object.symbolize_keys) + key_count = objects.select { |obj| obj[:model] == standarized_key }.count + 1 + bulkrax_identifier = full_row_to_hash["#{standarized_key}.bulkrax_identifier_#{key_count}"] || Bulkrax.fill_in_blank_source_identifiers.call(self, "#{standarized_key}-#{index}-#{key_count}") + asset = Asset.where(bulkrax_identifier: [bulkrax_identifier]).first if asset.nil? + admin_data_gid = if standarized_key == 'Asset' + if asset.present? + asset.admin_data.update!(bulkrax_importer_id: importer.id) + asset.admin_data_gid + else + AdminData.create( + bulkrax_importer_id: importer.id + ).gid + end + end + + current_object = { + 'model' => standarized_key, + work_identifier.to_s => bulkrax_identifier, + 'title' => create_title(asset) + } + current_object.merge!({'admin_data_gid' => admin_data_gid}) if admin_data_gid + next + end + + klass, value = standarized_key.split('.') + admin_data = AdminData.find_by_gid!(current_object['admin_data_gid']) if current_object['admin_data_gid'].present? + annotation_type_values = AnnotationTypesService.new.select_all_options.to_h.transform_keys(&:downcase).values + is_valid_annotation_type = annotation_type_values.include?(value) + + if is_valid_annotation_type + set_annotations(admin_data, full_row_to_hash, standarized_key, value) + elsif value == 'sonyci_id' + set_sonyci_id(admin_data, full_row_to_hash[key]) + else + raise "class key column is missing on row #{index}: #{full_row_to_hash}" unless klass == current_object['model'] + current_object[value] ||= full_row_to_hash[key] + end + end + + add_object(current_object.symbolize_keys) + end + + def set_admin_data_bulkrax_importer_id(admin_data) + return unless admin_data.present? + + admin_data.update(bulkrax_importer_id: importer.id) + end + + def set_annotations(admin_data, full_row_to_hash, key, value) + annotation = Annotation.find_by(annotation_type: value, admin_data: admin_data.id) + + if annotation.present? + annotation.update( + annotation: full_row_to_hash["Asset.annotation"] || nil, + ref: full_row_to_hash["Asset.ref"] || nil, + source: full_row_to_hash["Asset.source"] || nil, + value: full_row_to_hash[key], + version: full_row_to_hash["Asset.version"] || nil + ) + else + Annotation.create( + admin_data_id: admin_data.id, + annotation: full_row_to_hash["Asset.annotation"] || nil, + annotation_type: value, + ref: full_row_to_hash["Asset.ref"] || nil, + source: full_row_to_hash["Asset.source"] || nil, + value: full_row_to_hash[key], + version: full_row_to_hash["Asset.version"] || nil + ) + end + end + + def set_sonyci_id(admin_data, key) + admin_data.update(sonyci_id: [key]) + end + + def create_title(work = nil) + asset = objects.first + return unless asset + + work.present? ? "#{work.series_title.first}; #{work.episode_title.first}" : "#{asset[:series_title]}; #{asset[:episode_title]}" + end + + def add_object(current_object) + if current_object.present? + if objects.first + objects.first[:children] ||= [] + objects.first[:children] << current_object[work_identifier] + end + record_objects << current_object + objects << current_object + end + end +end diff --git a/app/parsers/pbcore_manifest_parser.rb b/app/parsers/pbcore_manifest_parser.rb new file mode 100644 index 000000000..e6696d65f --- /dev/null +++ b/app/parsers/pbcore_manifest_parser.rb @@ -0,0 +1,212 @@ +class PbcoreManifestParser < Bulkrax::XmlParser + include Bulkrax::PbcoreParserBehavior + attr_accessor :objects, :record_objects, :manifest_hash + + def create_works + self.record_objects = [] + set_objects.each_with_index do |record, index| + break if limit_reached?(limit, index) + + seen[record[work_identifier]] = true + new_entry = find_or_create_entry(entry_class, record[work_identifier], 'Bulkrax::Importer', record.compact) + + if record[:delete].present? + Bulkrax::DeleteWorkJob.send(perform_method, new_entry, current_run) + else + Bulkrax::ImportWorkJob.send(perform_method, new_entry.id, current_run.id) + end + + increment_counters(index) + end + importer.record_status + rescue StandardError => e + status_info(e) + end + + # In either case there may be multiple metadata files returned by metadata_paths + def records(_opts = {}) + invalid_files = [] + @records ||= + if parser_fields['import_type'] == 'multiple' + r = [] + metadata_paths.map do |md| + # Retrieve all records + elements = entry_class.read_data(md).xpath("//#{record_element}") + r += elements.map { |el| entry_class.data_for_entry(el, source_identifier) } + end + # Flatten because we may have multiple records per array + r.compact.flatten + elsif parser_fields['import_type'] == 'single' + records = metadata_paths.map do |md| + if MIME::Types.type_for(md).include?('text/csv') + csv_data = Bulkrax::CsvEntry.read_data(md) + @manifest_hash = {} + csv_data.each do |row| + @manifest_hash[row["DigitalInstantiation.filename"]] = row.to_h + end + next + else + begin + schema = Nokogiri::XML::Schema(File.read(Rails.root.join('spec', 'fixtures', 'pbcore-2.1.xsd'))) + data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record + schema_errors = schema.validate(md) + raise Nokogiri::XML::SyntaxError, schema_errors if schema_errors.present? + + entry_class.data_for_entry(data, source_identifier).merge!({ filename: File.basename(md) }) + rescue Nokogiri::XML::SyntaxError => e + invalid_files << { message: e, filepath: md } + end + end + end.compact # No need to flatten because we take only the first record + raise_format_errors(invalid_files) if invalid_files.present? + + records + end + end + + # If the import_file_path is an xml file, return that + # Otherwise return all xml files in the given folder + def metadata_paths + @metadata_paths ||= + if file? && MIME::Types.type_for(import_file_path).include?('application/xml') + [import_file_path] + else + file_paths.select do |f| + MIME::Types.type_for(f).include?('application/xml') && MIME::Types.type_for(f).include?('application/csv') + f.include?("import_#{importerexporter.id}") + end + end + end + + def setup_parents + prnts = [] + record_objects.each do |record| + rec = record.respond_to?(:to_h) ? record.to_h : record + next unless rec.is_a?(Hash) + + parents = rec[:parent].is_a?(String) ? rec[:parent].split(/\s*[:;|]\s*/) : rec[:parent] + next if parents.blank? + + prnts << { rec[work_identifier] => parents } + end + + prnts.blank? ? prnts : prnts.inject(:merge) + end + + def create_parent_child_relationships + parents.each do |key, value| + child = entry_class.where( + identifier: key, + importerexporter_id: importerexporter.id, + importerexporter_type: 'Bulkrax::Importer' + ).first + + parent = entry_class.where( + identifier: value.first, + importerexporter_id: importerexporter.id, + importerexporter_type: 'Bulkrax::Importer' + ).first + + if child.blank? + Rails.logger.error("Expected a child entry for #{work_identifier}: #{key}.") + elsif parent.blank? + Rails.logger.error("Expected a parent for child entry #{child.id}.") + end + + Bulkrax::ChildRelationshipsJob.perform_later(parent.id, [child.id], current_run.id) + end + rescue StandardError => e + status_info(e) + end + + private + + def set_objects + self.objects = [] + asset_bulkrax_identifier = '' + + records.sort_by! do |record| + csv_row = manifest_hash[record[:filename]] + asset_id = csv_row['Asset.id'].strip if csv_row.keys.include?('Asset.id') + + asset_id + end + + records.each_with_index do |file, index| + prev_index = (index - 1).positive? ? index - 1 : 0 + prev_csv_row = manifest_hash[records[prev_index][:filename]] + prev_asset_id = prev_csv_row['Asset.id'].strip + csv_row = manifest_hash[file[:filename]] + asset_id = csv_row['Asset.id'].strip if csv_row.keys.include?('Asset.id') + asset = Asset.find(asset_id) + manifest_filename = get_manifest_filename(csv_row) + digital_instantiation = DigitalInstantiation.where(local_instantiation_identifier: manifest_filename).first + pbcore = PBCore::Instantiation.parse(file[:data]) + tracks = pbcore.essence_tracks + + asset_bulkrax_identifier = if asset.bulkrax_identifier + asset.bulkrax_identifier + else + Bulkrax.fill_in_blank_source_identifiers.call(self, "Asset-#{index}-1") + end + asset.update(bulkrax_identifier: asset_bulkrax_identifier) if asset.bulkrax_identifier.nil? + add_object(asset.attributes.symbolize_keys, 'Asset', nil) if index == 0 || prev_asset_id != asset_id + + di_bulkrax_identifier = build_digital_instantiations(file, csv_row, digital_instantiation, index, asset) + # essence tracks don't have a unique identifier so importing the same one repeatedly, will create multiple identical models + build_essence_tracks(tracks, index, di_bulkrax_identifier, asset) + end + + self.objects + end + + def add_object(current_object, type, related_identifier) + unless type == 'Asset' + current_object[:parent] ||= [] + current_object[:parent] << related_identifier + end + + record_objects << current_object + objects << current_object + end + + def get_manifest_filename(csv_row) + # the filename in the manifest has extra info we don't need + csv_row["DigitalInstantiation.filename"].split('.')[0..1].join('.') + end + + def build_digital_instantiations(file, csv_row, digital_instantiation, index, asset) + current_object = [AAPB::BatchIngest::PBCoreXMLMapper.new(file[:data]).digital_instantiation_attributes.merge!( + { + filename: file[:filename], + pbcore_xml: file[:data], + skip_file_upload_validation: true, + instantiation_admin_data_gid: get_instantiation_admin_data_gid(csv_row, digital_instantiation), + } + )].first + # unable to call the conditional inside the merged object + current_object = current_object.merge!({ bulkrax_identifier: digital_instantiation.bulkrax_identifier }) if digital_instantiation.present? + type = 'DigitalInstantiation' + + obj = set_model(type, index, current_object, asset) + add_object(current_object.symbolize_keys, type, asset.bulkrax_identifier) + + obj[:bulkrax_identifier] + end + + def build_essence_tracks(tracks, index, di_bulkrax_identifier, asset) + parse_rows(tracks.map { |track| AAPB::BatchIngest::PBCoreXMLMapper.new(track.to_xml).essence_track_attributes }, 'EssenceTrack', index, di_bulkrax_identifier, asset) + end + + def get_instantiation_admin_data_gid(csv_row, digital_instantiation = nil) + if digital_instantiation.present? + digital_instantiation.instantiation_admin_data_gid + else + InstantiationAdminData.create( + aapb_preservation_lto: csv_row["DigitalInstantiation.aapb_preservation_lto"], + aapb_preservation_disk: csv_row["DigitalInstantiation.aapb_preservation_disk"], + md5: csv_row["DigitalInstantiation.md5"] + ).gid + end + end +end diff --git a/app/parsers/pbcore_xml_parser.rb b/app/parsers/pbcore_xml_parser.rb new file mode 100644 index 000000000..2b02431c5 --- /dev/null +++ b/app/parsers/pbcore_xml_parser.rb @@ -0,0 +1,200 @@ +class PbcoreXmlParser < Bulkrax::XmlParser + include Bulkrax::PbcoreParserBehavior + attr_accessor :objects, :record_objects + + # OVERRIDE BULKRAX 1.0.2 to capture format errors + # For multiple, we expect to find metadata for multiple works in the given metadata file(s) + # For single, we expect to find metadata for a single work in the given metadata file(s) + # if the file contains more than one record, we take only the first + # In either case there may be multiple metadata files returned by metadata_paths + def records(_opts = {}) + invalid_files = [] + @records ||= + if parser_fields['import_type'] == 'multiple' + r = [] + metadata_paths.map do |md| + # Retrieve all records + elements = entry_class.read_data(md).xpath("//#{record_element}") + r += elements.map { |el| entry_class.data_for_entry(el, source_identifier) } + end + # Flatten because we may have multiple records per array + r.compact.flatten + elsif parser_fields['import_type'] == 'single' + records = metadata_paths.map do |md| + begin + data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record + entry_class.data_for_entry(data, source_identifier) + rescue Nokogiri::XML::SyntaxError => e + invalid_files << { message: e, filepath: md } + end + end.compact # No need to flatten because we take only the first record + # OVERRIDE BULKRAX 1.0.2 to capture format errors + raise_format_errors(invalid_files) if invalid_files.present? + records + end + end + + def create_works + self.record_objects = [] + records.each_with_index do |file, index| + set_objects(file, index).each do |record| + break if limit_reached?(limit, index) + + # both instantiations can have an essence track child + if record[:model] == 'DigitalInstantiation' || record[:model] == 'PhysicalInstantiation' + record = set_instantiation_children(record) + end + + seen[record[work_identifier]] = true + new_entry = find_or_create_entry(entry_class, record[work_identifier], 'Bulkrax::Importer', record.compact) + if record[:delete].present? + Bulkrax::DeleteWorkJob.send(perform_method, new_entry, current_run) + else + Bulkrax::ImportWorkJob.send(perform_method, new_entry.id, current_run.id) + end + end + increment_counters(index) + end + importer.record_status + rescue StandardError => e + status_info(e) + end + + def total + records.size + rescue RuntimeError => e + nil + end + + def setup_parents + prnts = [] + record_objects.each do |record| + rec = record.respond_to?(:to_h) ? record.to_h : record + next unless rec.is_a?(Hash) + + children = rec[:children].is_a?(String) ? rec[:children].split(/\s*[:;|]\s*/) : rec[:children] + next if children.blank? + + prnts << { rec[work_identifier] => children } + end + + prnts.blank? ? prnts : prnts.inject(:merge) + end + + # Will be skipped unless the #record is a Hash + def create_parent_child_relationships + parents.each do |key, value| + parent = entry_class.where( + identifier: key, + importerexporter_id: importerexporter.id, + importerexporter_type: 'Bulkrax::Importer' + ).first + + # not finding the entries here indicates that the given identifiers are incorrect + # in that case we should log that + children = value.map do |child| + entry_class.where( + identifier: child, + importerexporter_id: importerexporter.id, + importerexporter_type: 'Bulkrax::Importer' + ).first + end.compact.uniq + + if parent.present? && (children.length != value.length) + # Increment the failures for the number we couldn't find + # Because all of our entries have been created by now, if we can't find them, the data is wrong + Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}") + break if children.empty? + Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})") + end + + Bulkrax::ChildRelationshipsJob.perform_later(parent.id, children.map(&:id), current_run.id) + end + rescue StandardError => e + status_info(e) + end + + private + + def set_objects(file, index) + self.objects = [] + current_object = {} + new_rows = [] + instantiations = PBCore::DescriptionDocument.parse(file[:data]).instantiations + pbcore_physical_instantiations = instantiations.select { |inst| inst.physical } + pbcore_digital_instantiations = instantiations.select { |inst| inst.digital } + tracks = instantiations.map(&:essence_tracks).flatten # processed in the digitial inst. actor. if we comment this out it will not + # show up in the bulkrax importer, but the records still get processed in the actor. + # people/contributor is processed as part of the asset_attributes method + + # we are checking to see if these models already exist so that we update them instead of creating duplicates + xml_asset = AAPB::BatchIngest::PBCoreXMLMapper.new(file[:data]).asset_attributes.merge!({ delete: file[:delete] }) + asset = Asset.where(id: xml_asset[:id]).first&.attributes&.symbolize_keys + xml_asset = asset.merge!(xml_asset) if asset + new_rows += parse_rows([xml_asset], 'Asset', index) + + pi_rows = pbcore_physical_instantiations.map do |inst| + xml_pi = AAPB::BatchIngest::PBCoreXMLMapper.new(inst.to_xml).physical_instantiation_attributes + physical_instantiation = PhysicalInstantiation.where(local_instantiation_identifier: xml_pi[:local_instantiation_identifier]).first&.attributes&.symbolize_keys + xml_pi = physical_instantiation.merge!(xml_pi) if physical_instantiation + + xml_pi + end + new_rows += parse_rows(pi_rows, 'PhysicalInstantiation', index) + + di_rows = pbcore_digital_instantiations.map do |inst| + xml_di = AAPB::BatchIngest::PBCoreXMLMapper.new(inst.to_xml).digital_instantiation_attributes.merge!({ pbcore_xml: inst.to_xml, skip_file_upload_validation: true }) + digital_instantiation = DigitalInstantiation.where(local_instantiation_identifier: xml_di[:local_instantiation_identifier]).first&.attributes&.symbolize_keys + xml_di = digital_instantiation.merge!(xml_di) if digital_instantiation + + xml_di + end + new_rows += parse_rows(di_rows, 'DigitalInstantiation', index) + + # essence tracks don't have a unique identifier, so importing the same one repeatedly will create multiple identical models + et_rows = tracks.map { |track| AAPB::BatchIngest::PBCoreXMLMapper.new(track.to_xml).essence_track_attributes } + new_rows += parse_rows(et_rows, 'EssenceTrack', index) + + new_rows + end + + def add_object(current_object, type, related_identifier) + if current_object.present? + # each xml file only has one asset, so it will be the first object + if objects.first + objects.first[:children] ||= [] + objects.first[:children] << current_object[work_identifier] + end + + record_objects << current_object + objects << current_object + end + end + + def set_instantiation_children(record) + initial_importer_id = record[work_identifier].first + initial_child_identifier = record[work_identifier].gsub(record[:model], 'EssenceTrack') + + current_importer_id = importerexporter.id.to_s + current_child_identifier = initial_child_identifier.sub(initial_importer_id, current_importer_id) + + if objects.first[:children].include?(initial_child_identifier) + # we are importing this file for the first time + # or re-running the importer where this file was first imported + record[:children] ||= [] + record[:children] << initial_child_identifier + objects.first[:children].delete(initial_child_identifier) + end + + if objects.first[:children].include?(current_child_identifier) + # we are importing the same file we imported in a different importer + # essence tracks aren't legitimate children of an asset though so we remove it + # and do not create a duplicate relation with the parent DI or PI + objects.first[:children].delete(current_child_identifier) + end + + record_objects.find { |r| r[work_identifier] == record[work_identifier] }.merge!({ children: [initial_child_identifier] }) + # return the record so that we retain the parent/child relationships + record + end +end diff --git a/app/presenters/hyrax/asset_presenter.rb b/app/presenters/hyrax/asset_presenter.rb index 23048ba99..84ccd9afb 100644 --- a/app/presenters/hyrax/asset_presenter.rb +++ b/app/presenters/hyrax/asset_presenter.rb @@ -11,7 +11,7 @@ class AssetPresenter < Hyrax::WorkShowPresenter :promo_description, :clip_description, :copyright_date, :level_of_user_access, :outside_url, :special_collections, :transcript_status, :organization, :sonyci_id, :licensing_info, :producing_organization, :series_title, :series_description, - :playlist_group, :playlist_order, :hyrax_batch_ingest_batch_id, :last_pushed, :last_update, :needs_update, :special_collection_category, :canonical_meta_tag, :cataloging_status, + :playlist_group, :playlist_order, :hyrax_batch_ingest_batch_id, :bulkrax_importer_id, :last_pushed, :last_update, :needs_update, :special_collection_category, :canonical_meta_tag, :cataloging_status, to: :solr_document def batch @@ -31,6 +31,23 @@ def batch_ingest_date @batch_ingest_date ||= Date.parse(batch.created_at.to_s) end + def bulkrax_import + raise 'No Bulkrax Import ID associated with this Asset' unless bulkrax_importer_id.present? + @bulkrax_import ||= Bulkrax::Importer.find(bulkrax_importer_id.first) + end + + def bulkrax_import_url + @bulkrax_import_url ||= "/importers/#{bulkrax_import.id}" + end + + def bulkrax_import_label + @bulkrax_import_ingest_label ||= bulkrax_import.parser_klass + end + + def bulkrax_import_date + @bulkrax_import_ingest_date ||= Date.parse(bulkrax_import.updated_at.to_s) + end + def annotations @annotations ||= Asset.find(solr_document['id']).annotations end @@ -68,6 +85,7 @@ def list_of_contribution_ids_to_display def display_aapb_admin_data? ! ( sonyci_id.blank? && + bulkrax_importer_id.blank? && hyrax_batch_ingest_batch_id.blank? && last_updated.blank? && last_pushed.blank? && diff --git a/app/presenters/sony_ci/webhook_log_presenter.rb b/app/presenters/sony_ci/webhook_log_presenter.rb new file mode 100644 index 000000000..72e2e8f3d --- /dev/null +++ b/app/presenters/sony_ci/webhook_log_presenter.rb @@ -0,0 +1,69 @@ +module SonyCi + class WebhookLogPresenter + + DATETIME_FORMAT = '%m/%d/%Y %I:%M:%S %P' + + attr_reader :webhook_log + + delegate :id, :url, :error, :error_message, :status, :guids, + to: :webhook_log + + def initialize(webhook_log) + raise ArgumentError, "expected first parameter to be a " \ + "SonyCi::WebhookLog but #{webhook_log.class} was " \ + "given" unless webhook_log.is_a? SonyCi::WebhookLog + @webhook_log = webhook_log + end + + def status + webhook_log.error ? "Fail" : "Success" + end + + def created_at + webhook_log.created_at.strftime(DATETIME_FORMAT) + end + + def action + WebhookLogPresenter.actions[webhook_log.action] || "None" + end + + def request_headers + return "None" unless webhook_log.request_headers + http_headers(webhook_log.request_headers) + end + + def request_body + return "None" unless webhook_log.request_body + JSON.pretty_generate(webhook_log.request_body) + end + + def response_headers + return "None" unless webhook_log.response_headers + http_headers(webhook_log.response_headers) + end + + def response_body + return "None" unless webhook_log.response_body + JSON.pretty_generate(webhook_log.response_body) + end + + private + + def http_headers(headers_hash) + headers_hash.map { |header, val| + "#{header}: #{val}" + }.join("\n") + end + + + class << self + # Returns a mapping of recognized actions from SonyCi::WebhookController + # to display text. + def actions + { + 'save_sony_ci_id' => "Link Asset to Sony Ci Media" + } + end + end + end +end diff --git a/app/services/aapb/batch_ingest/pbcore_xml_mapper.rb b/app/services/aapb/batch_ingest/pbcore_xml_mapper.rb index f0b43e850..65b6b6ad4 100644 --- a/app/services/aapb/batch_ingest/pbcore_xml_mapper.rb +++ b/app/services/aapb/batch_ingest/pbcore_xml_mapper.rb @@ -44,8 +44,16 @@ def prepare_annotations(annotations) def find_annotation_type_id(type) type_id = Annotation.find_annotation_type_id(type) - return type_id if type_id.present? - raise "annotation_type not registered with the AnnotationTypesService: #{type}." + + if ENV['SETTINGS__BULKRAX__ENABLED'] == 'true' + type_id + else + if type_id.present? + type_id + else + raise "annotation_type not registered with the AnnotationTypesService: #{type}." + end + end end def asset_attributes diff --git a/app/views/bulkrax/entries/show.html.erb b/app/views/bulkrax/entries/show.html.erb new file mode 100644 index 000000000..b6b786799 --- /dev/null +++ b/app/views/bulkrax/entries/show.html.erb @@ -0,0 +1,68 @@ +<%# OVERRIDE BULKRAX 1.0.2 to display ID instead of record class name %> +

<%= notice %>

+
+
+

+ Identifier: + <%= @entry.identifier %> +

+ +

+ Entry ID: + <%= @entry.id %> +

+ +

+ Type: + <%= @entry.factory_class || 'Unknown' %> +

+ <%= render partial: 'raw_metadata'%> + + <%= render partial: 'parsed_metadata'%> + + <%= render partial: 'bulkrax/shared/bulkrax_errors', locals: {item: @entry} %> + +

+ <% if @importer.present? %> + Importer: + <%= link_to @importer.name, importer_path(@importer) %> + <% elsif @exporter.present? %> + Exporter: + <%= link_to @exporter.name, exporter_path(@exporter) %> + <% end %> +

+ +

+ <% if @importer.present? %> + <% factory_record = @entry.factory.find %> + <% if factory_record.present? && @entry.factory_class %> + <%= @entry.factory_class.to_s %> Link: + <% if @entry.factory_class.to_s == 'Collection' %> + <%# OVERRIDE BULKRAX 1.0.2 to display ID instead of record class name %> + <%= link_to factory_record.id, hyrax.polymorphic_path(factory_record) %> + <% else %> + <%# OVERRIDE BULKRAX 1.0.2 to display ID instead of record class name %> + <%= link_to factory_record.id, main_app.polymorphic_path(factory_record) %> + <% end %> + <% else %> + Item Link: Item has not yet been imported successfully + <% end %> + <% else %> + <% record = @entry&.hyrax_record %> + <% if record.present? && @entry.factory_class %> + <%= record.class.to_s %> Link: + <% if record.is_a?(Collection) %> + <%# OVERRIDE BULKRAX 1.0.2 to display ID instead of record class name %> + <%= link_to record.id, hyrax.polymorphic_path(record) %> + <% else %> + <%# OVERRIDE BULKRAX 1.0.2 to display ID instead of record class name %> + <%= link_to record.id, main_app.polymorphic_path(record) %> + <% end %> + <% else %> + Item Link: No item associated with this entry or class unknown + <% end %> + <% end %> +

+ +
+
diff --git a/app/views/bulkrax/importers/_pbcore_manifest_xml_fields_override.html.erb b/app/views/bulkrax/importers/_pbcore_manifest_xml_fields_override.html.erb new file mode 100644 index 000000000..c2c01b7de --- /dev/null +++ b/app/views/bulkrax/importers/_pbcore_manifest_xml_fields_override.html.erb @@ -0,0 +1,57 @@ +<%# OVERRIDE from Bulkrax to remove multiple works selection from the dropdown menu %> +
+ + <%# @todo improve on this implementation. + As it stands, it's a hostage to namespaces, + eg. dc:title + if namespaces aren't in the xml, we would have to specify dc:title + but if the namespaces ARE present, we remove them so we would need title + %> + <%= fi.hidden_field :record_element, value: 'pbcoreInstantiationDocument' %> + + <%= fi.input :import_type, + collection: [ + ['Single Work per Metadata File', 'single'] + ], + selected: importer.parser_fields['import_type'], + input_html: { class: 'form-control' } + %> + +

Visiblity

+ + <%= fi.input :visibility, + collection: [ + ['Public', 'open'], + ['Private', 'restricted'] + ], + selected: importer.parser_fields['visibility'] || 'open', + input_html: { class: 'form-control' } + %> + + <% rights_statements = Hyrax.config.rights_statement_service_class.new %> + <%= fi.input :rights_statement, + collection: rights_statements.select_active_options, + selected: importer.parser_fields['rights_statement'], + include_blank: true, + item_helper: rights_statements.method(:include_current_value), + input_html: { class: 'form-control' }, + required: false %> + <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statment. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %> + +

XML and files to Import:

+

File upload and Cloud File upload MUST be a either a single XML file (for metadata only import) OR a Zip file containing the XML files and data files, each in a separate folder.

+

The Server Path can point to a folder containing XML files and data files to import, or direct to the XML file itself.

+ + <%= fi.input :file_style, collection: ['Upload a File', 'Specify a Path on the Server', 'Add Cloud File'], as: :radio_buttons, label: false %> +
+ <%= fi.input 'file', as: :file, input_html: {accept: ['application/zip', 'application/xml']} %>
+
+
+ <%= fi.input :import_file_path, as: :string, input_html: { value: importer.parser_fields['import_file_path'] } %> +
+
+ <% if Hyrax.config.browse_everything? %> + <%= render 'browse_everything', form: form %> + <% end %> +
+
diff --git a/app/views/bulkrax/importers/_pbcore_xml_fields_override.html.erb b/app/views/bulkrax/importers/_pbcore_xml_fields_override.html.erb new file mode 100644 index 000000000..b92aa9dc7 --- /dev/null +++ b/app/views/bulkrax/importers/_pbcore_xml_fields_override.html.erb @@ -0,0 +1,57 @@ +<%# OVERRIDE from Bulkrax to remove multiple works selection from the dropdown menu %> +
+ + <%# @todo improve on this implementation. + As it stands, it's a hostage to namespaces, + eg. dc:title + if namespaces aren't in the xml, we would have to specify dc:title + but if the namespaces ARE present, we remove them so we would need title + %> + <%= fi.hidden_field :record_element, value: 'pbcoreDescriptionDocument' %> + + <%= fi.input :import_type, + collection: [ + ['Single Work per Metadata File', 'single'] + ], + selected: importer.parser_fields['import_type'], + input_html: { class: 'form-control' } + %> + +

Visiblity

+ + <%= fi.input :visibility, + collection: [ + ['Public', 'open'], + ['Private', 'restricted'] + ], + selected: importer.parser_fields['visibility'] || 'open', + input_html: { class: 'form-control' } + %> + + <% rights_statements = Hyrax.config.rights_statement_service_class.new %> + <%= fi.input :rights_statement, + collection: rights_statements.select_active_options, + selected: importer.parser_fields['rights_statement'], + include_blank: true, + item_helper: rights_statements.method(:include_current_value), + input_html: { class: 'form-control' }, + required: false %> + <%= fi.input :override_rights_statement, as: :boolean, hint: 'If checked, always use the selected rights statment. If unchecked, use rights or rights_statement from the record and only use the provided value if dc:rights is blank.', input_html: { checked: (importer.parser_fields['override_rights_statement'] == "1") } %> + +

XML and files to Import:

+

File upload and Cloud File upload MUST be a either a single XML file (for metadata only import) OR a Zip file containing the XML files and data files, each in a separate folder.

+

The Server Path can point to a folder containing XML files and data files to import, or direct to the XML file itself.

+ + <%= fi.input :file_style, collection: ['Upload a File', 'Specify a Path on the Server', 'Add Cloud File'], as: :radio_buttons, label: false %> +
+ <%= fi.input 'file', as: :file, input_html: {accept: ['application/zip', 'application/xml']} %>
+
+
+ <%= fi.input :import_file_path, as: :string, input_html: { value: importer.parser_fields['import_file_path'] } %> +
+
+ <% if Hyrax.config.browse_everything? %> + <%= render 'browse_everything', form: form %> + <% end %> +
+
diff --git a/app/views/bulkrax/importers/index.html.erb b/app/views/bulkrax/importers/index.html.erb new file mode 100644 index 000000000..ec870e471 --- /dev/null +++ b/app/views/bulkrax/importers/index.html.erb @@ -0,0 +1,77 @@ +<%# OVERRIDE Bulkrax 1.0.2 to hide ability to delete per previous batch ingest behavior (inability to delete importers due to foreign key ref) %> +<% provide :page_header do %> +

Importers

+
+ <%= link_to new_importer_path, class: 'btn btn-primary' do %> + <%= t(:'helpers.action.importer.new') %> + <% end %> +
+<% end %> + +
+
+ <% if @importers.present? %> +
+ + + + + + + + + + + + + + + + + + + + <% @importers.each do |importer| %> + + + + + + + + + + + + <%# OVERRIDE Bulkrax 1.0.2 to hide ability to delete per previous batch ingest behavior (inability to delete importers due to foreign key ref) %> + + + + + <% end %> + +
NameStatusLast RunNext RunEntries EnqueuedEntries ProcessedEntries FailedEntries Deleted UpstreamTotal Collection EntriesTotal Work Entries
<%= link_to importer.name, importer_path(importer) %><%= importer.status %><%= importer.last_imported_at.strftime("%b %d, %Y") if importer.last_imported_at %><%= importer.next_import_at.strftime("%b %d, %Y") if importer.next_import_at %><%= importer.importer_runs.last&.enqueued_records %><%= (importer.importer_runs.last&.processed_collections || 0) + (importer.importer_runs.last&.processed_records || 0) %><%= (importer.importer_runs.last&.failed_collections || 0) + (importer.importer_runs.last&.failed_records || 0) %><%= importer.importer_runs.last&.deleted_records %><%= importer.importer_runs.last&.total_collection_entries %><%= importer.importer_runs.last&.total_work_entries %><%#= link_to raw(''), importer, method: :delete, data: { confirm: 'Are you sure?' } %><%= link_to raw(''), importer_path(importer) %><%= link_to raw(''), edit_importer_path(importer) %>
+
+ <% else %> +

No importers have been created.

+ <% end %> +
+
+ + diff --git a/app/views/bulkrax/shared/_bulkrax_errors.html.erb b/app/views/bulkrax/shared/_bulkrax_errors.html.erb new file mode 100644 index 000000000..901245e57 --- /dev/null +++ b/app/views/bulkrax/shared/_bulkrax_errors.html.erb @@ -0,0 +1,63 @@ +<%# OVERRIDE BULKRAX 1.0.2 to handle multiple error messages %> +<% if item.failed? %> +
+ +
+
+
+ +
+ + +
+ +
+
+ Errored at: <%= item.status_at %>

+ <%# OVERRIDE BULKRAX 1.0.2 to handle multiple error messages %> + <% error_messages = item.current_status.error_message&.split(' ****** ') %> + <% if error_messages && error_messages.count > 1 %> + Errors:

+ <% error_messages.each do |msg| %> + <%= coderay(msg, { wrap: :page, css: :class, tab_width: 200, break_lines: true }) %> +
+ <% end %> + <% else %> + Error: <%= item.current_status.error_class %> - <%= item.current_status.error_message %>

+ <% end %> + Error Trace:

+ <% item.current_status.error_backtrace.each do |v| %> + <%= coderay(v, { wrap: :page, css: :class, tab_width: 200, break_lines: true }) %> +
+ <% end %> +
+
+ Errored at: <%= item.status_at %>

+ Error: <%= item.current_status.error_class %> - <%= item.current_status.error_message %>

+ Error Trace:

+ <% item.current_status.error_backtrace.each do |v| %> + <%= coderay(v, { css: :class, tab_width: 0, break_lines: false }) %> +
+ <% end %> +
+
+
+
+
+
+<% elsif item.succeeded? %> +

+ Succeeded At: <%= item.status_at %> +

+<% else %> +

+ Succeeded At: Item has not yet been <%= @importer.present? ? 'imported' : 'exported' %> successfully +

+<% end %> diff --git a/app/views/catalog/_export_search_results.html.erb b/app/views/catalog/_export_search_results.html.erb index 02f05eae4..6f55bb1ae 100644 --- a/app/views/catalog/_export_search_results.html.erb +++ b/app/views/catalog/_export_search_results.html.erb @@ -8,6 +8,7 @@
  • <%= link_to("Digital Instantiation Report",catalog_export_path({:format=>"csv", :object_type=>"digital_instantiation"}.merge(search_state.to_h))) %>
  • <%= link_to("Physical Instantiation Report",catalog_export_path({:format=>"csv", :object_type=>"physical_instantiation"}.merge(search_state.to_h))) %>
  • <%= link_to("PBCore XML",catalog_export_path({:format=>"pbcore", :object_type=>"pbcore_zip"}.merge(search_state.to_h))) %>
  • +
  • <%= link_to("For Re-Ingest", bulkrax.exporters_path) %>
  • diff --git a/app/views/hyrax/assets/_batch.html.erb b/app/views/hyrax/assets/_batch.html.erb index 4cbd4f01b..9bc5f3f4b 100644 --- a/app/views/hyrax/assets/_batch.html.erb +++ b/app/views/hyrax/assets/_batch.html.erb @@ -1,4 +1,18 @@ -<% if presenter.hyrax_batch_ingest_batch_id.present? %> +<% if presenter.bulkrax_importer_id.present? %> +
    Importer
    +
    + +<% elsif presenter.hyrax_batch_ingest_batch_id.present? %>
    Batch