From 9d3aa4fce7e6c1e3faa1fbc51ee50bca55056844 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 10 May 2020 22:35:48 -0400 Subject: [PATCH 01/23] WIP Bring EXIFTOOL and FIDO into place Tested and works. Good. Still requires this to pass via the webform since we have not enabled the service to kick in as an independent process after webform generation (we should... i have an issue open). But it does generate the needed data and flooods the JSON with normally not very useful info --- src/StrawberryfieldFilePersisterService.php | 164 +++++++++++++++++++- strawberryfield.services.yml | 2 +- 2 files changed, 163 insertions(+), 3 deletions(-) diff --git a/src/StrawberryfieldFilePersisterService.php b/src/StrawberryfieldFilePersisterService.php index dd8437a..d37fbb4 100644 --- a/src/StrawberryfieldFilePersisterService.php +++ b/src/StrawberryfieldFilePersisterService.php @@ -26,6 +26,9 @@ use Drupal\Component\Plugin\Exception\PluginNotFoundException; use Drupal\Core\Entity\ContentEntityInterface; use Drupal\strawberryfield\Event\StrawberryfieldJsonProcessEvent; +use Drupal\Core\StreamWrapper\StreamWrapperInterface; +use Drupal\Core\Logger\LoggerChannelFactoryInterface; +use Drupal\Core\Config\ImmutableConfig; use Drupal\strawberryfield\StrawberryfieldEventType; use Symfony\Component\Process\Exception\ProcessFailedException; use Symfony\Component\Process\Process; @@ -103,6 +106,20 @@ class StrawberryfieldFilePersisterService { */ protected $transliteration; + /** + * The SBF configuration settings. + * + * @var \Drupal\Core\Config\ImmutableConfig + */ + protected $config; + + /** + * The logger factory. + * @var \Drupal\Core\Logger\LoggerChannelFactoryInterface + */ + protected $loggerFactory; + + /** * StrawberryfieldFilePersisterService constructor. * @@ -127,7 +144,8 @@ public function __construct( AccountInterface $current_user, LanguageManagerInterface $language_manager, TransliterationInterface $transliteration, - ModuleHandlerInterface $module_handler + ModuleHandlerInterface $module_handler, + LoggerChannelFactoryInterface $logger_factory ) { $this->fileSystem = $file_system; $this->fileUsage = $file_usage; @@ -138,9 +156,11 @@ public function __construct( $this->destinationScheme = $config_factory->get( 'strawberryfield.storage_settings' )->get('file_scheme'); + $this->config = $config_factory->get('strawberryfield.settings'); $this->languageManager = $language_manager; $this->transliteration = $transliteration; $this->moduleHandler = $module_handler; + $this->loggerFactory = $logger_factory; } @@ -358,6 +378,7 @@ public function generateAsFileStructure( // @TODO Fills up the md5 for all files and updates a single node at a time // @TODO evaluate Node locking while this happens. $md5 = md5_file($uri); + $filemetadata = $this->getBaseFileMetadata($file); $relativefolder = substr($md5, 0, 3); $uuid = $file->uuid(); // again, i know! @@ -381,9 +402,11 @@ public function generateAsFileStructure( 'tags' => [], ]; + // Add Metadata from exif/fido + $fileinfo = array_merge($fileinfo, $filemetadata); // Dispatch event with just the $fileinfo for a single file as JSON // This is used allow other functions to do things based on the JSON. - // IN this case we want 'someone' to count the number of pages e.g + // IN this case we want 'someone' to count txhe number of pages e.g // If the file is a PDF. // @TODO inject event dispatcher and move this to its own method. $event_type = StrawberryfieldEventType::JSONPROCESS; @@ -812,4 +835,141 @@ public function sortByFileName($a, $b) { return strnatcmp($a['name'],$b['name']); } + + /** + * Gets basic metadata from a File to be put back into a SBF + * + * Also deals with the fact that it can be local v/s remote. + * + * @param \Drupal\file\FileInterface $file + * + * @return array + * Metadata extracted for the image in array format if any + */ + public function getBaseFileMetadata(FileInterface $file) { + + // These are the 2 basic binaries we want eventually be able to run + // For each referenced Files + // With certain conditions of course + // Like: + // - How many files? Like 1 is cool, 2000 not cool + // - Size? Like moving realtime 'Sync' 2TB back to TEMP to MD5 it not cool + $metadata = []; + + $exif_exec_path = $this->config->get( + 'exif_exec_path' + ) ?: '/usr/bin/exiftool'; + $fido_exec_path = $this->config->get('fido_exec_path') ?: '/usr/bin/fido'; + $file_size = $file->getSize(); + $uri = $file->getFileUri(); + + /** @var \Drupal\Core\File\FileSystem $file_system */ + $scheme = $this->streamWrapperManager->getScheme($uri); + $templocation = NULL; + + // If the file isn't stored locally make a temporary copy. + if (!isset( + $this->streamWrapperManager->getWrappers( + StreamWrapperInterface::LOCAL + )[$scheme] + )) { + // Local stream. + $cache_key = md5($uri); + $templocation = $this->fileSystem->copy( + $uri, + 'temporary://sbr_' . $cache_key . '_' . basename($uri), + FileSystemInterface::EXISTS_REPLACE + ); + $templocation = \Drupal::service('file_system')->realpath( + $templocation + ); + } + else { + $templocation = \Drupal::service('file_system')->realpath( + $file->getFileUri() + ); + } + + if (!$templocation) { + $this->loggerFactory->get('strawberryfield')->warning( + 'Could not adquire a local accesible location for metadata extraction for file with URL @fileurl', + [ + '@fileurl' => $file->getFileUri(), + ] + ); + return $metadata; + } + + + if ($templocation) { + // @TODO MOVE CHECKSUM here + $output_exif = ''; + $output_fido = ''; + $result_exif = exec( + $exif_exec_path . ' -json -q ' . escapeshellcmd($templocation), + $output_exif, + $status_exif + ); + + $result_fido = exec( + $fido_exec_path . ' ' . escapeshellcmd($templocation), + $output_fido, + $status_fido + ); + + // First EXIF + if ($status_exif != 0) { + // Means exiftool did not work + $this->loggerFactory->get('strawberryfield')->warning( + 'Could not process EXIF on @temlocation for @fileurl', + [ + '@fileurl' => $file->getFileUri(), + '@templocation' => $templocation, + ] + ); + } + else { + // JSON-ify EXIF data + // remove RW Properties? + $output_exif = implode('', $output_exif); + $exif_full = json_decode($output_exif, TRUE); + $json_error = json_last_error(); + if ($json_error == JSON_ERROR_NONE && isset($exif_full[0])) { + $exif = $exif_full[0]; + unset($exif['FileName']); + unset($exif['SourceFile']); + unset($exif['Directory']); + unset($exif['FilePermissions']); + unset($exif['ThumbnailImage']); + $metadata['flv:exif'] = $exif; + } + } + // Second FIDO + if ($status_fido != 0) { + // Means Fido did not work + $this->loggerFactory->get('strawberryfield')->warning( + 'Could not process FIDO on @temlocation for @fileurl', + [ + '@fileurl' => $file->getFileUri(), + '@templocation' => $templocation, + ] + ); + } + else { + // JSON-ify EXIF data + // remove RW Properties? + $output_fido = explode(',', str_replace('"', '', $result_fido)); + if (count($output_fido) && $output_fido[0] == 'OK') { + // Means FIDO could do its JOB + $pronom['pronom_id'] = isset($output_fido[2]) ? 'info:pronom/' . $output_fido[2] : NULL; + $pronom['label'] = $output_fido[3] ?: NULL; + $pronom['mimetype'] = $output_fido[7] ?: NULL; + $pronom['detection_type'] = $output_fido[8] ?: NULL; + $metadata['flv:pronom'] = $pronom; + } + } + } + return $metadata; + } + } diff --git a/strawberryfield.services.yml b/strawberryfield.services.yml index d52e625..abab347 100644 --- a/strawberryfield.services.yml +++ b/strawberryfield.services.yml @@ -5,7 +5,7 @@ services: strawberryfield.file_persister: class: Drupal\strawberryfield\StrawberryfieldFilePersisterService - arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory','@current_user','@language_manager','@transliteration','@module_handler'] + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory','@current_user','@language_manager','@transliteration','@module_handler', '@logger.factory'] tags: - { name: backend_overridable } strawberryfield.keyname_manager: From 5528cb1dfae552d44ce715a22b64424d741a7af2 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 10 May 2020 22:39:34 -0400 Subject: [PATCH 02/23] First pass on securing the default formatter This formatter is quite verbose, basically a full dump of the JSON. So, why not simply make it invisible for users that have no EDIT capabilities? This can be better, and will be. First pass --- .../StrawberryDefaultFormatter.php | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php index 59d6173..cf00ed2 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php @@ -37,27 +37,45 @@ public function settingsSummary() { return $summary; } + /** + * {@inheritdoc} + */ + public static function defaultSettings() { + return [ + 'limit_access' => 'edit', + ]; + } + /** * {@inheritdoc} */ public function viewElements(FieldItemListInterface $items, $langcode) { $element = []; + $entity = $items->getEntity(); + $access = $entity + ->access('edit', NULL, TRUE)->isAllowed(); - foreach ($items as $delta => $item) { - // Render each element as markup. - $element[$delta] = [ - '#type' => 'details', - '#title' => t('Raw Metadata (JSON)'), - '#open' => FALSE, - 'json' => [ - '#markup' => json_encode(json_decode($item->value, true), JSON_PRETTY_PRINT), - '#prefix' => '
',
-          '#suffix' => '
', - ] - ]; + if ($access) { + foreach ($items as $delta => $item) { + // Render each element as markup. + $element[$delta] = [ + '#type' => 'details', + '#title' => t('Raw Metadata (JSON)'), + '#open' => FALSE, + 'json' => [ + '#markup' => json_encode( + json_decode($item->value, TRUE), + JSON_PRETTY_PRINT + ), + '#prefix' => '
',
+            '#suffix' => '
', + ] + ]; + } } - return $element; } + + } \ No newline at end of file From b446376788a8b6f33099341e0e3a1a322c5528cb Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 10 May 2020 22:51:42 -0400 Subject: [PATCH 03/23] Wrong signature? Since when Gosh. Seems like i changed this. Fixing. --- src/Event/StrawberryfieldJsonProcessEvent.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Event/StrawberryfieldJsonProcessEvent.php b/src/Event/StrawberryfieldJsonProcessEvent.php index 20e3bbd..e4349a1 100644 --- a/src/Event/StrawberryfieldJsonProcessEvent.php +++ b/src/Event/StrawberryfieldJsonProcessEvent.php @@ -48,8 +48,10 @@ class StrawberryfieldJsonProcessEvent extends Event { * * @param string $event_type * The event type. - * @param \Drupal\Core\Entity\EntityInterface $entity - * The entity which caused the event. + * @param array $originalJson + * The original JSON + * @param array $processedJson + * The processed JSON since these are arrays and not Objects by reference. */ public function __construct($event_type, array $originalJson, array $processedJson) { $this->eventType = $event_type; From d3241474d2e5724cc19847f25ac379650647fde9 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 10 May 2020 22:52:02 -0400 Subject: [PATCH 04/23] Let's add mime type we don't depend on Drupal's file entity access/db --- src/StrawberryfieldFilePersisterService.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/StrawberryfieldFilePersisterService.php b/src/StrawberryfieldFilePersisterService.php index d37fbb4..f12b308 100644 --- a/src/StrawberryfieldFilePersisterService.php +++ b/src/StrawberryfieldFilePersisterService.php @@ -398,6 +398,7 @@ public function generateAsFileStructure( 'dr:for' => $file_source_key, 'dr:fid' => (int) $file->id(), 'dr:uuid' => $uuid, + 'dr:mimetype' => $mime, 'name' => $file->getFilename(), 'tags' => [], ]; From d3b900b9de74bf27c5d357101d455efcfa32198a Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Thu, 28 May 2020 17:17:37 -0400 Subject: [PATCH 05/23] Pass the $messenger via Dependecy Injection We have a trait, that is cool, This is better in the long term. --- .../StrawberryfieldKeyNameProviderBase.php | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Plugin/StrawberryfieldKeyNameProviderBase.php b/src/Plugin/StrawberryfieldKeyNameProviderBase.php index 03bab78..b40cf19 100644 --- a/src/Plugin/StrawberryfieldKeyNameProviderBase.php +++ b/src/Plugin/StrawberryfieldKeyNameProviderBase.php @@ -22,6 +22,7 @@ use GuzzleHttp\Client; use Symfony\Component\DependencyInjection\ContainerInterface; use Drupal\Core\Plugin\PluginWithFormsTrait; +use Drupal\Core\Messenger\MessengerInterface; abstract class StrawberryfieldKeyNameProviderBase extends PluginBase implements KeyNameProviderPluginInterface, ContainerFactoryPluginInterface { @@ -51,6 +52,14 @@ abstract class StrawberryfieldKeyNameProviderBase extends PluginBase implements */ protected $entityTypeBundleInfo; + /** + * The messenger. + * + * @var \Drupal\Core\Messenger\MessengerInterface + */ + protected $messenger; + + public function __construct( array $configuration, string $plugin_id, @@ -59,7 +68,9 @@ public function __construct( EntityTypeBundleInfoInterface $entityTypeBundleInfo, FieldTypePluginManager $fieldTypePluginManager, EntityFieldManagerInterface $entityFieldManager, - Client $httpClient + Client $httpClient, + MessengerInterface $messenger + ) { parent::__construct($configuration, $plugin_id, $plugin_definition); $this->entityTypeBundleInfo = $entityTypeBundleInfo; @@ -68,6 +79,8 @@ public function __construct( $this->entityFieldManager = $entityFieldManager; $this->setConfiguration($configuration); $this->httpClient = $httpClient; + $this->messenger = $messenger; + } public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) { @@ -80,7 +93,9 @@ public static function create(ContainerInterface $container, array $configuratio $container->get('entity_type.bundle.info'), $container->get('plugin.manager.field.field_type'), $container->get('entity_field.manager'), - $container->get('http_client') + $container->get('http_client'), + $container->get('messenger') + ); } From c00ae2bdd479b129e8d79febd8d9a3c32f378f76 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 3 Jun 2020 09:47:18 -0400 Subject: [PATCH 06/23] WIP of a JSON API Drush command wrapper This commit adds: A wrapper drush command (example call) drush archipelago:jsonapi-ingest /var/www/html/d8content/ados/ado-photograph.json --bundle=digital_object --uri=http://esmero-web --files=/var/www/html/d8content/ados/ado-photograph-bin --user=jsonapi --password=jsonapi --moderation_state=published Where the main argument is a pure SBF JSON (for now because it can/should also deal with a FULL ready to be posted JSON API data). It takes a --files argument with a folder of files or a single file (need to check if this is working, this is all quite precarious still), rest are the bundle, the user and pass for jsonapi and a moderation state if people want that to be set. Right now this actually works and ingests a few files, then the node and connects the dots. But! Since as:image etc are still generated by the webform and not as an eventSubscriber, after the creation you still need to go the node, edit via we webform as such. Which is terrible. So next commit will fix that finally! All this requires also docker exec -ti esmero-php bash -c "php -dmemory_limit=-1 /usr/bin/composer require drupal/jsonapi_earlyrendering_workaround; drush en jsonapi_earlyrendering_workaround;" For some reason (documented in many places like https://www.drupal.org/project/drupal/issues/3072076 there seems to be a render context missing and cache rendering stuff (long expl) is leaking giving us bad error message at the end, even when the ingest actually works! So i added a check, this is a soft dependency since i don't want to depend on that contrib module, but drupal forces me into this situations. Also @giancarlobi i need help with this script. Will share demo assets for testing after my morning teaching sessions. This drush command is SOO helpful! --- composer.json | 9 +- drush.services.yml | 5 + src/Commands/JsonApiDrushCommands.php | 521 ++++++++++++++++++ .../StrawberryFieldFileComputedItem.php | 12 +- src/StrawberryfieldUtilityService.php | 14 +- 5 files changed, 555 insertions(+), 6 deletions(-) create mode 100644 drush.services.yml create mode 100644 src/Commands/JsonApiDrushCommands.php diff --git a/composer.json b/composer.json index 8c4a0f2..5ac5d16 100644 --- a/composer.json +++ b/composer.json @@ -20,5 +20,12 @@ "drupal/search_api": "~1.14" }, "minimum-stability": "dev", - "prefer-stable": true + "prefer-stable": true, + "extra": { + "drush": { + "services": { + "drush.services.yml": "^10" + } + } + } } diff --git a/drush.services.yml b/drush.services.yml new file mode 100644 index 0000000..3f8e70a --- /dev/null +++ b/drush.services.yml @@ -0,0 +1,5 @@ +services: + strawberryfield.commands: + class: Drupal\strawberryfield\Commands\JsonApiDrushCommands + tags: + - { name: drush.command } \ No newline at end of file diff --git a/src/Commands/JsonApiDrushCommands.php b/src/Commands/JsonApiDrushCommands.php new file mode 100644 index 0000000..0a0e33d --- /dev/null +++ b/src/Commands/JsonApiDrushCommands.php @@ -0,0 +1,521 @@ + '', 'user' => NULL, 'password' => NULL, 'bundle' => 'digital_object', 'fieldname' => 'field_descriptive_metadata', 'uuid' => NULL, 'moderation_state' => NULL]) { + + // If you want to help please read https://weitzman.github.io/blog/port-to-drush9 + if (!\Drupal::moduleHandler()->moduleExists('jsonapi')) { + throw new \Exception( + dt( + 'The JSON API Module needs to be enabled to be able to ingest Archipelago Digital Objects' + ) + ); + } + //@see https://www.drupal.org/project/drupal/issues/3072076 + if (!\Drupal::moduleHandler()->moduleExists('jsonapi_earlyrendering_workaround')) { + throw new \Exception( + dt( + 'This module needs the jsonapi_earlyrendering_workaround module installed while https://www.drupal.org/project/drupal/issues/3072076 gets merged. Please run php -dmemory_limit=-1 /usr/bin/composer require drupal/jsonapi_earlyrendering_workaround; drush en jsonapi_earlyrendering_workaround; ' + ) + ); + } + + if (!ExecTrait::programExists('curl')) { + throw new \Exception( + dt( + 'curl binary needs to exist to be able to ingest Archipelago Digital Objects using this command' + ) + ); + } + + if (strlen($options['bundle']) == 0) { + $bundle = 'digital_object'; + } + else { + $bundle = $options['bundle']; + } + // Build the POST URI for the request + $base_url = $this->input()->getOption('uri'); + //$filename = basename(); + $this->output()->writeln('BASE URL is ' . $base_url . '!'); + $fileurlpost = $base_url . '/jsonapi/node/' . $bundle . '/field_file_drop'; + $nodeurlpost = $base_url . '/jsonapi/node/' . $bundle; + + // Check if files is passed and if file or folder + if ($options['files']) { + if (is_dir($options['files'])) { + // @TODO should we allow a pattern to be passed as argument? + $files = \Drupal::service('file_system')->scanDirectory( + $options['files'], + '/\.*$/', + [ + 'callback' => 0, + 'recurse' => FALSE, + 'key' => 'uri', + 'min_depth' => 0, + ] + ); + foreach ($files as $file) { + //@TODO list files here? + error_log(var_export($file, TRUE)); + } + + } + else { + + + } + } + + // @see https://www.drupal.org/docs/8/core/modules/jsonapi-module/creating-new-resources-post + // @see https://www.drupal.org/node/3024331 + + // We could use Guzzle and stuff, but to be honest we just need to call CURL. + + // This will also allow us to do some crazy stuff like allowing partial JSONS + // SBF only pushes and also check for validity + // In the end all this will serve as wrap around for the AMI UI processing + // Module. + + $json_data = @file_get_contents($jsonfilepath); + + // Only process if json_data is present + if ($json_data && StrawberryfieldJsonHelper::isJsonString($json_data)) { + $schema = JsonSchema::import( + json_decode($this::acceptedjsonschemapost) + ); + // We want to create the Digital Object first and then attach the files? + // Probably not. + // Check if JSON is a full JSON API data payload or just our SBF. + // If just SBF, we need to have the machine name of the field to push data + $data = json_decode($json_data, TRUE); + if (isset($data['data']['type']) && $data['data']['type'] == 'node--' . $bundle) { + try { + // @see https://github.com/swaggest/php-json-schema + $schema->in((Object) $data); + } catch (JsonSchemaException $exception) { + throw new \Exception( + dt('The provided JSON is not a valid JSON API payload') + ); + } + } + else { + // Means we need to create our own body + if (!$options['uuid']) { + $options['uuid'] = \Drupal::service('uuid')->generate(); + error_log($options['uuid']); + } + $field_name = NULL; + $sbf_fields = array_values( + \Drupal::service('strawberryfield.utility') + ->getStrawberryfieldMachineForBundle($bundle) + ); + // If there are more than 1 then we need someone to tell us which one! + // but if only one we are all good + if (count($sbf_fields) == 1) { + $field_name = reset($sbf_fields); + } + elseif ($options['fieldname'] && in_array( + $options['fieldname'], + $sbf_fields + )) { + $field_name = $options['fieldname']; + } + + } + + } + + + if ($field_name) { + foreach ($files as $file) { + // Each file has the following structure + /*(object) array( + 'uri' => '/var/www/html/d8content/metadatadisplay_entity_03.json', + 'filename' => 'metadatadisplay_entity_03.json', + 'name' => 'metadatadisplay_entity_03', + ); */ + + $args = [ + 'curl', + '-L', + '--connect-timeout 30', + '-H "Accept: application/vnd.api+json;"', + '-H "Content-Type: application/octet-stream;"', + '-H "Content-Disposition: attachment; filename=\"' . urlencode( + $file->filename + ) . '\""', + '--data-binary @' . $file->uri + ]; + if ($options['user'] && $options['password']) { + $args = array_merge( + $args, + [ + '--user', + $options['user'] . ':' . $options['password'], + $fileurlpost + ] + ); + $this->output()->writeln(implode(' ', $args)); + + $this->output()->writeln($args); + $process = Drush::process(implode(' ', $args)); + $process->mustRun(); + error_log($process->getExitCode()); + if ($process->getExitCode() == 0) { + error_log(var_export($process->getOutput(), TRUE)); + $response = json_decode($process->getOutput(),true); + if (isset($response['data']['attributes']['drupal_internal__fid'])) { + $mime_type = $response['data']['attributes']['filemime']; + // Calculate the destination json key + $as_file_type = explode('/', $mime_type); + $as_file_type = count( + $as_file_type + ) == 2 ? $as_file_type[0] : 'document'; + $as_file_type = ($as_file_type != 'application') ? $as_file_type : 'document'; + $as_specific = $as_file_type[1]; + // WE need to check if $data a.k.a JSON contains some mappings that can help + /* + "ap:entitymapping": { + "entity:file": [ + "images", + "documents", + "audios", + "videos", + "models", + "vtts" + ] + + },*/ + // let's be naive and + // @TODO how to map this better, pass a webform id and use as an API + // Things that qualify + // Second part of the mime in plural. Needs to be empty/shallow array + // first part of the mime. Needs to be empty/shallow array + // Should check if all values are integer? + if (isset($data[$as_specific . 's']) && ($data[$as_specific . 's'] == NULL || is_array( + $data[$as_specific . 's'] + ))) { + $data[$as_specific . 's'][] = $response['data']['attributes']['drupal_internal__fid']; + } + else { + $data[$as_file_type . 's'][] = $response['data']['attributes']['drupal_internal__fid']; + } + } + } + else { + $this->output()->writeln($process->getExitCodeText()); + throw new \Exception(dt('We failed to upload the file')); + } + } + } + + // Now ingest the actual OBJECT + $data_body = []; + $data_body['data'] = [ + 'id' => $options['uuid'], + 'type' => 'node--' . $bundle, + 'attributes' => [ + $field_name => json_encode($data), + 'title' => isset($data['label']) ? $data['label'] : 'Unnamed Digital Object', + ] + ]; + + if ($options['moderation_state']) { + // @TODO Should we validate possible moderation states? + $data_body['data']['attributes']['moderation_state'] = $options['moderation_state']; + } + + $curl_body = json_encode($data_body); + + $args_node = [ + 'curl', + '-L', + '--connect-timeout 30', + '-H "Accept: application/vnd.api+json;"', + '-H "Content-type: application/vnd.api+json"', + '-XPOST', + "--data '" . $curl_body . "'" + ]; + if ($options['user'] && $options['password']) { + $args_node = array_merge( + $args_node, + [ + '--user', + $options['user'] . ':' . $options['password'], + $nodeurlpost + ] + ); + $this->output()->writeln(implode(' ', $args_node)); + + $this->output()->writeln($args_node); + $process_node = Drush::process(implode(' ', $args_node)); + $process_node->mustRun(); + error_log($process_node->getExitCode()); + if ($process_node->getExitCode() == 0) { + error_log(var_export($process_node->getOutput(), TRUE)); + } + } + } + } +} \ No newline at end of file diff --git a/src/Plugin/Field/FieldType/StrawberryFieldFileComputedItem.php b/src/Plugin/Field/FieldType/StrawberryFieldFileComputedItem.php index 823f5ff..3f542a3 100644 --- a/src/Plugin/Field/FieldType/StrawberryFieldFileComputedItem.php +++ b/src/Plugin/Field/FieldType/StrawberryFieldFileComputedItem.php @@ -11,6 +11,8 @@ use Drupal\Core\Form\FormStateInterface; use Drupal\Core\StreamWrapper\StreamWrapperInterface; use Drupal\Core\TypedData\DataDefinition; +use Drupal\Core\Render\BubbleableMetadata; +use Drupal\Core\File\FileSystemInterface; /** * Plugin implementation of a virtual 'file' field type. @@ -143,7 +145,9 @@ protected static function doGetUploadLocation(array $settings, $data = []) { // Replace tokens. As the tokens might contain HTML we convert it to plain // text. - $destination = PlainTextOutput::renderFromHtml(\Drupal::token()->replace($destination, $data)); + $metadata = new BubbleableMetadata(); + // Just in case we hit the ugly leaked cacheable render metadata problem. + $destination = PlainTextOutput::renderFromHtml(\Drupal::token()->replace($destination, $data, $metadata)); return $settings['uri_scheme'] . '://' . $destination; } @@ -159,7 +163,7 @@ public function getUploadValidators() { $settings = $this->getSettings(); // Cap the upload size according to the PHP limit. - $max_filesize = Bytes::toInt(file_upload_max_size()); + $max_filesize = Bytes::toInt(\Drupal\Component\Utility\Environment::getUploadMaxSize()); if (!empty($settings['max_filesize'])) { $max_filesize = min($max_filesize, Bytes::toInt($settings['max_filesize'])); } @@ -184,12 +188,12 @@ public static function generateSampleValue(FieldDefinitionInterface $field_defin // Prepare destination. $dirname = static::doGetUploadLocation($settings); - file_prepare_directory($dirname, FILE_CREATE_DIRECTORY); + \Drupal::service('file_system')->prepareDirectory($dirname, FileSystemInterface::CREATE_DIRECTORY); // Generate a file entity. $destination = $dirname . '/' . $random->name(10, TRUE) . '.txt'; $data = $random->paragraphs(3); - $file = file_save_data($data, $destination, FILE_EXISTS_ERROR); + $file = file_save_data($data, $destination, FileSystemInterface::EXISTS_ERROR); $values = [ 'target_id' => $file->id(), 'display' => (int) $settings['display_default'], diff --git a/src/StrawberryfieldUtilityService.php b/src/StrawberryfieldUtilityService.php index 3ba1444..6d5db10 100644 --- a/src/StrawberryfieldUtilityService.php +++ b/src/StrawberryfieldUtilityService.php @@ -126,7 +126,7 @@ public function getStrawberryfieldMachineNames() { return $this->strawberryfieldMachineNames; } $node_field_definitions = $this->entityFieldManager->getFieldStorageDefinitions('node'); - $sbf_field_names = array(); + $sbf_field_names = []; foreach ($node_field_definitions as $field_definition) { if ($field_definition->getType() === "strawberryfield_field") { $sbf_field_names[] = $field_definition->getName(); @@ -136,6 +136,18 @@ public function getStrawberryfieldMachineNames() { return $sbf_field_names; } + /** + * Given a Bundle returns the SBF field machine names + * + * @return array + * Returns array of SBF names + */ + public function getStrawberryfieldMachineForBundle($bundle = 'digital_object') { + $all_bundled_fields = $this->entityFieldManager->getFieldDefinitions('node', $bundle); + $all_sbf_fields = $this->getStrawberryfieldMachineNames(); + return array_intersect(array_keys($all_bundled_fields), $all_sbf_fields ); + } + /** * Returns the Solr Fields in a Solr Index are from SBFs ** From 23f22315353ced634065bcbd7dff87e559e4f1a5 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 5 Jun 2020 16:10:59 -0400 Subject: [PATCH 07/23] Adds verify executable to StrawberryfieldUtilityService Simple but effective verify - Checks if its there - Checks if webserver/current user can run Return boolean. Works so far! --- src/StrawberryfieldUtilityService.php | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/StrawberryfieldUtilityService.php b/src/StrawberryfieldUtilityService.php index 6d5db10..d485547 100644 --- a/src/StrawberryfieldUtilityService.php +++ b/src/StrawberryfieldUtilityService.php @@ -181,4 +181,23 @@ public function getStrawberryfieldSolrFields(Index $index_entity) { return $sbf_solr_fields; } + + /** + * Checks if a given command exists and is executable. + * + * @param $command + * + * @return bool + */ + public function verifyCommand($execpath) :bool { + $iswindows = strpos(PHP_OS, 'WIN') === 0; + $canexecute = FALSE; + $execpath = trim(escapeshellcmd($execpath)); + $test = $iswindows ? 'where' : 'command -v'; + $output = shell_exec("$test $execpath"); + if ($output) { + $canexecute = is_executable($execpath); + } + return $canexecute; + } } From a8d3f452de38b25513c2ce4b290615fa8fda5896 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 5 Jun 2020 16:11:33 -0400 Subject: [PATCH 08/23] adds exif path, fid and a toggle to the schema A toogle (extractmetadata) to rule them all! --- config/schema/strawberryfield.schema.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/config/schema/strawberryfield.schema.yml b/config/schema/strawberryfield.schema.yml index 8bf2661..43d4593 100644 --- a/config/schema/strawberryfield.schema.yml +++ b/config/schema/strawberryfield.schema.yml @@ -134,3 +134,17 @@ field.value.strawberryfield_field: value: type: string label: 'Strawberryfield Metadata' + +strawberryfield.filepersister_service_settings: + type: config_object + label: 'Archipelago IIIF Server configurations' + mapping: + extractmetadata: + type: boolean + label: 'Whether to run (TRUE) or to skip (FALSE) file identification directly on file persistence.' + exif_exec_path: + type: string + label: 'Exifinfo binary full executable path' + fido_exec_path: + type: string + label: 'FIDO binary full executable path' \ No newline at end of file From ac515e6c6cdac2b6e5be93d69d40a43b0a4a0aaa Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 5 Jun 2020 16:13:13 -0400 Subject: [PATCH 09/23] New form that brings storage and exif and fido into a single place Better less config forms than too many This form joins too needs into a single one and adds also some nice and nifty ajax driven executable path validation for both, exif and fido. Also saves. And also only validates of the global master switch is selected. Good! Old form had to go. Better so (drush cr folks!) --- src/Form/FilePersisterServiceSettingsForm.php | 208 ++++++++++++++++++ src/Form/StorageSettingsForm.php | 76 ------- strawberryfield.links.menu.yml | 11 +- strawberryfield.routing.yml | 16 +- strawberryfield.services.yml | 3 +- 5 files changed, 222 insertions(+), 92 deletions(-) create mode 100644 src/Form/FilePersisterServiceSettingsForm.php delete mode 100644 src/Form/StorageSettingsForm.php diff --git a/src/Form/FilePersisterServiceSettingsForm.php b/src/Form/FilePersisterServiceSettingsForm.php new file mode 100644 index 0000000..d37d050 --- /dev/null +++ b/src/Form/FilePersisterServiceSettingsForm.php @@ -0,0 +1,208 @@ +config('strawberryfield.filepersister_service_settings'); + $config_storage = $this->config('strawberryfield.storage_settings'); + $scheme_options = OcflHelper::getVisibleStreamWrappers(); + $form['file_scheme'] = [ + '#type' => 'radios', + '#title' => $this->t('Storage Scheme for Persisting Files'), + '#description' => $this->t('Please provide your prefered Storage Scheme for Persisting Strawberryfield managed Files'), + '#default_value' => $config_storage ->get('file_scheme'), + '#options' => $scheme_options, + '#required' => TRUE + + ]; + + $form['object_file_scheme'] = [ + '#type' => 'radios', + '#title' => $this->t('Storage Scheme for Persisting Digital Objects'), + '#description' => $this->t('Please provide your prefered Storage Scheme for Persisting Digital Objects as JSON Files'), + '#default_value' => $config_storage ->get('object_file_scheme'), + '#options' => $scheme_options, + '#required' => TRUE + ]; + + $form['extractmetadata'] = [ + '#type' => 'checkbox', + '#title' => $this->t('Should File level metadata extraction be processed?'), + '#description' => $this->t('If enabled, exiftool and FIDO will run on every file.'), + '#default_value' => !empty($config->get('extractmetadata')) ? $config->get('extractmetadata'): FALSE, + '#return_value' => TRUE, + ]; + $form['exif_exec_path'] = [ + '#type' => 'textfield', + '#title' => $this->t('Absolute path to the exiftool inside your server'), + '#description' => $this->t('exiftool will run on every file associated to an Archipelago Digital Object and resulting metadata will be appended to the strawberryfield JSON'), + '#default_value' => !empty($config->get('exif_exec_path')) ? $config->get('exif_exec_path'): '/usr/bin/exiftool', + '#prefix' => '', + '#states' => [ + 'visible' => [ + ':input[name="extractmetadata"]' => ['checked' => TRUE], + ], + ], + '#ajax' => [ + 'callback' => [$this, 'validateExif'], + 'effect' => 'fade', + 'wrapper' => 'exif-exec-path-validation', + 'method' => 'replace', + 'event' => 'change' + ] + ]; + $form['fido_exec_path'] = [ + '#type' => 'textfield', + '#title' => $this->t('Absolute path to the FIDO tool binary inside your server'), + '#description' => $this->t('FIDO will run against any file associated to an Archipelago Digital Object and resulting PRONOM ID will be appended to the strawberryfield JSON'), + '#default_value' => !empty($config->get('fido_exec_path')) ? $config->get('fido_exec_path'): '/usr/bin/fido', + '#states' => [ + 'visible' => [ + ':input[name="extractmetadata"]' => ['checked' => TRUE], + ], + ], + '#prefix' => '', + '#ajax' => [ + 'callback' => [$this, 'validateFido'], + 'effect' => 'fade', + 'wrapper' => 'fido-exec-path-validation', + 'method' => 'replace', + 'event' => 'change' + ] + ]; + + return parent::buildForm($form, $form_state); + } + + /** + * Validate exiftool Exec Path + * @param array $form + * @param \Drupal\Core\Form\FormStateInterface $form_state + * + * @return \Drupal\Core\Ajax\AjaxResponse + */ + public function validateExif(array $form, FormStateInterface $form_state) { + $response = new AjaxResponse(); + $canrun = \Drupal::service('strawberryfield.utility')->verifyCommand($form_state->getValue('exif_exec_path')); + if (!$canrun) { + $response->addCommand(new InvokeCommand('#edit-exif-exec-path', 'addClass', ['error'])); + $response->addCommand(new InvokeCommand('#edit-exif-exec-path', 'removeClass', ['ok'])); + $response->addCommand(new MessageCommand('exiftool path is not valid.', NULL, ['type' => 'error', 'announce' => 'exiftool path is not valid.'])); + + } else { + $response->addCommand(new InvokeCommand('#edit-exif-exec-path', 'removeClass', ['error'])); + $response->addCommand(new InvokeCommand('#edit-exif-exec-path', 'addClass', ['ok'])); + $response->addCommand(new MessageCommand('exiftool path is valid!', NULL, ['type' => 'status', 'announce' => 'exiftool path is valid!'])); + + } + return $response; + } + + /** + * Validate fido Exec Path + * @param array $form + * @param \Drupal\Core\Form\FormStateInterface $form_state + * + * @return \Drupal\Core\Ajax\AjaxResponse + */ + public function validateFido(array $form, FormStateInterface $form_state) { + $response = new AjaxResponse(); + $canrun = \Drupal::service('strawberryfield.utility')->verifyCommand($form_state->getValue('fido_exec_path')); + if (!$canrun) { + $response->addCommand(new InvokeCommand('#edit-fido-exec-path', 'addClass', ['error'])); + $response->addCommand(new InvokeCommand('#edit-fido-exec-path', 'removeClass', ['ok'])); + $response->addCommand(new MessageCommand('fido path is not valid.', NULL, ['type' => 'error', 'announce' => 'fido path is not valid.'])); + + } else { + $response->addCommand(new InvokeCommand('#edit-fido-exec-path', 'removeClass', ['error'])); + $response->addCommand(new InvokeCommand('#edit-fido-exec-path', 'addClass', ['ok'])); + $response->addCommand(new MessageCommand('fido path is valid!', NULL, ['type' => 'status', 'announce' => 'fido path is valid!'])); + + } + return $response; + } + + /** + * @param array $form + * @param \Drupal\Core\Form\FormStateInterface $form_state + */ + public function validateForm(array &$form, FormStateInterface $form_state) { + + if ((bool) $form_state->getValue('extractmetadata')) { + // Don't validate if not enabled. + $canrun_exif = \Drupal::service('strawberryfield.utility')->verifyCommand( + $form_state->getValue('exif_exec_path') + ); + if (!$canrun_exif) { + $form_state->setErrorByName( + 'exif_exec_path', + $this->t('Please correct. exiftool path is not valid.') + ); + } + $canrun_fido = \Drupal::service('strawberryfield.utility')->verifyCommand( + $form_state->getValue('fido_exec_path') + ); + if (!$canrun_fido) { + $form_state->setErrorByName( + 'fido_exec_path', + $this->t('Please correct. fido path is not valid.') + ); + } + } + + parent::validateForm( + $form, + $form_state + ); // TODO: Change the autogenerated stub + + } + + /** + * {@inheritdoc} + */ + public function submitForm(array &$form, FormStateInterface $form_state) { + $this->config('strawberryfield.filepersister_service_settings') + ->set('extractmetadata', (bool) $form_state->getValue('extractmetadata')) + ->set('exif_exec_path', trim($form_state->getValue('exif_exec_path'))) + ->set('fido_exec_path', trim($form_state->getValue('fido_exec_path'))) + ->save(); + $this->config('strawberryfield.storage_settings') + ->set('file_scheme', $form_state->getValue('file_scheme')) + ->set('object_file_scheme', $form_state->getValue('object_file_scheme')) + ->save(); + + parent::submitForm($form, $form_state); + } +} \ No newline at end of file diff --git a/src/Form/StorageSettingsForm.php b/src/Form/StorageSettingsForm.php deleted file mode 100644 index ddbacaf..0000000 --- a/src/Form/StorageSettingsForm.php +++ /dev/null @@ -1,76 +0,0 @@ -config('strawberryfield.storage_settings'); - $scheme_options = OcflHelper::getVisibleStreamWrappers(); - $form['file_scheme'] = [ - '#type' => 'radios', - '#title' => $this->t('Storage Scheme for Persisting Files'), - '#description' => $this->t('Please provide your prefered Storage Scheme for Persisting Strawberryfield managed Files'), - '#default_value' => $config->get('file_scheme'), - '#options' => $scheme_options, - '#required' => TRUE - - ]; - - $form['object_file_scheme'] = [ - '#type' => 'radios', - '#title' => $this->t('Storage Scheme for Persisting Digital Objects'), - '#description' => $this->t('Please provide your prefered Storage Scheme for Persisting Digital Objects as JSON Files'), - '#default_value' => $config->get('object_file_scheme'), - '#options' => $scheme_options, - '#required' => TRUE - ]; - - return parent::buildForm($form, $form_state); - } - - public function validateForm(array &$form, FormStateInterface $form_state) { - parent::validateForm( - $form, - $form_state - ); // TODO: Change the autogenerated stub - } - - /** - * {@inheritdoc} - */ - public function submitForm(array &$form, FormStateInterface $form_state) { - $this->config('strawberryfield.storage_settings') - ->set('file_scheme', $form_state->getValue('file_scheme')) - ->set('object_file_scheme', $form_state->getValue('object_file_scheme')) - ->save(); - - parent::submitForm($form, $form_state); - } -} \ No newline at end of file diff --git a/strawberryfield.links.menu.yml b/strawberryfield.links.menu.yml index 912657f..2d3f0e6 100644 --- a/strawberryfield.links.menu.yml +++ b/strawberryfield.links.menu.yml @@ -14,12 +14,12 @@ strawberryfield.group.admin: description: 'Archipelago Configuration' weight: -999 -strawberryfield.storage_settings_form: - title: 'Storage Configuration Form' - route_name: strawberryfield.storage_settings_form - description: 'Configure strawberry field file persistence settings' +strawberryfield.file_persister_settings_form: + title: 'File Persister Service Configuration Form' + route_name: strawberryfield.file_persister_settings_form + description: 'Configure strawberry field file persistence service and storage settings' parent: strawberryfield.group.admin - weight: 99 + weight: 98 strawberryfield.important_solr_settings_form: title: 'Important Solr Settings Form' @@ -27,4 +27,3 @@ strawberryfield.important_solr_settings_form: description: 'Configure which Solr field is used in ADO Type Mapping' parent: strawberryfield.group.admin weight: 99 - diff --git a/strawberryfield.routing.yml b/strawberryfield.routing.yml index 5faec47..46d9a98 100644 --- a/strawberryfield.routing.yml +++ b/strawberryfield.routing.yml @@ -8,21 +8,21 @@ system.admin_config_strawberryfield: options: _admin_route: TRUE -strawberryfield.storage_settings_form: - path: '/admin/config/archipelago/storage' +strawberryfield.important_solr_settings_form: + path: '/admin/config/archipelago/important-solr-settings' defaults: - _form: '\Drupal\strawberryfield\Form\StorageSettingsForm' - _title: 'Storage Settings' + _form: '\Drupal\strawberryfield\Form\ImportantSolrSettingsForm' + _title: 'Important Solr Settings' requirements: _permission: 'access administration pages' options: _admin_route: TRUE -strawberryfield.important_solr_settings_form: - path: '/admin/config/archipelago/important-solr-settings' +strawberryfield.file_persister_settings_form: + path: '/admin/config/archipelago/filepersisting' defaults: - _form: '\Drupal\strawberryfield\Form\ImportantSolrSettingsForm' - _title: 'Important Solr Settings' + _form: '\Drupal\strawberryfield\Form\FilePersisterServiceSettingsForm' + _title: 'File Persister Service Settings' requirements: _permission: 'access administration pages' options: diff --git a/strawberryfield.services.yml b/strawberryfield.services.yml index abab347..9123be4 100644 --- a/strawberryfield.services.yml +++ b/strawberryfield.services.yml @@ -2,10 +2,9 @@ services: strawberryfield.utility: class: Drupal\strawberryfield\StrawberryfieldUtilityService arguments: [ '@file_system', '@entity_type.manager', '@config.factory','@module_handler', '@entity_field.manager'] - strawberryfield.file_persister: class: Drupal\strawberryfield\StrawberryfieldFilePersisterService - arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory','@current_user','@language_manager','@transliteration','@module_handler', '@logger.factory'] + arguments: [ '@file_system', '@file.usage', '@entity_type.manager', '@stream_wrapper_manager', '@plugin.manager.archiver', '@config.factory','@current_user','@language_manager','@transliteration','@module_handler', '@logger.factory','@strawberryfield.utility'] tags: - { name: backend_overridable } strawberryfield.keyname_manager: From 61cea54c234f2f3de0695884aac96e7a1c2a5e4c Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 5 Jun 2020 16:17:07 -0400 Subject: [PATCH 10/23] WIP still but better. I need to test this but there are some chanes Since service containers are singletons (means initialized once per request only) So we can check, if needed here if fido and exif are well formed. I guess i will end removing this and maybe this can go into a STATUS CHECK? in global Drupal, like when you need a security patch. Ok, for now this is a mix of safe and performant. If both are wrong AND toogle is enabled it will logg it also. - Also, it checks now if the file is already in temporary storage before trying to move it. Imagine some consecutive operations because someone is just testing, no need to move 2TBs of video back and forth right? - Also a message fix. - I think there is still a bug when the uploaded file's basename is not a URL encoded... --- src/StrawberryfieldFilePersisterService.php | 96 +++++++++++++++++---- 1 file changed, 77 insertions(+), 19 deletions(-) diff --git a/src/StrawberryfieldFilePersisterService.php b/src/StrawberryfieldFilePersisterService.php index f12b308..2377636 100644 --- a/src/StrawberryfieldFilePersisterService.php +++ b/src/StrawberryfieldFilePersisterService.php @@ -28,6 +28,7 @@ use Drupal\strawberryfield\Event\StrawberryfieldJsonProcessEvent; use Drupal\Core\StreamWrapper\StreamWrapperInterface; use Drupal\Core\Logger\LoggerChannelFactoryInterface; +use Drupal\strawberryfield\StrawberryfieldUtilityService; use Drupal\Core\Config\ImmutableConfig; use Drupal\strawberryfield\StrawberryfieldEventType; use Symfony\Component\Process\Exception\ProcessFailedException; @@ -115,10 +116,24 @@ class StrawberryfieldFilePersisterService { /** * The logger factory. + * * @var \Drupal\Core\Logger\LoggerChannelFactoryInterface */ protected $loggerFactory; + /** + * The Strawberry Field Utility Service. + * + * @var \Drupal\strawberryfield\StrawberryfieldUtilityService + */ + protected $strawberryfieldUtility; + + /** + * If getBaseFileMetadata should be processed + * + * @var bool + */ + protected $extractFileMetadata = FALSE; /** * StrawberryfieldFilePersisterService constructor. @@ -133,6 +148,7 @@ class StrawberryfieldFilePersisterService { * @param \Drupal\Core\Language\LanguageManagerInterface $language_manager * @param \Drupal\Component\Transliteration\TransliterationInterface $transliteration * @param \Drupal\Core\Extension\ModuleHandlerInterface $module_handler + * @param StrawberryfieldUtilityService $strawberryfield_utility_service, */ public function __construct( FileSystemInterface $file_system, @@ -145,7 +161,8 @@ public function __construct( LanguageManagerInterface $language_manager, TransliterationInterface $transliteration, ModuleHandlerInterface $module_handler, - LoggerChannelFactoryInterface $logger_factory + LoggerChannelFactoryInterface $logger_factory, + StrawberryfieldUtilityService $strawberryfield_utility_service ) { $this->fileSystem = $file_system; $this->fileUsage = $file_usage; @@ -156,11 +173,31 @@ public function __construct( $this->destinationScheme = $config_factory->get( 'strawberryfield.storage_settings' )->get('file_scheme'); - $this->config = $config_factory->get('strawberryfield.settings'); + $this->config = $config_factory->get('strawberryfield.filepersister_service_settings'); $this->languageManager = $language_manager; $this->transliteration = $transliteration; $this->moduleHandler = $module_handler; $this->loggerFactory = $logger_factory; + $this->strawberryfieldUtility = $strawberryfield_utility_service; + // This will verify once per injection of the service, not every time + if ((boolean) $this->config->get('extractmetadata')) { + $canrun_exif = $this->strawberryfieldUtility->verifyCommand( + $this->config->get('exif_exec_path') + ); + $canrun_fido = $this->strawberryfieldUtility->verifyCommand( + $this->config->get('fido_exec_path') + ); + if ($canrun_exif || $canrun_fido) { + $this->extractFileMetadata = TRUE; + } + else { + // This will be moved to runners anyway so won't work it too + // much more. + $this->loggerFactory->get('strawberryfield')->warning( + 'File Metadata Extraction is enabled on ingest via Strawberryfield but neither EXIF or FIDO paths are correct executables. Please correct of disable.' + ); + } + } } @@ -854,14 +891,28 @@ public function getBaseFileMetadata(FileInterface $file) { // With certain conditions of course // Like: // - How many files? Like 1 is cool, 2000 not cool - // - Size? Like moving realtime 'Sync' 2TB back to TEMP to MD5 it not cool + // - Size? Like moving realtime 'Sync' 2TB back to TEMP to MD5-it not cool $metadata = []; + // Check if we should even run the file id service + // Reasons why we can not are: + // - Wrong path settings. + // - Disabled. + // Should we notify the user if processing is enabled and binaries can not + // be found? and or can not run? + + if (!$this->extractFileMetadata) { + // early return if not allowed. + return $metadata; + } + // I'm assuming binaries exists and are there. + // Should we check everytime? + // Or just when saving via the form? + + $exif_exec_path = trim($this->config->get( + 'exif_exec_path')); + $fido_exec_path = trim($this->config->get('fido_exec_path')); + - $exif_exec_path = $this->config->get( - 'exif_exec_path' - ) ?: '/usr/bin/exiftool'; - $fido_exec_path = $this->config->get('fido_exec_path') ?: '/usr/bin/fido'; - $file_size = $file->getSize(); $uri = $file->getFileUri(); /** @var \Drupal\Core\File\FileSystem $file_system */ @@ -876,17 +927,24 @@ public function getBaseFileMetadata(FileInterface $file) { )) { // Local stream. $cache_key = md5($uri); - $templocation = $this->fileSystem->copy( - $uri, - 'temporary://sbr_' . $cache_key . '_' . basename($uri), - FileSystemInterface::EXISTS_REPLACE - ); - $templocation = \Drupal::service('file_system')->realpath( - $templocation - ); + // Check first if the file is already around in temp? + // @TODO can be sure its the same one? Ideas? + if (is_readable($this->fileSystem->realpath('temporary://sbr_' . $cache_key . '_' . basename($uri)))) { + $templocation = $this->fileSystem->realpath('temporary://sbr_' . $cache_key . '_' . basename($uri)); + } + else { + $templocation = $this->fileSystem->copy( + $uri, + 'temporary://sbr_' . $cache_key . '_' . basename($uri), + FileSystemInterface::EXISTS_REPLACE + ); + $templocation = $this->fileSystem->realpath( + $templocation + ); + } } else { - $templocation = \Drupal::service('file_system')->realpath( + $templocation = $this->fileSystem->realpath( $file->getFileUri() ); } @@ -922,7 +980,7 @@ public function getBaseFileMetadata(FileInterface $file) { if ($status_exif != 0) { // Means exiftool did not work $this->loggerFactory->get('strawberryfield')->warning( - 'Could not process EXIF on @temlocation for @fileurl', + 'Could not process EXIF on @templocation for @fileurl', [ '@fileurl' => $file->getFileUri(), '@templocation' => $templocation, @@ -949,7 +1007,7 @@ public function getBaseFileMetadata(FileInterface $file) { if ($status_fido != 0) { // Means Fido did not work $this->loggerFactory->get('strawberryfield')->warning( - 'Could not process FIDO on @temlocation for @fileurl', + 'Could not process FIDO on @templocation for @fileurl', [ '@fileurl' => $file->getFileUri(), '@templocation' => $templocation, From 43633e10cb4e7b77f3db2d917f0fa78849db5bb6 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 22 Jun 2020 11:31:32 -0400 Subject: [PATCH 11/23] Create an as: structure event subscriber Until now, as:images,etc has been generated directly via the webform handler (not great). This brings into place an event subscriber which basically does the same (actually calls the same service and methods) but in a maybe smarter wrapping way. With this, ingests via JSON API get correctly classified. --- ...saveSubscriberAsFileStructureGenerator.php | 301 ++++++++++++++++++ src/StrawberryfieldEventType.php | 4 +- src/StrawberryfieldFilePersisterService.php | 147 +++++---- strawberryfield.services.yml | 5 + 4 files changed, 396 insertions(+), 61 deletions(-) create mode 100644 src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php diff --git a/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php new file mode 100644 index 0000000..aaa6449 --- /dev/null +++ b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php @@ -0,0 +1,301 @@ +stringTranslation = $string_translation; + $this->messenger = $messenger; + $this->loggerFactory = $logger_factory; + $this->strawberryfilepersister = $strawberry_filepersister; + } + + + /** + * @param \Drupal\strawberryfield\Event\StrawberryfieldCrudEvent $event + * + * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException + * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException + */ + public function onEntityPresave(StrawberryfieldCrudEvent $event) { + + /* @var $entity \Drupal\Core\Entity\ContentEntityInterface */ + $entity = $event->getEntity(); + $sbf_fields = $event->getFields(); + $newlysavedcount = 0; + foreach ($sbf_fields as $field_name) { + /* @var $field \Drupal\Core\Field\FieldItemInterface */ + $field = $entity->get($field_name); + /* @var \Drupal\strawberryfield\Field\StrawberryFieldItemList $field */ + + $newlyprocessed = 0; + if (!$field->isEmpty()) { + $entity = $field->getEntity(); + $entity_type_id = $entity->getEntityTypeId(); + /** @var $field \Drupal\Core\Field\FieldItemList */ + foreach ($field->getIterator() as $delta => $itemfield) { + $allprocessedAsValues = []; + /** @var $itemfield \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem */ + $fullvalues = $itemfield->provideDecoded(TRUE); + // SBF needs to have the entity mapping key + // helper structure to keep elements that map to entities around + $fullvalues = $this->cleanUpEntityMappingStructure($fullvalues); + // 'ap:entitymapping' will always exists of ::cleanUpEntityMappingStructure + $entity_mapping_structure = $fullvalues['ap:entitymapping']; + $allprocessedAsValues = []; + if (isset($entity_mapping_structure['entity:file'])) { + foreach ($entity_mapping_structure['entity:file'] as $jsonkey_with_filenumids) { + // Here each $jsonkey_with_filenumids is a json key that holds file ids + // Also $fullvalues[$jsonkeys_with_filenumids] will be there because + // ::cleanUpEntityMappingStructure. Still, double check please? + $processedAsValuesForKey = []; + $fullvalues[$jsonkey_with_filenumids] = isset($fullvalues[$jsonkey_with_filenumids]) ? $fullvalues[$jsonkey_with_filenumids] : []; + // make even single files an array + $fids = []; + $fids = (is_array($fullvalues[$jsonkey_with_filenumids])) ? $fullvalues[$jsonkey_with_filenumids] : [$fullvalues[$jsonkey_with_filenumids]]; + // Only keep ids that can be actually entity ids or uuids + $fids = array_filter( + $fids, + [$this, 'isEntityId'] + ); + if (is_array($fids) && !empty($fids)) { + $fids = array_unique($fids); + // @TODO. If UUID the loader needs to be different. + $processedAsValuesForKey = $this->strawberryfilepersister + ->generateAsFileStructure( + $fids, + $jsonkey_with_filenumids, + (array) $fullvalues + ); + $allprocessedAsValues = array_merge_recursive( + $allprocessedAsValues, + $processedAsValuesForKey + ); + } + } + // WE should be able to load also UUIDs here. + // Now assign back al as:structures + // Distribute all processed AS values for each field into its final JSON + // Structure, e.g as:image, as:application, as:documents, etc.\ + + foreach ($allprocessedAsValues as $askey => $info) { + // @TODO ensure non managed files inside structure are preserved. + // Could come from another URL only field or added manually by some + // Advanced user. + $newlyprocessed = $newlyprocessed + count($info); + $fullvalues[$askey] = $info; + } + if (!$itemfield->setMainValueFromArray((array) $fullvalues)) { + $this->messenger->addError($this->t('We could not persist file classification. Please contact the site admin.')); + }; + } + } + } + + } + if ($newlyprocessed > 0) { + $this->messenger->addStatus( + $this->stringTranslation->formatPlural( + $newlyprocessed, + 'Very good. New file metadata structured created.', + 'Great! @count new files metadata structured created.' + ) + ); + } + $current_class = get_called_class(); + $event->setProcessedBy($current_class, TRUE); + } + + + /** + * This function normalizes an entity mapping array. + * + * @param array FULL JSON + * A full array that contains somewhere, hopefully + * something like + * "ap:entitymapping": { + * "entity:file": [ + * "images", + * "documents", + * "audios", + * "videos", + * "models" + * ], + * "entity:node": [ + * "ismemberof" + * } + * + * @return array + * The cleaned/up $entityMapping + */ + private function cleanUpEntityMappingStructure(array $fullvalues) { + + if (isset($fullvalues['ap:entitymapping']) && is_array( + $fullvalues['ap:entitymapping'] + )) { + $entityMapping = array_filter( + $fullvalues['ap:entitymapping'], + [$this,'prefixedEntity'], + ARRAY_FILTER_USE_KEY + ); + // We can not have an array of arrays. + + foreach ($entityMapping as $entity_type_key => &$jsonkeys_with_fileids) { + $jsonkeys_with_fileids = array_filter( + $jsonkeys_with_fileids, + [$this,'isNotArray'] + ); + if (is_array($jsonkeys_with_fileids)) { + foreach ($jsonkeys_with_fileids as $json_key) { + // If not present simply create + if (!isset($fullvalues[$json_key])) { + $fullvalues[$json_key] = []; + } + } + } + // We are not checking here if the entity part is an actual entity + // Or if each key contains or not the actual ids + // nor if they are valid. IF we have 2000 entities + // doing this here is an overkill. Just do when needed + // Also: i really want to allow relationships to exist even before + // the referenced entites are present. + // We really care here only for the entity:file part + // but will do our best to clean all. + } + $fullvalues['ap:entitymapping'] = $entityMapping; + } + else { + // If not here or not an array create the structure. We want it . + $fullvalues['ap:entitymapping'] = [ + "entity:file" => [], + ]; + } + return $fullvalues; + } + + /** + * Checks if value is integer or an UUID. + * + * @param mixed $val + * + * @return bool + */ + private function isEntityId($val) { + return (is_int($val) && $val > 0) || \Drupal\Component\Uuid\Uuid::isValid( + $val + ); + } + /** + * Array value callback. True if value is not an array. + * + * @param mixed $val + * + * @return bool + */ + private function isNotArray($val) { + return !is_array($val); + } + + /** + * Array value callback. True if $key starts with Entity + * + * @param mixed $val + * + * @return bool + */ + private function prefixedEntity($key) { + return (strpos($key, 'entity:', 0) !== FALSE); + } +} \ No newline at end of file diff --git a/src/StrawberryfieldEventType.php b/src/StrawberryfieldEventType.php index 6276235..98485cf 100644 --- a/src/StrawberryfieldEventType.php +++ b/src/StrawberryfieldEventType.php @@ -52,7 +52,9 @@ final class StrawberryfieldEventType { * Name of the event fired when updating a node with a SBF attached. * * This event allows modules to perform an action whenever a node - * with a SBF(Strawberry Field) is updated. + * with a SBF(Strawberry Field) is updated. This is after storage + * was updated (SQL INSERT or UPDATE) so no JSON can be modified via this + * one. * The event listener method receives a * \Drupal\strawberryfield\Event\StrawberryfieldCrudEvent instance. * diff --git a/src/StrawberryfieldFilePersisterService.php b/src/StrawberryfieldFilePersisterService.php index 2377636..c269d62 100644 --- a/src/StrawberryfieldFilePersisterService.php +++ b/src/StrawberryfieldFilePersisterService.php @@ -214,66 +214,72 @@ public function getDestinationUri( string $relativefolder ) { - // Default $relativefolder is a 3 char hash generated by a checksum algorith. - $current_uri = $file->getFileUri(); - $uuid = $file->uuid(); - - $file_parts['destination_folder'] = $relativefolder; - $file_parts['destination_filename'] = pathinfo( - $current_uri, - PATHINFO_FILENAME - ); - $file_parts['destination_extension'] = pathinfo( - $current_uri, - PATHINFO_EXTENSION - ); - $file_parts['destination_scheme'] = $this->fileSystem->uriScheme( - $file->getFileUri() - ); - list($file_parts['destination_filetype'],) = explode( - '/', - $file->getMimeType() - ); + if ($file && $file->isTemporary()) { + // Default $relativefolder is a 3 char hash generated by a checksum algorith. + $current_uri = $file->getFileUri(); + $uuid = $file->uuid(); + + $file_parts['destination_folder'] = $relativefolder; + $file_parts['destination_filename'] = pathinfo( + $current_uri, + PATHINFO_FILENAME + ); + $file_parts['destination_extension'] = pathinfo( + $current_uri, + PATHINFO_EXTENSION + ); + $file_parts['destination_scheme'] = $this->fileSystem->uriScheme( + $file->getFileUri() + ); + list($file_parts['destination_filetype'],) = explode( + '/', + $file->getMimeType() + ); - // Allow other modules to alter the parts used to create final persistent destination. - // @TODO add the .api file and an example for this. - $this->moduleHandler->alter( - 'strawberryfield_file_destination', - $file_parts, - $file - ); + // Allow other modules to alter the parts used to create final persistent destination. + // @TODO add the .api file and an example for this. + $this->moduleHandler->alter( + 'strawberryfield_file_destination', + $file_parts, + $file + ); - $destination_extension = mb_strtolower( - $file_parts['destination_extension'] - ); - //https://api.drupal.org/api/drupal/core%21includes%21file.inc/function/file_uri_scheme/8.7.x - // If no destination scheme was setup on our global config use the original file scheme. - $desired_scheme = !empty($this->destinationScheme) ? $this->destinationScheme : $file_parts['destination_scheme']; - - // First part of Mime type becomes prefix. Performant for filtering in S3. - $destination_basename = $file_parts['destination_filetype'] . '-' . $file_parts['destination_filename']; - - // Sanitize the whole thing. - $destination_basename = $this->sanitizeFileName($destination_basename); - - // Edge case, should only happen if all goes wrong. - // RFC 2046: Since unknown mime-types always default to - // application/octet-stream and we use first part of the string - // we default to 'application' here. - if (empty($destination_basename)) { - $destination_basename = 'application-unnamed'; - } else { - // Object name limit for AWS S3 is 512 chars. Minio does not impose any. - // UUID adds 36 characters, plus 1 for the dash + 4 for extension. - // So we shamelessly cut at 471. Someone needs to act! - $destination_basename = substr($destination_basename, 0, 471); - } + $destination_extension = mb_strtolower( + $file_parts['destination_extension'] + ); + //https://api.drupal.org/api/drupal/core%21includes%21file.inc/function/file_uri_scheme/8.7.x + // If no destination scheme was setup on our global config use the original file scheme. + $desired_scheme = !empty($this->destinationScheme) ? $this->destinationScheme : $file_parts['destination_scheme']; + + // First part of Mime type becomes prefix. Performant for filtering in S3. + $destination_basename = $file_parts['destination_filetype'] . '-' . $file_parts['destination_filename']; + + // Sanitize the whole thing. + $destination_basename = $this->sanitizeFileName($destination_basename); + + // Edge case, should only happen if all goes wrong. + // RFC 2046: Since unknown mime-types always default to + // application/octet-stream and we use first part of the string + // we default to 'application' here. + if (empty($destination_basename)) { + $destination_basename = 'application-unnamed'; + } + else { + // Object name limit for AWS S3 is 512 chars. Minio does not impose any. + // UUID adds 36 characters, plus 1 for the dash + 4 for extension. + // So we shamelessly cut at 471. Someone needs to act! + $destination_basename = substr($destination_basename, 0, 471); + } - // WE add the unique UUID at the end. That gives us best protection against - // name collisions but still keeping human semantically aware file naming. + // WE add the unique UUID at the end. That gives us best protection against + // name collisions but still keeping human semantically aware file naming. - $destination_filename = $destination_basename . '-' . $uuid . '.' . $destination_extension; - return $desired_scheme . '://' . $file_parts['destination_folder'] . '/' . $destination_filename; + $destination_filename = $destination_basename . '-' . $uuid . '.' . $destination_extension; + return $desired_scheme . '://' . $file_parts['destination_folder'] . '/' . $destination_filename; + } + else { + return $file->getFileUri(); + } } /** @@ -303,11 +309,18 @@ public function sanitizeFileName(string $basename) { /** * Generates the full AS metadata structure to keep track of SBF files. * + * This method processes a single JSON Key with Entity IDs every time + * * @param array $file_id_list * @param $file_source_key + * The top level JSON key/property that contains the file entity id(s). * @param array $cleanjson - * + * A previously existing JSON/SBF full content. Used to extract existing, + * already processed as:structures. This means the only real requirement + * are the as:structures, if an * @return array + * An array containing only as:structures with every file classified and + * their metadata. * @throws \Drupal\Core\Entity\EntityStorageException */ public function generateAsFileStructure( @@ -319,7 +332,6 @@ public function generateAsFileStructure( /* @see https://www.drupal.org/project/drupal/issues/2577417 for a * a future solution for many many files */ - // @TODO This function is processing heavy // In a worst scenario we iterate over every existing file 3 times // Given the fact that a book could have 2000 pages, @@ -345,6 +357,10 @@ public function generateAsFileStructure( ); return []; } + // @TODO: should we alert the user in case the list of ids does not yield in + // the same amount of files loaded? + + // Will contain all as:something and its members based on referenced file ids $fileinfo_bytype_many = []; // Will contain temporary classification @@ -355,7 +371,7 @@ public function generateAsFileStructure( // @TODO if count($files) is different than $file_id_list means we lost // a file from storage. Could have been temporary and it was never accounted // Notify the user of that. Not a good thing - // Give the user the change to restore the file from some other place. + // Give the user the chance to restore the file from some other place. // Iterate and clasify by as: type foreach ($files as $file) { @@ -409,6 +425,7 @@ public function generateAsFileStructure( foreach ($to_process as $askey => $files) { foreach ($files as $file) { $uri = $file->getFileUri(); + error_log('processing' . $uri); // This can get heavy. // @TODO make md5 a queue worker task. // @TODO build two queues. Top one that calls all subqueues and then @@ -511,6 +528,7 @@ protected function retrieve_filestructure_from_metadata( $found['urn:uuid:' . $info['dr:uuid']] = $info; } } + dpm(count($found)); return $found; } @@ -538,6 +556,15 @@ public function persistFilesInJsonToDisks(StrawberryFieldItemList $field) { if (is_numeric($fid)) { $file = $this->entityTypeManager->getStorage('file')->load($fid); /** @var $file \Drupal\file\FileInterface|NULL */ + + //@TODO. We used to allow this service to act on any file + // Allowing users to renamed/move files + // Now only if it is temporary + // Because all not temporaries are already persisted. + // This this clashes with the fact that the file structure + // Naming service will always try to name things in a certain + // way. So either we allow both to act everytime or we + // have a other 'move your files' service? if ($file && $file->isTemporary()) { // This is tricky. We will allow non temporary to be moved if // The only usage is the current node! @@ -965,7 +992,7 @@ public function getBaseFileMetadata(FileInterface $file) { $output_exif = ''; $output_fido = ''; $result_exif = exec( - $exif_exec_path . ' -json -q ' . escapeshellcmd($templocation), + $exif_exec_path . ' -json -q -a -gps:all -Common "-gps*" -xmp:all -ImageWidth -ImageHeight -Canon -Nikon-AllDates -pdf:all -ee -MIMEType ' . escapeshellcmd($templocation), $output_exif, $status_exif ); diff --git a/strawberryfield.services.yml b/strawberryfield.services.yml index 9123be4..049f595 100644 --- a/strawberryfield.services.yml +++ b/strawberryfield.services.yml @@ -20,6 +20,11 @@ services: tags: - {name: event_subscriber} arguments: ['@string_translation', '@messenger'] + strawberryfield.presave_as_filestructure_subscriber: + class: Drupal\strawberryfield\EventSubscriber\StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator + tags: + - {name: event_subscriber} + arguments: ['@string_translation', '@messenger','@logger.factory','@strawberryfield.file_persister'] strawberryfield.presavefilepersister_subscriber: class: Drupal\strawberryfield\EventSubscriber\StrawberryfieldEventPresaveSubscriberFilePersister tags: From 0cf14972e4aa6e1767e87112711ce955451d01d4 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 22 Jun 2020 11:33:32 -0400 Subject: [PATCH 12/23] This adds new methods into the field item class Common functions needed. It adds also a jsons array to string setter, that also clears the cache (JMESPATH search resullts) to make sure when setting new values, all the cached elements can be recalculated --- .../Field/FieldType/StrawberryFieldItem.php | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/Plugin/Field/FieldType/StrawberryFieldItem.php b/src/Plugin/Field/FieldType/StrawberryFieldItem.php index 42128b8..836a146 100644 --- a/src/Plugin/Field/FieldType/StrawberryFieldItem.php +++ b/src/Plugin/Field/FieldType/StrawberryFieldItem.php @@ -185,6 +185,61 @@ public function isEmpty() { } + /** + * Decodes main value JSON string into an array + * + * We don't keep this around because this will be mainly used to be + * modified and re encoded afterwards. + * + * @param bool $assoc + * If return is array or a stdclass Object. + * + * @return array|\stdClass + */ + public function provideDecoded($assoc = TRUE) { + if ($this->isEmpty()) { + $this->flattenjson = []; + $jsonArray = []; + } + elseif ($this->validate()->count() == 0) { + $mainproperty = $this->mainPropertyName(); + $jsonArray = json_decode($this->{$mainproperty}, $assoc, 10); + + } + return $jsonArray; + } + + + /** + * Encodes and sets main value from array + * + * This method also clears flattenjson and jsonjmesresult caches. + * + * @param array $jsonarray + * Array of data we want to save in the main property + * + * @return string|boolean + * Returns either the correctly encoded string or boolean FALSE + * Make sure you compare using === FALSE!. + * + * @throws \InvalidArgumentException + * If what is passed to ::setValue() is not an array. + */ + public function setMainValueFromArray(array $jsonarray) { + + $jsonstring = json_encode($jsonarray, JSON_PRETTY_PRINT, 10); + + if ($jsonstring) { + $this->setValue([$this->mainPropertyName() => $jsonstring], TRUE); + // Clear this caches just in case + $this->flattenjson = []; + $this->jsonjmesresults = []; + } + + return $jsonstring; + } + + /** * Calculates / keeps around a flatten common keys array for the main value. * From 7a717df09afc0ef1894ce6d01be98cb6856f46a6 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 22 Jun 2020 23:52:53 -0400 Subject: [PATCH 13/23] Self Benchmark yourself Mr. Strawberryfield @giancarlobi first steps into self benchmarking and also what we can/will use in the future to measure how much/can/should archipelago do in real time v/s background. I started with some simple data. strawberryfield.general.yml Needs to be imported as a config file to get this running. Form exposure will follow. I want to keep track also of the other events dispatched so we can accumulate stats. For 3 larger files, on my super slow local dev docker environment, PHP used 2Mbytes Peak but only 3.46ms in all processing, including running exif, fido and persisting. Need to measure now how long it takes to do the same for DB insert. --- config/install/strawberryfield.general.yml | 1 + config/schema/strawberryfield.schema.yml | 10 ++++++- ...saveSubscriberAsFileStructureGenerator.php | 4 +-- src/StrawberryfieldFilePersisterService.php | 1 - src/StrawberryfieldUtilityService.php | 19 ++++++++++++++ strawberryfield.module | 26 +++++++++++++++++++ 6 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 config/install/strawberryfield.general.yml diff --git a/config/install/strawberryfield.general.yml b/config/install/strawberryfield.general.yml new file mode 100644 index 0000000..fde1bf6 --- /dev/null +++ b/config/install/strawberryfield.general.yml @@ -0,0 +1 @@ +benchmark: TRUE diff --git a/config/schema/strawberryfield.schema.yml b/config/schema/strawberryfield.schema.yml index 43d4593..4ff9b20 100644 --- a/config/schema/strawberryfield.schema.yml +++ b/config/schema/strawberryfield.schema.yml @@ -147,4 +147,12 @@ strawberryfield.filepersister_service_settings: label: 'Exifinfo binary full executable path' fido_exec_path: type: string - label: 'FIDO binary full executable path' \ No newline at end of file + label: 'FIDO binary full executable path' + +strawberryfield.general: + type: config_object + label: General SBF Module settings + mapping: + benchmark: + type: boolean + label: Benchmark time and memory usage of Event Subscribers diff --git a/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php index aaa6449..8966c90 100644 --- a/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php +++ b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php @@ -189,8 +189,8 @@ public function onEntityPresave(StrawberryfieldCrudEvent $event) { $this->messenger->addStatus( $this->stringTranslation->formatPlural( $newlyprocessed, - 'Very good. New file metadata structured created.', - 'Great! @count new files metadata structured created.' + 'Very good. New file metadata structure created.', + 'Great! @count new files metadata structures created.' ) ); } diff --git a/src/StrawberryfieldFilePersisterService.php b/src/StrawberryfieldFilePersisterService.php index c269d62..1d08417 100644 --- a/src/StrawberryfieldFilePersisterService.php +++ b/src/StrawberryfieldFilePersisterService.php @@ -528,7 +528,6 @@ protected function retrieve_filestructure_from_metadata( $found['urn:uuid:' . $info['dr:uuid']] = $info; } } - dpm(count($found)); return $found; } diff --git a/src/StrawberryfieldUtilityService.php b/src/StrawberryfieldUtilityService.php index d485547..a852e23 100644 --- a/src/StrawberryfieldUtilityService.php +++ b/src/StrawberryfieldUtilityService.php @@ -200,4 +200,23 @@ public function verifyCommand($execpath) :bool { } return $canexecute; } + + /** + * Format a quantity of bytes. + * + * @param int $size + * @param int $precision + * + * @return string + */ + public function formatBytes($size, $precision = 2) + { + if ($size === 0) { + return 0; + } + $base = log($size, 1024); + $suffixes = array('', 'k', 'M', 'G', 'T'); + return round(pow(1024, $base - floor($base)), $precision) . $suffixes[floor($base)]; + } + } diff --git a/strawberryfield.module b/strawberryfield.module index 5e2fa43..adbafd5 100644 --- a/strawberryfield.module +++ b/strawberryfield.module @@ -25,12 +25,38 @@ use Drupal\Core\Field\FieldItemListInterface; */ function strawberryfield_node_presave(ContentEntityInterface $entity) { + if ($sbf_fields = \Drupal::service('strawberryfield.utility')->bearsStrawberryfield($entity)) { + $config = \Drupal::config('strawberryfield.general'); + $bench = FALSE; + // When benchmark is enabled a simple but effective report will be found in the reports/logs + if ($config->get('benchmark')) { + $bench = TRUE; + } + // Introducing our newest development, the processing time stats! + $start_time = microtime(true); + $event_type = StrawberryfieldEventType::PRESAVE; $event = new StrawberryfieldCrudEvent($event_type, $entity, $sbf_fields); /** @var \Symfony\Component\EventDispatcher\EventDispatcher $dispatcher */ $dispatcher = \Drupal::service('event_dispatcher'); $dispatcher->dispatch($event_type, $event); + + if ($bench) { + $end_time = microtime(TRUE); + $time = bcsub($end_time, $start_time, 4); + $max_memory = memory_get_peak_usage(TRUE); + \Drupal::logger('strawberryfield')->notice( + 'ADO with UUID @uuid spend @time ms on all presave event subscriber processing and max memory usage was @maxmem. Event Subscribers that run where the following
@events', + [ + '@uuid' => $entity->uuid(), + '@time' => $time, + '@maxmem' => \Drupal::service('strawberryfield.utility')->formatBytes($max_memory, 2), + '@events' => print_r($event->getProcessedBy(), TRUE), + ] + ); + } + } } From fcbd14b2c1f40bb5583bf9925a9d6c0e0bfe4e7e Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 23 Jun 2020 08:30:38 -0400 Subject: [PATCH 14/23] Make JsonldKeyNameProvider.php safer in case of failed remote URL This is something i had upstream. Fixed now --- .../JsonldKeyNameProvider.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Plugin/StrawberryfieldKeyNameProvider/JsonldKeyNameProvider.php b/src/Plugin/StrawberryfieldKeyNameProvider/JsonldKeyNameProvider.php index e752099..7244052 100644 --- a/src/Plugin/StrawberryfieldKeyNameProvider/JsonldKeyNameProvider.php +++ b/src/Plugin/StrawberryfieldKeyNameProvider/JsonldKeyNameProvider.php @@ -219,7 +219,8 @@ public function processFromSource() { $filterURL = $this->getConfiguration()['filterurl']; // We won't filter things out there, ::extractKeys will deal with that. - $filterData = $this->getRemoteJsonData($filterURL); + $filderdatapre = $this->getRemoteJsonData($filterURL); + $filterData = $filderdatapre ? $filderdatapre : []; $keys = $this->extractKeys($maindata, $filterData); @@ -261,7 +262,7 @@ protected function getRemoteJsonData($remoteUrl) { $jsondata = json_decode($filecache, TRUE); $json_error = json_last_error(); if ($json_error == JSON_ERROR_NONE) { - return $jsondata; + return is_array($jsondata) ? $jsondata: [$jsondata]; } else { // Basically whatever that we have is not JSON, lets go for it again. $filecache = FALSE; @@ -317,7 +318,7 @@ protected function getRemoteJsonData($remoteUrl) { } } } - return $jsondata; + return is_array($jsondata) ? $jsondata: [$jsondata]; } // This means we had an error on the JSON decode. $this->messenger->addError( From 234d3ce3cf7501c6abc3b1edd98c1222162f89af Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 23 Jun 2020 16:02:38 -0400 Subject: [PATCH 15/23] Adds permissions to View RAW JSON metadata Now that we extract EXIF and there can be a lot of fun and private stuff in EXIF data we can / should allow a permission on the RAW display This adds 2 new permissions that only apply to the StrawberryDefaultFormatter View any raw JSON stored in a Strawberryfield View own raw JSON in a Strawberryfield This is how things work here: - if the entity (node, file, etc) has an owner the 'Own' can be actually evaluated - every user can have the 'View any' of course - Every user that can edit an entity can see the RAW metadata (i see no benefit on hiding data with editing capabillities to be honest) Users with 'bypass node access' can see everything as always. TODO: of course a lot, we can maybe make this a more general method. I can also inject the user here instead of using the drupal:: service. But then, i don't think its crucial here right now. --- .../StrawberryDefaultFormatter.php | 48 +++++++++++++++++-- strawberryfield.permissions.yml | 9 +++- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php b/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php index cf00ed2..8a18999 100644 --- a/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php +++ b/src/Plugin/Field/FieldFormatter/StrawberryDefaultFormatter.php @@ -10,6 +10,9 @@ use Drupal\Core\Field\FormatterBase; use Drupal\Core\Field\FieldItemListInterface; +use Drupal\Core\Session\AccountInterface; +use Drupal\user\EntityOwnerInterface; +use Drupal\Core\Entity\ContentEntityBase; /** * Simplistic Strawberry Field formatter. @@ -51,11 +54,32 @@ public static function defaultSettings() { */ public function viewElements(FieldItemListInterface $items, $langcode) { $element = []; + $account = $this->prepareUser(); + + // Only enforce owner permissions here if the entity has an owner $entity = $items->getEntity(); - $access = $entity - ->access('edit', NULL, TRUE)->isAllowed(); + $access = FALSE; + + //@TODO make this its own Method and reuse in every Formatter. + if ($entity instanceof EntityOwnerInterface) { + /* @var ContentEntityBase $entity */ + // Check if the entity can have an owner? - if ($access) { + if (($account->id() == $entity->getOwner()->id()) && + $account->hasPermission('view own Raw Strawberryfield')) { + $access = TRUE; + } + elseif ($account->hasPermission('view any Raw Strawberryfield')) { + $access = TRUE; + } + } + else { + // Users with Edit access can always see this? + $access = $entity + ->access('edit', NULL, TRUE)->isAllowed(); + } + + if ($access || $account->hasPermission('bypass node access')) { foreach ($items as $delta => $item) { // Render each element as markup. $element[$delta] = [ @@ -69,13 +93,29 @@ public function viewElements(FieldItemListInterface $items, $langcode) { ), '#prefix' => '
',
             '#suffix' => '
', - ] + ], ]; } } return $element; } + /** + * Loads the current account object, if it does not exist yet. + * + * @param \Drupal\Core\Session\AccountInterface $account + * The account interface instance. + * + * @return \Drupal\Core\Session\AccountInterface + * Returns the current account object. + */ + protected function prepareUser(AccountInterface $account = NULL) { + if (!$account) { + $account = \Drupal::currentUser(); + } + return $account; + } + } \ No newline at end of file diff --git a/strawberryfield.permissions.yml b/strawberryfield.permissions.yml index b2e1e87..a754669 100644 --- a/strawberryfield.permissions.yml +++ b/strawberryfield.permissions.yml @@ -1,2 +1,9 @@ 'upload to Digital Object file dropbox field': - title: Upload to Digital Object file dropbox field \ No newline at end of file + title: Upload to Digital Object file dropbox field +'view any Raw Strawberryfield': + title: 'View any raw JSON stored in a Strawberryfield when showed through the provided Formatter' + description: 'Allows viewing all Strawberryfield RAW JSON when a "Strawberry Default Formatter" is rendered.' + warning: 'Warning: This permission affects every Strawberryfield JSON in the repository and can expose, e.g. private EXIF data. Note: To allow users per Formatter instance, e.g when using multiple Display Modes you can also use the ''Access'' option on the Formatter Configuration Form' +'view own Raw Strawberryfield': + title: 'View own raw JSON in a Strawberryfield when showed through the provided Formatter' + description: 'Allows viewing own Strawberryfield RAW JSON when a "Strawberry Default Formatter" is rendered.' From 483a2c7ee3aefc673897fb409999716a672a8475 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Wed, 24 Jun 2020 23:46:40 -0400 Subject: [PATCH 16/23] Remove message about structure being created Poeple have enought messages in their lifes --- ...eldEventPresaveSubscriberAsFileStructureGenerator.php | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php index 8966c90..4593763 100644 --- a/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php +++ b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php @@ -185,15 +185,6 @@ public function onEntityPresave(StrawberryfieldCrudEvent $event) { } } - if ($newlyprocessed > 0) { - $this->messenger->addStatus( - $this->stringTranslation->formatPlural( - $newlyprocessed, - 'Very good. New file metadata structure created.', - 'Great! @count new files metadata structures created.' - ) - ); - } $current_class = get_called_class(); $event->setProcessedBy($current_class, TRUE); } From e5f1fa55cc238a414f1d6a7c985c08c5ce7495ea Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 29 Jun 2020 15:55:37 -0400 Subject: [PATCH 17/23] Deal with no moderation state? Simple check on/if base field is there or not. --- src/Commands/JsonApiDrushCommands.php | 30 ++++++++++++++++++--------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/Commands/JsonApiDrushCommands.php b/src/Commands/JsonApiDrushCommands.php index 0a0e33d..a439b81 100644 --- a/src/Commands/JsonApiDrushCommands.php +++ b/src/Commands/JsonApiDrushCommands.php @@ -297,7 +297,7 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, // Build the POST URI for the request $base_url = $this->input()->getOption('uri'); //$filename = basename(); - $this->output()->writeln('BASE URL is ' . $base_url . '!'); + $this->output()->writeln('BASE URL is ' . $base_url . '!\n'); $fileurlpost = $base_url . '/jsonapi/node/' . $bundle . '/field_file_drop'; $nodeurlpost = $base_url . '/jsonapi/node/' . $bundle; @@ -315,14 +315,17 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, 'min_depth' => 0, ] ); + if (count($files) ) { + $this->output()->writeln(dt('Files in provided location')); + } foreach ($files as $file) { //@TODO list files here? - error_log(var_export($file, TRUE)); + $this->output()->writeln($file->filename); } } else { - + $this->output()->writeln(dt('No files provided')); } } @@ -332,10 +335,10 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, // We could use Guzzle and stuff, but to be honest we just need to call CURL. - // This will also allow us to do some crazy stuff like allowing partial JSONS + // This will also allow us to do some crazy stuff like allowing partial JSONs // SBF only pushes and also check for validity // In the end all this will serve as wrap around for the AMI UI processing - // Module. + // module. $json_data = @file_get_contents($jsonfilepath); @@ -355,7 +358,7 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, $schema->in((Object) $data); } catch (JsonSchemaException $exception) { throw new \Exception( - dt('The provided JSON is not a valid JSON API payload') + dt('The provided JSON is not a valid JSON API payload. Suspending the ingest') ); } } @@ -421,7 +424,6 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, $this->output()->writeln($args); $process = Drush::process(implode(' ', $args)); $process->mustRun(); - error_log($process->getExitCode()); if ($process->getExitCode() == 0) { error_log(var_export($process->getOutput(), TRUE)); $response = json_decode($process->getOutput(),true); @@ -465,7 +467,7 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, } else { $this->output()->writeln($process->getExitCodeText()); - throw new \Exception(dt('We failed to upload the file')); + throw new \Exception(dt('We failed to upload the file. Suspending the ingest')); } } } @@ -482,8 +484,16 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, ]; if ($options['moderation_state']) { - // @TODO Should we validate possible moderation states? - $data_body['data']['attributes']['moderation_state'] = $options['moderation_state']; + // Check if the bundle has actually the field. + $all_bundle_fields = \Drupal::service('entity_field.manager')->getFieldDefinitions('node', $bundle); + if (isset($all_bundle_fields['moderation_state'])) { + $data_body['data']['attributes']['moderation_state'] = $options['moderation_state']; + } + else { + $this->output()->writeln(dt('Bundle @bundle is not moderated so skipping moderation state', [ + '@bundle' => $bundle + ])); + } } $curl_body = json_encode($data_body); From f9771d38233da35b8feccb18888fc0ef95abc773 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Mon, 29 Jun 2020 15:56:11 -0400 Subject: [PATCH 18/23] Doc Comment Like nothing really, but C'mon, i can not have wrongly written JSON, not even in the comments! --- ...berryfieldEventPresaveSubscriberAsFileStructureGenerator.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php index 4593763..50f2aeb 100644 --- a/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php +++ b/src/EventSubscriber/StrawberryfieldEventPresaveSubscriberAsFileStructureGenerator.php @@ -204,7 +204,7 @@ public function onEntityPresave(StrawberryfieldCrudEvent $event) { * "videos", * "models" * ], - * "entity:node": [ + * "entity:node": {[ * "ismemberof" * } * From de73775c3ca700949d555f2aa888df4e48fbc2da Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 30 Jun 2020 14:28:40 -0400 Subject: [PATCH 19/23] Update JsonApiDrushCommands.php Drush command little less verbose, makes more sense now @giancarlobi i will merge now with ISSUE-86. Then you can test that branch and all should be easier. This will of course need more work. But as always, its a step forward. Some NEXT steps: I learned today that drush can return its output in a generic format, than then can be cast via an argument (general drush one, not controlled by us) into YAML, JSON, CSV, etc. So... the docs are here https://github.com/consolidation/output-formatters. We should try it. --- src/Commands/JsonApiDrushCommands.php | 125 +++++++++++++++++++------- 1 file changed, 94 insertions(+), 31 deletions(-) diff --git a/src/Commands/JsonApiDrushCommands.php b/src/Commands/JsonApiDrushCommands.php index a439b81..dd1dc59 100644 --- a/src/Commands/JsonApiDrushCommands.php +++ b/src/Commands/JsonApiDrushCommands.php @@ -28,9 +28,11 @@ class JsonApiDrushCommands extends DrushCommands { use ExecTrait; + protected $user; protected $password; + /** * JSON SCHEMA Draft 7.0 for a JSON API NODE via POST */ @@ -250,18 +252,34 @@ class JsonApiDrushCommands extends DrushCommands { * Wraps a JSON API node post call back with added files. * * @param string $jsonfilepath - * A file containing either a full JSON API data payload or just SBF JSON data. + * A file containing either a full JSON API data payload or just SBF JSON + * data. + * * @command archipelago:jsonapi-ingest * @aliases ap-jsonapi-ingest * @options user JSON API capable user * @options password JSON API capable user's password - * @options files file or folder containing things to be uploaded and attached to json + * @options files file or folder containing things to be uploaded and + * attached to json * @options bundle Machine name of the bundle. * @options uuid target uuid for new digital object. * - * @usage archipelago:jsonapi-ingest digital_object.json --user=jsonapi --password=yourpassword --files=/home/www/someplace --bundle=digital_object --moderation_state=published + * @usage archipelago:jsonapi-ingest digital_object.json --user=jsonapi + * --password=yourpassword --files=/home/www/someplace + * --bundle=digital_object --moderation_state=published */ - public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, 'password' => NULL, 'bundle' => 'digital_object', 'fieldname' => 'field_descriptive_metadata', 'uuid' => NULL, 'moderation_state' => NULL]) { + public function ingest( + $jsonfilepath, + $options = [ + 'files' => '', + 'user' => NULL, + 'password' => NULL, + 'bundle' => 'digital_object', + 'fieldname' => 'field_descriptive_metadata', + 'uuid' => NULL, + 'moderation_state' => NULL, + ] + ) { // If you want to help please read https://weitzman.github.io/blog/port-to-drush9 if (!\Drupal::moduleHandler()->moduleExists('jsonapi')) { @@ -272,7 +290,9 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, ); } //@see https://www.drupal.org/project/drupal/issues/3072076 - if (!\Drupal::moduleHandler()->moduleExists('jsonapi_earlyrendering_workaround')) { + if (!\Drupal::moduleHandler()->moduleExists( + 'jsonapi_earlyrendering_workaround' + )) { throw new \Exception( dt( 'This module needs the jsonapi_earlyrendering_workaround module installed while https://www.drupal.org/project/drupal/issues/3072076 gets merged. Please run php -dmemory_limit=-1 /usr/bin/composer require drupal/jsonapi_earlyrendering_workaround; drush en jsonapi_earlyrendering_workaround; ' @@ -296,8 +316,6 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, } // Build the POST URI for the request $base_url = $this->input()->getOption('uri'); - //$filename = basename(); - $this->output()->writeln('BASE URL is ' . $base_url . '!\n'); $fileurlpost = $base_url . '/jsonapi/node/' . $bundle . '/field_file_drop'; $nodeurlpost = $base_url . '/jsonapi/node/' . $bundle; @@ -315,12 +333,12 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, 'min_depth' => 0, ] ); - if (count($files) ) { - $this->output()->writeln(dt('Files in provided location')); + if (count($files)) { + $this->output()->writeln(dt('Files in provided location:')); } foreach ($files as $file) { //@TODO list files here? - $this->output()->writeln($file->filename); + $this->output()->writeln(' - '.$file->filename); } } @@ -358,7 +376,9 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, $schema->in((Object) $data); } catch (JsonSchemaException $exception) { throw new \Exception( - dt('The provided JSON is not a valid JSON API payload. Suspending the ingest') + dt( + 'The provided JSON is not a valid JSON API payload. Suspending the ingest' + ) ); } } @@ -366,7 +386,14 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, // Means we need to create our own body if (!$options['uuid']) { $options['uuid'] = \Drupal::service('uuid')->generate(); - error_log($options['uuid']); + $this->output()->writeln( + dt( + 'Using the following @uuid for your new ADO', + [ + '@uuid' => $options['uuid'], + ] + ) + ); } $field_name = NULL; $sbf_fields = array_values( @@ -408,7 +435,7 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, '-H "Content-Disposition: attachment; filename=\"' . urlencode( $file->filename ) . '\""', - '--data-binary @' . $file->uri + '--data-binary @' . $file->uri, ]; if ($options['user'] && $options['password']) { $args = array_merge( @@ -416,18 +443,25 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, [ '--user', $options['user'] . ':' . $options['password'], - $fileurlpost + $fileurlpost, ] ); - $this->output()->writeln(implode(' ', $args)); - $this->output()->writeln($args); $process = Drush::process(implode(' ', $args)); $process->mustRun(); if ($process->getExitCode() == 0) { - error_log(var_export($process->getOutput(), TRUE)); - $response = json_decode($process->getOutput(),true); + + $response = json_decode($process->getOutput(), TRUE); if (isset($response['data']['attributes']['drupal_internal__fid'])) { + $this->output()->writeln( + dt( + 'File @file sucessfully uploaded with file ID @fileid ', + [ + '@file' => $file->filename, + '@fileod' => $response['data']['attributes']['drupal_internal__fid'], + ] + ) + ); $mime_type = $response['data']['attributes']['filemime']; // Calculate the destination json key $as_file_type = explode('/', $mime_type); @@ -467,32 +501,41 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, } else { $this->output()->writeln($process->getExitCodeText()); - throw new \Exception(dt('We failed to upload the file. Suspending the ingest')); + throw new \Exception( + dt('We failed to upload the file. Suspending the ingest') + ); } } } // Now ingest the actual OBJECT $data_body = []; + $ado_title = isset($data['label']) ? $data['label'] : 'Unnamed Digital Object'; $data_body['data'] = [ 'id' => $options['uuid'], 'type' => 'node--' . $bundle, 'attributes' => [ $field_name => json_encode($data), - 'title' => isset($data['label']) ? $data['label'] : 'Unnamed Digital Object', - ] + 'title' => $ado_title, + ], ]; if ($options['moderation_state']) { // Check if the bundle has actually the field. - $all_bundle_fields = \Drupal::service('entity_field.manager')->getFieldDefinitions('node', $bundle); + $all_bundle_fields = \Drupal::service('entity_field.manager') + ->getFieldDefinitions('node', $bundle); if (isset($all_bundle_fields['moderation_state'])) { $data_body['data']['attributes']['moderation_state'] = $options['moderation_state']; } else { - $this->output()->writeln(dt('Bundle @bundle is not moderated so skipping moderation state', [ - '@bundle' => $bundle - ])); + $this->output()->writeln( + dt( + 'Bundle @bundle is not moderated so skipping moderation state', + [ + '@bundle' => $bundle, + ] + ) + ); } } @@ -505,7 +548,7 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, '-H "Accept: application/vnd.api+json;"', '-H "Content-type: application/vnd.api+json"', '-XPOST', - "--data '" . $curl_body . "'" + "--data '" . $curl_body . "'", ]; if ($options['user'] && $options['password']) { $args_node = array_merge( @@ -513,17 +556,37 @@ public function ingest($jsonfilepath, $options = ['files' => '', 'user' => NULL, [ '--user', $options['user'] . ':' . $options['password'], - $nodeurlpost + $nodeurlpost, ] ); - $this->output()->writeln(implode(' ', $args_node)); - $this->output()->writeln($args_node); $process_node = Drush::process(implode(' ', $args_node)); $process_node->mustRun(); - error_log($process_node->getExitCode()); + if ($process_node->getExitCode() == 0) { - error_log(var_export($process_node->getOutput(), TRUE)); + $response = json_decode($process_node->getOutput(), TRUE); + if (isset($response['data']['id'])) { + $this->output()->writeln(dt("New Object '@title' with UUID @id successfully ingested. Thanks!",[ + '@title' => $ado_title, + '@id' => $response['data']['id'] + ])); + } + else { + throw new \Exception( + dt('We failed to Ingest the ADO. Sorry this is the output: @errorcode. Suspending the ingest.', [ + '@errorcode' => $process_node->getOutput() + ]) + ); + } + } + else { + + throw new \Exception( + dt('We failed to Ingest the ADO with error: @errorcode. Suspending the ingest.', [ + '@errorcode' => $this->output()->writeln($process->getExitCodeText()) + ]) + ); + // Should i roll back the files? } } } From f17c08f52d2fd59d00c66d58efc20e9246cfad32 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Tue, 30 Jun 2020 18:19:05 -0400 Subject: [PATCH 20/23] Tiny update on automatic ordering This code here now preserves manual order (whatever/however that was generated, which includes also untouched automatic ordering, we can not know how odering happened, just that it happened). But if there is ordering in place and we add new files, it will simply push new files (ordered by whatever order automatically is already happening) to after the current ones. I feel this is fair. I will then, once this is proven to be right program a flag: basically a 'command' passed via webform element (key and value) or via just json: that can affect this. Command can be 'force reordering' or .. wait of it, force EXIF, etc. I wanted to do this for long time. Now its the (for 1.0.0.) @giancarlobi once i merge this into ISSUE-86, can you test? Thanks!! --- src/StrawberryfieldFilePersisterService.php | 43 ++++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/src/StrawberryfieldFilePersisterService.php b/src/StrawberryfieldFilePersisterService.php index 1d08417..eeda7b1 100644 --- a/src/StrawberryfieldFilePersisterService.php +++ b/src/StrawberryfieldFilePersisterService.php @@ -360,7 +360,7 @@ public function generateAsFileStructure( // @TODO: should we alert the user in case the list of ids does not yield in // the same amount of files loaded? - + // Will contain all as:something and its members based on referenced file ids $fileinfo_bytype_many = []; // Will contain temporary classification @@ -423,9 +423,10 @@ public function generateAsFileStructure( // Final iteration // Only do this if file was not previously processed and stored. foreach ($to_process as $askey => $files) { + $newforsorting = FALSE; foreach ($files as $file) { $uri = $file->getFileUri(); - error_log('processing' . $uri); + // This can get heavy. // @TODO make md5 a queue worker task. // @TODO build two queues. Top one that calls all subqueues and then @@ -479,6 +480,7 @@ public function generateAsFileStructure( //The node save hook will deal with moving data. // We don't need the key here but makes cleaning easier $fileinfo_bytype_many['as:' . $askey]['urn:uuid:' . $uuid] = $fileinfo; + $newforsorting = TRUE; // Side effect of this is that if the same file id is referenced twice // by different fields, as:something will contain it once only. // Not bad, just saying. @@ -487,15 +489,46 @@ public function generateAsFileStructure( } // Natural Order Sort. // @TODO how should we deal with manually ordered files? - // This will always reorder everything based on filenames. + // This will always reorder everything based on filenames only if the original order is still that one + // So, here is how things go: + // We sort anyway, faster than dividing and thingking too much. + // But, we assign new sequence only to newer ones. So never (for now) to existing ones + // with one exception. If the sequence matches the new order, which basically means + // we are good. + uasort($fileinfo_bytype_many['as:' . $askey], array($this,'sortByFileName')); + $max_sequence = 0; + // Let's get the max sequence first. + $max_sequence = array_reduce($fileinfo_bytype_many['as:' . $askey], function($a, $b) { + if (isset($b['sequence'])) { + return max($a, (int) $b['sequence']); + } else { + return $a; + } + }, 1); + // For each always wins over array_walk $i=0; + $j=0; foreach ($fileinfo_bytype_many['as:' . $askey] as &$item) { $i++; //Order is already given by uasort but not trustable in JSON - //So we set sequence number - $item['sequence'] = $i; + //So we set sequence number but let's check first what we got + if (isset($item['sequence'])) { + if ($item['sequence'] != $i) { + // means this was ordered manually. Preserve this. + // @TODO program some exception? + } else { + // Means new order matches expected order + // @TODO means we can simply avoid the offset totally + } + } else { + // Why $j and no $i? Because i want to only count ones without a sequence + $j++; + // Why -1? Because we want to offset new sequence elements + $item['sequence'] = $j + ($max_sequence); + } + } From 8a95ca523fafbc98a74474c9ad2fc2948453ccb5 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 3 Jul 2020 22:13:57 -0400 Subject: [PATCH 21/23] hook_entity_bundle_field_info seems to do the trick New way of making our base field dynamically bundled only to SBF bearing Bundles, which allows us still to deliver the promise of -add a SBF, welcome you have a digital object. Basically we take our field_file_drop base field (computed) and if the bundle bears a SBF (simple entity query) we set the bundled property and push the initiated base field as a dynamicly created bundled one, keeping, hear me, the same machine name. Result. Permissions per bundle now apply to data pushed via JSONAPI and we have no more 403. --- strawberryfield.module | 77 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/strawberryfield.module b/strawberryfield.module index adbafd5..356f2ad 100644 --- a/strawberryfield.module +++ b/strawberryfield.module @@ -34,7 +34,11 @@ function strawberryfield_node_presave(ContentEntityInterface $entity) { $bench = TRUE; } // Introducing our newest development, the processing time stats! + // Starting on PHP 7.3 we should use hrtime for docker and VMS. + // https://www.php.net/manual/en/function.microtime.php $start_time = microtime(true); + //@TODO make bench simply an Event Method! That way we can measure every + //Event by calling it and for new ones. Etc. $event_type = StrawberryfieldEventType::PRESAVE; $event = new StrawberryfieldCrudEvent($event_type, $entity, $sbf_fields); @@ -44,21 +48,21 @@ function strawberryfield_node_presave(ContentEntityInterface $entity) { if ($bench) { $end_time = microtime(TRUE); - $time = bcsub($end_time, $start_time, 4); + // Removed bsuc + $time = round($end_time, $start_time, 4); $max_memory = memory_get_peak_usage(TRUE); + $events = implode("\n\r",$event->getProcessedBy()); \Drupal::logger('strawberryfield')->notice( - 'ADO with UUID @uuid spend @time ms on all presave event subscriber processing and max memory usage was @maxmem. Event Subscribers that run where the following
@events', + 'ADO with UUID @uuid spend @time in seconds on all presave event subscriber processing and max memory usage was @maxmem. Event Subscribers that run where the following
@events', [ '@uuid' => $entity->uuid(), '@time' => $time, '@maxmem' => \Drupal::service('strawberryfield.utility')->formatBytes($max_memory, 2), - '@events' => print_r($event->getProcessedBy(), TRUE), + '@events' => $events, ] ); } - } - } /** @@ -184,9 +188,7 @@ function strawberryfield_invalidate_fieldefinition_caches(ContentEntityInterface * * @return array */ -function strawberryfield_entity_base_field_info(EntityTypeInterface $entity_type -) { - +function strawberryfield_entity_base_field_info(EntityTypeInterface $entity_type) { if ($entity_type->id() == 'node') { $scheme_options = \Drupal::service('stream_wrapper_manager')->getNames( StreamWrapperInterface::WRITE_VISIBLE @@ -203,9 +205,18 @@ function strawberryfield_entity_base_field_info(EntityTypeInterface $entity_type $fields = []; // Add a field that serves as a drop box for any entities that bear a SBF - // @see https://www.drupal.org/project/drupal/issues/2346347 to + // @see https://www.drupal.org/project/drupal/issues/2346347 + // (still WIP as June 2020) // know why we can't use \Drupal\Core\Field\BaseFieldDefinition - // If we try to make this Bundle specific. + // @TODO If we try to make this Bundle specific? + // @Update 20202: https://www.previousnext.com.au/blog/how-create-and-expose-computed-properties-rest-api-drupal-8 + // Issue with that approach is we need to have a hook update for every bundle + // Which makes adding new bundles and attaching automagically super complex + + // @TODO future work on exposing other JSON properties as other REAL field types + // Dymanically can be archieved by creating a new Class extending BaseFieldDefinition + // That manages without hickups the 'Bundle' option + // \Drupal\Core\Field\BaseFieldDefinition::setTargetBundle $fields['field_file_drop'] = BaseFieldDefinition::create('entity_reference') ->setName('field_file_drop') @@ -230,6 +241,46 @@ function strawberryfield_entity_base_field_info(EntityTypeInterface $entity_type } } +/** + * Implements hook_entity_bundle_field_info(); + * + * @param \Drupal\Core\Entity\EntityTypeInterface $entity_type + * The entity type definition. + * @param string $bundle + * The bundle. + * @param \Drupal\Core\Field\FieldDefinitionInterface[] $base_field_definitions + * The list of base field definitions for the entity type. + * + * @return \Drupal\Core\Field\FieldDefinitionInterface[] + * An array of bundle field definitions, keyed by field name. + */ +function strawberryfield_entity_bundle_field_info( + EntityTypeInterface $entity_type, + $bundle, + array $base_field_definitions +) { + // This is a good workaround for ISSUE-86 + // Will basically attach the base field as a bundled one and by doing so + // Allow our bundle based permissions to be evaluated + + if (($entity_type->id() == 'node') && isset($base_field_definitions['field_file_drop'])) { + if (\Drupal::service('strawberryfield.utility') + ->bundleHasStrawberryfield($bundle)) { + + // Add the target bundle to the field_file_drop base field + // only if it carries a Strawberryfield + // In practice this will allow Bundle specific create access permissions + // to work and force anything not Strawberryfield to either have the node + // access override or simply not work. + + $base_field_definitions['field_file_drop']->setTargetBundle($bundle); + return [ + 'field_file_drop' => $base_field_definitions['field_file_drop'], + ]; + } + } +} + /** * Implements hook_entity_field_access(); * @@ -243,12 +294,11 @@ function strawberryfield_entity_base_field_info(EntityTypeInterface $entity_type function strawberryfield_entity_field_access($operation, FieldDefinitionInterface $field_definition, AccountInterface $account, FieldItemListInterface $items = NULL) { if ($field_definition ->getName() == 'field_file_drop') { - return AccessResult::allowedIfHasPermission($account, 'upload to Digital Object file dropbox field'); + return AccessResult::allowedIfHasPermission($account, 'upload to Digital Object file dropbox field'); } return AccessResult::neutral(); } - function strawberryfield_file_mimetype_mapping_alter(&$mapping) { // Add relevant Repository Mimetypes missing from D8 @@ -258,8 +308,6 @@ function strawberryfield_file_mimetype_mapping_alter(&$mapping) { $mapping['extensions']['jsonld'] = 'jsonld_mimetype'; $mapping['mimetypes']['jpeg2000_mimetype'] = 'image/jp2'; $mapping['extensions']['jp2'] = 'jpeg2000_mimetype'; - $mapping['mimetypes']['jpeg2000_mimetype'] = 'image/jp2'; - $mapping['extensions']['jp2'] = 'jpeg2000_mimetype'; $mapping['mimetypes']['fits_image_mimetype'] = 'image/fits'; // @see https://en.wikipedia.org/wiki/FITS $mapping['extensions']['fits'] = 'fits_image_mimetype'; @@ -268,7 +316,6 @@ function strawberryfield_file_mimetype_mapping_alter(&$mapping) { $mapping['extensions']['stl'] = 'stl_model_mimetype'; // @see https://www.iana.org/assignments/media-types/media-types.xhtml $mapping['mimetypes']['stl_model_mimetype'] = 'model/stl'; - } function strawberryfield_s3fs_url_settings_alter(array &$url_settings, $s3_file_path) { From b2a13ed5cc0bd14f6cc6dd20dbd650069503f783 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Fri, 3 Jul 2020 22:14:55 -0400 Subject: [PATCH 22/23] A class i will eventually use. Not now This class could allow me to attach base field as a bundled field. Not needed now but does not hurt to have --- src/Field/BundleAdoFieldDefinition.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/Field/BundleAdoFieldDefinition.php diff --git a/src/Field/BundleAdoFieldDefinition.php b/src/Field/BundleAdoFieldDefinition.php new file mode 100644 index 0000000..03a0f6a --- /dev/null +++ b/src/Field/BundleAdoFieldDefinition.php @@ -0,0 +1,16 @@ + Date: Fri, 3 Jul 2020 22:15:29 -0400 Subject: [PATCH 23/23] New utility methods To deal with "who is an ADO, or if you are this Bundle are you also an ADO?" --- src/StrawberryfieldUtilityService.php | 56 ++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/StrawberryfieldUtilityService.php b/src/StrawberryfieldUtilityService.php index a852e23..f10ebcf 100644 --- a/src/StrawberryfieldUtilityService.php +++ b/src/StrawberryfieldUtilityService.php @@ -136,16 +136,70 @@ public function getStrawberryfieldMachineNames() { return $sbf_field_names; } + /** + * Given a Bundle returns yes if it contains a SBF defined via a field config + * + * @param string $bundle + * + * @return bool + * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException + * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException + */ + public function bundleHasStrawberryfield($bundle = 'digital_object') { + + $field = $this->entityTypeManager->getStorage('field_config'); + $field_ids = $this->entityTypeManager->getStorage('field_config')->getQuery() + ->condition('entity_type', 'node') + ->condition('bundle', $bundle) + ->condition('field_type' , 'strawberryfield_field') + ->execute(); + $fields = $this->entityTypeManager->getStorage('field_config')->loadMultiple($field_ids); + + return count($field_ids)? TRUE : FALSE; + } + + /** + * Given a Bundle returns SBF's field config Object + * @param string $bundle + * + * @return \Drupal\Core\Entity\EntityInterface[] + * @throws \Drupal\Component\Plugin\Exception\InvalidPluginDefinitionException + * @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException + */ + public function getStrawberryfieldConfigFromStorage($bundle = 'digital_object') { + + $field = $this->entityTypeManager->getStorage('field_config'); + $field_ids = $this->entityTypeManager->getStorage('field_config')->getQuery() + ->condition('entity_type', 'node') + ->condition('bundle', $bundle) + ->condition('field_type' , 'strawberryfield_field') + ->execute(); + $fields = $this->entityTypeManager->getStorage('field_config')->loadMultiple($field_ids); + + return $fields; + } + /** * Given a Bundle returns the SBF field machine names * + * This include Code generated, overrides, etc. For just the directly created via the UI + * Which are FieldConfig Instances use: + * \Drupal\strawberryfield\StrawberryfieldUtilityService::getStrawberryfieldConfigFromStorage + * + * @param $bundle + * A Node Bundle + * * @return array * Returns array of SBF names */ public function getStrawberryfieldMachineForBundle($bundle = 'digital_object') { + // @WARNING Never call this function inside any field based hook + // Chances are the hook will be called invoked inside ::getFieldDefinitions + // All you will find yourself inside a SUPER ETERNAL LOOP. You are adviced. + $all_bundled_fields = $this->entityFieldManager->getFieldDefinitions('node', $bundle); $all_sbf_fields = $this->getStrawberryfieldMachineNames(); - return array_intersect(array_keys($all_bundled_fields), $all_sbf_fields ); + return array_intersect(array_keys($all_bundled_fields), $all_sbf_fields); } /**