diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index c0defccfdef..1e42b27f05b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -36,6 +36,8 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.util.FileUtil; +import opennlp.tools.util.StringUtil; + import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; @@ -85,10 +87,15 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { this.setIsLocalFile(false); try { - bucketName=getBucketName(driverId); - minPartSize = getMinPartSize(driverId); + bucketName=getBucketName(driverId); + minPartSize = getMinPartSize(driverId); s3=getClient(driverId); tm=getTransferManager(driverId); + endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); + proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url", ""); + if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) { + logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified"); + } //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened. try { if (bucketName == null || !s3.doesBucketExistV2(bucketName)) { @@ -104,18 +111,18 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { } } - public S3AccessIO(String storageLocation, String driverId) { - this(null, null, driverId); + public S3AccessIO(String storageLocation, String driverId) { + this(null, null, driverId); // TODO: validate the storage location supplied bucketName = storageLocation.substring(0,storageLocation.indexOf('/')); minPartSize = getMinPartSize(driverId); key = storageLocation.substring(storageLocation.indexOf('/')+1); } - //Used for tests only + //Used for tests only public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, String driverId) { super(dvObject, req, driverId); - bucketName = getBucketName(driverId); + bucketName = getBucketName(driverId); this.setIsLocalFile(false); this.s3 = s3client; } @@ -125,6 +132,8 @@ public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client, private String bucketName = null; private String key = null; private long minPartSize; + private String endpoint = null; + private String proxy= null; @Override public void open(DataAccessOption... options) throws IOException { @@ -160,28 +169,28 @@ public void open(DataAccessOption... options) throws IOException { // So we fix it up here. ToDo: refactor so that storageidentifier is generated by the appropriate StorageIO class and is final from the start. String newStorageIdentifier = null; if (storageIdentifier.startsWith(this.driverId + "://")) { - if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { - //Driver id but no bucket - if(bucketName!=null) { - newStorageIdentifier=this.driverId + "://" + bucketName + ":" + storageIdentifier.substring((this.driverId + "://").length()); - } else { - throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") is not associated with a bucket."); - } - } // else we're OK (assumes bucket name in storageidentifier matches the driver's bucketname) + if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + //Driver id but no bucket + if(bucketName!=null) { + newStorageIdentifier=this.driverId + "://" + bucketName + ":" + storageIdentifier.substring((this.driverId + "://").length()); + } else { + throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") is not associated with a bucket."); + } + } // else we're OK (assumes bucket name in storageidentifier matches the driver's bucketname) } else { - if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { - //No driver id or bucket - newStorageIdentifier= this.driverId + "://" + bucketName + ":" + storageIdentifier; - } else { - //Just the bucketname - newStorageIdentifier= this.driverId + "://" + storageIdentifier; - } + if(!storageIdentifier.substring((this.driverId + "://").length()).contains(":")) { + //No driver id or bucket + newStorageIdentifier= this.driverId + "://" + bucketName + ":" + storageIdentifier; + } else { + //Just the bucketname + newStorageIdentifier= this.driverId + "://" + storageIdentifier; + } } if(newStorageIdentifier != null) { - //Fixup needed: - storageIdentifier = newStorageIdentifier; - dvObject.setStorageIdentifier(newStorageIdentifier); - } + //Fixup needed: + storageIdentifier = newStorageIdentifier; + dvObject.setStorageIdentifier(newStorageIdentifier); + } if (isReadAccess) { @@ -224,36 +233,36 @@ public void open(DataAccessOption... options) throws IOException { } else if (dvObject instanceof Dataverse) { throw new IOException("Data Access: Storage driver does not support dvObject type Dataverse yet"); } else { - // Direct access, e.g. for external upload - no associated DVobject yet, but we want to be able to get the size - // With small files, it looks like we may call before S3 says it exists, so try some retries before failing - if(key!=null) { - ObjectMetadata objectMetadata = null; - int retries = 20; - while(retries > 0) { - try { - objectMetadata = s3.getObjectMetadata(bucketName, key); - if(retries != 20) { - logger.warning("Success for key: " + key + " after " + ((20-retries)*3) + " seconds"); - } - retries = 0; - } catch (SdkClientException sce) { - if(retries > 1) { - retries--; - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - logger.warning("Retrying after: " + sce.getMessage()); - } else { - throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); - } - } - } + // Direct access, e.g. for external upload - no associated DVobject yet, but we want to be able to get the size + // With small files, it looks like we may call before S3 says it exists, so try some retries before failing + if(key!=null) { + ObjectMetadata objectMetadata = null; + int retries = 20; + while(retries > 0) { + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + if(retries != 20) { + logger.warning("Success for key: " + key + " after " + ((20-retries)*3) + " seconds"); + } + retries = 0; + } catch (SdkClientException sce) { + if(retries > 1) { + retries--; + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + logger.warning("Retrying after: " + sce.getMessage()); + } else { + throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); + } + } + } this.setSize(objectMetadata.getContentLength()); - }else { + }else { throw new IOException("Data Access: Invalid DvObject type"); - } + } } } @@ -578,7 +587,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) thr //Helper method for supporting saving streams with unknown length to S3 //We save those streams to a file and then upload the file - private File createTempFile(Path path, InputStream inputStream) throws IOException { + private File createTempFile(Path path, InputStream inputStream) throws IOException { File targetFile = new File(path.toUri()); // File needs a name try (OutputStream outStream = new FileOutputStream(targetFile);) { @@ -722,8 +731,8 @@ public boolean exists() { if (dvObject instanceof DataFile) { destinationKey = key; } else if((dvObject==null) && (key !=null)) { - //direct access - destinationKey = key; + //direct access + destinationKey = key; } else { logger.warning("Trying to check if a path exists is only supported for a data file."); } @@ -784,42 +793,42 @@ String getDestinationKey(String auxItemTag) throws IOException { */ String getMainFileKey() throws IOException { if (key == null) { - DataFile df = this.getDataFile(); - // TODO: (?) - should we worry here about the datafile having null for the owner here? - key = getMainFileKey(df.getOwner(), df.getStorageIdentifier(), driverId); + DataFile df = this.getDataFile(); + // TODO: (?) - should we worry here about the datafile having null for the owner here? + key = getMainFileKey(df.getOwner(), df.getStorageIdentifier(), driverId); } return key; } static String getMainFileKey(Dataset owner, String storageIdentifier, String driverId) throws IOException { - - // or about the owner dataset having null for the authority and/or identifier? - // we should probably check for that and throw an exception. (unless we are - // super positive that this condition would have been intercepted by now) - String baseKey = owner.getAuthorityForFileStorage() + "/" + owner.getIdentifierForFileStorage(); - return getMainFileKey(baseKey, storageIdentifier, driverId); + + // or about the owner dataset having null for the authority and/or identifier? + // we should probably check for that and throw an exception. (unless we are + // super positive that this condition would have been intercepted by now) + String baseKey = owner.getAuthorityForFileStorage() + "/" + owner.getIdentifierForFileStorage(); + return getMainFileKey(baseKey, storageIdentifier, driverId); } private static String getMainFileKey(String baseKey, String storageIdentifier, String driverId) throws IOException { - String key = null; - if (storageIdentifier == null || "".equals(storageIdentifier)) { - throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); - } - - if (storageIdentifier.indexOf(driverId + "://")>=0) { - //String driverId = storageIdentifier.substring(0, storageIdentifier.indexOf("://")+3); - //As currently implemented (v4.20), the bucket is part of the identifier and we could extract it and compare it with getBucketName() as a check - - //Only one bucket per driver is supported (though things might work if the profile creds work with multiple buckets, then again it's not clear when logic is reading from the driver property or from the DataFile). - //String bucketName = storageIdentifier.substring(driverId.length() + 3, storageIdentifier.lastIndexOf(":")); - key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); - } else { - throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object associated with driver: " + driverId); - } - return key; + String key = null; + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + if (storageIdentifier.indexOf(driverId + "://")>=0) { + //String driverId = storageIdentifier.substring(0, storageIdentifier.indexOf("://")+3); + //As currently implemented (v4.20), the bucket is part of the identifier and we could extract it and compare it with getBucketName() as a check - + //Only one bucket per driver is supported (though things might work if the profile creds work with multiple buckets, then again it's not clear when logic is reading from the driver property or from the DataFile). + //String bucketName = storageIdentifier.substring(driverId.length() + 3, storageIdentifier.lastIndexOf(":")); + key = baseKey + "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1); + } else { + throw new IOException("S3AccessIO: DataFile (storage identifier " + storageIdentifier + ") does not appear to be an S3 object associated with driver: " + driverId); + } + return key; } public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); if ("true".equalsIgnoreCase(optionValue)) { return true; } @@ -873,7 +882,11 @@ public String generateTemporaryS3Url() throws IOException { } if (s != null) { - return s.toString(); + if(!StringUtil.isEmpty(proxy)) { + return s.toString().replace(endpoint, proxy); + } else { + return s.toString(); + } } //throw new IOException("Failed to generate temporary S3 url for "+key); @@ -889,7 +902,7 @@ public String generateTemporaryS3Url() throws IOException { @Deprecated public String generateTemporaryS3UploadUrl() throws IOException { - + key = getMainFileKey(); Date expiration = new Date(); long msec = expiration.getTime(); @@ -899,86 +912,82 @@ public String generateTemporaryS3UploadUrl() throws IOException { return generateTemporaryS3UploadUrl(key, expiration); } - private String generateTemporaryS3UploadUrl(String key, Date expiration) throws IOException { + private String generateTemporaryS3UploadUrl(String key, Date expiration) throws IOException { GeneratePresignedUrlRequest generatePresignedUrlRequest = - new GeneratePresignedUrlRequest(bucketName, key).withMethod(HttpMethod.PUT).withExpiration(expiration); + new GeneratePresignedUrlRequest(bucketName, key).withMethod(HttpMethod.PUT).withExpiration(expiration); //Require user to add this header to indicate a temporary file generatePresignedUrlRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); URL presignedUrl; try { - presignedUrl = s3.generatePresignedUrl(generatePresignedUrlRequest); + presignedUrl = s3.generatePresignedUrl(generatePresignedUrlRequest); } catch (SdkClientException sce) { - logger.warning("SdkClientException generating temporary S3 url for "+key+" ("+sce.getMessage()+")"); - presignedUrl = null; + logger.warning("SdkClientException generating temporary S3 url for "+key+" ("+sce.getMessage()+")"); + presignedUrl = null; } String urlString = null; if (presignedUrl != null) { - String endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url"); - String proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url"); - if(proxy!=null) { - urlString = presignedUrl.toString().replace(endpoint, proxy); - } else { - urlString = presignedUrl.toString(); - } + if(!StringUtil.isEmpty(proxy)) { + urlString = presignedUrl.toString().replace(endpoint, proxy); + } else { + urlString = presignedUrl.toString(); + } } return urlString; } - public JsonObjectBuilder generateTemporaryS3UploadUrls(String globalId, String storageIdentifier, long fileSize) throws IOException { - - JsonObjectBuilder response = Json.createObjectBuilder(); - key = getMainFileKey(); - java.util.Date expiration = new java.util.Date(); - long msec = expiration.getTime(); - msec += 60 * 1000 * getUrlExpirationMinutes(); - expiration.setTime(msec); - - if (fileSize <= minPartSize) { - response.add("url", generateTemporaryS3UploadUrl(key, expiration)); - } else { - JsonObjectBuilder urls = Json.createObjectBuilder(); - InitiateMultipartUploadRequest initiationRequest = new InitiateMultipartUploadRequest(bucketName, key); - initiationRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); - InitiateMultipartUploadResult initiationResponse = s3.initiateMultipartUpload(initiationRequest); - String uploadId = initiationResponse.getUploadId(); - for (int i = 1; i <= (fileSize / minPartSize) + (fileSize % minPartSize > 0 ? 1 : 0); i++) { - GeneratePresignedUrlRequest uploadPartUrlRequest = new GeneratePresignedUrlRequest(bucketName, key) - .withMethod(HttpMethod.PUT).withExpiration(expiration); - uploadPartUrlRequest.addRequestParameter("uploadId", uploadId); - uploadPartUrlRequest.addRequestParameter("partNumber", Integer.toString(i)); - URL presignedUrl; - try { - presignedUrl = s3.generatePresignedUrl(uploadPartUrlRequest); - } catch (SdkClientException sce) { - logger.warning("SdkClientException generating temporary S3 url for " + key + " (" + sce.getMessage() - + ")"); - presignedUrl = null; - } - String urlString = null; - if (presignedUrl != null) { - String endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url"); - String proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url"); - if (proxy != null) { - urlString = presignedUrl.toString().replace(endpoint, proxy); - } else { - urlString = presignedUrl.toString(); - } - } - urls.add(Integer.toString(i), urlString); - } - response.add("urls", urls); - response.add("abort", "/api/datasets/mpupload?globalid=" + globalId + "&uploadid=" + uploadId - + "&storageidentifier=" + storageIdentifier); - response.add("complete", "/api/datasets/mpupload?globalid=" + globalId + "&uploadid=" + uploadId - + "&storageidentifier=" + storageIdentifier); - - } - response.add("partSize", minPartSize); - - return response; - } + public JsonObjectBuilder generateTemporaryS3UploadUrls(String globalId, String storageIdentifier, long fileSize) throws IOException { + + JsonObjectBuilder response = Json.createObjectBuilder(); + key = getMainFileKey(); + java.util.Date expiration = new java.util.Date(); + long msec = expiration.getTime(); + msec += 60 * 1000 * getUrlExpirationMinutes(); + expiration.setTime(msec); + + if (fileSize <= minPartSize) { + response.add("url", generateTemporaryS3UploadUrl(key, expiration)); + } else { + JsonObjectBuilder urls = Json.createObjectBuilder(); + InitiateMultipartUploadRequest initiationRequest = new InitiateMultipartUploadRequest(bucketName, key); + initiationRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); + InitiateMultipartUploadResult initiationResponse = s3.initiateMultipartUpload(initiationRequest); + String uploadId = initiationResponse.getUploadId(); + for (int i = 1; i <= (fileSize / minPartSize) + (fileSize % minPartSize > 0 ? 1 : 0); i++) { + GeneratePresignedUrlRequest uploadPartUrlRequest = new GeneratePresignedUrlRequest(bucketName, key) + .withMethod(HttpMethod.PUT).withExpiration(expiration); + uploadPartUrlRequest.addRequestParameter("uploadId", uploadId); + uploadPartUrlRequest.addRequestParameter("partNumber", Integer.toString(i)); + URL presignedUrl; + try { + presignedUrl = s3.generatePresignedUrl(uploadPartUrlRequest); + } catch (SdkClientException sce) { + logger.warning("SdkClientException generating temporary S3 url for " + key + " (" + sce.getMessage() + + ")"); + presignedUrl = null; + } + String urlString = null; + if (presignedUrl != null) { + if(!StringUtil.isEmpty(proxy)) { + urlString = presignedUrl.toString().replace(endpoint, proxy); + } else { + urlString = presignedUrl.toString(); + } + } + urls.add(Integer.toString(i), urlString); + } + response.add("urls", urls); + response.add("abort", "/api/datasets/mpupload?globalid=" + globalId + "&uploadid=" + uploadId + + "&storageidentifier=" + storageIdentifier); + response.add("complete", "/api/datasets/mpupload?globalid=" + globalId + "&uploadid=" + uploadId + + "&storageidentifier=" + storageIdentifier); + + } + response.add("partSize", minPartSize); + + return response; + } int getUrlExpirationMinutes() { String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); @@ -997,174 +1006,174 @@ int getUrlExpirationMinutes() { } private static String getBucketName(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".bucket-name"); + return System.getProperty("dataverse.files." + driverId + ".bucket-name"); } - private static long getMinPartSize(String driverId) { - // as a default, pick 1 GB minimum part size for AWS S3 - // (minimum allowed is 5*1024**2 but it probably isn't worth the complexity starting at ~5MB. Also - confirmed that they use base 2 definitions) - long min = 5 * 1024 * 1024l; - - String partLength = System.getProperty("dataverse.files." + driverId + ".min-part-size"); - try { - if (partLength != null) { - long val = Long.parseLong(partLength); - if(val>=min) { - min=val; - } else { - logger.warning(min + " is the minimum part size allowed for jvm option dataverse.files." + driverId + ".min-part-size" ); - } - } else { - min = 1024 * 1024 * 1024l; - } - } catch (NumberFormatException nfe) { - logger.warning("Unable to parse dataverse.files." + driverId + ".min-part-size as long: " + partLength); - } - return min; - } + private static long getMinPartSize(String driverId) { + // as a default, pick 1 GB minimum part size for AWS S3 + // (minimum allowed is 5*1024**2 but it probably isn't worth the complexity starting at ~5MB. Also - confirmed that they use base 2 definitions) + long min = 5 * 1024 * 1024l; + + String partLength = System.getProperty("dataverse.files." + driverId + ".min-part-size"); + try { + if (partLength != null) { + long val = Long.parseLong(partLength); + if(val>=min) { + min=val; + } else { + logger.warning(min + " is the minimum part size allowed for jvm option dataverse.files." + driverId + ".min-part-size" ); + } + } else { + min = 1024 * 1024 * 1024l; + } + } catch (NumberFormatException nfe) { + logger.warning("Unable to parse dataverse.files." + driverId + ".min-part-size as long: " + partLength); + } + return min; + } private static TransferManager getTransferManager(String driverId) { - if(driverTMMap.containsKey(driverId)) { - return driverTMMap.get(driverId); - } else { - // building a TransferManager instance to support multipart uploading for files over 4gb. - TransferManager manager = TransferManagerBuilder.standard() - .withS3Client(getClient(driverId)) - .build(); - driverTMMap.put(driverId, manager); - return manager; - } + if(driverTMMap.containsKey(driverId)) { + return driverTMMap.get(driverId); + } else { + // building a TransferManager instance to support multipart uploading for files over 4gb. + TransferManager manager = TransferManagerBuilder.standard() + .withS3Client(getClient(driverId)) + .build(); + driverTMMap.put(driverId, manager); + return manager; + } } private static AmazonS3 getClient(String driverId) { - if(driverClientMap.containsKey(driverId)) { - return driverClientMap.get(driverId); - } else { - // get a standard client, using the standard way of configuration the credentials, etc. - AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); - - ClientConfiguration cc = new ClientConfiguration(); - Integer poolSize = Integer.getInteger("dataverse.files." + driverId + ".connection-pool-size", 256); - cc.setMaxConnections(poolSize); - s3CB.setClientConfiguration(cc); - - /** - * Pass in a URL pointing to your S3 compatible storage. - * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html - */ - String s3CEUrl = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); - /** - * Pass in a region to use for SigV4 signing of requests. - * Defaults to "dataverse" as it is not relevant for custom S3 implementations. - */ - String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse"); - - // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. - if (!s3CEUrl.isEmpty()) { - s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); - } - /** - * Pass in a boolean value if path style access should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - Boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".path-style-access", "false")); - // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false - s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); - - /** - * Pass in a boolean value if payload signing should be used within the S3 client. - * Anything but case-insensitive "true" will lead to value of false, which is default value, too. - */ - Boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".payload-signing","false")); - /** - * Pass in a boolean value if chunked encoding should not be used within the S3 client. - * Anything but case-insensitive "false" will lead to value of true, which is default value, too. - */ - Boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".chunked-encoding","true")); - // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false - s3CB.setPayloadSigningEnabled(s3payloadSigning); - // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true - // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled - s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. - */ - String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default"); - - s3CB.setCredentials(new ProfileCredentialsProvider(s3profile)); - // let's build the client :-) - AmazonS3 client = s3CB.build(); - driverClientMap.put(driverId, client); - return client; - } + if(driverClientMap.containsKey(driverId)) { + return driverClientMap.get(driverId); + } else { + // get a standard client, using the standard way of configuration the credentials, etc. + AmazonS3ClientBuilder s3CB = AmazonS3ClientBuilder.standard(); + + ClientConfiguration cc = new ClientConfiguration(); + Integer poolSize = Integer.getInteger("dataverse.files." + driverId + ".connection-pool-size", 256); + cc.setMaxConnections(poolSize); + s3CB.setClientConfiguration(cc); + + /** + * Pass in a URL pointing to your S3 compatible storage. + * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html + */ + String s3CEUrl = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); + /** + * Pass in a region to use for SigV4 signing of requests. + * Defaults to "dataverse" as it is not relevant for custom S3 implementations. + */ + String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse"); + + // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. + if (!s3CEUrl.isEmpty()) { + s3CB.setEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(s3CEUrl, s3CERegion)); + } + /** + * Pass in a boolean value if path style access should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".path-style-access", "false")); + // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false + s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); + + /** + * Pass in a boolean value if payload signing should be used within the S3 client. + * Anything but case-insensitive "true" will lead to value of false, which is default value, too. + */ + Boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".payload-signing","false")); + /** + * Pass in a boolean value if chunked encoding should not be used within the S3 client. + * Anything but case-insensitive "false" will lead to value of true, which is default value, too. + */ + Boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".chunked-encoding","true")); + // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false + s3CB.setPayloadSigningEnabled(s3payloadSigning); + // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true + // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled + s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); + + /** + * Pass in a string value if this storage driver should use a non-default AWS S3 profile. + * The default is "default" which should work when only one profile exists. + */ + String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default"); + + s3CB.setCredentials(new ProfileCredentialsProvider(s3profile)); + // let's build the client :-) + AmazonS3 client = s3CB.build(); + driverClientMap.put(driverId, client); + return client; + } } - public void removeTempTag() throws IOException { - if (!(dvObject instanceof DataFile)) { - logger.warning("Attempt to remove tag from non-file DVObject id: " + dvObject.getId()); - throw new IOException("Attempt to remove temp tag from non-file S3 Object"); - } - try { - - key = getMainFileKey(); - DeleteObjectTaggingRequest deleteObjectTaggingRequest = new DeleteObjectTaggingRequest(bucketName, key); - //NOte - currently we only use one tag so delete is the fastest and cheapest way to get rid of that one tag - //Otherwise you have to get tags, remove the one you don't want and post new tags and get charged for the operations + public void removeTempTag() throws IOException { + if (!(dvObject instanceof DataFile)) { + logger.warning("Attempt to remove tag from non-file DVObject id: " + dvObject.getId()); + throw new IOException("Attempt to remove temp tag from non-file S3 Object"); + } + try { + + key = getMainFileKey(); + DeleteObjectTaggingRequest deleteObjectTaggingRequest = new DeleteObjectTaggingRequest(bucketName, key); + //NOte - currently we only use one tag so delete is the fastest and cheapest way to get rid of that one tag + //Otherwise you have to get tags, remove the one you don't want and post new tags and get charged for the operations s3.deleteObjectTagging(deleteObjectTaggingRequest); } catch (SdkClientException sce) { - if(sce.getMessage().contains("Status Code: 501")) { - // In this case, it's likely that tags are not implemented at all (e.g. by Minio) so no tag was set either and it's just something to be aware of - logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); - } else { - // In this case, the assumption is that adding tags has worked, so not removing it is a problem that should be looked into. - logger.severe("Unable to remove temp tag from : " + bucketName + " : " + key); - } + if(sce.getMessage().contains("Status Code: 501")) { + // In this case, it's likely that tags are not implemented at all (e.g. by Minio) so no tag was set either and it's just something to be aware of + logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); + } else { + // In this case, the assumption is that adding tags has worked, so not removing it is a problem that should be looked into. + logger.severe("Unable to remove temp tag from : " + bucketName + " : " + key); + } } catch (IOException e) { - logger.warning("Could not create key for S3 object." ); - e.printStackTrace(); - } - - } - - public static void abortMultipartUpload(String globalId, String storageIdentifier, String uploadId) - throws IOException { - String baseKey = null; - int index = globalId.indexOf(":"); - if (index >= 0) { - baseKey = globalId.substring(index + 1); - } else { - throw new IOException("Invalid Global ID (expected form with ':' prefix)"); - } - String[] info = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - String driverId = info[0]; - AmazonS3 s3Client = getClient(driverId); - String bucketName = getBucketName(driverId); - String key = getMainFileKey(baseKey, storageIdentifier, driverId); - AbortMultipartUploadRequest req = new AbortMultipartUploadRequest(bucketName, key, uploadId); - s3Client.abortMultipartUpload(req); - } - - public static void completeMultipartUpload(String globalId, String storageIdentifier, String uploadId, - List etags) throws IOException { - String baseKey = null; - int index = globalId.indexOf(":"); - if (index >= 0) { - baseKey = globalId.substring(index + 1); - } else { - throw new IOException("Invalid Global ID (expected form with ':' prefix)"); - } - - String[] info = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - String driverId = info[0]; - AmazonS3 s3Client = getClient(driverId); - String bucketName = getBucketName(driverId); - String key = getMainFileKey(baseKey, storageIdentifier, driverId); - CompleteMultipartUploadRequest req = new CompleteMultipartUploadRequest(bucketName, key, uploadId, etags); - s3Client.completeMultipartUpload(req); - } + logger.warning("Could not create key for S3 object." ); + e.printStackTrace(); + } + + } + + public static void abortMultipartUpload(String globalId, String storageIdentifier, String uploadId) + throws IOException { + String baseKey = null; + int index = globalId.indexOf(":"); + if (index >= 0) { + baseKey = globalId.substring(index + 1); + } else { + throw new IOException("Invalid Global ID (expected form with ':' prefix)"); + } + String[] info = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String driverId = info[0]; + AmazonS3 s3Client = getClient(driverId); + String bucketName = getBucketName(driverId); + String key = getMainFileKey(baseKey, storageIdentifier, driverId); + AbortMultipartUploadRequest req = new AbortMultipartUploadRequest(bucketName, key, uploadId); + s3Client.abortMultipartUpload(req); + } + + public static void completeMultipartUpload(String globalId, String storageIdentifier, String uploadId, + List etags) throws IOException { + String baseKey = null; + int index = globalId.indexOf(":"); + if (index >= 0) { + baseKey = globalId.substring(index + 1); + } else { + throw new IOException("Invalid Global ID (expected form with ':' prefix)"); + } + + String[] info = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String driverId = info[0]; + AmazonS3 s3Client = getClient(driverId); + String bucketName = getBucketName(driverId); + String key = getMainFileKey(baseKey, storageIdentifier, driverId); + CompleteMultipartUploadRequest req = new CompleteMultipartUploadRequest(bucketName, key, uploadId, etags); + s3Client.completeMultipartUpload(req); + } }