Skip to content

Commit

Permalink
Merge pull request #2 from ing-bank/support-fqp-search
Browse files Browse the repository at this point in the history
Support fully qualified bucket paths
  • Loading branch information
nielsdenissen authored Dec 7, 2018
2 parents 1d6cbc6 + 4c117e7 commit 3ba62df
Show file tree
Hide file tree
Showing 11 changed files with 176 additions and 90 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ before_script:
- docker-compose up -d
- echo "Wait for containers to be up and running"
- bash waitForContainerSetup.sh
- bash setupS3Env.sh

after_success:
- bash <(curl -s https://codecov.io/bash)
Expand Down
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,37 @@ http://{RANGER_HOST}:{RANGER_PORT}/service/public/v2/api/servicedef
* Proper lookups
* No ceph-user name required
* AWS S3 support

# Ranger admin site configuration

Ranger S3 plugin uses aws sdk to connect to backend to list buckets and folders. In case of
timeouts, check ranger setting and adjust accordingly. Default 1000 may be to low in some cases.
Configuration file is in `ranger-1.1.0-admin/ews/webapp/WEB-INF/classes/conf/ranger-admin-default-site.xml.`

```
<property>
<name>ranger.resource.lookup.timeout.value.in.ms</name>
<value>10000</value>
<description />
</property>
```

Additionally plugin can be configured with different aws region. In order to change region, add
following section to `ranger-1.1.0-admin/ews/webapp/WEB-INF/classes/conf/ranger-admin-default-site.xml.`

```
<property>
<name>airlock.s3.aws.region</name>
<value>region_name</value>
</property>
```


# Plugin usage with [Airlock](https://github.com/ing-bank/airlock)

If you run plugin via Airlock, make sure that:

- user used in S3 service setup is NPA user in [Airlock STS](https://github.com/ing-bank/airlock-sts).
See "NPA S3 users" section of STS readme
- user is added to "all - path" Ranger policy - in oder words ceph user used for connection, must be
allowed to read all bucket paths
22 changes: 13 additions & 9 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

<groupId>com.ing.ranger</groupId>
<artifactId>ranger-s3-plugin</artifactId>
<version>0.1</version>
<version>0.2</version>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand All @@ -31,11 +31,16 @@
<junit.version>4.12</junit.version>
<assertj.version>1.7.0</assertj.version>
<ranger.version>1.1.0</ranger.version>
<aws.version>1.11.285</aws.version>
<radosgw.version>1.0.2</radosgw.version>
<aws.version>1.11.461</aws.version>
</properties>

<dependencies>
<dependency>
<artifactId>guava</artifactId>
<groupId>com.google.guava</groupId>
<version>11.0.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.ranger</groupId>
<artifactId>ranger-plugins-common</artifactId>
Expand All @@ -55,7 +60,7 @@
</exclusion>
</exclusions>
</dependency>
<!--<dependency>
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
<version>${aws.version}</version>
Expand All @@ -65,13 +70,12 @@
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>-->
</dependency>
<dependency>
<groupId>io.github.twonote</groupId>
<artifactId>radosgw-admin4j</artifactId>
<version>${radosgw.version}</version>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.9</version>
</dependency>

<!-- test dependencies -->
<dependency>
<groupId>junit</groupId>
Expand Down
5 changes: 5 additions & 0 deletions setupS3Env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

docker-compose exec ceph s3cmd put /etc/issue s3://demobucket/subdir1/
docker-compose exec ceph s3cmd put /etc/issue s3://demobucket/subdir2/

8 changes: 0 additions & 8 deletions src/main/java/com/ing/ranger/s3/RangerServiceS3.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
import com.ing.ranger.s3.client.S3ResourceManager;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ranger.plugin.model.RangerService;
import org.apache.ranger.plugin.model.RangerServiceDef;
import org.apache.ranger.plugin.service.RangerBaseService;
import org.apache.ranger.plugin.service.ResourceLookupContext;

Expand All @@ -36,12 +34,6 @@ public class RangerServiceS3 extends RangerBaseService {

private static final Log LOG = LogFactory.getLog(RangerServiceS3.class);

// check if required
@Override
public void init(RangerServiceDef serviceDef, RangerService service) {
super.init(serviceDef, service);
}

@Override
public Map<String, Object> validateConfig() throws Exception {
Map<String, Object> ret = new HashMap<String, Object>();
Expand Down
159 changes: 113 additions & 46 deletions src/main/java/com/ing/ranger/s3/client/S3Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,32 @@

package com.ing.ranger.s3.client;

import org.apache.commons.io.FilenameUtils;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.Bucket;
import com.amazonaws.services.s3.model.ObjectListing;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.ranger.authorization.hadoop.config.RangerConfiguration;
import org.apache.ranger.plugin.client.BaseClient;
import org.twonote.rgwadmin4j.RgwAdmin;
import org.twonote.rgwadmin4j.RgwAdminBuilder;
import org.twonote.rgwadmin4j.model.User;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class S3Client {
private String endpoint;
private String accesskey;
private String secretkey;
private String uid; // ceph user uid, used to verify connection to S3 backend
private String accessKey;
private String secretKey;
private String awsRegion;

private static final Log LOG = LogFactory.getLog(S3Client.class);

Expand All @@ -47,56 +54,43 @@ private static void logError(String errorMessage) throws Exception {
}

public S3Client(Map<String, String> configs) throws Exception {
this.endpoint = configs.get("endpoint");
this.accesskey = configs.get("accesskey");
this.secretkey = configs.get("secretkey");
this.uid = configs.get("uid");
this.endpoint = configs.get("endpoint");
this.accessKey = configs.get("accesskey");
this.secretKey = configs.get("secretkey");
this.awsRegion = RangerConfiguration.getInstance().get("airlock.s3.aws.region", "us-east-1");

if (this.endpoint == null || this.endpoint.isEmpty() || !this.endpoint.startsWith("http") || !this.endpoint.endsWith("admin")) {
logError("Incorrect value found for configuration `endpoint`. Please provide url in format http://host:port/admin");
if (this.endpoint == null || this.endpoint.isEmpty() || !this.endpoint.startsWith("http")) {
logError("Incorrect value found for configuration `endpoint`. Please provide url in format http://host:port");
}
if (this.accesskey == null || this.secretkey == null || this.uid == null) {
logError("Required value not found. Please provide accesskey, secretkey and user uid");
if (this.accessKey == null || this.secretKey == null) {
logError("Required value not found. Please provide accessKey, secretKey and user uid");
}
}

private RgwAdmin getRgwAdmin() {
return new RgwAdminBuilder()
.accessKey(this.accesskey)
.secretKey(this.secretkey)
.endpoint(this.endpoint)
.build();
}

public List<String> getBuckets(final String userInput) {
final String needle;
RgwAdmin rgwAdmin = getRgwAdmin();
private AmazonS3 getAWSClient() {
AWSCredentials credentials = new BasicAWSCredentials(this.accessKey, this.secretKey);
// singer type only required util akka http allows Raw User-Agent header
// airlock changes User-Agent and causes signature mismatch
ClientConfiguration conf = new ClientConfiguration();
conf.setSignerOverride("S3SignerType");

if (userInput != null) {
needle = userInput;
} else {
needle = new String();
}

// Empty string ensures returning all buckets
List<String> bucketNames = rgwAdmin.listBucket("")
.stream()
.filter(s -> FilenameUtils.wildcardMatch(s, needle + "*"))
.collect(Collectors.toList());
AmazonS3ClientBuilder client = AmazonS3ClientBuilder
.standard()
.withCredentials(new AWSStaticCredentialsProvider(credentials))
.withClientConfiguration(conf)
.withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, awsRegion));

if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Buckets returned for input=%s buckets=%s", needle, bucketNames));
}
return bucketNames;
client.setPathStyleAccessEnabled(true);
return client.build();
}

public Map<String, Object> connectionTest() {
Map<String, Object> responseData = new HashMap<String, Object>();
RgwAdmin rgwAdmin = getRgwAdmin();
Optional<User> user = rgwAdmin.getUserInfo(this.uid);

if (!user.isPresent()) {
final String errMessage = "Failed, cannot retrieve UserInfo for: " + user;
List<Bucket> buckets = getAWSClient().listBuckets();

if (buckets.get(0).getName().isEmpty()) {
final String errMessage = "Failed, cannot retrieve Buckets list from S3";
BaseClient.generateResponseDataMap(false, errMessage, errMessage,
null, null, responseData);
} else {
Expand All @@ -106,4 +100,77 @@ public Map<String, Object> connectionTest() {
}
return responseData;
}

private String removeLeadingSlash(final String userInput) {
String withoutLeadingSlash;
if (userInput.startsWith("/")) {
withoutLeadingSlash = userInput.substring(1);
} else {
withoutLeadingSlash = userInput;
}
return withoutLeadingSlash;
}

public List<String> getBucketPaths(final String userInput) {
Supplier<Stream<Bucket>> buckets = () -> getAWSClient().listBuckets().stream();
String[] userInputSplit = removeLeadingSlash(userInput).split("/");
String bucketFilter = userInputSplit[0];
String subdirFilter;

if (userInputSplit.length >= 2) {
subdirFilter = userInput.substring(removeLeadingSlash(userInput).indexOf("/") + 2);
} else {
subdirFilter = "";
}

List<String> bucketsPaths = buckets
.get()
.filter(b -> b.getName().startsWith(bucketFilter))
.flatMap(b -> {
if (subdirFilter.length() > 0 || userInput.endsWith("/")) {
return getBucketsPseudoDirs(b.getName(), subdirFilter).stream();
} else {
return buckets.get()
.filter(sb -> sb.getName().startsWith(bucketFilter))
.map(sb -> String.format("/%s", sb.getName()));
}
})
.distinct()
.sorted()
.limit(50)
.collect(Collectors.toList());

return bucketsPaths;
}

public List<String> getBucketsPseudoDirs(final String bucket, final String subdirFilter) {
ObjectListing bucketObjects = getAWSClient().listObjects(bucket);

List<String> pseduDirsFiltered = bucketObjects
.getObjectSummaries()
.stream()
.filter(p -> {
if (subdirFilter.length() > 0) {
return p.getKey().startsWith(subdirFilter);
} else {
return true;
}
})
.map(p -> {
if (p.getSize() == 0) {
return String.format("/%s/%s", bucket, p.getKey());
} else {
Integer endIndex = p.getKey().contains("/") ? p.getKey().lastIndexOf("/") : 0;
if (endIndex > 0) {
return String.format("/%s/%s/", bucket, p.getKey().substring(0, endIndex));
} else {
return String.format("/%s/", bucket);
}

}
})
.collect(Collectors.toList());

return pseduDirsFiltered;
}
}
4 changes: 2 additions & 2 deletions src/main/java/com/ing/ranger/s3/client/S3ResourceManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

public class S3ResourceManager {

private static S3Client getS3Client(Map<String, String> configs) throws Exception {
public static S3Client getS3Client(Map<String, String> configs) throws Exception {
if (configs != null) {
return new S3Client(configs);
} else {
Expand All @@ -54,7 +54,7 @@ public static List<String> getBuckets(Map<String, String> configs, ResourceLooku

if (client != null) {
synchronized (client) {
buckets = client.getBuckets(userInput);
buckets = client.getBucketPaths(userInput);
}
}
return buckets;
Expand Down
15 changes: 2 additions & 13 deletions src/main/resources/s3-ranger.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,6 @@
[
{
"itemId": 1,
"name": "uid",
"type": "string",
"subType": "",
"mandatory": true,
"validationRegEx": "",
"validationMessage": "",
"uiHint":"",
"label": "Admin user id"
},
{
"itemId": 2,
"name": "endpoint",
"type": "string",
"subType": "",
Expand All @@ -80,7 +69,7 @@
"label": "S3 Endpoint"
},
{
"itemId": 3,
"itemId": 2,
"name": "accesskey",
"type": "string",
"subType": "",
Expand All @@ -91,7 +80,7 @@
"label": "Access key"
},
{
"itemId": 4,
"itemId": 3,
"name": "secretkey",
"type": "password",
"subType": "",
Expand Down
Loading

0 comments on commit 3ba62df

Please sign in to comment.