From 0fe7fea5a2273a67496ae5dd3aff0c176a1d0506 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Tue, 21 Apr 2020 23:00:17 +0100 Subject: [PATCH] Deal with buckets being collected from multiple regions Two issues here - buckets are a global service so we need to filter and also if we are crawling us-east-1 and we look at buckets from another region all hell breaks loose. See https://github.com/aws/aws-sdk-java/issues/1338 --- app/collectors/bucket.scala | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/app/collectors/bucket.scala b/app/collectors/bucket.scala index ecf86e63..bc7d9ca6 100644 --- a/app/collectors/bucket.scala +++ b/app/collectors/bucket.scala @@ -30,9 +30,10 @@ case class AWSBucketCollector(origin: AmazonOrigin, resource: ResourceType) exte def crawl: Iterable[Bucket] = { val request = new ListBucketsRequest() - client.listBuckets(request).asScala.flatMap { - Bucket.fromApiData(_, client) - } + client.listBuckets(request).asScala + .flatMap { + Bucket.fromApiData(_, client) + } } } @@ -43,14 +44,20 @@ object Bucket { def fromApiData(bucket: AWSBucket, client: AmazonS3): Option[Bucket] = { val bucketName = bucket.getName try { - Some(Bucket( - arn = arn(bucketName), - name = bucketName, - region = client.getBucketLocation(bucket.getName), - createdTime = Try(new DateTime(bucket.getCreationDate)).toOption - )) + val bucketRegion = client.getBucketLocation(bucket.getName) + if (bucketRegion == client.getRegionName) { + Some(Bucket( + arn = arn(bucketName), + name = bucketName, + region = bucketRegion, + createdTime = Try(new DateTime(bucket.getCreationDate)).toOption + )) + } else { + None + } } catch { case e:AmazonS3Exception if e.getErrorCode == "NoSuchBucket" => None + case e:AmazonS3Exception if e.getErrorCode == "AuthorizationHeaderMalformed" => None case NonFatal(t) => throw new IllegalStateException(s"Failed when building info for bucket $bucketName", t) }