Skip to content

Commit

Permalink
#102 | [WIP] Add TODOs before putting on hold
Browse files Browse the repository at this point in the history
  • Loading branch information
himeshr committed Jul 17, 2024
1 parent f2d8de1 commit 6a03e37
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 17 deletions.
14 changes: 2 additions & 12 deletions src/main/java/org/avniproject/etl/config/AmazonClientService.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,14 @@ private Date getExpireDate(long expireDuration) {
return expiration;
}

public ArrayList<String> listObjectsInBucket(String s3PathPrefix, String negativeFilterPatternString) {
Boolean isNegativePatternFilteringRequired = StringUtils.hasText(negativeFilterPatternString);
Pattern negativeFilterPattern = Pattern.compile(negativeFilterPatternString);
Predicate<String> negativeFilterPatternPredicate = negativeFilterPattern.asPredicate();
Pattern uuidRegexPattern = Pattern.compile("^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}");
Predicate<String> uuidRegexPatternPredicate = uuidRegexPattern.asPredicate();

public ArrayList<String> listObjectsInBucket(String s3PathPrefix) {
ArrayList<String> listOfMediaUrls = new ArrayList<>();
ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucketName).withPrefix(s3PathPrefix).withMaxKeys(MAX_KEYS);
ListObjectsV2Result result;
do {
result = s3Client.listObjectsV2(req);
for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
// System.out.printf(" - %s (size: %d)\n", objectSummary.getKey(), objectSummary.getSize());
if (uuidRegexPatternPredicate.test(objectSummary.getKey().substring(objectSummary.getKey().lastIndexOf("/") + 1))
&& !(isNegativePatternFilteringRequired && negativeFilterPatternPredicate.test(objectSummary.getKey()))) {
listOfMediaUrls.add(objectSummary.getKey());
}
listOfMediaUrls.add(objectSummary.getKey());
}
// If there are more than maxKeys keys in the bucket, get a continuation token
// and list the next objects.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ public ResponseEntity createJob(@RequestBody JobScheduleRequest jobScheduleReque

EtlJobSummary latestJobRun = scheduledJobService.getLatestJobRun(jobScheduleRequest.getEntityUUID(), jobScheduleRequest.getJobGroup());
if (latestJobRun != null) return ResponseEntity.badRequest().body("Job already present");
//TODO Add validation for !jobScheduleRequest.getJobGroup().equals(JobGroup.Sync) to check that JobGroup.Sync is already scheduled

JobDetailImpl jobDetail = getJobDetail(jobScheduleRequest, organisationIdentity, organisationIdentitiesInGroup);
scheduler.addJob(jobDetail, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import org.avniproject.etl.domain.Organisation;
import org.avniproject.etl.domain.OrganisationIdentity;
import org.avniproject.etl.repository.OrganisationRepository;
import org.glassfish.jaxb.core.v2.TODO;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import javax.swing.text.html.HTML;
import java.util.ArrayList;
import java.util.List;

Expand Down Expand Up @@ -51,11 +53,18 @@ public void runFor(OrganisationIdentity organisationIdentity) {
log.info(String.format("Running Media Analysis for %s", organisationIdentity.toString()));
OrgIdentityContextHolder.setContext(organisationIdentity, etlServiceConfig);
Organisation organisation = organisationFactory.create(organisationIdentity);
// TODO
ArrayList<String> listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation), "thumbnails");
ArrayList<String> listOfAllMediaUrlsIncludingThumbnails = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation), "");
ArrayList<String> listOfAllThumbnailsUrls = amazonClientService.listObjectsInBucket(getThumbnailsDirectory(organisation), "");
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsIncludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsIncludingThumbnails.size(), listOfAllThumbnailsUrls.size()));
ArrayList<String> listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation));
//TODO Fix test issues causing build break
//TODO Make use of listOfAllMediaUrls to come up with required subset of URLs, like thumbnails, media after validating UUID and excluding Mobile and Adhoc entries
//TODO Log entries that get filtered out for dev purposes
ArrayList<String> listOfAllMediaUrlsExcludingThumbnails = new ArrayList<>();
ArrayList<String> listOfAllThumbnailsUrls = new ArrayList<>();
// TODO: 17/07/24 Fetch list of MediaUrls from media table
// SELECT REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '') as image_url_in_media_table
// FROM goonj.media
// ORDER BY REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '');
// TODO: 17/07/24 Invoke Analysis method to perform various metrics computations for each entry in media table of the org
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size()));
log.info(String.format("Completed Media Analysis for schema %s with dbUser %s and schemaUser %s", organisationIdentity.getSchemaName(), organisationIdentity.getDbUser(), organisationIdentity.getSchemaUser()));
OrgIdentityContextHolder.setContext(organisationIdentity, etlServiceConfig);
}
Expand Down

0 comments on commit 6a03e37

Please sign in to comment.