Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
Merge pull request #8 from nhsuk/feature/prune-old-files
Browse files Browse the repository at this point in the history
add function to prune old files
  • Loading branch information
st3v3nhunt committed May 29, 2018
2 parents ffd8463 + e6cf778 commit 272784f
Show file tree
Hide file tree
Showing 14 changed files with 350 additions and 75 deletions.
66 changes: 56 additions & 10 deletions AzureDataService.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,11 @@ const requireEnv = require('require-environment-variables');
requireEnv(['AZURE_STORAGE_CONNECTION_STRING']);

const azureService = require('./lib/azureService');
const createFilter = require('./lib/createFileVersionFilter');
const filters = require('./lib/filters');
const fsHelper = require('./lib/fsHelper');
const getDateFromFilename = require('./lib/getDateFromFilename');
const sortDateDesc = require('./lib/sortByFilenameDateDesc');
const validateConfig = require('./lib/validateConfig');

function getSuffix(startMoment) {
return `-${startMoment.format('YYYYMMDD')}.json`;
}
const libConfig = require('./lib/config');

class AzureDataService {
constructor(config) {
Expand All @@ -25,11 +21,16 @@ class AzureDataService {
this.seedIdFile = `${config.outputFile}-seed-ids`;
this.localSeedIdFile = `${this.outputDir}/${this.seedIdFile}.json`;
this.version = config.version;
this.dateFormat = libConfig.dateFormat;
validateConfig(this);
}

getSuffix(startMoment) {
return `-${startMoment.format(this.dateFormat)}.json`;
}

getSuffixWithVersion(startMoment) {
return `-${startMoment.format('YYYYMMDD')}-${this.version}.json`;
return `-${startMoment.format(this.dateFormat)}-${this.version}.json`;
}

async downloadLatest(blobName, filename) {
Expand All @@ -51,8 +52,8 @@ class AzureDataService {
}

async getLatestData() {
const filter = createFilter(this.outputFile, this.version);
const lastScan = await azureService.getLatestBlob(this.containerName, filter, sortDateDesc);
const filter = filters.createFileVersionFilter(this.outputFile, this.version);
const lastScan = await azureService.getLatestBlob(this.containerName, filter);
if (lastScan) {
return this.downloadLatest(lastScan.name, this.localFile);
}
Expand All @@ -69,13 +70,58 @@ class AzureDataService {

async uploadIds(startMoment) {
this.log.info(`Saving date stamped version of '${this.seedIdFile}' in Azure`);
await azureService.uploadToAzure(this.containerName, this.localSeedIdFile, `${this.seedIdFile}${getSuffix(startMoment)}`);
await azureService.uploadToAzure(this.containerName, this.localSeedIdFile, `${this.seedIdFile}${this.getSuffix(startMoment)}`);
}

async uploadSummary(startMoment) {
this.log.info('Saving summary file in Azure');
await azureService.uploadToAzure(this.containerName, this.localSummaryFile, `${this.outputFile}-${this.summaryFile}${this.getSuffixWithVersion(startMoment)}`);
}

async pruneDataFiles(oldestMoment, files) {
const filter = filters.createExpiredDataFilter(this.outputFile, this.version, oldestMoment);
const fileVersionFilter = filters.createFileVersionFilter(this.outputFile, this.version);
await this.pruneExpiredFiles(files, filter, fileVersionFilter);
}

async pruneIdListFiles(oldestMoment, files) {
const filter = filters.createExpiredIdListFilter(this.outputFile, this.version, oldestMoment);
const latestFilter = filters.createIdListFilter(this.seedIdFile);
await this.pruneExpiredFiles(files, filter, latestFilter);
}

async pruneSummaryFiles(oldestMoment, files) {
const filter = filters.createExpiredSummaryFilter(
this.outputFile,
this.summaryFile,
this.version, oldestMoment
);
const latestFilter = filters.createSummaryFileFilter(this.outputFile, this.summaryFile);
await this.pruneExpiredFiles(files, filter, latestFilter);
}

async pruneExpiredFiles(files, filter, latestFilter) {
const expiredFiles = files.filter(filter);

const latest = await azureService.getLatestBlob(this.containerName, latestFilter);
// eslint-disable-next-line no-restricted-syntax
for (const file of expiredFiles) {
// safeguard to stop deleting latest data
if (!latest || file.name !== latest.name) {
// eslint-disable-next-line no-await-in-loop
await azureService.deleteFromAzure(this.containerName, file.name);
}
}
}

async pruneFilesOlderThan(oldestMoment) {
const files = await azureService.listBlobs(this.containerName);
if (files) {
await this.pruneDataFiles(oldestMoment, files);
await this.pruneIdListFiles(oldestMoment, files);
await this.pruneSummaryFiles(oldestMoment, files);
}
}
}

module.exports = AzureDataService;
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.4.0 / 2018-05-29
==================
- Provide function to purge old files

0.3.0 / 2018-04-25
==================
- Update npm dependencies
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,5 @@ All upload functions take a `startMoment` parameter used to datestamp the file.
`uploadIds` uploads the ID file from `outputDir` to `seedIdFile` in Azure Storage with datestamped suffix.

`uploadSummary` uploads the summary file from `outputDir` to Azure Storage, with an `outputFile` prefix, a datestamp, and a version suffix.

`pruneFilesOlderThan` removes all files prior to the provided moment parameter. The latest data, ID list and summary file will remain to prevent data loss.
4 changes: 3 additions & 1 deletion lib/azureService.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ function deleteFromAzure(containerName, blobName) {
if (!error) {
resolve(result);
} else {
reject(error);
// treat not found as successful delete
// eslint-disable-next-line no-unused-expressions
error.code === 'BlobNotFound' ? resolve(error) : reject(error);
}
});
});
Expand Down
1 change: 1 addition & 0 deletions lib/config.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
module.exports = {
azureTimeoutMinutes: process.env.AZURE_TIMEOUT_MINUTES || 5,
dateFormat: 'YYYYMMDD',
};
6 changes: 0 additions & 6 deletions lib/createFileVersionFilter.js

This file was deleted.

48 changes: 48 additions & 0 deletions lib/filters.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
const moment = require('moment');
const libConfig = require('./config');

function dateExpired(regex, file, oldestDate) {
const match = regex.exec(file.name);
if (match && match[1]) {
const date = moment(match[1], libConfig.dateFormat);
return date.isBefore(oldestDate);
}
return false;
}

function createExpiredDataFilter(outputFile, version, oldestDate) {
const regex = new RegExp(`^${outputFile}-(\\d{8})-${version}.json`);
return file => dateExpired(regex, file, oldestDate);
}

function createExpiredSummaryFilter(outputFile, summaryFile, version, oldestDate) {
const regex = new RegExp(`^${outputFile}-${summaryFile}-(\\d{8})-${version}.json`);
return file => dateExpired(regex, file, oldestDate);
}

function createExpiredIdListFilter(outputFile, version, oldestDate) {
const regex = new RegExp(`^${outputFile}-seed-ids-(\\d{8}).json`);
return file => dateExpired(regex, file, oldestDate);
}

function createFileVersionFilter(outputFile, version) {
const regex = new RegExp(`^${outputFile}-\\d{8}.*-${version}.json`);
return file => file.name.match(regex);
}

function createIdListFilter(seedIdFile) {
return file => file.name.startsWith(`${seedIdFile}-`);
}

function createSummaryFileFilter(outputFile, summaryFile) {
return file => file.name.startsWith(`${outputFile}-${summaryFile}`);
}

module.exports = {
createExpiredDataFilter,
createExpiredIdListFilter,
createExpiredSummaryFilter,
createFileVersionFilter,
createIdListFilter,
createSummaryFileFilter,
};
3 changes: 2 additions & 1 deletion lib/getDateFromFilename.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
const moment = require('moment');
const libConfig = require('./config');

const regex = /.*-(\d{8}).*/;
const minDate = moment(0);

function getMoment(dateString) {
const date = moment(dateString, 'YYYYMMDD');
const date = moment(dateString, libConfig.dateFormat);
return date.isValid() ? date : minDate;
}

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "azure-data-service",
"version": "0.3.0",
"version": "0.4.0",
"description": "service to upload and retrieve latest files from azure storage",
"main": "AzureDataService.js",
"repository": "https://github.com/nhsuk/azure-data-service.git",
Expand Down
Loading

0 comments on commit 272784f

Please sign in to comment.