diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..f2d48ba --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,106 @@ +name: main + +on: + push: + +jobs: + put-to-cache: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - run: | + mkdir -p a/b/c + echo "1234" >> a/b/c/file.txt + echo "000" >> file.txt + + - uses: ./ + with: + path: | + a/b/c/file.txt + file.txt + key: cache-${{ github.run_id }}-${{ github.sha }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_ACCESS_SECRET }} + aws-region: ${{ secrets.AWS_REGION }} + aws-cache-bucket: ${{ secrets.AWS_CACHE_BUCKET }} + + read-cache: + needs: [put-to-cache] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./ + with: + path: | + a/b/c/file.txt + file.txt + key: cache-${{ github.run_id }}-${{ github.sha }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_ACCESS_SECRET }} + aws-region: ${{ secrets.AWS_REGION }} + aws-cache-bucket: ${{ secrets.AWS_CACHE_BUCKET }} + + - run: | + test -f a/b/c/file.txt + test -f file.txt + + read-partial-cache: + needs: [put-to-cache] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./ + with: + path: | + a/b/c/file.txt + key: cache-${{ github.run_id }}-${{ github.sha }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_ACCESS_SECRET }} + aws-region: ${{ secrets.AWS_REGION }} + aws-cache-bucket: ${{ secrets.AWS_CACHE_BUCKET }} + + - run: | + test -f a/b/c/file.txt + + read-cache-and-overwrite: + needs: [put-to-cache] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: | + mkdir -p a/b/c + echo "-" >> a/b/c/file.txt + echo "-" >> a/b/c/file2.txt + + - uses: ./ + with: + path: | + a/b/c/file.txt + key: cache-${{ github.run_id }}-${{ github.sha }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_ACCESS_SECRET }} + aws-region: ${{ secrets.AWS_REGION }} + aws-cache-bucket: ${{ secrets.AWS_CACHE_BUCKET }} + + - run: | + test -f a/b/c/file.txt + test -f a/b/c/file2.txt + + lookup-cache: + needs: [put-to-cache] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: ./restore + id: restore + with: + lookup-only: true + key: cache-${{ github.run_id }}-${{ github.sha }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_ACCESS_SECRET }} + aws-region: ${{ secrets.AWS_REGION }} + aws-cache-bucket: ${{ secrets.AWS_CACHE_BUCKET }} + + - run: echo ${{ steps.restore.outputs.cache-hit }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2eea525 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env \ No newline at end of file diff --git a/README.md b/README.md index c29328f..06f547e 100644 --- a/README.md +++ b/README.md @@ -1 +1,25 @@ # s3-cache + +> Drop in replace for [actions/cache](https://github.com/actions/cache), cache artifact on s3 + +# Usage + +```yaml +steps: + - uses: rayonapp/s3-cache + with: + path: | + path/to/file/a + another/file + key: cache-${{ hashFiles('**/**.rs') }} + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_ACCESS_SECRET }} + aws-region: ${{ secrets.AWS_REGION }} + aws-cache-bucket: ${{ secrets.AWS_CACHE_BUCKET }} +``` + +# Motivation + +Handle cache expiration ourselves. + +No restriction based on branches, [github-restrictions-for-accessing-a-cache](https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache) diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..29d8fe6 --- /dev/null +++ b/action.yml @@ -0,0 +1,30 @@ +name: s3-cache +description: | + Cache using s3 +runs: + using: node20 + main: main.js + post: post.js + post-if: success() +inputs: + key: + description: cache key + required: true + path: + description: path to files to cache,use multiline for multiple files + + aws-access-key-id: + description: aws-access-key-id + required: true + aws-secret-access-key: + description: aws-secret-access-key + required: true + aws-region: + description: aws-region + required: true + aws-cache-bucket: + description: aws-cache-bucket + required: true +outputs: + cache-hit: + description: true if a cache is found for this key diff --git a/cache.js b/cache.js new file mode 100644 index 0000000..596dac7 --- /dev/null +++ b/cache.js @@ -0,0 +1,92 @@ +const s3 = require("./s3"); +const { join: path_join } = require("path"); +const fs = require("fs"); +const { execSync, execFileSync } = require("child_process"); + +/** + * set the cache-hit output + * cache-hit=true if a cache if found for this key + */ +const lookUp = async (key) => { + console.log("look up", { key }); + + const cacheHit = await s3.exist(key); + + console.log("cacheHit=", cacheHit.toString()); + + execSync(`echo "cache-hit=${cacheHit.toString()}" >> $GITHUB_OUTPUT`); +}; + +/** + * push files to the cache + * + * for each path, create a zip file in a tmp dir + * then zip all of them + * and push the resulting file to s3 with the cache key as name + */ +const put = async (key, paths) => { + const a = Date.now(); + console.log("put", { key, paths }); + + const tmpDir = fs.mkdtempSync("s3-cache"); + + try { + for (const path of paths) { + if (!fs.existsSync(path)) throw new Error(`file don't exist: ${path}`); + + const pathKey = path.replace(/\//g, "_") + ".zip"; + + execFileSync("zip", [path_join(tmpDir, pathKey), "-r", path]); + } + + execFileSync("zip", ["__payload.zip", "-r", "."], { cwd: tmpDir }); + + const payload = fs.readFileSync(path_join(tmpDir, "__payload.zip")); + + await s3.put(key, payload); + + console.log("uploaded in", Date.now() - a, "ms"); + } finally { + fs.rmSync(tmpDir, { recursive: true }); + } +}; + +/** + * get files from the cache + */ +const get = async (key, paths) => { + const a = Date.now(); + console.log("get", { key, paths }); + + const payload = await s3.get(key); + + if (payload) { + const tmpDir = fs.mkdtempSync("s3-cache"); + + try { + fs.writeFileSync( + path_join(tmpDir, "__payload.zip"), + Buffer.from(payload) + ); + + execFileSync("unzip", ["__payload.zip"], { cwd: tmpDir }); + + for (const filename of paths) { + const pathKey = filename.replace(/\//g, "_") + ".zip"; + + if (!fs.existsSync(path_join(tmpDir, pathKey))) + throw new Error(`file don't exist in the cache: ${filename}`); + + execFileSync("unzip", ["-o", path_join(tmpDir, pathKey)]); + } + + execSync(`echo "cache-hit=true" >> $GITHUB_OUTPUT`); + + console.log("downloaded in", Date.now() - a, "ms"); + } finally { + fs.rmSync(tmpDir, { recursive: true }); + } + } +}; + +module.exports = { get, put, lookUp }; diff --git a/dependencies/aws4fetch/aws4fetch.js b/dependencies/aws4fetch/aws4fetch.js new file mode 100644 index 0000000..e4aae8c --- /dev/null +++ b/dependencies/aws4fetch/aws4fetch.js @@ -0,0 +1,386 @@ +"use strict"; + +/** + * lifted from https://github.com/mhart/aws4fetch + */ + +Object.defineProperty(exports, "__esModule", { value: true }); + +/** + * @license MIT + * @copyright Michael Hart 2022 + */ +const encoder = new TextEncoder(); +const HOST_SERVICES = { + appstream2: "appstream", + cloudhsmv2: "cloudhsm", + email: "ses", + marketplace: "aws-marketplace", + mobile: "AWSMobileHubService", + pinpoint: "mobiletargeting", + queue: "sqs", + "git-codecommit": "codecommit", + "mturk-requester-sandbox": "mturk-requester", + "personalize-runtime": "personalize", +}; +const UNSIGNABLE_HEADERS = new Set([ + "authorization", + "content-type", + "content-length", + "user-agent", + "presigned-expires", + "expect", + "x-amzn-trace-id", + "range", + "connection", +]); +class AwsClient { + constructor({ + accessKeyId, + secretAccessKey, + sessionToken, + service, + region, + cache, + retries, + initRetryMs, + }) { + if (accessKeyId == null) + throw new TypeError("accessKeyId is a required option"); + if (secretAccessKey == null) + throw new TypeError("secretAccessKey is a required option"); + this.accessKeyId = accessKeyId; + this.secretAccessKey = secretAccessKey; + this.sessionToken = sessionToken; + this.service = service; + this.region = region; + this.cache = cache || new Map(); + this.retries = retries != null ? retries : 10; + this.initRetryMs = initRetryMs || 50; + } + async sign(input, init) { + if (input instanceof Request) { + const { method, url, headers, body } = input; + init = Object.assign({ method, url, headers }, init); + if (init.body == null && headers.has("Content-Type")) { + init.body = + body != null && headers.has("X-Amz-Content-Sha256") + ? body + : await input.clone().arrayBuffer(); + } + input = url; + } + const signer = new AwsV4Signer( + Object.assign({ url: input }, init, this, init && init.aws) + ); + const signed = Object.assign({}, init, await signer.sign()); + delete signed.aws; + try { + return new Request(signed.url.toString(), signed); + } catch (e) { + if (e instanceof TypeError) { + return new Request( + signed.url.toString(), + Object.assign({ duplex: "half" }, signed) + ); + } + throw e; + } + } + async fetch(input, init) { + for (let i = 0; i <= this.retries; i++) { + const fetched = fetch(await this.sign(input, init)); + if (i === this.retries) { + return fetched; + } + const res = await fetched; + if (res.status < 500 && res.status !== 429) { + return res; + } + await new Promise((resolve) => + setTimeout(resolve, Math.random() * this.initRetryMs * Math.pow(2, i)) + ); + } + throw new Error( + "An unknown error occurred, ensure retries is not negative" + ); + } +} +class AwsV4Signer { + constructor({ + method, + url, + headers, + body, + accessKeyId, + secretAccessKey, + sessionToken, + service, + region, + cache, + datetime, + signQuery, + appendSessionToken, + allHeaders, + singleEncode, + }) { + if (url == null) throw new TypeError("url is a required option"); + if (accessKeyId == null) + throw new TypeError("accessKeyId is a required option"); + if (secretAccessKey == null) + throw new TypeError("secretAccessKey is a required option"); + this.method = method || (body ? "POST" : "GET"); + this.url = new URL(url); + this.headers = new Headers(headers || {}); + this.body = body; + this.accessKeyId = accessKeyId; + this.secretAccessKey = secretAccessKey; + this.sessionToken = sessionToken; + let guessedService, guessedRegion; + if (!service || !region) { + [guessedService, guessedRegion] = guessServiceRegion( + this.url, + this.headers + ); + } + this.service = service || guessedService || ""; + this.region = region || guessedRegion || "us-east-1"; + this.cache = cache || new Map(); + this.datetime = + datetime || new Date().toISOString().replace(/[:-]|\.\d{3}/g, ""); + this.signQuery = signQuery; + this.appendSessionToken = + appendSessionToken || this.service === "iotdevicegateway"; + this.headers.delete("Host"); + if ( + this.service === "s3" && + !this.signQuery && + !this.headers.has("X-Amz-Content-Sha256") + ) { + this.headers.set("X-Amz-Content-Sha256", "UNSIGNED-PAYLOAD"); + } + const params = this.signQuery ? this.url.searchParams : this.headers; + params.set("X-Amz-Date", this.datetime); + if (this.sessionToken && !this.appendSessionToken) { + params.set("X-Amz-Security-Token", this.sessionToken); + } + this.signableHeaders = ["host", ...this.headers.keys()] + .filter((header) => allHeaders || !UNSIGNABLE_HEADERS.has(header)) + .sort(); + this.signedHeaders = this.signableHeaders.join(";"); + this.canonicalHeaders = this.signableHeaders + .map( + (header) => + header + + ":" + + (header === "host" + ? this.url.host + : (this.headers.get(header) || "").replace(/\s+/g, " ")) + ) + .join("\n"); + this.credentialString = [ + this.datetime.slice(0, 8), + this.region, + this.service, + "aws4_request", + ].join("/"); + if (this.signQuery) { + if (this.service === "s3" && !params.has("X-Amz-Expires")) { + params.set("X-Amz-Expires", "86400"); + } + params.set("X-Amz-Algorithm", "AWS4-HMAC-SHA256"); + params.set( + "X-Amz-Credential", + this.accessKeyId + "/" + this.credentialString + ); + params.set("X-Amz-SignedHeaders", this.signedHeaders); + } + if (this.service === "s3") { + try { + this.encodedPath = decodeURIComponent( + this.url.pathname.replace(/\+/g, " ") + ); + } catch (e) { + this.encodedPath = this.url.pathname; + } + } else { + this.encodedPath = this.url.pathname.replace(/\/+/g, "/"); + } + if (!singleEncode) { + this.encodedPath = encodeURIComponent(this.encodedPath).replace( + /%2F/g, + "/" + ); + } + this.encodedPath = encodeRfc3986(this.encodedPath); + const seenKeys = new Set(); + this.encodedSearch = [...this.url.searchParams] + .filter(([k]) => { + if (!k) return false; + if (this.service === "s3") { + if (seenKeys.has(k)) return false; + seenKeys.add(k); + } + return true; + }) + .map((pair) => pair.map((p) => encodeRfc3986(encodeURIComponent(p)))) + .sort(([k1, v1], [k2, v2]) => + k1 < k2 ? -1 : k1 > k2 ? 1 : v1 < v2 ? -1 : v1 > v2 ? 1 : 0 + ) + .map((pair) => pair.join("=")) + .join("&"); + } + async sign() { + if (this.signQuery) { + this.url.searchParams.set("X-Amz-Signature", await this.signature()); + if (this.sessionToken && this.appendSessionToken) { + this.url.searchParams.set("X-Amz-Security-Token", this.sessionToken); + } + } else { + this.headers.set("Authorization", await this.authHeader()); + } + return { + method: this.method, + url: this.url, + headers: this.headers, + body: this.body, + }; + } + async authHeader() { + return [ + "AWS4-HMAC-SHA256 Credential=" + + this.accessKeyId + + "/" + + this.credentialString, + "SignedHeaders=" + this.signedHeaders, + "Signature=" + (await this.signature()), + ].join(", "); + } + async signature() { + const date = this.datetime.slice(0, 8); + const cacheKey = [ + this.secretAccessKey, + date, + this.region, + this.service, + ].join(); + let kCredentials = this.cache.get(cacheKey); + if (!kCredentials) { + const kDate = await hmac("AWS4" + this.secretAccessKey, date); + const kRegion = await hmac(kDate, this.region); + const kService = await hmac(kRegion, this.service); + kCredentials = await hmac(kService, "aws4_request"); + this.cache.set(cacheKey, kCredentials); + } + return buf2hex(await hmac(kCredentials, await this.stringToSign())); + } + async stringToSign() { + return [ + "AWS4-HMAC-SHA256", + this.datetime, + this.credentialString, + buf2hex(await hash(await this.canonicalString())), + ].join("\n"); + } + async canonicalString() { + return [ + this.method.toUpperCase(), + this.encodedPath, + this.encodedSearch, + this.canonicalHeaders + "\n", + this.signedHeaders, + await this.hexBodyHash(), + ].join("\n"); + } + async hexBodyHash() { + let hashHeader = + this.headers.get("X-Amz-Content-Sha256") || + (this.service === "s3" && this.signQuery ? "UNSIGNED-PAYLOAD" : null); + if (hashHeader == null) { + if ( + this.body && + typeof this.body !== "string" && + !("byteLength" in this.body) + ) { + throw new Error( + "body must be a string, ArrayBuffer or ArrayBufferView, unless you include the X-Amz-Content-Sha256 header" + ); + } + hashHeader = buf2hex(await hash(this.body || "")); + } + return hashHeader; + } +} +async function hmac(key, string) { + const cryptoKey = await crypto.subtle.importKey( + "raw", + typeof key === "string" ? encoder.encode(key) : key, + { name: "HMAC", hash: { name: "SHA-256" } }, + false, + ["sign"] + ); + return crypto.subtle.sign("HMAC", cryptoKey, encoder.encode(string)); +} +async function hash(content) { + return crypto.subtle.digest( + "SHA-256", + typeof content === "string" ? encoder.encode(content) : content + ); +} +function buf2hex(buffer) { + return Array.prototype.map + .call(new Uint8Array(buffer), (x) => ("0" + x.toString(16)).slice(-2)) + .join(""); +} +function encodeRfc3986(urlEncodedStr) { + return urlEncodedStr.replace( + /[!'()*]/g, + (c) => "%" + c.charCodeAt(0).toString(16).toUpperCase() + ); +} +function guessServiceRegion(url, headers) { + const { hostname, pathname } = url; + if (hostname.endsWith(".r2.cloudflarestorage.com")) { + return ["s3", "auto"]; + } + if (hostname.endsWith(".backblazeb2.com")) { + const match = hostname.match(/^(?:[^.]+\.)?s3\.([^.]+)\.backblazeb2\.com$/); + return match != null ? ["s3", match[1]] : ["", ""]; + } + const match = hostname + .replace("dualstack.", "") + .match(/([^.]+)\.(?:([^.]*)\.)?amazonaws\.com(?:\.cn)?$/); + let [service, region] = (match || ["", ""]).slice(1, 3); + if (region === "us-gov") { + region = "us-gov-west-1"; + } else if (region === "s3" || region === "s3-accelerate") { + region = "us-east-1"; + service = "s3"; + } else if (service === "iot") { + if (hostname.startsWith("iot.")) { + service = "execute-api"; + } else if (hostname.startsWith("data.jobs.iot.")) { + service = "iot-jobs-data"; + } else { + service = pathname === "/mqtt" ? "iotdevicegateway" : "iotdata"; + } + } else if (service === "autoscaling") { + const targetPrefix = (headers.get("X-Amz-Target") || "").split(".")[0]; + if (targetPrefix === "AnyScaleFrontendService") { + service = "application-autoscaling"; + } else if (targetPrefix === "AnyScaleScalingPlannerFrontendService") { + service = "autoscaling-plans"; + } + } else if (region == null && service.startsWith("s3-")) { + region = service.slice(3).replace(/^fips-|^external-1/, ""); + service = "s3"; + } else if (service.endsWith("-fips")) { + service = service.slice(0, -5); + } else if (region && /-\d$/.test(service) && !/-\d$/.test(region)) { + [service, region] = [region, service]; + } + return [HOST_SERVICES[service] || service, region]; +} + +exports.AwsClient = AwsClient; +exports.AwsV4Signer = AwsV4Signer; diff --git a/main.js b/main.js new file mode 100644 index 0000000..1fdf5ea --- /dev/null +++ b/main.js @@ -0,0 +1,6 @@ +const { get } = require("./cache"); + +const key = process.env.INPUT_KEY; +const paths = process.env.INPUT_PATH?.split("\n").filter((f) => f.trim()); + +get(key, paths); diff --git a/post.js b/post.js new file mode 100644 index 0000000..99f6871 --- /dev/null +++ b/post.js @@ -0,0 +1,13 @@ +/** + * this script is executed after the run finishes + */ + +const { put } = require("./cache"); +const { exist } = require("./s3"); + +const key = process.env.INPUT_KEY; +const paths = process.env.INPUT_PATH?.split("\n").filter((f) => f.trim()); + +exist(key).then((cacheHit) => { + if (!cacheHit) put(key, paths); +}); diff --git a/restore/action.yml b/restore/action.yml new file mode 100644 index 0000000..518adc5 --- /dev/null +++ b/restore/action.yml @@ -0,0 +1,31 @@ +name: s3-cache-restore +description: | + Cache using s3 +runs: + using: node20 + main: main.js + +inputs: + key: + description: cache key + required: true + path: + description: path to files to cache, use multiline for multiple files + lookup-only: + description: if true set outputs.cache.hit but don't download the cache + + aws-access-key-id: + description: aws-access-key-id + required: true + aws-secret-access-key: + description: aws-secret-access-key + required: true + aws-region: + description: aws-region + required: true + aws-cache-bucket: + description: aws-cache-bucket + required: true +outputs: + cache-hit: + description: true if a cache is found for this key diff --git a/restore/main.js b/restore/main.js new file mode 100644 index 0000000..7388571 --- /dev/null +++ b/restore/main.js @@ -0,0 +1,8 @@ +const { get, lookUp } = require("../cache"); + +const key = process.env.INPUT_KEY; +const lookupOnly = process.env["INPUT_LOOKUP-ONLY"]; +const paths = process.env.INPUT_PATH?.split("\n").filter((f) => f.trim()); + +if (lookupOnly) lookUp(key); +else get(key, paths); diff --git a/s3.js b/s3.js new file mode 100644 index 0000000..addb69e --- /dev/null +++ b/s3.js @@ -0,0 +1,52 @@ +const { AwsClient } = require("./dependencies/aws4fetch/aws4fetch"); + +const AWS_ACCESS_KEY_ID = process.env["INPUT_AWS-ACCESS-KEY-ID"]; +const AWS_SECRET_ACCESS_KEY = process.env["INPUT_AWS-SECRET-ACCESS-KEY"]; +const AWS_REGION = process.env["INPUT_AWS-REGION"]; +const AWS_CACHE_BUCKET = process.env["INPUT_AWS-CACHE-BUCKET"]; + +const client = new AwsClient({ + accessKeyId: AWS_ACCESS_KEY_ID, + secretAccessKey: AWS_SECRET_ACCESS_KEY, + region: AWS_REGION, + service: "s3", +}); + +const put = async (key, body) => { + const res = await client.fetch( + `https://${AWS_CACHE_BUCKET}.s3.${AWS_REGION}.amazonaws.com/${key}`, + { + method: "PUT", + body, + } + ); + + if (!res.ok) throw new Error(res.statusText); +}; + +const exist = async (key) => { + const res = await client.fetch( + `https://${AWS_CACHE_BUCKET}.s3.${AWS_REGION}.amazonaws.com/${key}`, + { method: "HEAD" } + ); + + if (res.status === 404) return false; + + if (!res.ok) throw new Error(res.statusText); + + return true; +}; + +const get = async (key) => { + const res = await client.fetch( + `https://${AWS_CACHE_BUCKET}.s3.${AWS_REGION}.amazonaws.com/${key}` + ); + + if (res.status === 404) return null; + + if (!res.ok) throw new Error(res.statusText); + + return await res.arrayBuffer(); +}; + +module.exports = { get, put, exist }; diff --git a/save/action.yml b/save/action.yml new file mode 100644 index 0000000..3cb6466 --- /dev/null +++ b/save/action.yml @@ -0,0 +1,26 @@ +name: s3-cache-save +description: | + Cache using s3 +runs: + using: node20 + main: main.js + +inputs: + key: + description: cache key + required: true + path: + description: path to files to cache,use multiline for multiple files + + aws-access-key-id: + description: aws-access-key-id + required: true + aws-secret-access-key: + description: aws-secret-access-key + required: true + aws-region: + description: aws-region + required: true + aws-cache-bucket: + description: aws-cache-bucket + required: true diff --git a/save/main.js b/save/main.js new file mode 100644 index 0000000..cc7b484 --- /dev/null +++ b/save/main.js @@ -0,0 +1,6 @@ +const { put } = require("../cache"); + +const key = process.env.INPUT_KEY; +const paths = process.env.INPUT_PATH?.split("\n").filter((f) => f.trim()); + +put(key, paths);