Skip to content

Commit

Permalink
Initial publish
Browse files Browse the repository at this point in the history
  • Loading branch information
huntharo committed Jun 22, 2023
0 parents commit 131d47a
Show file tree
Hide file tree
Showing 21 changed files with 12,495 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
node_modules
dist
32 changes: 32 additions & 0 deletions .eslintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"root": true,
"parser": "@typescript-eslint/parser",
"parserOptions": {
"project": "./tsconfig.json"
},
"plugins": ["@typescript-eslint", "prettier"],
"extends": [
"eslint:recommended",
"plugin:@typescript-eslint/eslint-recommended",
"plugin:@typescript-eslint/recommended",
"prettier"
],
"env": {
"jest": true
},
"rules": {
"no-console": 1, // Means warning
"prettier/prettier": 2, // Means error
"@typescript-eslint/ban-ts-comment": "off",
"@typescript-eslint/no-floating-promises": ["error"],
"@typescript-eslint/no-misused-promises": ["error"],
"@typescript-eslint/promise-function-async": ["error"],
"@typescript-eslint/require-await": ["error"],
// note you must disable the base rule as it can report incorrect errors
"no-return-await": "off",
"@typescript-eslint/return-await": ["error"],
// Don't allow awaiting non-Promises
"@typescript-eslint/await-thenable": "error"
},
"ignorePatterns": ["dist", "cdk.out"]
}
36 changes: 36 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Build - CI

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Use Node.js 16
uses: actions/setup-node@v3
with:
node-version: 16

- name: Install Modules
run: npm ci

- name: Build
run: npm run build

- name: Build Docs
run: npm run build:docs

- name: Lint
run: npm run lint

- name: Test
run: npm run test
31 changes: 31 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Publish Docs

on:
release:
types: [published]

workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Use Node.js 16
uses: actions/setup-node@v3
with:
node-version: 16

- name: Install Modules
run: npm ci

- name: Build Docs
run: npm run build:docs

- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs
40 changes: 40 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Package and Publish

on:
release:
types: [published]

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Use Node.js 16
uses: actions/setup-node@v3
with:
node-version: 16

- name: Use the Release Tag Version
run: |
npm version from-git --allow-same-version --no-git-tag-version
- name: Install Modules
run: npm ci

- name: Build
run: npm run build

- name: Lint
run: npm run lint

- name: Test
run: npm run test

- name: NPM registry authentication
run: npm set //registry.npmjs.org/:_authToken ${{ secrets.NPMJSORG_PUBLISH_TOKEN }}

- name: Publish with CLI to Code Artifact
run: |
npm publish --access public --ignore-scripts --dry-run
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.idea/
node_modules
dist/
*.tsbuildinfo
*.log
.nyc_output/
coverage/
.DS_Store
docs/
1 change: 1 addition & 0 deletions .nvmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
v16.17.1
6 changes: 6 additions & 0 deletions .prettierrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"semi": true,
"trailingComma": "all",
"singleQuote": true,
"printWidth": 100
}
44 changes: 44 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"editor.defaultFormatter": "esbenp.prettier-vscode",
"editor.formatOnSave": true,
"editor.insertSpaces": true,
"editor.tabSize": 2,
"files.associations": {
"Dockerfile*": "dockerfile"
},
"[typescript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"yaml.customTags": [
"!And",
"!And sequence",
"!If",
"!If sequence",
"!Not",
"!Not sequence",
"!Equals",
"!Equals sequence",
"!Or",
"!Or sequence",
"!FindInMap",
"!FindInMap sequence",
"!Base64",
"!Join",
"!Join sequence",
"!Cidr",
"!Ref",
"!Sub",
"!Sub sequence",
"!GetAtt",
"!GetAZs",
"!ImportValue",
"!ImportValue sequence",
"!Select",
"!Select sequence",
"!Split",
"!Split sequence"
],
"[xml]": {
"editor.defaultFormatter": "DotJoshJohnson.xml"
}
}
5 changes: 5 additions & 0 deletions CODE_OF_CONDUCT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Contributor Code of Conduct

The Shutterstock team is committed to fostering a welcoming community.

This project is governed by Shutterstock’s [Code of Conduct](https://github.com/shutterstock/code-of-conduct). All contributors and participants agree to abide by its terms.
8 changes: 8 additions & 0 deletions ISSUE_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
### Expected behavior

### Actual behavior

### Steps to reproduce the behavior

### Additional specs (e.g. browser, version, etc.)

21 changes: 21 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)
=====================
Copyright (c) `2022` `Shutterstock, Inc.`

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
[![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) [![API Docs](https://img.shields.io/badge/API%20Docs-View%20Here-blue)](https://tech.shutterstock.com/chunker/) [![Build - CI](https://github.com/shutterstock/chunker/actions/workflows/ci.yml/badge.svg)](https://github.com/shutterstock/chunker/actions/workflows/ci.yml) [![Package and Publish](https://github.com/shutterstock/chunker/actions/workflows/publish.yml/badge.svg)](https://github.com/shutterstock/chunker/actions/workflows/publish.yml) [![Publish Docs](https://github.com/shutterstock/chunker/actions/workflows/docs.yml/badge.svg)](https://github.com/shutterstock/chunker/actions/workflows/docs.yml)

# Overview

`@shutterstock/chunker` calls a blocking async callback _before_ adding an item that would exceed a user-defined size limit OR when the count of items limit is reached.

A common use case for `@shutterstock/chunker` is as a "batch accumulator" that gathers up items to be processed in a batch where the batch has specific count and size constraints that must be followed. For example, sending batches to an AWS Kinesis Data Stream requires that there be 500 or less records totalling 500 MB or less in size. The record count part is easy, but the record size check and handling both is more difficult.

# Getting Started

## Installation

The package is available on npm as [@shutterstock/chunker](https://www.npmjs.com/package/@shutterstock/chunker)

`npm i @shutterstock/chunker`

## Importing

```typescript
import { Chunker } from '@shutterstock/chunker';
```

## API Documentation

After installing the package, you might want to look at our [API Documentation](https://tech.shutterstock.com/chunker/) to learn about all the features available.

# `Chunker`

`Chunker` has a `BlockingQueue` that it uses to store items until the size or count limits are reached. When the limits are reached, the `Chunker` calls the user-provided callback with the items in the queue. The callback is expected to return a `Promise` that resolves when the items have been processed. The `Chunker` will wait for the `Promise` to resolve before continuing.

See below for an example of using `Chunker` to write batches of records to an AWS Kinesis Data Stream.

# Contributing

## Setting up Build Environment

- `nvm use`
- `npm i`
- `npm run build`
- `npm run lint`
- `npm run test`

## Running Examples

### aws-kinesis-writer

1. Create Kinesis Data Stream using AWS Console or any other method
1. Default name is `chunker-test-stream`
2. 1 shard is sufficient
3. 1 day retention is sufficient
4. No encryption is sufficient
5. On-demand throughput is sufficient
2. `npm run example:aws-kinesis-writer`
1. If the stream name was changed: `KINESIS_STREAM_NAME=my-stream-name npm run example:aws-kinesis-writer`
3. Observe in the log output that the `enqueue` method intermittently blocks when the count or size constraints would be breached. During the block the records are written to the Kinesis Data Stream, after which the block is released and the new item is added to the next batch.
92 changes: 92 additions & 0 deletions examples/aws-kinesis-writer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/* eslint-disable no-console */
import * as kinesis from '@aws-sdk/client-kinesis';
import { Chunker } from '@shutterstock/chunker';

const kinesisClient = new kinesis.KinesisClient({});
const { KINESIS_STREAM_NAME = 'chunker-test-stream', RECORDS_TO_WRITE = '1500' } = process.env;
const RECORDS_TO_WRITE_NUM = parseInt(RECORDS_TO_WRITE, 10);

async function main() {
// AWS Kinesis payloads must have 500 or less items
// and must be 5 MB or less in total size.
// Chunker will call the callback when the metrics are
// reached so we can flush the batch to Kinesis without
// ever exceeding the limits.
const chunker = new Chunker({
countLimit: 500,
// Only go up to 95% of the limit of the Kinesis payload size
sizeLimit: 5 * 1024 * 1024 * 0.95,
/**
* Compute the item size
* @param record The record to compute the size of
* @returns
*/
sizer: (record: { item: kinesis.PutRecordsRequestEntry; index: number }): number => {
const { item, index } = record;
const itemJSON = JSON.stringify(item);
// Return the real size of the record if below 500 items
if (index < 500) {
return itemJSON.length;
}

// Lie about the record size above 500 items
// to force flushes based on size
// Records will be between 500 KB and 1024 KB if count is 1000
return index * 1024;
},
/**
* Write the items to Kinesis when called
* @param records The records to write
* @returns The result of the Kinesis PutRecordsCommand
*/
writer: async (
records: { index: number; item: kinesis.PutRecordsRequestEntry }[],
): Promise<void> => {
console.log(
`Writing to Kinesis - Start - Records: ${records.length}, First Index: ${
records[0].index
}, Last Index: ${records[records.length - 1].index}`,
);
// Send the records in a batch since we know we will be under the batch limits
await kinesisClient.send(
new kinesis.PutRecordsCommand({
StreamName: KINESIS_STREAM_NAME,
Records: records.map((record) => record.item),
}),
);
console.log(
`Writing to Kinesis - Done - Records: ${records.length}, First Index: ${
records[0].index
}, Last Index: ${records[records.length - 1].index}`,
);
},
});

try {
for (let i = 0; i < RECORDS_TO_WRITE_NUM; i++) {
const niceNumberStr = `${i.toString().padStart(5, '0')}`;
const partitionKey = `${(i % 100).toString().padStart(5, '0')}`;
const record: kinesis.PutRecordsRequestEntry = {
Data: Buffer.from(niceNumberStr, 'utf-8'),
PartitionKey: partitionKey,
};

console.log(
`Writing to Chunker - Start - Record: ${i}, Data: ${niceNumberStr}, PartitionKey: ${partitionKey}`,
);
const start = Date.now();
await chunker.enqueue({ item: record, index: i });
console.log(
`Writing to Chunker - Done - Record: ${i}, Data: ${niceNumberStr}, PartitionKey: ${partitionKey}, Duration: ${
Date.now() - start
} ms`,
);
}
} finally {
console.log('Waiting for Chunker to finish - Start');
await chunker.onIdle();
console.log('Waiting for Chunker to finish - Done');
}
}

void main();
Loading

0 comments on commit 131d47a

Please sign in to comment.