From 94a888f41610a870e66f057e86ed275d316b6d7e Mon Sep 17 00:00:00 2001 From: Jay Miller Date: Wed, 26 Mar 2025 11:31:54 -0700 Subject: [PATCH] first cut at some debugging support --- .gitignore | 1 + dist/index.d.ts | 21 +++- dist/index.js | 276 ++++++++++++++++++++++++++++-------------------- src/index.ts | 243 ++++++++++++++++++++++++++---------------- 4 files changed, 331 insertions(+), 210 deletions(-) diff --git a/.gitignore b/.gitignore index 2a6399f..4c237a3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ spec.json readme-sync/f_project.sublime-workspace *.sublime-workspace readme-sync/f_project.sublime-workspace +node_modules/ \ No newline at end of file diff --git a/dist/index.d.ts b/dist/index.d.ts index 0d13016..d80fba9 100644 --- a/dist/index.d.ts +++ b/dist/index.d.ts @@ -1,11 +1,24 @@ -/// import type { ClassificationResult, ExtractionResult, Webhook } from "./types"; +export interface SensibleSDKOptions { + debug?: true; + pollingInterval?: number; + region?: "us-west-2" | "eu-west-2" | "ca-central-1"; +} export declare class SensibleSDK { - apiKey: string; - constructor(apiKey: string); + private readonly options; + static DEFAULT_WAIT_TIME: number; + static MAX_WAIT_TIME: number; + static DEFAULT_POLLING_INTERVAL: number; + static MAX_POLLING_INTERVAL: number; + private requestCount; + private readonly backend; + private readonly s3; + constructor(apiKey: string, options?: SensibleSDKOptions); extract(params: ExtractParams): Promise; classify(params: ClassificationParams): Promise; - waitFor(request: ClassificationRequest | ExtractionRequest): Promise; + private extractionWaitLoop; + private classificationWaitLoop; + waitFor(request: ClassificationRequest | ExtractionRequest, timeout?: number): Promise; generateExcel(requests: ExtractionRequest | ExtractionRequest[]): Promise<{ url: string; }>; diff --git a/dist/index.js b/dist/index.js index 6f6a644..328df8f 100644 --- a/dist/index.js +++ b/dist/index.js @@ -52,91 +52,119 @@ var got_1 = require("got"); var querystring = require("node:querystring"); var fs_1 = require("fs"); var util_1 = require("util"); -var baseUrl = "https://api.sensible.so/v0"; var SensibleSDK = /** @class */ (function () { - function SensibleSDK(apiKey) { - this.apiKey = apiKey; + function SensibleSDK(apiKey, options) { + if (options === void 0) { options = {}; } + var _this = this; + var _a; + this.options = options; + this.requestCount = 1; + var region = (_a = options.region) !== null && _a !== void 0 ? _a : "us-west-2"; + var subdomain = region === "us-west-2" ? "api" : "api-".concat(region); + var prefixUrl = "https://".concat(subdomain, ".sensible.so/v0"); + var hooks = { + beforeRequest: [ + function (options) { + var requestId = _this.requestCount++; + if (_this.options.debug) { + options.context.requestId = requestId; + console.info("Request ".concat(requestId, ": ").concat(options.method, " ").concat(options.url)); + } + } + ], + afterResponse: [ + function (response) { + if (_this.options.debug) { + var amzRequestid = response.headers["x-amzn-requestid"]; + var requestId = response.request.options.context.requestId; + console.info("Response ".concat(requestId, ": ").concat(response.statusCode).concat(amzRequestid ? " (amz request id: ".concat(amzRequestid, ")") : "")); + } + return response; + } + ], + }; + this.backend = got_1.default.extend({ + prefixUrl: prefixUrl, + headers: { + authorization: "Bearer ".concat(apiKey), + }, + throwHttpErrors: false, + hooks: hooks, + }); + this.s3 = got_1.default.extend({ + headers: { + "content-type": undefined, + }, + throwHttpErrors: false, + hooks: hooks, + }); } SensibleSDK.prototype.extract = function (params) { return __awaiter(this, void 0, void 0, function () { - var webhook, documentName, environment, url, body, headers, response, e_1, id, upload_url, file, _a, putResponse, e_2; + var webhook, documentName, environment, url, json, response, id, upload_url, file, _a, putResponse; return __generator(this, function (_b) { switch (_b.label) { case 0: // This can be called from JS, so we cannot trust the type engine validateExtractParams(params); webhook = params.webhook, documentName = params.documentName, environment = params.environment; - url = baseUrl + - ("url" in params ? "/extract_from_url" : "/generate_upload_url") + + url = ("url" in params ? "extract_from_url" : "generate_upload_url") + ("documentType" in params ? "/".concat(params.documentType) + ("configuration" in params ? "/".concat(params.configurationName) : "") : "") + "?".concat(querystring.stringify(__assign(__assign({}, (environment ? { environment: environment } : {})), (documentName ? { document_name: documentName } : {})))); - body = __assign(__assign(__assign({}, ("url" in params ? { document_url: params.url } : {})), (webhook ? { webhook: webhook } : {})), ("documentTypes" in params ? { types: params.documentTypes } : {})); - headers = { authorization: "Bearer ".concat(this.apiKey) }; - _b.label = 1; - case 1: - _b.trys.push([1, 3, , 4]); - return [4 /*yield*/, got_1.default + json = __assign(__assign(__assign({}, ("url" in params ? { document_url: params.url } : {})), (webhook ? { webhook: webhook } : {})), ("documentTypes" in params ? { types: params.documentTypes } : {})); + return [4 /*yield*/, this.backend .post(url, { - json: body, - headers: headers, - }) - .json()]; - case 2: + json: json, + })]; + case 1: response = _b.sent(); - return [3 /*break*/, 4]; - case 3: - e_1 = _b.sent(); - throwError(e_1); - return [3 /*break*/, 4]; - case 4: - if (!("url" in params)) return [3 /*break*/, 5]; + if (response.statusCode !== 200) { + throw responseError(response); + } + if (!("url" in params)) return [3 /*break*/, 2]; if (!isExtractionResponse(response)) { throw "Got invalid response from generate_upload_url: ".concat(JSON.stringify(response)); } return [2 /*return*/, { type: "extraction", id: response.id }]; - case 5: + case 2: if (!isExtractionUrlResponse(response)) { throw "Got invalid response from extract_from_url: ".concat(JSON.stringify(response)); } id = response.id, upload_url = response.upload_url; - if (!("file" in params)) return [3 /*break*/, 6]; + if (!("file" in params)) return [3 /*break*/, 3]; _a = params.file; - return [3 /*break*/, 8]; - case 6: return [4 /*yield*/, fs_1.promises.readFile(params.path)]; - case 7: + return [3 /*break*/, 5]; + case 3: return [4 /*yield*/, fs_1.promises.readFile(params.path)]; + case 4: _a = _b.sent(); - _b.label = 8; - case 8: + _b.label = 5; + case 5: file = _a; - _b.label = 9; - case 9: - _b.trys.push([9, 11, , 12]); - return [4 /*yield*/, got_1.default.put(upload_url, { + return [4 /*yield*/, this.s3.put(upload_url, { method: "PUT", body: file, })]; - case 10: + case 6: putResponse = _b.sent(); - return [3 /*break*/, 12]; - case 11: - e_2 = _b.sent(); - throw "Error ".concat(e_2.response.statusCode, " uploading file to S3: ").concat(e_2.response.body); - case 12: return [2 /*return*/, { type: "extraction", id: id }]; + if (putResponse.statusCode !== 200) { + throw responseError(putResponse); + } + return [2 /*return*/, { type: "extraction", id: id }]; } }); }); }; SensibleSDK.prototype.classify = function (params) { return __awaiter(this, void 0, void 0, function () { - var url, file, _a, response, e_3; + var url, file, _a, contentType, response; return __generator(this, function (_b) { switch (_b.label) { case 0: validateClassificationParams(params); - url = "".concat(baseUrl, "/classify/async"); + url = "classify/async"; if (!("file" in params)) return [3 /*break*/, 1]; _a = params.file; return [3 /*break*/, 3]; @@ -146,26 +174,19 @@ var SensibleSDK = /** @class */ (function () { _b.label = 3; case 3: file = _a; - _b.label = 4; - case 4: - _b.trys.push([4, 6, , 7]); - return [4 /*yield*/, got_1.default + contentType = "application/pdf"; + return [4 /*yield*/, this.backend .post(url, { body: file, headers: { - authorization: "Bearer ".concat(this.apiKey), - "content-type": "application/pdf", // HACK + "content-type": contentType, }, - }) - .json()]; - case 5: + })]; + case 4: response = _b.sent(); - return [3 /*break*/, 7]; - case 6: - e_3 = _b.sent(); - throwError(e_3); - return [3 /*break*/, 7]; - case 7: + if (response.statusCode !== 200) { + throw responseError(response); + } if (!isClassificationResponse(response)) { throw "Got invalid response from extract_from_url: ".concat(JSON.stringify(response)); } @@ -178,48 +199,77 @@ var SensibleSDK = /** @class */ (function () { }); }); }; - SensibleSDK.prototype.waitFor = function (request) { + SensibleSDK.prototype.extractionWaitLoop = function (id, interval) { return __awaiter(this, void 0, void 0, function () { - var response, response, e_4; + var response, result; return __generator(this, function (_a) { switch (_a.label) { - case 0: - if (!true) return [3 /*break*/, 8]; - if (!(request.type === "extraction")) return [3 /*break*/, 2]; - return [4 /*yield*/, got_1.default - .get("".concat(baseUrl, "/documents/").concat(request.id), { - headers: { authorization: "Bearer ".concat(this.apiKey) }, - }) - .json()]; + case 0: return [4 /*yield*/, this.backend.get("documents/".concat(id))]; case 1: response = _a.sent(); - if (response && - typeof response === "object" && - "status" in response && - response.status !== "WAITING") { - return [2 /*return*/, response]; + if (response.statusCode !== 200) { + throw responseError(response); + } + result = response.body; + if (result.status == "COMPLETE" || result.status == "FAILED") { + return [2 /*return*/, result]; } - return [3 /*break*/, 6]; + return [4 /*yield*/, sleep(interval)]; case 2: - response = void 0; + _a.sent(); _a.label = 3; case 3: - _a.trys.push([3, 5, , 6]); - return [4 /*yield*/, got_1.default.get(request.downloadLink).json()]; - case 4: + if (true) return [3 /*break*/, 0]; + _a.label = 4; + case 4: return [2 /*return*/]; + } + }); + }); + }; + SensibleSDK.prototype.classificationWaitLoop = function (downloadUrl, interval) { + return __awaiter(this, void 0, void 0, function () { + var response; + return __generator(this, function (_a) { + switch (_a.label) { + case 0: return [4 /*yield*/, this.s3.get(downloadUrl)]; + case 1: response = _a.sent(); - return [2 /*return*/, response]; - case 5: - e_4 = _a.sent(); - // 404 is expected while classifying is being done - if (!(e_4 instanceof got_1.HTTPError && e_4.response.statusCode === 404)) - throwError(e_4); - return [3 /*break*/, 6]; - case 6: return [4 /*yield*/, sleep(5000)]; - case 7: - _a.sent(); // TODO: parameterize polling interval - return [3 /*break*/, 0]; - case 8: return [2 /*return*/]; + if (!(response.statusCode === 404)) return [3 /*break*/, 3]; + return [4 /*yield*/, sleep(interval)]; + case 2: + _a.sent(); + return [3 /*break*/, 4]; + case 3: + if (response.statusCode !== 200) { + throw responseError(response); + } + return [2 /*return*/, response.body]; + case 4: + if (true) return [3 /*break*/, 0]; + _a.label = 5; + case 5: return [2 /*return*/]; + } + }); + }); + }; + SensibleSDK.prototype.waitFor = function (request_1) { + return __awaiter(this, arguments, void 0, function (request, timeout) { + var interval, loopPromise; + var _a; + if (timeout === void 0) { timeout = SensibleSDK.DEFAULT_WAIT_TIME; } + return __generator(this, function (_b) { + switch (_b.label) { + case 0: + timeout = Math.min(timeout, SensibleSDK.MAX_WAIT_TIME); + interval = Math.min((_a = this.options.pollingInterval) !== null && _a !== void 0 ? _a : SensibleSDK.DEFAULT_POLLING_INTERVAL, SensibleSDK.MAX_POLLING_INTERVAL); + loopPromise = request.type === "extraction" + ? this.extractionWaitLoop(request.id, interval) + : this.classificationWaitLoop(request.downloadLink, interval); + return [4 /*yield*/, Promise.race([ + loopPromise, + sleep(timeout).then(function () { throw "Timed out waiting for ".concat(timeout, "ms"); }) + ])]; + case 1: return [2 /*return*/, _b.sent()]; } }); }); @@ -227,33 +277,28 @@ var SensibleSDK = /** @class */ (function () { // requested extractions must be completed SensibleSDK.prototype.generateExcel = function (requests) { return __awaiter(this, void 0, void 0, function () { - var extractions, url, e_5; + var extractions, url, response; return __generator(this, function (_a) { switch (_a.label) { case 0: extractions = Array.isArray(requests) ? requests : [requests]; - url = baseUrl + - "/generate_excel/" + + url = "generate_excel/" + extractions.map(function (extraction) { return extraction.id; }).join(","); - _a.label = 1; + return [4 /*yield*/, this.backend.get(url)]; case 1: - _a.trys.push([1, 3, , 4]); - return [4 /*yield*/, got_1.default - .get(url, { - headers: { authorization: "Bearer ".concat(this.apiKey) }, - }) - .json()]; - case 2: return [2 /*return*/, _a.sent()]; - case 3: - e_5 = _a.sent(); - throwError(e_5); - // HACK: keep TS happy - throw null; - case 4: return [2 /*return*/]; + response = _a.sent(); + if (response.statusCode !== 200) { + throw responseError(response); + } + return [2 /*return*/, response.body]; } }); }); }; + SensibleSDK.DEFAULT_WAIT_TIME = 60000 * 5; // five minutes + SensibleSDK.MAX_WAIT_TIME = 60000 * 15; // fifteen minutes + SensibleSDK.DEFAULT_POLLING_INTERVAL = 5000; // five seconds + SensibleSDK.MAX_POLLING_INTERVAL = 60000; // one minute return SensibleSDK; }()); exports.SensibleSDK = SensibleSDK; @@ -303,20 +348,23 @@ function throwError(e) { if (!(e instanceof got_1.HTTPError)) { throw "Unknown error ".concat(e); } - switch (e.response.statusCode) { + throw responseError(e.response); +} +function responseError(response) { + switch (response.statusCode) { case 400: - throw "Bad Request (400): ".concat(e.response.body); + return "Bad Request (400): ".concat(response.body); case 401: - throw "Unauthorized (401), please check your API key"; + return "Unauthorized (401), please check your API key"; case 415: - throw "Unsupported Media Type (415), please check your document format"; + return "Unsupported Media Type (415), please check your document format"; case 429: // automatic retry? - throw "Too Many Requests (429)"; + return "Too Many Requests (429)"; case 500: - throw "Internal Server Error (500): ".concat(e.response.body); + return "Internal Server Error (500): ".concat(response.body); default: - throw "Got unknown HTTP status code ".concat(e.response.statusCode, ": ").concat(e.response.body); + return "Unexpected HTTP status code ".concat(response.statusCode, ": ").concat(response.body); } } function isClassificationResponse(response) { diff --git a/src/index.ts b/src/index.ts index 5cfba7d..0869517 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,16 +1,67 @@ -import got, { HTTPError } from "got"; +import got, { Got, Hooks, HTTPError, Response } from "got"; import * as querystring from "node:querystring"; import { promises as fs } from "fs"; import { promisify } from "util"; import type { ClassificationResult, ExtractionResult, Webhook } from "./types"; -const baseUrl = "https://api.sensible.so/v0"; -export class SensibleSDK { - apiKey: string; +export interface SensibleSDKOptions { + debug?: true; + pollingInterval?: number; + region?: "us-west-2" | "eu-west-2" | "ca-central-1"; +} - constructor(apiKey: string) { - this.apiKey = apiKey; +export class SensibleSDK { + public static DEFAULT_WAIT_TIME = 60_000 * 5; // five minutes + public static MAX_WAIT_TIME = 60_000 * 15; // fifteen minutes + public static DEFAULT_POLLING_INTERVAL = 5_000; // five seconds + public static MAX_POLLING_INTERVAL = 60_000; // one minute + + + private requestCount = 1; + private readonly backend: Got; + private readonly s3: Got; + + constructor(apiKey: string, private readonly options: SensibleSDKOptions = {}) { + const region = options.region ?? "us-west-2"; + const subdomain = region === "us-west-2" ? "api" : `api-${region}`; + const prefixUrl = `https://${subdomain}.sensible.so/v0`; + const hooks: Hooks = { + beforeRequest: [ + (options) => { + const requestId = this.requestCount++; + if (this.options.debug) { + options.context.requestId = requestId; + console.info(`Request ${requestId}: ${options.method} ${options.url}`); + } + } + ], + afterResponse: [ + (response) => { + if (this.options.debug) { + const amzRequestid = response.headers["x-amzn-requestid"]; + const requestId = response.request.options.context.requestId; + console.info(`Response ${requestId}: ${response.statusCode}${amzRequestid ? ` (amz request id: ${amzRequestid})` : ""}`); + } + return response; + } + ], + }; + this.backend = got.extend({ + prefixUrl, + headers: { + authorization: `Bearer ${apiKey}`, + }, + throwHttpErrors: false, + hooks, + }); + this.s3 = got.extend({ + headers: { + "content-type": undefined, + }, + throwHttpErrors: false, + hooks, + }); } async extract(params: ExtractParams): Promise { @@ -20,8 +71,7 @@ export class SensibleSDK { const { webhook, documentName, environment } = params; const url = - baseUrl + - ("url" in params ? "/extract_from_url" : "/generate_upload_url") + + ("url" in params ? "extract_from_url" : "generate_upload_url") + ("documentType" in params ? `/${params.documentType}` + ("configuration" in params ? `/${params.configurationName}` : "") @@ -31,23 +81,19 @@ export class SensibleSDK { ...(documentName ? { document_name: documentName } : {}), })}`; - const body = { + const json = { ...("url" in params ? { document_url: params.url } : {}), ...(webhook ? { webhook } : {}), ...("documentTypes" in params ? { types: params.documentTypes } : {}), }; - const headers = { authorization: `Bearer ${this.apiKey}` }; - - let response; - try { - response = await got - .post(url, { - json: body, - headers, - }) - .json(); - } catch (e: unknown) { - throwError(e); + + const response = await this.backend + .post(url, { + json, + }); + + if (response.statusCode !== 200) { + throw responseError(response); } if ("url" in params) { @@ -69,13 +115,14 @@ export class SensibleSDK { const file = "file" in params ? params.file : await fs.readFile(params.path); - try { - const putResponse = await got.put(upload_url, { - method: "PUT", - body: file, - }); - } catch (e: any) { - throw `Error ${e.response.statusCode} uploading file to S3: ${e.response.body}`; + + const putResponse = await this.s3.put(upload_url, { + method: "PUT", + body: file, + }); + + if (putResponse.statusCode !== 200) { + throw responseError(putResponse); } return { type: "extraction", id }; @@ -85,25 +132,24 @@ export class SensibleSDK { async classify(params: ClassificationParams): Promise { validateClassificationParams(params); - const url = `${baseUrl}/classify/async`; + const url = `classify/async`; const file = "file" in params ? params.file : await fs.readFile(params.path); - - let response; - try { - response = await got - .post(url, { - body: file, - headers: { - authorization: `Bearer ${this.apiKey}`, - "content-type": "application/pdf", // HACK - }, - }) - .json(); - } catch (e: unknown) { - throwError(e); + const contentType = "application/pdf"; + + const response = await this.backend + .post(url, { + body: file, + headers: { + "content-type": contentType, + }, + }); + + if (response.statusCode !== 200) { + throw responseError(response); } + if (!isClassificationResponse(response)) { throw `Got invalid response from extract_from_url: ${JSON.stringify( response @@ -116,36 +162,53 @@ export class SensibleSDK { }; } - async waitFor(request: ClassificationRequest | ExtractionRequest) { - // TODO: timeout? - while (true) { - if (request.type === "extraction") { - const response = await got - .get(`${baseUrl}/documents/${request.id}`, { - headers: { authorization: `Bearer ${this.apiKey}` }, - }) - .json(); - if ( - response && - typeof response === "object" && - "status" in response && - (response.status == "COMPLETE" || response.status == "FAILED") - ) { - return response as ExtractionResult; - } - } else { - let response; - try { - response = await got.get(request.downloadLink).json(); - return response as ClassificationResult; - } catch (e: unknown) { - // 404 is expected while classifying is being done - if (!(e instanceof HTTPError && e.response.statusCode === 404)) - throwError(e); - } + private async extractionWaitLoop(id: string, interval: number): Promise { + do { + const response = + await this.backend.get(`documents/${id}`); + if (response.statusCode !== 200) { + throw responseError(response); } - await sleep(5_000); // TODO: parameterize polling interval - } + const result = response.body; + if (result.status == "COMPLETE" || result.status == "FAILED") { + return result; + } + await sleep(interval); + } while (true); + } + + private async classificationWaitLoop(downloadUrl: string, interval: number): Promise { + do { + const response = + await this.s3.get(downloadUrl); + if (response.statusCode === 404) { + await sleep(interval); + continue; + } + if (response.statusCode !== 200) { + throw responseError(response); + } + return response.body; + } while (true); + } + + async waitFor( + request: ClassificationRequest | ExtractionRequest, + timeout = SensibleSDK.DEFAULT_WAIT_TIME + ): Promise { + timeout = Math.min(timeout, SensibleSDK.MAX_WAIT_TIME); + const interval = Math.min( + this.options.pollingInterval ?? SensibleSDK.DEFAULT_POLLING_INTERVAL, + SensibleSDK.MAX_POLLING_INTERVAL + ); + const loopPromise = request.type === "extraction" + ? this.extractionWaitLoop(request.id, interval) + : this.classificationWaitLoop(request.downloadLink, interval); + + return await Promise.race([ + loopPromise, + sleep(timeout).then(() => { throw `Timed out waiting for ${timeout}ms` }) + ]); } // requested extractions must be completed @@ -155,21 +218,13 @@ export class SensibleSDK { const extractions = Array.isArray(requests) ? requests : [requests]; const url = - baseUrl + - "/generate_excel/" + + "generate_excel/" + extractions.map((extraction) => extraction.id).join(","); - - try { - return await got - .get(url, { - headers: { authorization: `Bearer ${this.apiKey}` }, - }) - .json(); - } catch (e) { - throwError(e); - // HACK: keep TS happy - throw null; + const response = await this.backend.get<{ url: string; }>(url); + if (response.statusCode !== 200) { + throw responseError(response); } + return response.body; } } @@ -280,20 +335,24 @@ function throwError(e: unknown) { throw `Unknown error ${e}`; } - switch (e.response.statusCode) { + throw responseError(e.response); +} + +function responseError(response: Response): string { + switch (response.statusCode) { case 400: - throw `Bad Request (400): ${e.response.body}`; + return `Bad Request (400): ${response.body}`; case 401: - throw "Unauthorized (401), please check your API key"; + return "Unauthorized (401), please check your API key"; case 415: - throw "Unsupported Media Type (415), please check your document format"; + return "Unsupported Media Type (415), please check your document format"; case 429: // automatic retry? - throw "Too Many Requests (429)"; + return "Too Many Requests (429)"; case 500: - throw `Internal Server Error (500): ${e.response.body}`; + return `Internal Server Error (500): ${response.body}`; default: - throw `Got unknown HTTP status code ${e.response.statusCode}: ${e.response.body}`; + return `Unexpected HTTP status code ${response.statusCode}: ${response.body}`; } }