-
Notifications
You must be signed in to change notification settings - Fork 54
feat: respect input schema defaults in Actor.getInput()
#409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,10 @@ import { addTimeoutToPromise } from '@apify/timeout'; | |
import type { ChargeOptions, ChargeResult } from './charging.js'; | ||
import { ChargingManager } from './charging.js'; | ||
import { Configuration } from './configuration.js'; | ||
import { | ||
getDefaultsFromInputSchema, | ||
readInputSchema, | ||
} from './input-schemas.js'; | ||
import { KeyValueStore } from './key_value_store.js'; | ||
import { PlatformEventManager } from './platform_event_manager.js'; | ||
import type { ProxyConfigurationOptions } from './proxy_configuration.js'; | ||
|
@@ -1235,18 +1239,27 @@ export class Actor<Data extends Dictionary = Dictionary> { | |
const inputSecretsPrivateKeyPassphrase = this.config.get( | ||
'inputSecretsPrivateKeyPassphrase', | ||
); | ||
const input = await this.getValue<T>(this.config.get('inputKey')); | ||
const rawInput = await this.getValue<T>(this.config.get('inputKey')); | ||
|
||
let input = rawInput as T; | ||
|
||
if ( | ||
ow.isValid(input, ow.object.nonEmpty) && | ||
ow.isValid(rawInput, ow.object.nonEmpty) && | ||
inputSecretsPrivateKeyFile && | ||
inputSecretsPrivateKeyPassphrase | ||
) { | ||
const privateKey = createPrivateKey({ | ||
key: Buffer.from(inputSecretsPrivateKeyFile, 'base64'), | ||
passphrase: inputSecretsPrivateKeyPassphrase, | ||
}); | ||
return decryptInputSecrets<T>({ input, privateKey }); | ||
|
||
input = decryptInputSecrets({ input: rawInput, privateKey }); | ||
} | ||
|
||
if (ow.isValid(input, ow.object.nonEmpty) && !Buffer.isBuffer(input)) { | ||
input = await this.insertDefaultsFromInputSchema(input); | ||
} | ||
|
||
return input; | ||
} | ||
|
||
|
@@ -2291,4 +2304,33 @@ export class Actor<Data extends Dictionary = Dictionary> { | |
].join('\n'), | ||
); | ||
} | ||
|
||
private async insertDefaultsFromInputSchema<T extends Dictionary>( | ||
input: T, | ||
): Promise<T> { | ||
// TODO: v0, move all this logic from here and apify-cli to input_schema module | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here |
||
|
||
const env = this.getEnv(); | ||
let inputSchema: Dictionary | undefined | null; | ||
|
||
// On platform, we can get the input schema from the build data | ||
if (this.isAtHome() && env.actorBuildId) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we even need this? this change was only supposed to be dealing with the local development, on platform you always get the whole input from the API |
||
const buildData = await this.apifyClient | ||
.build(env.actorBuildId) | ||
.get(); | ||
|
||
inputSchema = buildData?.actorDefinition?.input; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would at least log an error if this is not present in the response. |
||
} else { | ||
// On local, we can get the input schema from the local config | ||
inputSchema = readInputSchema(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It'd also be nice to log something if no input schema is found. |
||
} | ||
|
||
if (!inputSchema) { | ||
return input; | ||
} | ||
|
||
const defaults = getDefaultsFromInputSchema(inputSchema); | ||
|
||
return { ...defaults, ...input }; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
// TODO: v0, move all this logic from here and apify-cli to input_schema module | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather see an issue instead of a TODO in code, TODOs always rot in there. |
||
|
||
import { existsSync, readFileSync } from 'node:fs'; | ||
import { join } from 'node:path'; | ||
import process from 'node:process'; | ||
|
||
import type { Dictionary } from '@crawlee/utils'; | ||
|
||
const DEFAULT_INPUT_SCHEMA_PATHS = [ | ||
['.actor', 'INPUT_SCHEMA.json'], | ||
['INPUT_SCHEMA.json'], | ||
['.actor', 'input_schema.json'], | ||
['input_schema.json'], | ||
]; | ||
Comment on lines
+9
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the name of the input schema file is configurable via |
||
|
||
const ACTOR_SPECIFICATION_FOLDER = '.actor'; | ||
|
||
const LOCAL_CONFIG_NAME = 'actor.json'; | ||
|
||
function readJSONIfExists(path: string): Dictionary | null { | ||
if (existsSync(path)) { | ||
const content = readFileSync(path, 'utf8'); | ||
return JSON.parse(content); | ||
} | ||
|
||
return null; | ||
} | ||
|
||
export const readInputSchema = (): Dictionary | null => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: can we make all of them ( |
||
const localConfig = readJSONIfExists( | ||
join(process.cwd(), ACTOR_SPECIFICATION_FOLDER, LOCAL_CONFIG_NAME), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
But I'm not sure how else we would refer to these files 🤔 I guess it's okay to live with this, especially since on Platform, we load this via API. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I only gave it a quick look, but it looks like we don't copy the But if we add a nice error log when there's not input schema to be found, yeah, we can live with that. |
||
); | ||
|
||
if (typeof localConfig?.input === 'object') { | ||
return localConfig.input; | ||
} | ||
|
||
if (typeof localConfig?.input === 'string') { | ||
const fullPath = join( | ||
process.cwd(), | ||
ACTOR_SPECIFICATION_FOLDER, | ||
localConfig.input, | ||
); | ||
|
||
return readJSONIfExists(fullPath); | ||
} | ||
|
||
for (const path of DEFAULT_INPUT_SCHEMA_PATHS) { | ||
const fullPath = join(process.cwd(), ...path); | ||
if (existsSync(fullPath)) { | ||
return readJSONIfExists(fullPath); | ||
} | ||
} | ||
|
||
return null; | ||
}; | ||
|
||
export const getDefaultsFromInputSchema = (inputSchema: any) => { | ||
const defaults: Record<string, unknown> = {}; | ||
|
||
for (const [key, fieldSchema] of Object.entries<any>( | ||
inputSchema.properties, | ||
)) { | ||
if (fieldSchema.default !== undefined) { | ||
defaults[key] = fieldSchema.default; | ||
} | ||
} | ||
|
||
return defaults; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
FROM node:22 AS builder | ||
|
||
COPY /package*.json ./ | ||
RUN npm --quiet set progress=false \ | ||
&& npm install --only=prod --no-optional --no-audit \ | ||
&& npm update | ||
|
||
COPY /apify.tgz /apify.tgz | ||
RUN npm --quiet install /apify.tgz | ||
|
||
FROM apify/actor-node:22 | ||
|
||
RUN rm -r node_modules | ||
COPY --from=builder /node_modules ./node_modules | ||
COPY --from=builder /package*.json ./ | ||
COPY /.actor ./.actor | ||
COPY /src ./src | ||
|
||
RUN echo "Installed NPM packages:" \ | ||
&& (npm list --only=prod --no-optional --all || true) \ | ||
&& echo "Node.js version:" \ | ||
&& node --version \ | ||
&& echo "NPM version:" \ | ||
&& npm --version | ||
|
||
CMD npm start --silent |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"actorSpecification": 1, | ||
"name": "apify-sdk-js-test-input", | ||
"version": "0.0", | ||
"input": { | ||
"title": "Actor Input", | ||
"description": "Test input", | ||
"type": "object", | ||
"schemaVersion": 1, | ||
"properties": { | ||
"foo": { | ||
"title": "Foo", | ||
"type": "string", | ||
"description": "Foo", | ||
"default": "bar" | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"name": "apify-sdk-js-test-harness", | ||
"version": "0.0.1", | ||
"type": "module", | ||
"description": "This is an example of an Apify actor.", | ||
"engines": { | ||
"node": ">=22.0.0" | ||
}, | ||
"dependencies": { | ||
"apify": "*" | ||
}, | ||
"scripts": { | ||
"start": "node ./src/main.mjs" | ||
}, | ||
"author": "It's not you it's me", | ||
"license": "ISC" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import { Actor, log } from 'apify'; | ||
|
||
await Actor.init(); | ||
|
||
const input = await Actor.getInput(); | ||
|
||
log.info(`Input: ${JSON.stringify(input)}`); | ||
|
||
await Actor.setValue('RECEIVED_INPUT', input); | ||
|
||
await Actor.exit(); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import assert from 'node:assert/strict'; | ||
import test from 'node:test'; | ||
|
||
import { ApifyClient, KeyValueStore } from 'apify'; | ||
import { sleep } from 'crawlee'; | ||
|
||
const client = new ApifyClient({ | ||
token: process.env.APIFY_TOKEN, | ||
}); | ||
|
||
const actor = client.actor(process.argv[2]); | ||
|
||
const runActor = async (input, options) => { | ||
const { id: runId } = await actor.call(input, options); | ||
await client.run(runId).waitForFinish(); | ||
await sleep(6000); // wait for updates to propagate to MongoDB | ||
return await client.run(runId).get(); | ||
}; | ||
|
||
test('defaults work', async () => { | ||
const run = await runActor({}, {}); | ||
|
||
assert.strictEqual(run.status, 'SUCCEEDED'); | ||
|
||
const store = await KeyValueStore.open(run.defaultKeyValueStoreId, { | ||
storageClient: client, | ||
}); | ||
|
||
const receivedInput = await store.getValue('RECEIVED_INPUT'); | ||
assert.deepEqual(receivedInput, { foo: 'bar' }); | ||
}); | ||
|
||
test('input is passed through', async () => { | ||
const run = await runActor({ foo: 'baz' }, {}); | ||
|
||
assert.strictEqual(run.status, 'SUCCEEDED'); | ||
|
||
const store = await KeyValueStore.open(run.defaultKeyValueStoreId, { | ||
storageClient: client, | ||
}); | ||
|
||
const receivedInput = await store.getValue('RECEIVED_INPUT'); | ||
assert.deepEqual(receivedInput, { foo: 'baz' }); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
infer
seems like a better word here