feat: support /2.0/ai/extract_structured endpoint (#3596)

* feat: support /2.0/ai/extract_structured endpoint * fix: rm comments * Update src/api/Intelligence.js Co-authored-by: greg-in-a-box <103291617+greg-in-a-box@users.noreply.github.com> * fix: requested --------- Co-authored-by: greg-in-a-box <103291617+greg-in-a-box@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
box · Aug 6, 2024 · dee4eee · dee4eee
1 parent 54b4c83
commit dee4eee
Show file tree

Hide file tree

Showing 12 changed files with 366 additions and 1 deletion.
diff --git a/src/api/Intelligence.js b/src/api/Intelligence.js
@@ -4,8 +4,13 @@
  * @author Box
  */
 
-import Base from './Base';
+import getProp from 'lodash/get';
 import type { BoxItem } from '../common/types/core';
+import { ERROR_CODE_EXTRACT_STRUCTURED } from '../constants';
+import { isUserCorrectableError } from '../utils/error';
+import Base from './Base';
+import { AiExtractResponse } from './schemas/AiExtractResponse';
+import { AiExtractStructured } from './schemas/AiExtractStructured';
 
 class Intelligence extends Base {
     /**
@@ -42,6 +47,32 @@ class Intelligence extends Base {
             },
         });
     }
+
+    /**
+     * Sends an AI request to supported LLMs and returns extracted key pairs and values.
+     *
+     * @param {AiExtractStructured} request - AI Extract Structured Request
+     * @return A successful response including the answer from the LLM.
+     */
+    async extractStructured(request: AiExtractStructured): Promise<AiExtractResponse> {
+        this.errorCode = ERROR_CODE_EXTRACT_STRUCTURED;
+
+        const url = `${this.getBaseApiUrl()}/ai/extract_structured`;
+
+        let suggestionsResponse = {};
+        try {
+            suggestionsResponse = await this.xhr.post({
+                url,
+                data: request,
+            });
+        } catch (e) {
+            const { status } = e;
+            if (isUserCorrectableError(status)) {
+                throw e;
+            }
+        }
+        return getProp(suggestionsResponse, 'data', {});
+    }
 }
 
 export default Intelligence;
diff --git a/src/api/__tests__/Intelligence.test.js b/src/api/__tests__/Intelligence.test.js
@@ -1,3 +1,4 @@
+import { ERROR_CODE_EXTRACT_STRUCTURED } from '../../constants';
 import Intelligence from '../Intelligence';
 
 describe('api/Intelligence', () => {
@@ -78,4 +79,69 @@ describe('api/Intelligence', () => {
             expect(e.message).toEqual('Missing items!');
         }
     });
+
+    describe('extractStructured()', () => {
+        const request = {
+            metadata_template: {
+                type: 'metadata_template',
+                scope: 'global',
+                template_key: 'myTestTemplate',
+            },
+        };
+
+        test('should return a successful response including the answer from the LLM', async () => {
+            const suggestionsFromServer = {
+                stringFieldKey: 'fieldVal1',
+                floatFieldKey: 124.0,
+                enumFieldKey: 'EnumOptionKey',
+                multiSelectFieldKey: ['multiSelectOption1', 'multiSelectOption5'],
+            };
+            intelligence.xhr.post = jest.fn().mockReturnValueOnce({
+                data: suggestionsFromServer,
+            });
+
+            const suggestions = await intelligence.extractStructured(request);
+            expect(suggestions).toEqual(suggestionsFromServer);
+            expect(intelligence.xhr.post).toHaveBeenCalledWith({
+                url: `${intelligence.getBaseApiUrl()}/ai/extract_structured`,
+                data: request,
+            });
+        });
+
+        test('should return empty map of suggestions when error is 400', async () => {
+            const error = new Error();
+            error.status = 400;
+            intelligence.xhr.post = jest.fn().mockReturnValueOnce(Promise.reject(error));
+            let suggestions;
+            try {
+                suggestions = await intelligence.extractStructured(request);
+            } catch (e) {
+                expect(e.status).toEqual(400);
+            }
+            expect(intelligence.errorCode).toBe(ERROR_CODE_EXTRACT_STRUCTURED);
+            expect(suggestions).toEqual({});
+            expect(intelligence.xhr.post).toHaveBeenCalledWith({
+                url: `${intelligence.getBaseApiUrl()}/ai/extract_structured`,
+                data: request,
+            });
+        });
+
+        test('should throw error when error is not 400', async () => {
+            const error = new Error();
+            error.status = 401;
+            intelligence.xhr.post = jest.fn().mockReturnValueOnce(Promise.reject(error));
+            let suggestions;
+            try {
+                suggestions = await intelligence.extractStructured(request);
+            } catch (e) {
+                expect(e.status).toEqual(401);
+            }
+            expect(intelligence.errorCode).toBe(ERROR_CODE_EXTRACT_STRUCTURED);
+            expect(suggestions).toBeUndefined();
+            expect(intelligence.xhr.post).toHaveBeenCalledWith({
+                url: `${intelligence.getBaseApiUrl()}/ai/extract_structured`,
+                data: request,
+            });
+        });
+    });
 });
diff --git a/src/api/schemas/AiAgentBasicTextTool.js b/src/api/schemas/AiAgentBasicTextTool.js
@@ -0,0 +1,30 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+import type { AiLlmEndpointParamsGoogleOrAiLlmEndpointParamsOpenAi } from './AiLlmEndpointParamsGoogleOrAiLlmEndpointParamsOpenAi';
+
+export interface AiAgentBasicTextTool {
+    /**
+     * The model to be used for the AI Agent for basic text.
+     */
+    +model?: string;
+    /**
+     * System messages try to help the LLM "understand" its role and what it is supposed to do.
+     */
+    +system_message?: string;
+    /**
+     * Prompt template will have the contextual information of the request and then the user prompt as well.
+    May include inputs for `{current_date}`, `{user_question}`, and `{content}` depending on the use.
+     */
+    +prompt_template?: string;
+    /**
+     * The number of tokens for completion.
+     */
+    +num_tokens_for_completion?: number;
+    /**
+     * The parameters for the LLM endpoint specific to OpenAI models.
+     */
+    +llm_endpoint_params?: AiLlmEndpointParamsGoogleOrAiLlmEndpointParamsOpenAi;
+}
diff --git a/src/api/schemas/AiAgentExtractStructured.js b/src/api/schemas/AiAgentExtractStructured.js
@@ -0,0 +1,18 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+import { AiAgentBasicTextTool } from './AiAgentBasicTextTool';
+import type { AiAgentLongTextTool } from './AiAgentLongTextTool';
+
+export type AiAgentExtractStructuredTypeField = 'ai_agent_extract_structured';
+
+export interface AiAgentExtractStructured {
+    /**
+     * The type of AI agent to be used for extraction.
+     */
+    +type: AiAgentExtractStructuredTypeField;
+    +long_text?: AiAgentLongTextTool;
+    +basic_text?: AiAgentBasicTextTool;
+}
diff --git a/src/api/schemas/AiAgentLongTextTool.js b/src/api/schemas/AiAgentLongTextTool.js
@@ -0,0 +1,29 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+import { AiAgentBasicTextTool } from './AiAgentBasicTextTool';
+
+export interface AiAgentLongTextToolEmbeddingsStrategyField {
+    /**
+     * The strategy to be used for the AI Agent for calculating embeddings.
+     */
+    +id?: string;
+    /**
+     * The number of tokens per chunk.
+     */
+    +num_tokens_per_chunk?: number;
+}
+
+export interface AiAgentLongTextToolEmbeddingsField {
+    /**
+     * The model to be used for the AI Agent for calculating embeddings.
+     */
+    +model?: string;
+    +strategy?: AiAgentLongTextToolEmbeddingsStrategyField;
+}
+
+export type AiAgentLongTextTool = AiAgentBasicTextTool & {
+    +embeddings?: AiAgentLongTextToolEmbeddingsField,
+};
diff --git a/src/api/schemas/AiExtractResponse.js b/src/api/schemas/AiExtractResponse.js
@@ -0,0 +1,6 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+export interface AiExtractResponse {}
diff --git a/src/api/schemas/AiExtractStructured.js b/src/api/schemas/AiExtractStructured.js
@@ -0,0 +1,84 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+import { AiAgentExtractStructured } from './AiAgentExtractStructured';
+import { AiItemBase } from './AiItemBase';
+
+export type AiExtractStructuredMetadataTemplateTypeField = 'metadata_template';
+
+export interface AiExtractStructuredMetadataTemplateField {
+    /**
+     * The name of the metadata template.
+     */
+    +template_key?: string;
+    /**
+     * Value is always `metadata_template`.
+     */
+    +type?: AiExtractStructuredMetadataTemplateTypeField;
+    /**
+     * The scope of the metadata template can either be global or
+    enterprise_*. The global scope is used for templates that are
+    available to any Box enterprise. The enterprise_* scope represents
+    templates that have been created within a specific enterprise,
+    where * will be the ID of that enterprise.
+     */
+    +scope?: string;
+}
+
+export interface AiExtractStructuredFieldsOptionsField {
+    /**
+     * A unique identifier for the field.
+     */
+    +key: string;
+}
+
+export interface AiExtractStructuredFieldsField {
+    /**
+     * A unique identifier for the field.
+     */
+    +key: string;
+    /**
+     * A description of the field.
+     */
+    +description?: string;
+    /**
+     * The display name of the field.
+     */
+    +display_name?: string;
+    /**
+     * Context about the key that may include how to find and how to format it.
+     */
+    +prompt?: string;
+    /**
+     * The type of the field. Can include but is not limited to string, float, date, enum, and multiSelect.
+     */
+    +type?: string;
+    /**
+     * A list of options for this field. This is most often used in combination with the enum and multiSelect field types.
+     */
+    +options?: $ReadOnlyArray<AiExtractStructuredFieldsOptionsField>;
+}
+
+export interface AiExtractStructured {
+    /**
+     * The items to be processed by the LLM, often files.
+     */
+    +items: $ReadOnlyArray<AiItemBase>;
+    /**
+     * The metadata template containing the fields to extract. Cannot be used
+    in combination with `fields`.
+     */
+    +metadata_template?: AiExtractStructuredMetadataTemplateField;
+    /**
+     * The fields to be extracted from the items. Cannot be used in combination
+    with `metadata_template`.
+     */
+    +fields?: $ReadOnlyArray<AiExtractStructuredFieldsField>;
+    /**
+     * The JSON blob that contains overrides for the agent config.
+     */
+    +agent_config?: string;
+    +ai_agent?: AiAgentExtractStructured;
+}
diff --git a/src/api/schemas/AiItemBase.js b/src/api/schemas/AiItemBase.js
@@ -0,0 +1,21 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+export type AiItemBaseTypeField = 'file';
+
+export interface AiItemBase {
+    /**
+     * The id of the item.
+     */
+    +id: string;
+    /**
+     * The type of the item.
+     */
+    +type: AiItemBaseTypeField;
+    /**
+     * The content of the item, often the text representation.
+     */
+    +content?: string;
+}
diff --git a/src/api/schemas/AiLlmEndpointParamsGoogle.js b/src/api/schemas/AiLlmEndpointParamsGoogle.js
@@ -0,0 +1,29 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+export type AiLlmEndpointParamsGoogleTypeField = 'google_params';
+
+export interface AiLlmEndpointParamsGoogle {
+    /**
+     * The type of the AI LLM endpoint params object for Google.
+     */
+    +type: AiLlmEndpointParamsGoogleTypeField;
+    /**
+     * The temperature is used for sampling during response generation, which occurs when `top-P` and `top-K` are applied.
+    Temperature controls the degree of randomness in token selection.
+     */
+    +temperature?: number;
+    /**
+     * Top-P changes how the model selects tokens for output. Tokens are selected from the most (see `top-K`) to least probable
+    until the sum of their probabilities equals the `top-P` value.
+     */
+    +top_p?: number;
+    /**
+     * Top-K changes how the model selects tokens for output. A top-K of 1 means the next selected token is the
+    most probable among all tokens in the model's vocabulary (also called greedy decoding),
+    while a top-K of 3 means that the next token is selected from among the three most probable tokens by using temperature.
+     */
+    +top_k?: number;
+}
diff --git a/src/api/schemas/AiLlmEndpointParamsGoogleOrAiLlmEndpointParamsOpenAi.js b/src/api/schemas/AiLlmEndpointParamsGoogleOrAiLlmEndpointParamsOpenAi.js
@@ -0,0 +1,11 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+import { AiLlmEndpointParamsGoogle } from './AiLlmEndpointParamsGoogle';
+import { AiLlmEndpointParamsOpenAi } from './AiLlmEndpointParamsOpenAi';
+
+export type AiLlmEndpointParamsGoogleOrAiLlmEndpointParamsOpenAi =
+    | AiLlmEndpointParamsGoogle
+    | AiLlmEndpointParamsOpenAi;
diff --git a/src/api/schemas/AiLlmEndpointParamsOpenAi.js b/src/api/schemas/AiLlmEndpointParamsOpenAi.js
@@ -0,0 +1,39 @@
+/**
+ * @flow
+ * @author Box
+ */
+
+export type AiLlmEndpointParamsOpenAiTypeField = 'openai_params';
+
+export interface AiLlmEndpointParamsOpenAi {
+    /**
+     * The type of the AI LLM endpoint params object for OpenAI.
+     */
+    +type: AiLlmEndpointParamsOpenAiTypeField;
+    /**
+     * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,
+    while lower values like 0.2 will make it more focused and deterministic.
+    We generally recommend altering this or `top_p` but not both.
+     */
+    +temperature?: number;
+    /**
+     * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results
+    of the tokens with `top_p` probability mass. So 0.1 means only the tokens comprising the top 10% probability
+    mass are considered. We generally recommend altering this or temperature but not both.
+     */
+    +top_p?: number;
+    /**
+     * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the
+    text so far, decreasing the model's likelihood to repeat the same line verbatim.
+     */
+    +frequency_penalty?: number;
+    /**
+     * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,
+    increasing the model's likelihood to talk about new topics.
+     */
+    +presence_penalty?: number;
+    /**
+     * Up to 4 sequences where the API will stop generating further tokens.
+     */
+    +stop?: string;
+}