From 8d39bb7520963a1afaa17f880185c0b57e0db3be Mon Sep 17 00:00:00 2001 From: Changkun Ou Date: Mon, 22 Dec 2025 15:16:36 +0100 Subject: [PATCH] feat: add parsing options in get parse results response --- README.md | 1 - parse_get.go | 15 ++++++++++++--- types.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1543d30..ea45953 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,6 @@ go get github.com/sixt/tensorlake-go import "github.com/sixt/tensorlake-go" c := tensorlake.NewClient( - tensorlake.WithRegion(tensorlake.RegionOnPrem), tensorlake.WithBaseURL("https://api.your-domain.com"), tensorlake.WithAPIKey("your-api-key"), ) diff --git a/parse_get.go b/parse_get.go index 3257fb6..8910a3e 100644 --- a/parse_get.go +++ b/parse_get.go @@ -30,6 +30,8 @@ import ( type ParseResultUpdateFunc func(name ParseEventName, result *ParseResult) type GetParseResultOptions struct { + withOptions bool + // UseSSE enables Server-Sent Events (SSE) for streaming updates. // See also: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events useSSE bool @@ -41,6 +43,12 @@ type GetParseResultOptions struct { // GetParseResultOption is a function that configures the GetParseResultOptions. type GetParseResultOption func(*GetParseResultOptions) +func WithOptions(enable bool) GetParseResultOption { + return func(opts *GetParseResultOptions) { + opts.withOptions = enable + } +} + // WithSSE enables Server-Sent Events (SSE) for streaming updates. func WithSSE(enable bool) GetParseResultOption { return func(opts *GetParseResultOptions) { @@ -70,14 +78,15 @@ func WithOnUpdate(onUpdate ParseResultUpdateFunc) GetParseResultOption { // [Get Parse Result API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/get func (c *Client) GetParseResult(ctx context.Context, parseId string, opts ...GetParseResultOption) (*ParseResult, error) { o := &GetParseResultOptions{ - useSSE: false, - onUpdate: nil, + withOptions: false, + useSSE: false, + onUpdate: nil, } for _, opt := range opts { opt(o) } - reqURL := fmt.Sprintf("%s/parse/%s", c.baseURL, parseId) + reqURL := fmt.Sprintf("%s/parse/%s?with_options=%t", c.baseURL, parseId, o.withOptions) req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) if err != nil { diff --git a/types.go b/types.go index 646b95c..f21fd1e 100644 --- a/types.go +++ b/types.go @@ -270,6 +270,54 @@ type ParseResult struct { // None - one structured data object for the entire document. // Page - one structured data object for each page. StructuredData []StructuredData `json:"structured_data,omitempty"` + + // Options contains the options used for the parse job. + // Note that this field is no + Options *ParseResultOptions `json:"options,omitempty"` + + // Resource usage associated with the parse job. + // + // This includes details such as number of pages parsed, tokens used for + // OCR and extraction, etc. + // + // Usage is only populated for successful jobs. + // + // Billing is based on the resource usage. + Usage Usage `json:"usage"` +} + +// ParseResultOptions contains the options used for the parse job. +// It includes the configuration options used for the parse job, +// including the file ID, file URL, raw text, mime type, +// and structured extraction options, etc. +type ParseResultOptions struct { + FileSource + FileName string `json:"file_name"` + FileLabels map[string]string `json:"file_labels"` + MimeType MimeType `json:"mime_type"` + TraceId string `json:"trace_id"` + PageRange string `json:"page_range"` + JobType JobType `json:"job_type"` + Configuration *ParsingOptions `json:"configuration"` + Usage *Usage `json:"usage,omitempty"` + MessageUpdate string `json:"message_update,omitempty"` +} + +// Usage contains resource usage associated with the parse job. +// This includes details such as number of pages parsed, tokens used for +// OCR and extraction, etc. +// Usage is only populated for successful jobs. +// Billing is based on the resource usage. +type Usage struct { + PagesParsed int `json:"pages_parsed"` + SignatureDetectedPages int `json:"signature_detected_pages"` + StrikethroughDetectedPages int `json:"strikethrough_detected_pages"` + OCRInputTokenUsed int `json:"ocr_input_token_used"` + OCROutputTokenUsed int `json:"ocr_output_token_used"` + ExtractionInputTokenUsed int `json:"extraction_input_token_used"` + ExtractionOutputTokenUsed int `json:"extraction_output_token_used"` + SummarizationInputTokenUsed int `json:"summarization_input_token_used"` + SummarizationOutputTokenUsed int `json:"summarization_output_token_used"` } // StructuredExtractionOptions holds configuration for structured data extraction.