Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ go get github.com/sixt/tensorlake-go
import "github.com/sixt/tensorlake-go"

c := tensorlake.NewClient(
tensorlake.WithRegion(tensorlake.RegionOnPrem),
tensorlake.WithBaseURL("https://api.your-domain.com"),
tensorlake.WithAPIKey("your-api-key"),
)
Expand Down
15 changes: 12 additions & 3 deletions parse_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ import (
type ParseResultUpdateFunc func(name ParseEventName, result *ParseResult)

type GetParseResultOptions struct {
withOptions bool

// UseSSE enables Server-Sent Events (SSE) for streaming updates.
// See also: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events
useSSE bool
Expand All @@ -41,6 +43,12 @@ type GetParseResultOptions struct {
// GetParseResultOption is a function that configures the GetParseResultOptions.
type GetParseResultOption func(*GetParseResultOptions)

func WithOptions(enable bool) GetParseResultOption {
return func(opts *GetParseResultOptions) {
opts.withOptions = enable
}
}

// WithSSE enables Server-Sent Events (SSE) for streaming updates.
func WithSSE(enable bool) GetParseResultOption {
return func(opts *GetParseResultOptions) {
Expand Down Expand Up @@ -70,14 +78,15 @@ func WithOnUpdate(onUpdate ParseResultUpdateFunc) GetParseResultOption {
// [Get Parse Result API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/get
func (c *Client) GetParseResult(ctx context.Context, parseId string, opts ...GetParseResultOption) (*ParseResult, error) {
o := &GetParseResultOptions{
useSSE: false,
onUpdate: nil,
withOptions: false,
useSSE: false,
onUpdate: nil,
}
for _, opt := range opts {
opt(o)
}

reqURL := fmt.Sprintf("%s/parse/%s", c.baseURL, parseId)
reqURL := fmt.Sprintf("%s/parse/%s?with_options=%t", c.baseURL, parseId, o.withOptions)

req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
if err != nil {
Expand Down
48 changes: 48 additions & 0 deletions types.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,54 @@ type ParseResult struct {
// None - one structured data object for the entire document.
// Page - one structured data object for each page.
StructuredData []StructuredData `json:"structured_data,omitempty"`

// Options contains the options used for the parse job.
// Note that this field is no
Options *ParseResultOptions `json:"options,omitempty"`

// Resource usage associated with the parse job.
//
// This includes details such as number of pages parsed, tokens used for
// OCR and extraction, etc.
//
// Usage is only populated for successful jobs.
//
// Billing is based on the resource usage.
Usage Usage `json:"usage"`
}

// ParseResultOptions contains the options used for the parse job.
// It includes the configuration options used for the parse job,
// including the file ID, file URL, raw text, mime type,
// and structured extraction options, etc.
type ParseResultOptions struct {
FileSource
FileName string `json:"file_name"`
FileLabels map[string]string `json:"file_labels"`
MimeType MimeType `json:"mime_type"`
TraceId string `json:"trace_id"`
PageRange string `json:"page_range"`
JobType JobType `json:"job_type"`
Configuration *ParsingOptions `json:"configuration"`
Usage *Usage `json:"usage,omitempty"`
MessageUpdate string `json:"message_update,omitempty"`
}

// Usage contains resource usage associated with the parse job.
// This includes details such as number of pages parsed, tokens used for
// OCR and extraction, etc.
// Usage is only populated for successful jobs.
// Billing is based on the resource usage.
type Usage struct {
PagesParsed int `json:"pages_parsed"`
SignatureDetectedPages int `json:"signature_detected_pages"`
StrikethroughDetectedPages int `json:"strikethrough_detected_pages"`
OCRInputTokenUsed int `json:"ocr_input_token_used"`
OCROutputTokenUsed int `json:"ocr_output_token_used"`
ExtractionInputTokenUsed int `json:"extraction_input_token_used"`
ExtractionOutputTokenUsed int `json:"extraction_output_token_used"`
SummarizationInputTokenUsed int `json:"summarization_input_token_used"`
SummarizationOutputTokenUsed int `json:"summarization_output_token_used"`
}

// StructuredExtractionOptions holds configuration for structured data extraction.
Expand Down