diff --git a/README.md b/README.md index c83a7c3..d0dad80 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,284 @@ [![Go Reference](https://pkg.go.dev/badge/github.com/sixt/tensorlake-go.svg)](https://pkg.go.dev/github.com/sixt/tensorlake-go) +A comprehensive Go SDK for the [Tensorlake API](https://docs.tensorlake.ai/api-reference/v2/introduction), enabling intelligent document processing with parsing, structured data extraction, and page classification capabilities. + +## Features + +- **Document Parsing**: Convert PDFs, DOCX, images, and more to structured markdown +- **Data Extraction**: Extract structured data using JSON schemas +- **Page Classification**: Classify pages by content type +- **File Management**: Upload and manage documents +- **Datasets**: Reusable parsing configurations for consistent processing +- **SSE Support**: Real-time progress updates via Server-Sent Events +- **Iterator Pattern**: Easy pagination through results + +## Installation + ```bash go get github.com/sixt/tensorlake-go ``` -This repository contains an implementation of the [Tensorlake API Reference](https://docs.tensorlake.ai/api-reference/v2/introduction), enabling document parsing, structured data extraction, and page classification, etc. +**Requirements:** Go 1.25 or later + +## Quick Start + +### 1. Initialize the Client + +```go +import "github.com/sixt/tensorlake-go" + +c := tensorlake.NewClient( + tensorlake.WithRegion(tensorlake.RegionOnPrem), + tensorlake.WithBaseURL("https://api.your-domain.com"), + tensorlake.WithAPIKey("your-api-key"), +) +``` + +### 2. Upload a File + +```go +file, _ := os.Open("document.pdf") +defer file.Close() + +uploadResp, _ := c.UploadFile(context.Background(), &tensorlake.UploadFileRequest{ + FileBytes: file, + FileName: "document.pdf", + Labels: map[string]string{"category": "invoice"}, +}) + +fmt.Printf("File uploaded: %s\n", uploadResp.FileId) +``` + +### 3. Parse the Document + +```go +parseJob, _ := c.ParseDocument(context.Background(), &tensorlake.ParseDocumentRequest{ + FileSource: tensorlake.FileSource{ + FileId: uploadResp.FileId, + }, +}) + +// Get results with real-time updates +result, _ := c.GetParseResult( + context.Background(), + parseJob.ParseId, + tensorlake.WithSSE(true), + tensorlake.WithOnUpdate(func(name tensorlake.ParseEventName, r *tensorlake.ParseResult) { + fmt.Printf("Status: %s - %d/%d pages\n", name, r.ParsedPagesCount, r.TotalPages) + }), +) + +// Access parsed content +for _, page := range result.Pages { + fmt.Printf("Page %d:\n", page.PageNumber) + // Process page content... +} +``` + +## Documentation + +### Core APIs + +- **[File Management APIs](./docs/file-apis.md)** - Upload, list, retrieve metadata, and delete files +- **[Parse APIs](./docs/parse-apis.md)** - Parse documents, extract data, and classify pages +- **[Dataset APIs](./docs/dataset-apis.md)** - Create reusable parsing configurations + +### Comprehensive Examples + +#### Extract Structured Data + +```go +import "github.com/google/jsonschema-go/jsonschema" + +// Define extraction schema +type InvoiceData struct { + InvoiceNumber string `json:"invoice_number"` + VendorName string `json:"vendor_name"` + TotalAmount float64 `json:"total_amount"` + LineItems []LineItem `json:"line_items"` +} + +type LineItem struct { + Description string `json:"description"` + Amount float64 `json:"amount"` +} + +schema, _ := jsonschema.For[InvoiceData](nil) + +// Parse with extraction +parseJob, _ := c.ParseDocument(context.Background(), &tensorlake.ParseDocumentRequest{ + FileSource: tensorlake.FileSource{FileId: fileId}, + StructuredExtractionOptions: []tensorlake.StructuredExtractionOptions{ + { + SchemaName: "invoice_data", + JSONSchema: schema, + PartitionStrategy: tensorlake.PartitionStrategyNone, + ProvideCitations: true, + }, + }, +}) + +// Retrieve and unmarshal extracted data +result, _ := c.GetParseResult(context.Background(), parseJob.ParseId) +for _, data := range result.StructuredData { + var extracted map[string]interface{} + json.Unmarshal(data.Data, &extracted) + fmt.Printf("Extracted: %+v\n", extracted) +} +``` + +#### Classify Pages + +```go +parseJob, err := c.ClassifyDocument(context.Background(), &tensorlake.ClassifyDocumentRequest{ + FileSource: tensorlake.FileSource{FileId: fileId}, + PageClassifications: []tensorlake.PageClassConfig{ + { + Name: "signature_page", + Description: "Pages containing signatures or signature blocks", + }, + { + Name: "terms_and_conditions", + Description: "Pages with legal terms and conditions", + }, + }, +}) + +result, _ := c.GetParseResult(context.Background(), parseJob.ParseId) +for _, pageClass := range result.PageClasses { + fmt.Printf("Class '%s' found on pages: %v\n", pageClass.PageClass, pageClass.PageNumbers) +} +``` + +#### Use Datasets for Batch Processing + +```go +// Create a reusable dataset +dataset, err := c.CreateDataset(context.Background(), &tensorlake.CreateDatasetRequest{ + Name: "invoice-processing", + Description: "Standard invoice parsing configuration", + ParsingOptions: &tensorlake.ParsingOptions{ + TableOutputMode: tensorlake.TableOutputModeMarkdown, + }, + StructuredExtractionOptions: []tensorlake.StructuredExtractionOptions{ + { + SchemaName: "invoice", + JSONSchema: schema, + }, + }, +}) + +// Process multiple files with the same configuration +fileIds := []string{"file_001", "file_002", "file_003"} +for _, fileId := range fileIds { + parseJob, err := c.ParseDataset(context.Background(), &tensorlake.ParseDatasetRequest{ + DatasetId: dataset.DatasetId, + FileSource: tensorlake.FileSource{FileId: fileId}, + }) + // Process results... +} +``` + +## Advanced Features + +### Server-Sent Events (SSE) + +Get real-time progress updates for long-running parse jobs: + +```go +result, err := c.GetParseResult( + ctx, + parseId, + tensorlake.WithSSE(true), + tensorlake.WithOnUpdate(func(name tensorlake.ParseEventName, r *tensorlake.ParseResult) { + switch eventName { + case tensorlake.sseEventParseQueued: + fmt.Println("Job queued") + case tensorlake.sseEventParseUpdate: + fmt.Printf("Progress: %d/%d pages\n", r.ParsedPagesCount, r.TotalPages) + case tensorlake.sseEventParseDone: + fmt.Println("Complete!") + } + }), +) +``` + +### Iterator Pattern + +Easily iterate through paginated results: + +```go +// Iterate all files +for file, err := range c.IterFiles(ctx, 50, tensorlake.PaginationDirectionNext) { + if err != nil { + panic(err) + } + fmt.Printf("File: %s\n", file.FileName) +} + +// Iterate all parse jobs +for job, err := range c.IterParseJobs(ctx, 50, tensorlake.PaginationDirectionNext) { + if err != nil { + panic(err) + } + fmt.Printf("Job %s: Status: %s\n", job.ParseId, job.Status) +} + +// Iterate all datasets +for dataset, err := range c.IterDatasets(ctx, 50, tensorlake.PaginationDirectionNext) { + if err != nil { + panic(err) + } + fmt.Printf("Dataset %s: Name: %s, Status: %s\n", dataset.DatasetId, dataset.Name, dataset.Status) +} +``` + +## Supported File Types + +- **Documents**: PDF, DOCX +- **Spreadsheets**: XLS, XLSX, XLSM, CSV +- **Presentations**: PPTX, Apple Keynote +- **Images**: PNG, JPG, JPEG +- **Text**: Plain text, HTML + +Maximum file size: 1 GB + +## Error Handling + +All API methods return structured errors: + +```go +result, err := c.ParseDocument(ctx, request) +if err != nil { + var apiErr *tensorlake.ErrorResponse + if errors.As(err, &apiErr) { + fmt.Printf("API Error: %s (Code: %s)\n", apiErr.Message, apiErr.ErrorCode) + // Handle specific error codes + } else { + fmt.Printf("Network/Client Error: %v\n", err) + } +} +``` + +## Best Practices + +1. **Reuse Datasets** - Create datasets for frequently processed document types +2. **Use SSE** - Enable SSE for large documents to track progress +3. **Batch Processing** - Process similar documents with the same dataset configuration +4. **Error Handling** - Always check error responses and handle retries appropriately +5. **Labels** - Use labels to organize and filter files and parse jobs +6. **Iterators** - Use iterator methods for efficient pagination through large result sets + +## Contributing + +Contributions are welcome! Please feel free to submit issues or pull requests. + +## Related Resources + +- [Tensorlake API Documentation](https://docs.tensorlake.ai/) +- [API Reference](https://docs.tensorlake.ai/api-reference/v2/introduction) +- [Go Package Documentation](https://pkg.go.dev/github.com/sixt/tensorlake-go) ## License diff --git a/dataset_create.go b/dataset_create.go index 2dcdfc6..3ee058e 100644 --- a/dataset_create.go +++ b/dataset_create.go @@ -90,12 +90,16 @@ type CreateDatasetResponse struct { } // CreateDataset creates a new dataset. +// +// See also: [Create Dataset API Reference] +// +// [Create Dataset API Reference]: https://docs.tensorlake.ai/api-reference/v2/datasets/create func (c *Client) CreateDataset(ctx context.Context, in *CreateDatasetRequest) (*CreateDatasetResponse, error) { b, err := json.Marshal(in) if err != nil { return nil, fmt.Errorf("failed to marshal request: %w", err) } - + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/datasets", bytes.NewReader(b)) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) diff --git a/dataset_delete.go b/dataset_delete.go index 38585b0..d747f7e 100644 --- a/dataset_delete.go +++ b/dataset_delete.go @@ -21,6 +21,10 @@ import ( ) // DeleteDataset deletes a dataset from Tensorlake. +// +// See also: [Delete Dataset API Reference] +// +// [Delete Dataset API Reference]: https://docs.tensorlake.ai/api-reference/v2/datasets/delete func (c *Client) DeleteDataset(ctx context.Context, datasetId string) error { reqURL := fmt.Sprintf("%s/datasets/%s", c.baseURL, datasetId) diff --git a/dataset_get.go b/dataset_get.go index 8410aff..dc97d0e 100644 --- a/dataset_get.go +++ b/dataset_get.go @@ -23,6 +23,10 @@ import ( ) // GetDataset retrieves details for a specific dataset. +// +// See also: [Get Dataset API Reference] +// +// [Get Dataset API Reference]: https://docs.tensorlake.ai/api-reference/v2/datasets/get func (c *Client) GetDataset(ctx context.Context, datasetId string) (*Dataset, error) { reqURL := fmt.Sprintf("%s/datasets/%s", c.baseURL, datasetId) diff --git a/dataset_list.go b/dataset_list.go index b0d0e2e..c32507d 100644 --- a/dataset_list.go +++ b/dataset_list.go @@ -88,6 +88,10 @@ type ListDatasetsRequest struct { } // ListDatasets lists all datasets in the organization. +// +// See also: [List Datasets API Reference] +// +// [List Datasets API Reference]: https://docs.tensorlake.ai/api-reference/v2/datasets/list func (c *Client) ListDatasets(ctx context.Context, in *ListDatasetsRequest) (*PaginationResult[Dataset], error) { reqURL := c.baseURL + "/datasets" @@ -148,6 +152,10 @@ type ListDatasetDataRequest struct { // ListDatasetData lists all the parse jobs associated with a specific dataset. // This endpoint allows you to retrieve the status and metadata of each parse job // that has been submitted under the specified dataset. +// +// See also: [List Dataset Data API Reference] +// +// [List Dataset Data API Reference]: https://docs.tensorlake.ai/api-reference/v2/datasets/data func (c *Client) ListDatasetData(ctx context.Context, in *ListDatasetDataRequest) (*PaginationResult[ParseResult], error) { reqURL := fmt.Sprintf("%s/datasets/%s/data", c.baseURL, in.DatasetId) params := url.Values{} diff --git a/dataset_parse.go b/dataset_parse.go index 0436e74..0135074 100644 --- a/dataset_parse.go +++ b/dataset_parse.go @@ -34,6 +34,10 @@ type ParseDatasetRequest struct { } // ParseDataset parses a document using a dataset's configuration. +// +// See also: [Parse Dataset API Reference] +// +// [Parse Dataset API Reference]: https://docs.tensorlake.ai/api-reference/v2/datasets/parse func (c *Client) ParseDataset(ctx context.Context, in *ParseDatasetRequest) (*ParseJob, error) { if !in.SourceProvided() { return nil, fmt.Errorf("exactly one of file_id, file_url, or raw_text must be provided") diff --git a/dataset_test.go b/dataset_test.go index fc9753b..154ae3e 100644 --- a/dataset_test.go +++ b/dataset_test.go @@ -89,8 +89,8 @@ func TestDataset(t *testing.T) { t.Logf("dataset parse job: %+v", p) // Get parse job results. - r, err := c.GetParseResult(t.Context(), p.ParseId, WithSSE(true), WithOnUpdate(func(eventName string, _ *ParseResult) { - t.Logf("parse status: %s", eventName) + r, err := c.GetParseResult(t.Context(), p.ParseId, WithSSE(true), WithOnUpdate(func(name ParseEventName, _ *ParseResult) { + t.Logf("parse status: %s", name) })) if err != nil { t.Fatalf("failed to get parse job result: %v", err) diff --git a/dataset_update.go b/dataset_update.go index cbfc5e9..fdaf02f 100644 --- a/dataset_update.go +++ b/dataset_update.go @@ -34,6 +34,10 @@ type UpdateDatasetRequest struct { } // UpdateDataset updates a dataset's settings. +// +// See also: [Update Dataset API Reference] +// +// [Update Dataset API Reference]: https://docs.tensorlake.ai/api-reference/v2/datasets/update func (c *Client) UpdateDataset(ctx context.Context, in *UpdateDatasetRequest) (*Dataset, error) { reqBody, err := json.Marshal(in) if err != nil { diff --git a/doc.go b/doc.go index 1ca06b1..b88a160 100644 --- a/doc.go +++ b/doc.go @@ -46,4 +46,35 @@ // if err != nil { // log.Fatal(err) // } +// +// # Parsing a Document +// +// Parse an uploaded file and retrieve the results: +// +// // Start parsing using the file ID from upload +// parseJob, err := c.ParseDocument(context.Background(), &tensorlake.ParseDocumentRequest{ +// FileSource: tensorlake.FileSource{ +// FileId: r.FileId, +// }, +// Labels: map[string]string{"type": "invoice"}, +// }) +// if err != nil { +// log.Fatal(err) +// } +// +// // Retrieve parse results with streaming updates +// result, err := c.GetParseResult(context.Background(), parseJob.ParseId, +// tensorlake.WithSSE(true), +// tensorlake.WithOnUpdate(func(eventName string, r *tensorlake.ParseResult) { +// log.Printf("Parse status: %s", eventName) +// }), +// ) +// if err != nil { +// log.Fatal(err) +// } +// +// // Access the parsed content +// for _, page := range result.Pages { +// log.Printf("Page %d: %s", page.PageNumber, page.Markdown) +// } package tensorlake diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..d03407d --- /dev/null +++ b/docs/README.md @@ -0,0 +1,174 @@ +# Tensorlake Go SDK Documentation + +Welcome to the Tensorlake Go SDK documentation! This guide covers all APIs for intelligent document processing. + +## Documentation Index + +### Core API Guides + +1. **[File Management APIs](./file-apis.md)** + - Upload files to Tensorlake + - List and search files + - Retrieve file metadata + - Delete files + - Supported file types and limits + +2. **[Parse APIs](./parse-apis.md)** + - Parse documents to markdown + - Extract structured data with JSON schemas + - Classify pages by content + - Get parse results with SSE streaming + - List and manage parse jobs + - Advanced parsing configurations + +3. **[Dataset APIs](./dataset-apis.md)** + - Create reusable parsing configurations + - Update dataset settings + - List and manage datasets + - Batch process documents with datasets + - Use cases and best practices + +## Quick Navigation + +### Common Tasks + +- **Getting Started**: See [Main README](../README.md#quick-start) +- **Upload a file**: [File APIs - Upload](./file-apis.md#upload-file) +- **Parse a document**: [Parse APIs - Parse Document](./parse-apis.md#parse-document) +- **Extract data**: [Parse APIs - Extract Document](./parse-apis.md#extract-document) +- **Create a dataset**: [Dataset APIs - Create Dataset](./dataset-apis.md#create-dataset) +- **Stream results with SSE**: [Parse APIs - Get Parse Result](./parse-apis.md#get-parse-result) + +### By Use Case + +#### Simple Document Parsing +1. [Upload file](./file-apis.md#upload-file) +2. [Parse document](./parse-apis.md#parse-document) +3. [Get results](./parse-apis.md#get-parse-result) + +#### Structured Data Extraction +1. [Define JSON schema](./parse-apis.md#extract-document) +2. [Extract with schema](./parse-apis.md#extract-document) +3. [Access extracted data](./parse-apis.md#example-access-structured-data) + +#### Page Classification +1. [Define page classes](./parse-apis.md#classify-document) +2. [Classify document](./parse-apis.md#classify-document) +3. [Access classifications](./parse-apis.md#classify-document) + +#### Batch Processing +1. [Create dataset](./dataset-apis.md#create-dataset) +2. [Parse with dataset](./dataset-apis.md#parse-with-dataset) +3. [Process multiple files](./dataset-apis.md#example-parse-multiple-files-with-same-configuration) + +## Key Concepts + +### File Sources +Documents can be provided in three ways: +- **File ID**: Upload first, then reference by ID (recommended) +- **File URL**: Publicly accessible internet URL +- **Raw Text**: Plain text content + +See [Parse APIs](./parse-apis.md#parse-document) for details. + +### Pagination +List operations support cursor-based pagination: +- Use `Cursor`, `Limit`, and `Direction` parameters +- Or use convenient iterator methods (`IterFiles`, `IterParseJobs`, `IterDatasets`) + +See [File APIs - List Files](./file-apis.md#iterate-all-files) for examples. + +### Server-Sent Events (SSE) +Get real-time updates for long-running parse jobs: +- Enable with `WithSSE(true)` +- Receive callbacks with `WithOnUpdate()` + +See [Parse APIs - SSE Streaming](./parse-apis.md#example-sse-streaming) for examples. + +## API Reference by Category + +### File Management +| Operation | Method | Documentation | +|-----------|--------|---------------| +| Upload file | `UploadFile()` | [Link](./file-apis.md#upload-file) | +| List files | `ListFiles()` / `IterFiles()` | [Link](./file-apis.md#list-files) | +| Get metadata | `GetFileMetadata()` | [Link](./file-apis.md#get-file-metadata) | +| Delete file | `DeleteFile()` | [Link](./file-apis.md#delete-file) | + +### Parse Operations +| Operation | Method | Documentation | +|-----------|--------|---------------| +| Parse document | `ParseDocument()` | [Link](./parse-apis.md#parse-document) | +| Read document | `ReadDocument()` | [Link](./parse-apis.md#read-document) | +| Extract data | `ExtractDocument()` | [Link](./parse-apis.md#extract-document) | +| Classify pages | `ClassifyDocument()` | [Link](./parse-apis.md#classify-document) | +| Get results | `GetParseResult()` | [Link](./parse-apis.md#get-parse-result) | +| List jobs | `ListParseJobs()` / `IterParseJobs()` | [Link](./parse-apis.md#list-parse-jobs) | +| Delete job | `DeleteParseJob()` | [Link](./parse-apis.md#delete-parse-job) | + +### Dataset Management +| Operation | Method | Documentation | +|-----------|--------|---------------| +| Create dataset | `CreateDataset()` | [Link](./dataset-apis.md#create-dataset) | +| Get dataset | `GetDataset()` | [Link](./dataset-apis.md#get-dataset) | +| Update dataset | `UpdateDataset()` | [Link](./dataset-apis.md#update-dataset) | +| List datasets | `ListDatasets()` / `IterDatasets()` | [Link](./dataset-apis.md#list-datasets) | +| Delete dataset | `DeleteDataset()` | [Link](./dataset-apis.md#delete-dataset) | +| Parse with dataset | `ParseDataset()` | [Link](./dataset-apis.md#parse-with-dataset) | + +## Configuration Reference + +### Parsing Options +Customize document parsing behavior: +- Chunking strategies +- OCR models +- Table output formats +- Image inclusion +- Detection features (signatures, barcodes, etc.) + +See [Parse APIs - Parsing Options](./parse-apis.md#parsingoptions) + +### Enrichment Options +Add AI-powered enhancements: +- Table summarization +- Figure summarization +- Custom prompts + +See [Parse APIs - Enrichment Options](./parse-apis.md#enrichmentoptions) + +### Extraction Options +Configure structured data extraction: +- JSON schema definition +- Partition strategies +- Model providers +- Page class filters +- Citation support + +See [Parse APIs - Structured Extraction](./parse-apis.md#extract-document) + +## Examples + +Each API guide includes comprehensive examples: +- Basic usage +- Advanced configurations +- Error handling +- Real-world use cases + +Start with the [Main README](../README.md) for an overview, then dive into specific API guides as needed. + +## External Resources + +- [Tensorlake API Documentation](https://docs.tensorlake.ai/) +- [API Reference v2](https://docs.tensorlake.ai/api-reference/v2/introduction) +- [Go Package Documentation](https://pkg.go.dev/github.com/sixt/tensorlake-go) + +## Need Help? + +- Check the relevant API guide for detailed examples +- Review [best practices](./parse-apis.md#best-practices) in each guide +- Consult the [error handling](./file-apis.md#error-handling) section + +--- + +[← Back to Main README](../README.md) + diff --git a/docs/dataset-apis.md b/docs/dataset-apis.md new file mode 100644 index 0000000..acadd71 --- /dev/null +++ b/docs/dataset-apis.md @@ -0,0 +1,598 @@ +# Dataset APIs + +Datasets in Tensorlake allow you to define reusable parsing configurations that can be applied to multiple documents. This is useful when you have a specific document type (e.g., invoices, contracts) that requires consistent processing settings. + +## Table of Contents + +- [Overview](#overview) +- [Create Dataset](#create-dataset) +- [Get Dataset](#get-dataset) +- [Update Dataset](#update-dataset) +- [List Datasets](#list-datasets) +- [Delete Dataset](#delete-dataset) +- [Parse with Dataset](#parse-with-dataset) + +## Overview + +A dataset encapsulates: +- **Parsing options**: How to parse documents +- **Structured extraction schemas**: What data to extract +- **Page classifications**: How to classify pages +- **Enrichment settings**: Additional AI-powered enhancements + +Once created, you can parse multiple documents using the same dataset configuration, ensuring consistency across similar document types. + +--- + +## Create Dataset + +Create a new dataset with specific parsing configuration. + +### Method + +```go +func (c *Client) CreateDataset(ctx context.Context, in *CreateDatasetRequest) (*CreateDatasetResponse, error) +``` + +### Request Parameters + +```go +type CreateDatasetRequest struct { + // Name of the dataset (required, must be unique) + // Can only contain alphanumeric characters, hyphens, and underscores + Name string + + // Optional description + Description string + + // Parsing configuration (optional) + ParsingOptions *ParsingOptions + StructuredExtractionOptions []StructuredExtractionOptions + PageClassifications []PageClassConfig + EnrichmentOptions *EnrichmentOptions +} +``` + +### Response + +```go +type CreateDatasetResponse struct { + Name string // Name of the dataset + DatasetId string // Unique dataset identifier + CreatedAt string // Creation timestamp (RFC 3339) +} +``` + +### Example + +```go +// Create a dataset for invoice processing +dataset, err := client.CreateDataset(context.Background(), &tensorlake.CreateDatasetRequest{ + Name: "invoice-processing", + Description: "Standard invoice parsing with data extraction", + ParsingOptions: &tensorlake.ParsingOptions{ + IncludeImages: false, + TableOutputMode: tensorlake.TableOutputModeMarkdown, + SignatureDetection: true, + }, + StructuredExtractionOptions: []tensorlake.StructuredExtractionOptions{ + { + SchemaName: "invoice_data", + JSONSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "invoice_number": {Type: "string"}, + "vendor_name": {Type: "string"}, + "total_amount": {Type: "number"}, + "invoice_date": {Type: "string"}, + "due_date": {Type: "string"}, + "line_items": { + Type: "array", + Items: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "description": {Type: "string"}, + "quantity": {Type: "number"}, + "unit_price": {Type: "number"}, + "total": {Type: "number"}, + }, + }, + }, + }, + Required: []string{"invoice_number", "vendor_name", "total_amount"}, + }, + PartitionStrategy: tensorlake.PartitionStrategyNone, + ProvideCitations: true, + }, + }, + PageClassifications: []tensorlake.PageClassConfig{ + { + Name: "invoice_page", + Description: "Pages containing invoice information with line items", + }, + { + Name: "payment_terms", + Description: "Pages with payment terms and conditions", + }, + }, +}) +if err != nil { + log.Fatal(err) +} + +fmt.Printf("Dataset created: %s (ID: %s)\n", dataset.Name, dataset.DatasetId) +``` + +--- + +## Get Dataset + +Retrieve details for a specific dataset. + +### Method + +```go +func (c *Client) GetDataset(ctx context.Context, datasetId string) (*Dataset, error) +``` + +### Parameters + +- `datasetId`: The unique identifier of the dataset + +### Response + +```go +type Dataset struct { + Name string // Dataset name + DatasetId string // Unique identifier + Description string // Dataset description + Status DatasetStatus // Current status + CreatedAt string // Creation timestamp + UpdatedAt string // Last update timestamp +} +``` + +### Example + +```go +dataset, err := client.GetDataset(context.Background(), "dataset_abc123") +if err != nil { + log.Fatal(err) +} + +fmt.Printf("Dataset: %s\n", dataset.Name) +fmt.Printf("Status: %s\n", dataset.Status) +fmt.Printf("Description: %s\n", dataset.Description) +fmt.Printf("Created: %s\n", dataset.CreatedAt) +fmt.Printf("Updated: %s\n", dataset.UpdatedAt) +``` + +--- + +## Update Dataset + +Update an existing dataset's configuration. + +### Method + +```go +func (c *Client) UpdateDataset(ctx context.Context, in *UpdateDatasetRequest) (*Dataset, error) +``` + +### Request Parameters + +```go +type UpdateDatasetRequest struct { + DatasetId string // Required: which dataset to update + + // All fields are optional - only provide what you want to update + Description string + ParsingOptions *ParsingOptions + StructuredExtractionOptions []StructuredExtractionOptions + PageClassifications []PageClassConfig + EnrichmentOptions *EnrichmentOptions +} +``` + +### Response + +Returns the updated `Dataset` object. + +### Example + +```go +// Update dataset to include table summarization +updatedDataset, err := client.UpdateDataset(context.Background(), &tensorlake.UpdateDatasetRequest{ + DatasetId: "dataset_abc123", + Description: "Invoice processing with enhanced table summaries", + EnrichmentOptions: &tensorlake.EnrichmentOptions{ + TableSummarization: true, + TableSummarizationPrompt: "Summarize line items with totals", + }, +}) +if err != nil { + log.Fatal(err) +} + +fmt.Printf("Dataset updated: %s\n", updatedDataset.Name) +``` + +--- + +## List Datasets + +List all datasets in your project with cursor-based pagination. + +### Method + +```go +func (c *Client) ListDatasets(ctx context.Context, in *ListDatasetsRequest) (*PaginationResult[Dataset], error) +``` + +### Request Parameters + +```go +type ListDatasetsRequest struct { + Cursor string // Pagination cursor + Direction PaginationDirection // "next" or "prev" + Limit int // Maximum results per page + Name string // Filter by name (substring match) +} +``` + +### Response + +```go +type PaginationResult[Dataset] struct { + Items []Dataset // Array of datasets + HasMore bool // More results available + NextCursor string // Cursor for next page + PrevCursor string // Cursor for previous page +} +``` + +### Example + +```go +// List first 10 datasets +response, err := client.ListDatasets(context.Background(), &tensorlake.ListDatasetsRequest{ + Limit: 10, + Direction: tensorlake.PaginationDirectionNext, +}) +if err != nil { + log.Fatal(err) +} + +fmt.Printf("Found %d datasets:\n", len(response.Items)) +for _, dataset := range response.Items { + fmt.Printf(" - %s (ID: %s) - %s\n", + dataset.Name, dataset.DatasetId, dataset.Status) +} + +// Paginate through results +if response.HasMore { + nextPage, err := client.ListDatasets(context.Background(), &tensorlake.ListDatasetsRequest{ + Cursor: response.NextCursor, + Limit: 10, + Direction: tensorlake.PaginationDirectionNext, + }) + // ... process next page +} +``` + +### Iterate All Datasets + +For convenience, use the `IterDatasets` method: + +```go +func (c *Client) IterDatasets(ctx context.Context, limit int, direction PaginationDirection) iter.Seq2[Dataset, error] +``` + +**Example:** + +```go +for dataset, err := range client.IterDatasets(context.Background(), 50, tensorlake.PaginationDirectionNext) { + if err != nil { + log.Fatal(err) + } + fmt.Printf("Dataset: %s - %s\n", dataset.Name, dataset.Description) +} +``` + +--- + +## Delete Dataset + +Delete a dataset from Tensorlake. + +### Method + +```go +func (c *Client) DeleteDataset(ctx context.Context, datasetId string) error +``` + +### Parameters + +- `datasetId`: The unique identifier of the dataset to delete + +### Example + +```go +err := client.DeleteDataset(context.Background(), "dataset_abc123") +if err != nil { + log.Fatal(err) +} + +fmt.Println("Dataset deleted successfully") +``` + +### Notes + +- Deleting a dataset is permanent +- Does not delete files or parse jobs created using the dataset +- You cannot undo this operation + +--- + +## Parse with Dataset + +Parse a document using a dataset's predefined configuration. + +### Method + +```go +func (c *Client) ParseDataset(ctx context.Context, in *ParseDatasetRequest) (*ParseJob, error) +``` + +### Request Parameters + +```go +type ParseDatasetRequest struct { + DatasetId string // Required: which dataset to use + FileSource // One of: FileId, FileURL, or RawText (required) + + // Optional overrides + PageRange string + FileName string // Only used with FileId + MimeType MimeType + Labels map[string]string +} +``` + +### Response + +```go +type ParseJob struct { + ParseId string // Unique identifier for tracking + CreatedAt string // RFC 3339 timestamp +} +``` + +### Example: Parse Multiple Files with Same Configuration + +```go +// Create a dataset for contracts +dataset, err := client.CreateDataset(context.Background(), &tensorlake.CreateDatasetRequest{ + Name: "legal-contracts", + Description: "Parse legal contracts and extract key terms", + ParsingOptions: &tensorlake.ParsingOptions{ + SignatureDetection: true, + CrossPageHeaderDetection: true, + }, + StructuredExtractionOptions: []tensorlake.StructuredExtractionOptions{ + { + SchemaName: "contract_terms", + JSONSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "parties": {Type: "array", Items: &jsonschema.Schema{Type: "string"}}, + "effective_date": {Type: "string"}, + "term_length": {Type: "string"}, + "payment_terms": {Type: "string"}, + }, + }, + }, + }, + PageClassifications: []tensorlake.PageClassConfig{ + { + Name: "signature_page", + Description: "Pages with signature blocks", + }, + }, +}) +if err != nil { + log.Fatal(err) +} + +// Parse multiple contracts using the same dataset +fileIds := []string{"file_contract1", "file_contract2", "file_contract3"} + +for _, fileId := range fileIds { + parseJob, err := client.ParseDataset(context.Background(), &tensorlake.ParseDatasetRequest{ + DatasetId: dataset.DatasetId, + FileSource: tensorlake.FileSource{ + FileId: fileId, + }, + Labels: map[string]string{ + "document_type": "contract", + "batch": "2024-Q1", + }, + }) + if err != nil { + log.Printf("Error parsing %s: %v", fileId, err) + continue + } + + fmt.Printf("Started parse job: %s for file %s\n", parseJob.ParseId, fileId) +} +``` + +### Example: Parse from URL with Dataset + +```go +parseJob, err := client.ParseDataset(context.Background(), &tensorlake.ParseDatasetRequest{ + DatasetId: "dataset_abc123", + FileSource: tensorlake.FileSource{ + FileURL: "https://example.com/invoice.pdf", + }, + Labels: map[string]string{ + "source": "vendor_portal", + }, +}) +if err != nil { + log.Fatal(err) +} + +// Retrieve results +result, err := client.GetParseResult(context.Background(), parseJob.ParseId) +if err != nil { + log.Fatal(err) +} + +fmt.Printf("Parse completed with dataset: %s\n", result.DatasetId) +``` + +--- + +## Common Types + +### DatasetStatus + +```go +type DatasetStatus string + +const ( + DatasetStatusActive DatasetStatus = "active" + DatasetStatusInactive DatasetStatus = "inactive" +) +``` + +### Dataset Naming Conventions + +When creating datasets, follow these guidelines: + +- Use descriptive, lowercase names with hyphens: `invoice-processing`, `legal-contracts` +- Keep names concise but meaningful: `quarterly-reports` not `q-rpts` +- Group related datasets with prefixes: `hr-employee-docs`, `hr-payroll-docs` + +--- + +## Use Cases + +### 1. Batch Processing Similar Documents + +```go +// Create dataset for expense reports +dataset, _ := client.CreateDataset(ctx, &tensorlake.CreateDatasetRequest{ + Name: "expense-reports", + StructuredExtractionOptions: []tensorlake.StructuredExtractionOptions{ + { + SchemaName: "expenses", + JSONSchema: expenseSchema, + }, + }, +}) + +// Process all expense PDFs +for _, fileId := range expenseFileIds { + client.ParseDataset(ctx, &tensorlake.ParseDatasetRequest{ + DatasetId: dataset.DatasetId, + FileSource: tensorlake.FileSource{FileId: fileId}, + }) +} +``` + +### 2. Multi-Tenant Processing + +```go +// Create dataset per customer/tenant +customerDataset, _ := client.CreateDataset(ctx, &tensorlake.CreateDatasetRequest{ + Name: fmt.Sprintf("customer-%s-invoices", customerId), + Description: fmt.Sprintf("Invoice processing for customer %s", customerId), + // ... customer-specific configuration +}) + +// Parse customer documents +client.ParseDataset(ctx, &tensorlake.ParseDatasetRequest{ + DatasetId: customerDataset.DatasetId, + FileSource: tensorlake.FileSource{FileId: fileId}, + Labels: map[string]string{"customer_id": customerId}, +}) +``` + +### 3. Document Type Classification Pipeline + +```go +// Create datasets for different document types +contractDataset, _ := client.CreateDataset(ctx, &tensorlake.CreateDatasetRequest{ + Name: "contracts", + // Contract-specific schemas +}) + +invoiceDataset, _ := client.CreateDataset(ctx, &tensorlake.CreateDatasetRequest{ + Name: "invoices", + // Invoice-specific schemas +}) + +// Route documents to appropriate dataset +func processDocument(fileId, docType string) { + var datasetId string + switch docType { + case "contract": + datasetId = contractDataset.DatasetId + case "invoice": + datasetId = invoiceDataset.DatasetId + } + + client.ParseDataset(ctx, &tensorlake.ParseDatasetRequest{ + DatasetId: datasetId, + FileSource: tensorlake.FileSource{FileId: fileId}, + }) +} +``` + +--- + +## Best Practices + +1. **Reusability**: Create datasets for document types you process frequently +2. **Naming**: Use clear, descriptive names that indicate the document type and purpose +3. **Versioning**: Create new datasets instead of updating existing ones for major config changes +4. **Testing**: Test dataset configurations with sample documents before batch processing +5. **Organization**: Use consistent naming patterns for related datasets +6. **Documentation**: Keep dataset descriptions up-to-date with their purpose and configuration +7. **Monitoring**: Track parse job results by dataset to identify configuration issues + +--- + +## Error Handling + +Common error scenarios: + +```go +dataset, err := client.CreateDataset(ctx, request) +if err != nil { + var apiErr *tensorlake.ErrorResponse + if errors.As(err, &apiErr) { + switch apiErr.ErrorCode { + case "dataset_already_exists": + fmt.Println("Dataset name must be unique") + case "invalid_schema": + fmt.Println("JSON schema validation failed") + case "quota_exceeded": + fmt.Println("Dataset limit reached") + default: + fmt.Printf("API Error: %s\n", apiErr.Message) + } + } + return +} +``` + +--- + +For more information, see: +- [File Management APIs](./file-apis.md) +- [Parse APIs](./parse-apis.md) +- [Main Documentation](../README.md) + diff --git a/docs/file-apis.md b/docs/file-apis.md new file mode 100644 index 0000000..aad3351 --- /dev/null +++ b/docs/file-apis.md @@ -0,0 +1,333 @@ +# File Management APIs + +The File Management APIs allow you to upload, list, retrieve metadata, and delete files in your Tensorlake project. + +## Table of Contents + +- [Upload File](#upload-file) +- [List Files](#list-files) +- [Get File Metadata](#get-file-metadata) +- [Delete File](#delete-file) + +## Upload File + +Upload a file to Tensorlake Cloud. The file will be associated with the project specified by the API key used in the request. + +### Supported File Types + +- **PDF** documents +- **Word** (DOCX) +- **Spreadsheets** (XLS, XLSX, XSLM, CSV) +- **Presentations** (PPTX, Apple Keynote) +- **Images** (PNG, JPG, JPEG) +- **Raw text** (plain text, HTML) + +### Method + +```go +func (c *Client) UploadFile(ctx context.Context, in *UploadFileRequest) (*FileUploadResponse, error) +``` + +### Request Parameters + +```go +type UploadFileRequest struct { + // FileBytes is the reader for the file to upload (required) + FileBytes io.Reader + + // FileName is the name of the file to upload (required) + FileName string + + // Labels are optional key-value pairs to categorize the file + Labels map[string]string +} +``` + +### Response + +```go +type FileUploadResponse struct { + // FileId is the ID of the created file + // Use this ID to reference the file in parse and other operations + FileId string + + // CreatedAt is the creation date and time in RFC 3339 format + CreatedAt time.Time +} +``` + +### Example + +```go +file, err := os.Open("path/to/document.pdf") +if err != nil { + log.Fatal(err) +} +defer file.Close() + +response, err := client.UploadFile(context.Background(), &tensorlake.UploadFileRequest{ + FileBytes: file, + FileName: "document.pdf", + Labels: map[string]string{ + "category": "invoice", + "year": "2024", + }, +}) +if err != nil { + log.Fatal(err) +} + +fmt.Printf("File uploaded with ID: %s\n", response.FileId) +``` + +### Notes + +- Upload limit: 1 GB per file +- Files are deduplicated - uploading the same file multiple times returns the same `file_id` +- File type is automatically detected based on Content-Type header or file extension +- Labels help categorize and filter files for better organization + +--- + +## List Files + +List all files in your Tensorlake project with cursor-based pagination. + +### Method + +```go +func (c *Client) ListFiles(ctx context.Context, in *ListFilesRequest) (*PaginationResult[FileInfo], error) +``` + +### Request Parameters + +```go +type ListFilesRequest struct { + // Cursor for pagination (base64-encoded timestamp) + Cursor string + + // Direction of pagination: "next" or "prev" + Direction PaginationDirection + + // Limit the number of results (x >= 0) + Limit int + + // Filter by file name (case-sensitive substring match) + FileName string + + // Filter by creation date (RFC 3339 format) + CreatedAfter string + CreatedBefore string +} +``` + +### Response + +```go +type PaginationResult[FileInfo] struct { + Items []FileInfo // Array of file metadata + HasMore bool // Whether more results are available + NextCursor string // Cursor for next page + PrevCursor string // Cursor for previous page +} + +type FileInfo struct { + FileId string + FileName string + MimeType MimeType + FileSize int64 + ChecksumSHA256 string + CreatedAt string + Labels map[string]string +} +``` + +### Example + +```go +// List first 10 files +response, err := client.ListFiles(context.Background(), &tensorlake.ListFilesRequest{ + Limit: 10, + Direction: tensorlake.PaginationDirectionNext, +}) +if err != nil { + log.Fatal(err) +} + +for _, file := range response.Items { + fmt.Printf("File: %s (ID: %s, Size: %d bytes)\n", + file.FileName, file.FileId, file.FileSize) +} + +// Get next page if available +if response.HasMore { + nextPage, err := client.ListFiles(context.Background(), &tensorlake.ListFilesRequest{ + Cursor: response.NextCursor, + Limit: 10, + Direction: tensorlake.PaginationDirectionNext, + }) + // ... process next page +} +``` + +### Iterate All Files + +For convenience, use the `IterFiles` method to iterate through all files: + +```go +func (c *Client) IterFiles(ctx context.Context, limit int, direction PaginationDirection) iter.Seq2[FileInfo, error] +``` + +**Example:** + +```go +for file, err := range client.IterFiles(context.Background(), 50, tensorlake.PaginationDirectionNext) { + if err != nil { + log.Fatal(err) + } + fmt.Printf("Processing file: %s\n", file.FileName) +} +``` + +--- + +## Get File Metadata + +Retrieve metadata for a specific file by its ID. + +### Method + +```go +func (c *Client) GetFileMetadata(ctx context.Context, fileId string) (*FileInfo, error) +``` + +### Parameters + +- `fileId`: The unique identifier of the file + +### Response + +```go +type FileInfo struct { + FileId string // Unique file identifier + FileName string // Original file name + MimeType MimeType // MIME type of the file + FileSize int64 // File size in bytes + ChecksumSHA256 string // SHA-256 checksum + CreatedAt string // Creation timestamp (RFC 3339) + Labels map[string]string // Associated labels +} +``` + +### Example + +```go +metadata, err := client.GetFileMetadata(context.Background(), "file_abc123xyz") +if err != nil { + log.Fatal(err) +} + +fmt.Printf("File: %s\n", metadata.FileName) +fmt.Printf("Size: %d bytes\n", metadata.FileSize) +fmt.Printf("Type: %s\n", metadata.MimeType) +fmt.Printf("Uploaded: %s\n", metadata.CreatedAt) +``` + +--- + +## Delete File + +Delete a file from Tensorlake Cloud. + +### Method + +```go +func (c *Client) DeleteFile(ctx context.Context, fileId string) error +``` + +### Parameters + +- `fileId`: The unique identifier of the file to delete + +### Example + +```go +err := client.DeleteFile(context.Background(), "file_abc123xyz") +if err != nil { + log.Fatal(err) +} + +fmt.Println("File deleted successfully") +``` + +### Notes + +- Deleting a file is permanent and cannot be undone +- Deleting a file does not automatically delete parse jobs created from it +- You must have appropriate permissions to delete files in the project + +--- + +## Common Types + +### PaginationDirection + +```go +type PaginationDirection string + +const ( + PaginationDirectionNext PaginationDirection = "next" + PaginationDirectionPrev PaginationDirection = "prev" +) +``` + +### MimeType + +Common MIME types for uploaded files: + +```go +type MimeType string + +const ( + MimeTypePDF MimeType = "application/pdf" + MimeTypeDOCX MimeType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + MimeTypePNG MimeType = "image/png" + MimeTypeJPEG MimeType = "image/jpeg" + // ... and more +) +``` + +--- + +## Error Handling + +All methods return an error as the last return value. Common error scenarios: + +- **Network errors**: Connection issues, timeouts +- **Authentication errors**: Invalid or expired API key +- **Validation errors**: Invalid parameters, missing required fields +- **Not found errors**: File ID doesn't exist +- **Quota errors**: Upload limits exceeded + +**Example error handling:** + +```go +response, err := client.UploadFile(ctx, request) +if err != nil { + var apiErr *tensorlake.ErrorResponse + if errors.As(err, &apiErr) { + fmt.Printf("API Error: %s (Code: %s)\n", apiErr.Message, apiErr.ErrorCode) + } else { + fmt.Printf("Error: %v\n", err) + } + return +} +``` + +--- + +For more information, see: +- [Parse APIs](./parse-apis.md) +- [Dataset APIs](./dataset-apis.md) +- [Main Documentation](../README.md) + diff --git a/docs/parse-apis.md b/docs/parse-apis.md new file mode 100644 index 0000000..4f7fb07 --- /dev/null +++ b/docs/parse-apis.md @@ -0,0 +1,620 @@ +# Parse APIs + +The Parse APIs provide comprehensive document processing capabilities including parsing, structured data extraction, and page classification. + +## Table of Contents + +- [Overview](#overview) +- [Parse Document](#parse-document) +- [Read Document](#read-document) +- [Extract Document](#extract-document) +- [Classify Document](#classify-document) +- [Get Parse Result](#get-parse-result) +- [List Parse Jobs](#list-parse-jobs) +- [Delete Parse Job](#delete-parse-job) +- [Configuration Options](#configuration-options) + +## Overview + +Tensorlake offers multiple parsing operations: + +- **Parse Document**: Comprehensive parsing with optional extraction and classification +- **Read Document**: Basic document parsing to markdown +- **Extract Document**: Structured data extraction using JSON schemas +- **Classify Document**: Page classification based on content + +All parsing operations follow the same pattern: +1. Submit a parse request → receive a `ParseJob` with `parse_id` +2. Use the `parse_id` to query results via `GetParseResult` +3. Optionally use SSE (Server-Sent Events) for real-time progress updates + +--- + +## Parse Document + +Submit a document for comprehensive parsing, including reading, extraction, and classification in a single operation. + +### Method + +```go +func (c *Client) ParseDocument(ctx context.Context, in *ParseDocumentRequest) (*ParseJob, error) +``` + +### Request Parameters + +```go +type ParseDocumentRequest struct { + FileSource // One of: FileId, FileURL, or RawText (required) + + // Optional configuration + ParsingOptions *ParsingOptions + EnrichmentOptions *EnrichmentOptions + StructuredExtractionOptions []StructuredExtractionOptions + PageClassificationOptions []PageClassConfig + + // Page range to parse (e.g., "1-5,8,10") + PageRange string + + // Additional metadata + Labels map[string]string + MimeType MimeType +} + +// FileSource - exactly one must be provided +type FileSource struct { + FileId string // File ID from UploadFile + FileURL string // Internet-reachable URL + RawText string // Plain text content +} +``` + +### Response + +```go +type ParseJob struct { + ParseId string // Unique identifier for tracking + CreatedAt string // RFC 3339 timestamp +} +``` + +### Example + +```go +// Parse an uploaded file with extraction +parseJob, err := client.ParseDocument(context.Background(), &tensorlake.ParseDocumentRequest{ + FileSource: tensorlake.FileSource{ + FileId: "file_abc123", + }, + StructuredExtractionOptions: []tensorlake.StructuredExtractionOptions{ + { + SchemaName: "invoice_data", + JSONSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "invoice_number": {Type: "string"}, + "total_amount": {Type: "number"}, + "date": {Type: "string"}, + }, + }, + }, + }, + Labels: map[string]string{"type": "invoice"}, +}) +if err != nil { + log.Fatal(err) +} + +fmt.Printf("Parse job created: %s\n", parseJob.ParseId) +``` + +--- + +## Read Document + +Submit a document for basic parsing to markdown format. + +### Method + +```go +func (c *Client) ReadDocument(ctx context.Context, in *ReadDocumentRequest) (*ParseJob, error) +``` + +### Request Parameters + +```go +type ReadDocumentRequest struct { + FileSource // One of: FileId, FileURL, or RawText + + ParsingOptions *ParsingOptions + EnrichmentOptions *EnrichmentOptions + PageRange string + Labels map[string]string + FileName string // Only used with FileId + MimeType MimeType +} +``` + +### Example + +```go +// Read a PDF from URL +parseJob, err := client.ReadDocument(context.Background(), &tensorlake.ReadDocumentRequest{ + FileSource: tensorlake.FileSource{ + FileURL: "https://example.com/document.pdf", + }, + ParsingOptions: &tensorlake.ParsingOptions{ + IncludeImages: true, + TableOutputMode: tensorlake.TableOutputModeMarkdown, + }, +}) +if err != nil { + log.Fatal(err) +} +``` + +--- + +## Extract Document + +Submit a document for structured data extraction using JSON schemas. + +### Method + +```go +func (c *Client) ExtractDocument(ctx context.Context, in *ExtractDocumentRequest) (*ParseJob, error) +``` + +### Request Parameters + +```go +type ExtractDocumentRequest struct { + FileSource // One of: FileId, FileURL, or RawText + + // At least one extraction schema required + StructuredExtractionOptions []StructuredExtractionOptions + + PageRange string + MimeType string + Labels map[string]string +} + +type StructuredExtractionOptions struct { + SchemaName string // Name for this schema + JSONSchema *jsonschema.Schema // JSON schema definition + PartitionStrategy PartitionStrategy // How to partition document + ModelProvider ModelProvider // LLM provider to use + PageClasses []string // Filter by page classes + Prompt string // Custom extraction prompt + ProvideCitations bool // Include bounding boxes + SkipOCR bool // Skip OCR processing +} +``` + +### Example + +```go +// Extract invoice data +parseJob, err := client.ExtractDocument(context.Background(), &tensorlake.ExtractDocumentRequest{ + FileSource: tensorlake.FileSource{ + FileId: "file_abc123", + }, + StructuredExtractionOptions: []tensorlake.StructuredExtractionOptions{ + { + SchemaName: "invoice", + JSONSchema: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "vendor_name": {Type: "string"}, + "invoice_number": {Type: "string"}, + "total_amount": {Type: "number"}, + "line_items": { + Type: "array", + Items: &jsonschema.Schema{ + Type: "object", + Properties: map[string]*jsonschema.Schema{ + "description": {Type: "string"}, + "amount": {Type: "number"}, + }, + }, + }, + }, + }, + PartitionStrategy: tensorlake.PartitionStrategyNone, + ProvideCitations: true, + }, + }, +}) +if err != nil { + log.Fatal(err) +} +``` + +--- + +## Classify Document + +Submit a document for page classification. + +### Method + +```go +func (c *Client) ClassifyDocument(ctx context.Context, in *ClassifyDocumentRequest) (*ParseJob, error) +``` + +### Request Parameters + +```go +type ClassifyDocumentRequest struct { + FileSource // One of: FileId, FileURL, or RawText + + // At least one classification config required + PageClassifications []PageClassConfig + + PageRange string + MimeType string + Labels map[string]string +} + +type PageClassConfig struct { + Name string // Class name + Description string // What to look for in pages +} +``` + +### Example + +```go +// Classify pages in a legal document +parseJob, err := client.ClassifyDocument(context.Background(), &tensorlake.ClassifyDocumentRequest{ + FileSource: tensorlake.FileSource{ + FileId: "file_abc123", + }, + PageClassifications: []tensorlake.PageClassConfig{ + { + Name: "signature_page", + Description: "Pages containing signatures or signature blocks", + }, + { + Name: "terms_and_conditions", + Description: "Pages with legal terms and conditions text", + }, + { + Name: "exhibits", + Description: "Appendix or exhibit pages", + }, + }, +}) +if err != nil { + log.Fatal(err) +} +``` + +--- + +## Get Parse Result + +Retrieve the result of a parse job, with optional SSE streaming for real-time updates. + +### Method + +```go +func (c *Client) GetParseResult(ctx context.Context, parseId string, opts ...GetParseResultOption) (*ParseResult, error) +``` + +### Options + +```go +// Enable Server-Sent Events for streaming updates +func WithSSE(enable bool) GetParseResultOption + +// Callback for intermediate updates during SSE +func WithOnUpdate(onUpdate ParseResultUpdateFunc) GetParseResultOption + +type ParseResultUpdateFunc func(name ParseEventName, result *ParseResult) +``` + +### Response + +```go +type ParseResult struct { + // Job metadata + ParseId string + Status ParseStatus + ParsedPagesCount int + TotalPages int + CreatedAt string + FinishedAt string + Error string + Labels map[string]string + DatasetId string + + // Parsed content + Pages []Page // Page-by-page content + Chunks []Chunk // Text chunks (if chunking enabled) + PageClasses []PageClass // Classification results + StructuredData []StructuredData // Extracted structured data +} +``` + +### Example: Basic Retrieval + +```go +result, err := client.GetParseResult(context.Background(), parseJob.ParseId) +if err != nil { + log.Fatal(err) +} + +if result.Status == tensorlake.ParseStatusCompleted { + fmt.Printf("Parsed %d pages\n", result.ParsedPagesCount) + + // Access page content + for _, page := range result.Pages { + fmt.Printf("Page %d:\n", page.PageNumber) + for _, fragment := range page.PageFragments { + fmt.Printf(" Type: %s\n", fragment.FragmentType) + } + } +} +``` + +### Example: SSE Streaming + +```go +result, err := client.GetParseResult( + context.Background(), + parseJob.ParseId, + tensorlake.WithSSE(true), + tensorlake.WithOnUpdate(func(name tensorlake.ParseEventName, r *tensorlake.ParseResult) { + switch eventName { + case tensorlake.sseEventParseQueued: + fmt.Println("Parse job queued") + case tensorlake.sseEventParseUpdate: + fmt.Printf("Processing... %d/%d pages\n", r.ParsedPagesCount, r.TotalPages) + case tensorlake.sseEventParseDone: + fmt.Println("Parse complete!") + case tensorlake.sseEventParseFailed: + fmt.Printf("Parse failed: %s\n", r.Error) + } + }), +) +if err != nil { + log.Fatal(err) +} + +// Process final result +fmt.Printf("Final status: %s\n", result.Status) +``` + +### Example: Access Structured Data + +```go +result, err := client.GetParseResult(context.Background(), parseJob.ParseId) +if err != nil { + log.Fatal(err) +} + +for _, data := range result.StructuredData { + fmt.Printf("Schema: %s\n", data.SchemaName) + fmt.Printf("Pages: %v\n", data.PageNumbers) + + // Unmarshal the extracted data + var extracted map[string]interface{} + if err := json.Unmarshal(data.Data, &extracted); err != nil { + log.Fatal(err) + } + + fmt.Printf("Extracted data: %+v\n", extracted) +} +``` + +--- + +## List Parse Jobs + +List all parse jobs in your project with pagination. + +### Method + +```go +func (c *Client) ListParseJobs(ctx context.Context, in *ListParseJobsRequest) (*PaginationResult[ParseResult], error) +``` + +### Request Parameters + +```go +type ListParseJobsRequest struct { + Cursor string + Direction PaginationDirection + Limit int + FileId string // Filter by file ID + CreatedAfter string // RFC 3339 timestamp + CreatedBefore string // RFC 3339 timestamp +} +``` + +### Example + +```go +// List recent parse jobs +response, err := client.ListParseJobs(context.Background(), &tensorlake.ListParseJobsRequest{ + Limit: 20, + Direction: tensorlake.PaginationDirectionNext, +}) +if err != nil { + log.Fatal(err) +} + +for _, job := range response.Items { + fmt.Printf("Parse ID: %s, Status: %s, Pages: %d\n", + job.ParseId, job.Status, job.ParsedPagesCount) +} +``` + +### Iterate All Parse Jobs + +```go +func (c *Client) IterParseJobs(ctx context.Context, limit int, direction PaginationDirection) iter.Seq2[ParseResult, error] +``` + +**Example:** + +```go +for job, err := range client.IterParseJobs(context.Background(), 50, tensorlake.PaginationDirectionNext) { + if err != nil { + log.Fatal(err) + } + if job.Status == tensorlake.ParseStatusFailed { + fmt.Printf("Failed job: %s - %s\n", job.ParseId, job.Error) + } +} +``` + +--- + +## Delete Parse Job + +Delete a previously submitted parse job. + +### Method + +```go +func (c *Client) DeleteParseJob(ctx context.Context, parseId string) error +``` + +### Example + +```go +err := client.DeleteParseJob(context.Background(), "parse_abc123") +if err != nil { + log.Fatal(err) +} + +fmt.Println("Parse job deleted successfully") +``` + +### Notes + +- Deleting a parse job removes the job and its settings +- Does not delete the original file used for parsing +- Does not affect other parse jobs from the same file + +--- + +## Configuration Options + +### ParsingOptions + +Configure how documents are parsed: + +```go +type ParsingOptions struct { + ChunkingStrategy ChunkingStrategy // How to chunk document + CrossPageHeaderDetection bool // Detect headers across pages + DisableLayoutDetection bool // Skip layout detection for speed + OCRModel OCRPipelineProvider // OCR model to use + RemoveStrikethroughLines bool // Remove strikethrough text + SignatureDetection bool // Detect signatures + SkewDetection bool // Correct skewed/rotated pages + TableOutputMode TableOutputMode // HTML or Markdown + TableParsingFormat TableParsingFormat // Table extraction method + IgnoreSections []PageFragmentType // Skip certain content types + IncludeImages bool // Include images in markdown + BarcodeDetection bool // Detect barcodes +} +``` + +**Example:** + +```go +parsingOpts := &tensorlake.ParsingOptions{ + IncludeImages: true, + TableOutputMode: tensorlake.TableOutputModeMarkdown, + SignatureDetection: true, + CrossPageHeaderDetection: true, +} +``` + +### EnrichmentOptions + +Enhance parsed content with AI-generated summaries: + +```go +type EnrichmentOptions struct { + FigureSummarization bool // Summarize figures + FigureSummarizationPrompt string // Custom prompt for figures + TableSummarization bool // Summarize tables + TableSummarizationPrompt string // Custom prompt for tables + IncludeFullPageImage bool // Include full page for context +} +``` + +**Example:** + +```go +enrichmentOpts := &tensorlake.EnrichmentOptions{ + TableSummarization: true, + TableSummarizationPrompt: "Summarize this financial table in 2-3 sentences", + IncludeFullPageImage: true, +} +``` + +### Common Enums + +#### ParseStatus + +```go +const ( + ParseStatusPending ParseStatus = "pending" + ParseStatusQueued ParseStatus = "queued" + ParseStatusProcessing ParseStatus = "processing" + ParseStatusCompleted ParseStatus = "completed" + ParseStatusFailed ParseStatus = "failed" +) +``` + +#### ChunkingStrategy + +```go +const ( + ChunkingStrategyNone ChunkingStrategy = "none" + ChunkingStrategyPage ChunkingStrategy = "page" + ChunkingStrategySemantic ChunkingStrategy = "semantic" +) +``` + +#### TableOutputMode + +```go +const ( + TableOutputModeHTML TableOutputMode = "html" + TableOutputModeMarkdown TableOutputMode = "markdown" +) +``` + +#### PartitionStrategy + +```go +const ( + PartitionStrategyNone PartitionStrategy = "none" // One result for entire doc + PartitionStrategyPage PartitionStrategy = "page" // One result per page +) +``` + +--- + +## Best Practices + +1. **Use SSE for Long Documents**: Enable SSE streaming to get progress updates for large documents +2. **Optimize with Parsing Options**: Disable unnecessary features (e.g., `DisableLayoutDetection`) for faster processing +3. **Structured Extraction**: Use specific, well-defined JSON schemas for better extraction accuracy +4. **Page Classification**: Provide detailed descriptions in `PageClassConfig` to improve classification accuracy +5. **Error Handling**: Always check the `Status` field and handle failed parse jobs appropriately +6. **Pagination**: Use `IterParseJobs` for easier iteration through all jobs + +--- + +For more information, see: +- [File Management APIs](./file-apis.md) +- [Dataset APIs](./dataset-apis.md) +- [Main Documentation](../README.md) + diff --git a/file_delete.go b/file_delete.go index d37905d..861d082 100644 --- a/file_delete.go +++ b/file_delete.go @@ -22,6 +22,10 @@ import ( ) // DeleteFile deletes a file from Tensorlake Cloud. +// +// See also: [Delete File API Reference] +// +// [Delete File API Reference]: https://docs.tensorlake.ai/api-reference/v2/files/delete func (c *Client) DeleteFile(ctx context.Context, fileId string) error { reqURL := fmt.Sprintf("%s/files/%s", c.baseURL, url.PathEscape(fileId)) diff --git a/file_list.go b/file_list.go index f5e378c..70bce75 100644 --- a/file_list.go +++ b/file_list.go @@ -101,6 +101,10 @@ type ListFilesRequest struct { // - HasMore: A boolean indicating whether there are more files available beyond the current page. // - NextCursor: A base64-encoded cursor for the next page of results. If HasMore is false, this field will be null. // - PrevCursor: A base64-encoded cursor for the previous page of results. If this is the first page, this field will be null. +// +// See also: [List Files API Reference] +// +// [List Files API Reference]: https://docs.tensorlake.ai/api-reference/v2/files/list func (c *Client) ListFiles(ctx context.Context, in *ListFilesRequest) (*PaginationResult[FileInfo], error) { reqURL := fmt.Sprintf("%s/files", c.baseURL) params := url.Values{} diff --git a/file_metadata.go b/file_metadata.go index 60b1600..0684ba4 100644 --- a/file_metadata.go +++ b/file_metadata.go @@ -35,6 +35,10 @@ type FileInfo struct { } // GetFileMetadata retrieves metadata for a specific file. +// +// See also: [Get File Metadata API Reference] +// +// [Get File Metadata API Reference]: https://docs.tensorlake.ai/api-reference/v2/files/get-metadata func (c *Client) GetFileMetadata(ctx context.Context, fileId string) (*FileInfo, error) { reqURL := fmt.Sprintf("%s/files/%s/metadata", c.baseURL, url.PathEscape(fileId)) diff --git a/file_upload.go b/file_upload.go index 442b52e..79a6c23 100644 --- a/file_upload.go +++ b/file_upload.go @@ -85,6 +85,10 @@ type FileUploadResponse struct { // # Limits // // There is an upload limit of 1 GB per file. +// +// See also: [Upload File API Reference] +// +// [Upload File API Reference]: https://docs.tensorlake.ai/api-reference/v2/files/upload func (c *Client) UploadFile(ctx context.Context, in *UploadFileRequest) (*FileUploadResponse, error) { if in.FileName == "" { return nil, errors.New("file name is empty") diff --git a/parse_classify.go b/parse_classify.go index a51bfd9..fe96b25 100644 --- a/parse_classify.go +++ b/parse_classify.go @@ -33,6 +33,10 @@ type ClassifyDocumentRequest struct { } // ClassifyDocument submits a document for page classification. +// +// See also: [Classify Document API Reference] +// +// [Classify Document API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/classify func (c *Client) ClassifyDocument(ctx context.Context, in *ClassifyDocumentRequest) (*ParseJob, error) { if !in.SourceProvided() { return nil, fmt.Errorf("exactly one of file_id, file_url, or raw_text must be provided") diff --git a/parse_classify_test.go b/parse_classify_test.go index cef25a9..5ca2687 100644 --- a/parse_classify_test.go +++ b/parse_classify_test.go @@ -85,8 +85,8 @@ func TestClassifyDocument(t *testing.T) { t.Logf("classify document done, parse ID: %s", r.ParseId) // Get parse result. - result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(eventName string, _ *ParseResult) { - t.Logf("parse status: %s", eventName) + result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(name ParseEventName, _ *ParseResult) { + t.Logf("parse status: %s", name) })) if err != nil { t.Fatalf("failed to get parse result: %v", err) diff --git a/parse_delete.go b/parse_delete.go index 887ff2a..2b29eaf 100644 --- a/parse_delete.go +++ b/parse_delete.go @@ -23,7 +23,11 @@ import ( // DeleteParseJob deletes a previously submitted parse job. This will // remove the parse job and its associated settings from the system. // Deleting a parse job does not delete the original file used for parsing, -// nor does it affect any other parse jobs that may have been created from the same file. +// nor does it affect any other parse jobs that may have been created from the same file +// +// See also: [Delete Parse Job API Reference] +// +// [Delete Parse Job API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/delete func (c *Client) DeleteParseJob(ctx context.Context, parseId string) error { reqURL := fmt.Sprintf("%s/parse/%s", c.baseURL, parseId) req, err := http.NewRequestWithContext(ctx, http.MethodDelete, reqURL, nil) diff --git a/parse_extract.go b/parse_extract.go index 47d66a8..a53315c 100644 --- a/parse_extract.go +++ b/parse_extract.go @@ -34,6 +34,10 @@ type ExtractDocumentRequest struct { } // ExtractDocument submits a document for structured data extraction. +// +// See also: [Extract Document API Reference] +// +// [Extract Document API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/extract func (c *Client) ExtractDocument(ctx context.Context, in *ExtractDocumentRequest) (*ParseJob, error) { // Validate that exactly one source is provided if !in.SourceProvided() { diff --git a/parse_extract_test.go b/parse_extract_test.go index bdc6812..0f7ec00 100644 --- a/parse_extract_test.go +++ b/parse_extract_test.go @@ -91,8 +91,8 @@ func TestExtractDocument(t *testing.T) { t.Logf("extract document done, parse ID: %s", r.ParseId) // Get parse result. - result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(eventName string, _ *ParseResult) { - t.Logf("parse status: %s", eventName) + result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(name ParseEventName, _ *ParseResult) { + t.Logf("parse status: %s", name) })) if err != nil { t.Fatalf("failed to get parse result: %v", err) diff --git a/parse_get.go b/parse_get.go index d146b88..5b55931 100644 --- a/parse_get.go +++ b/parse_get.go @@ -27,7 +27,7 @@ import ( // ParseResultUpdateFunc is a callback function that receives intermediate parse result updates // during SSE streaming. It will be called for each SSE event received. -type ParseResultUpdateFunc func(eventName string, result *ParseResult) +type ParseResultUpdateFunc func(name ParseEventName, result *ParseResult) type GetParseResultOptions struct { // UseSSE enables Server-Sent Events (SSE) for streaming updates. @@ -64,6 +64,10 @@ func WithOnUpdate(onUpdate ParseResultUpdateFunc) GetParseResultOption { // When the job finishes successfully, the response will contain pages // (chunks of the page) chunks (text chunks extracted from the document), // structured data (every schema_name provided in the parse request as a key). +// +// See also: [Get Parse Result API Reference] +// +// [Get Parse Result API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/get func (c *Client) GetParseResult(ctx context.Context, parseId string, opts ...GetParseResultOption) (*ParseResult, error) { o := &GetParseResultOptions{ useSSE: false, @@ -93,13 +97,16 @@ func (c *Client) GetParseResult(ctx context.Context, parseId string, opts ...Get }) } +// ParseEventName is the name of the SSE event. +type ParseEventName string + // The possible SSE events. // See also: https://github.com/tensorlakeai/tensorlake/blob/main/src/tensorlake/documentai/_parse.py#L499 const ( - sseEventParseQueued = "parse_queued" - sseEventParseUpdate = "parse_update" - sseEventParseDone = "parse_done" - sseEventParseFailed = "parse_failed" + sseEventParseQueued ParseEventName = "parse_queued" + sseEventParseUpdate ParseEventName = "parse_update" + sseEventParseDone ParseEventName = "parse_done" + sseEventParseFailed ParseEventName = "parse_failed" ) func (c *Client) handleSSEResponse(req *http.Request, onUpdate ParseResultUpdateFunc) (*ParseResult, error) { @@ -144,21 +151,21 @@ func (c *Client) handleSSEResponse(req *http.Request, onUpdate ParseResultUpdate } switch ev.Name() { - case sseEventParseQueued, sseEventParseUpdate: + case string(sseEventParseQueued), string(sseEventParseUpdate): if onUpdate != nil { - onUpdate(ev.Name(), &result) + onUpdate(ParseEventName(ev.Name()), &result) } continue - case sseEventParseDone: + case string(sseEventParseDone): if onUpdate != nil { - onUpdate(ev.Name(), &result) + onUpdate(ParseEventName(ev.Name()), &result) } return &result, nil - case sseEventParseFailed: + case string(sseEventParseFailed): if onUpdate != nil { - onUpdate(ev.Name(), &result) + onUpdate(ParseEventName(ev.Name()), &result) } return nil, fmt.Errorf("failed to parse result: %s", result.Error) default: diff --git a/parse_get_test.go b/parse_get_test.go index 56b0aa7..df61b57 100644 --- a/parse_get_test.go +++ b/parse_get_test.go @@ -54,8 +54,8 @@ func TestGetParseResultSSE(t *testing.T) { t.Logf("read document done, parse ID: %s", r.ParseId) // Read document status. - result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(eventName string, _ *ParseResult) { - t.Logf("parse status: %s", eventName) + result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(name ParseEventName, _ *ParseResult) { + t.Logf("parse status: %s", name) })) if err != nil { t.Fatalf("failed to get parse result: %v", err) diff --git a/parse_list.go b/parse_list.go index 4c8a38f..df67f94 100644 --- a/parse_list.go +++ b/parse_list.go @@ -65,6 +65,10 @@ type ListParseJobsRequest struct { } // ListParseJobs lists parse jobs in the Tensorlake project. +// +// See also: [List Parse Jobs API Reference] +// +// [List Parse Jobs API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/list func (c *Client) ListParseJobs(ctx context.Context, in *ListParseJobsRequest) (*PaginationResult[ParseResult], error) { reqURL := c.baseURL + "/parse" params := url.Values{} diff --git a/parse_parse.go b/parse_parse.go index 3fae52e..bd7c5a2 100644 --- a/parse_parse.go +++ b/parse_parse.go @@ -78,6 +78,10 @@ type ParseDocumentRequest struct { } // ParseDocument submits a document for comprehensive parsing (read, extract, and classify). +// +// See also: [Parse Document API Reference] +// +// [Parse Document API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/parse func (c *Client) ParseDocument(ctx context.Context, in *ParseDocumentRequest) (*ParseJob, error) { if !in.SourceProvided() { return nil, fmt.Errorf("exactly one of file_id, file_url, or raw_text must be provided") diff --git a/parse_parse_test.go b/parse_parse_test.go index aceae96..5cc3e5e 100644 --- a/parse_parse_test.go +++ b/parse_parse_test.go @@ -59,8 +59,8 @@ func TestParseDocumentRemote(t *testing.T) { // Read document status. - result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(eventName string, _ *ParseResult) { - t.Logf("parse status: %s", eventName) + result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(name ParseEventName, _ *ParseResult) { + t.Logf("parse status: %s", name) })) if err != nil { t.Fatalf("failed to get parse result: %v", err) @@ -195,8 +195,8 @@ func TestParseDocumentStructuredExtraction(t *testing.T) { t.Logf("parse document done, parse ID: %s", r.ParseId) // Get parse result. - result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(eventName string, _ *ParseResult) { - t.Logf("parse status: %s", eventName) + result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(name ParseEventName, _ *ParseResult) { + t.Logf("parse status: %s", name) })) if err != nil { t.Fatalf("failed to get parse result: %v", err) diff --git a/parse_read.go b/parse_read.go index 6b5d621..211b883 100644 --- a/parse_read.go +++ b/parse_read.go @@ -70,6 +70,10 @@ type ReadDocumentRequest struct { // provide document layout information. Once submitted, the API will // return a parse response with a parse_id field. You can query the status // and results of the parse operation with the Get Parse Result endpoint. +// +// See also: [Read Document API Reference] +// +// [Read Document API Reference]: https://docs.tensorlake.ai/api-reference/v2/parse/read func (c *Client) ReadDocument(ctx context.Context, in *ReadDocumentRequest) (*ParseJob, error) { if !in.SourceProvided() { return nil, fmt.Errorf("exactly one of file_id, file_url, or raw_text must be provided") diff --git a/parse_read_test.go b/parse_read_test.go index d937f09..124168e 100644 --- a/parse_read_test.go +++ b/parse_read_test.go @@ -53,8 +53,8 @@ func TestReadDocument(t *testing.T) { t.Logf("read document done, parse ID: %s", r.ParseId) // Read document status. - result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(eventName string, _ *ParseResult) { - t.Logf("parse status: %s", eventName) + result, err := c.GetParseResult(t.Context(), r.ParseId, WithSSE(true), WithOnUpdate(func(name ParseEventName, _ *ParseResult) { + t.Logf("parse status: %s", name) })) if err != nil { t.Fatalf("failed to get parse result: %v", err)