Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
module github.com/sixt/tensorlake-mcp

go 1.25
go 1.25.1

require (
github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689
github.com/google/jsonschema-go v0.4.2
github.com/google/uuid v1.6.0
github.com/modelcontextprotocol/go-sdk v1.2.0-pre.2
github.com/sixt/tensorlake-go v0.1.0
github.com/sixt/tensorlake-go v0.1.1
)

require (
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689 h1:0psnKZ+N2IP43/SZC8SKx6OpFJwLmQb9m9QyV9BC2f8=
github.com/go4org/hashtriemap v0.0.0-20251130024219-545ba229f689/go.mod h1:OGmRfY/9QEK2P5zCRtmqfbCF283xPkU2dvVA4MvbvpI=
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
Expand All @@ -8,8 +10,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/modelcontextprotocol/go-sdk v1.2.0-pre.2 h1:U2JzHO7jPPKyo1Uu34xuR0QOgeLrWmsIG6orPJE//Bc=
github.com/modelcontextprotocol/go-sdk v1.2.0-pre.2/go.mod h1:6fM3LCm3yV7pAs8isnKLn07oKtB0MP9LHd3DfAcKw10=
github.com/sixt/tensorlake-go v0.1.0 h1:k0Xo45Qy789KvUEHSD8vKRwytgfL4zvmAsizkW47ZF8=
github.com/sixt/tensorlake-go v0.1.0/go.mod h1:qPF3a5Z2gAPdz4SoKE4NRnb72+c5D3/c9F21+BKvbFc=
github.com/sixt/tensorlake-go v0.1.1 h1:LA6UzOk5Iv2fKYqqzgAXOK0zHBwXjxTTkFIbXjmBxLw=
github.com/sixt/tensorlake-go v0.1.1/go.mod h1:qPF3a5Z2gAPdz4SoKE4NRnb72+c5D3/c9F21+BKvbFc=
github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
Expand Down
23 changes: 21 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ func init() {
logLevel = cmp.Or(logLevel, "debug") // default to debug

// Setup the default logger be a json logger.
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
//
// Note that MCP requires stdout to be used exclusively for JSON-RPC messages.
// All logging must go to stderr.
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: func() slog.Level {
switch logLevel {
case "debug":
Expand Down Expand Up @@ -75,11 +78,19 @@ func main() {
},
})

ctx := context.Background()
s := newServer()
go s.initializeDocumentResources(ctx)
defer s.CleanupSession(ctx) // Cleanup session on exit.

// Notes: We word the tool names using "document" instead of "file" to avoid confusion with the file tool which
// is already spreaded everywhere in LLM host applications. For instance, Claude or Cursor both have their own file tool.

mcp.AddTool(impl, &mcp.Tool{
Name: "list_documents",
Description: "List all documents in the session.",
}, s.ListDocuments)

mcp.AddTool(impl, &mcp.Tool{
Name: "upload_document",
Description: "Upload a document from a URL, local path, or data URI to Tensorlake and obtain a document_id to be used later in other processing/parsing steps.",
Expand Down Expand Up @@ -118,7 +129,7 @@ func main() {
Properties: map[string]*jsonschema.Schema{
"document_id": {
Type: "string",
Description: "The document Id to start parsing. Example: 'file_1234567890'. This is the document_id returned by the upload_document tool.",
Description: "The document Id to start parsing. Example: 'file_1234567890'. This is the document_id returned by the upload_document tool. A document ID must be provided.",
},
"parse_id": {
Type: "string",
Expand All @@ -130,9 +141,17 @@ func main() {
},
// TODO: extend parsing options.
},
Required: []string{"document_id"},
},
}, s.ParseDocument)

impl.AddResource(&mcp.Resource{
Name: "documents",
Description: "Access all documents and their metadata",
URI: "tensorlake://documents",
MIMEType: "application/json",
}, s.DocumentResources)

if err := impl.Run(context.Background(), &mcp.StdioTransport{}); err != nil {
slog.Error("failed to run tensorlake-mcp", "error", err)
}
Expand Down
126 changes: 126 additions & 0 deletions resources.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright 2025 SIXT SE
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"context"
"encoding/json"
"fmt"
"log/slog"
"net/url"

"github.com/go4org/hashtriemap"
"github.com/modelcontextprotocol/go-sdk/mcp"
"github.com/sixt/tensorlake-go"
)

type FileInfo struct {
FileId string `json:"file_id"`
FileName string `json:"file_name"`
MimeType tensorlake.MimeType `json:"mime_type"`
FileSize int64 `json:"file_size"`
ChecksumSHA256 string `json:"checksum_sha256,omitempty"`
CreatedAt string `json:"created_at,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
ParseJobs []*tensorlake.ParseResult `json:"parse_jobs,omitempty"`
}

var (
files hashtriemap.HashTrieMap[string, *FileInfo]
)

func (s *server) initializeDocumentResources(ctx context.Context) {
// Iterate all parse jobs. This way we get all parsed results and their files if any.
for parseJob, err := range s.tl.IterParseJobs(ctx, 100) {
if err != nil {
slog.Error("failed to iterate parse jobs", "error", err)
break
}

// Correlate parse jobs and their documents.
r, err := s.tl.GetParseResult(ctx, parseJob.ParseId, tensorlake.WithOptions(true))
if err != nil {
continue
}
if r.Options == nil {
continue
}

fileId := r.Options.FileId
if fileId == "" {
continue
}

m, err := s.tl.GetFileMetadata(ctx, fileId)
if err != nil {
continue
}

finfo := &FileInfo{
FileId: fileId,
FileName: m.FileName,
MimeType: m.MimeType,
FileSize: m.FileSize,
ChecksumSHA256: m.ChecksumSHA256,
CreatedAt: m.CreatedAt,
Labels: m.Labels,
ParseJobs: []*tensorlake.ParseResult{r},
}

info, ok := files.Load(fileId)
if !ok {
files.Store(fileId, finfo)
} else {
info.ParseJobs = append(info.ParseJobs, r)
files.Store(fileId, info)
}
}
}

// DocumentResources handles resource requests for document metadata and parse results.
// The URI is of the form "tensorlake://documents".
func (s *server) DocumentResources(ctx context.Context, req *mcp.ReadResourceRequest) (*mcp.ReadResourceResult, error) {
u, err := url.Parse(req.Params.URI)
if err != nil {
return nil, fmt.Errorf("invalid tensorlake resource URI: %s", req.Params.URI)
}
if u.Scheme != "tensorlake" {
return nil, fmt.Errorf("invalid tensorlake resource URI scheme: %s", u.Scheme)
}
if u.Host != "documents" {
return nil, fmt.Errorf("invalid tensorlake resource URI host: %s", u.Host)
}

// List all documents

ff := make([]*FileInfo, 0)
files.Range(func(key string, value *FileInfo) bool {
ff = append(ff, value)
return true
})
data, err := json.MarshalIndent(ff, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal documents: %w", err)
}
return &mcp.ReadResourceResult{
Contents: []*mcp.ResourceContents{
{
URI: req.Params.URI,
MIMEType: "application/json",
Text: string(data),
},
},
}, nil
}
Loading