Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ A terminal AI assistant with local knowledge management. Supports Gemini, Ollama
- **HTTP API** — JSON REST API with SSE streaming for building frontends
- **MCP Server** — Use Koopa's tools from Claude Desktop or Cursor
- **RAG** — Semantic search over your conversations and documents (pgvector)
- **User Memory** — Automatically learns user preferences, identity, and project context across sessions (pgvector + LLM extraction, two-threshold dedup, time-decay)
- **Built-in tools** — File I/O, shell commands, web search, web scraping
- **MCP client** — Plug in external MCP servers for additional tools
- **Sessions** — Persistent conversation history in PostgreSQL
Expand Down Expand Up @@ -58,15 +59,7 @@ export HMAC_SECRET=$(openssl rand -base64 32)
./koopa serve
```

| Endpoint | Method | Description |
|---------------------------------|--------|--------------------------|
| `/api/chat` | POST | Send message (SSE stream)|
| `/api/sessions` | GET | List sessions |
| `/api/sessions` | POST | Create session |
| `/api/sessions/{id}` | GET | Get session |
| `/api/sessions/{id}` | DELETE | Delete session |
| `/api/sessions/{id}/messages` | GET | Get session messages |
| `/health` | GET | Health check |
See [API Integration Guide](docs/api-integration-guide.md) for endpoint details, SSE streaming protocol, and frontend integration examples.

### MCP Server

Expand Down
26 changes: 3 additions & 23 deletions db/migrations/000001_init_schema.down.sql
Original file line number Diff line number Diff line change
@@ -1,31 +1,11 @@
-- Koopa Database Schema - Down Migration
-- Drops all objects created by 000001_init_schema.up.sql in reverse order

-- ============================================================================
-- Drop Messages Table
-- ============================================================================

DROP TABLE IF EXISTS memories;
DROP TABLE IF EXISTS messages;

-- ============================================================================
-- Drop Sessions Table (including indexes)
-- ============================================================================

DROP INDEX IF EXISTS idx_sessions_owner_id;
DROP INDEX IF EXISTS idx_sessions_updated_at;
DROP TABLE IF EXISTS sessions;

-- ============================================================================
-- Drop Documents Table (including indexes)
-- ============================================================================

DROP INDEX IF EXISTS idx_documents_owner;
DROP INDEX IF EXISTS idx_documents_metadata_gin;
DROP INDEX IF EXISTS idx_documents_source_type;
DROP INDEX IF EXISTS idx_documents_embedding;
DROP TABLE IF EXISTS documents;

-- ============================================================================
-- Drop Extensions
-- Note: Only drop if no other schemas depend on it
-- ============================================================================

DROP EXTENSION IF EXISTS vector;
Comment on lines +1 to 11

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indexes are dropped after their associated tables (e.g., indexes on 'sessions' and 'documents' are dropped after the tables themselves). This can cause errors if the table no longer exists when attempting to drop the index. Recommendation: Drop all indexes before dropping their associated tables to avoid dependency errors.

Comment on lines +1 to 11

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The migration script does not wrap the DROP statements in a transaction. If an error occurs partway through, the database could be left in an inconsistent state. Recommendation: Wrap all statements in a transaction block (e.g., BEGIN; ... COMMIT;) to ensure atomicity.

78 changes: 65 additions & 13 deletions db/migrations/000001_init_schema.up.sql
Original file line number Diff line number Diff line change
@@ -1,47 +1,46 @@
-- Koopa Database Schema
-- Consolidated migration for sessions, messages, and documents
-- NOTE: All CREATE statements use IF NOT EXISTS for idempotent execution
-- Koopa Database Schema (consolidated)
-- All tables: sessions, messages, documents, memories

-- Enable pgvector extension (required for vector search)
CREATE EXTENSION IF NOT EXISTS vector;

-- ============================================================================
-- Documents Table (for RAG / Knowledge Store)
-- Used by Genkit PostgreSQL Plugin with custom column names
-- Documents Table (RAG / Knowledge Store)
-- ============================================================================

CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
embedding vector(768) NOT NULL, -- gemini-embedding-001 truncated via OutputDimensionality
source_type TEXT, -- Metadata column for filtering
metadata JSONB -- Additional metadata in JSON format
embedding vector(768) NOT NULL,
source_type TEXT,
metadata JSONB,
owner_id TEXT
);

-- HNSW index for fast vector similarity search
CREATE INDEX IF NOT EXISTS idx_documents_embedding ON documents
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);

-- Index for filtering by source_type
CREATE INDEX IF NOT EXISTS idx_documents_source_type ON documents(source_type);

-- Enables fast queries like: WHERE metadata @> '{"key": "value"}'
CREATE INDEX IF NOT EXISTS idx_documents_metadata_gin
ON documents USING GIN (metadata jsonb_path_ops);

CREATE INDEX IF NOT EXISTS idx_documents_owner ON documents(owner_id);

-- ============================================================================
-- Sessions Table
-- ============================================================================

CREATE TABLE IF NOT EXISTS sessions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title TEXT,
owner_id TEXT NOT NULL DEFAULT '',

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential Data Integrity and Security Issue:

The owner_id column in the sessions table is defined as TEXT NOT NULL DEFAULT ''. This allows sessions to be created with an empty string as the owner, which may lead to orphaned sessions and complicate access control. It is recommended to:

  • Remove the default value and require explicit assignment of a valid owner.
  • Alternatively, use NULL to indicate no owner and enforce ownership at the application level.

Example:

owner_id TEXT NOT NULL

Or, if nullable:

owner_id TEXT

created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

CREATE INDEX IF NOT EXISTS idx_sessions_updated_at ON sessions(updated_at DESC);
CREATE INDEX IF NOT EXISTS idx_sessions_owner_id ON sessions(owner_id, updated_at DESC);

-- ============================================================================
-- Messages Table
Expand All @@ -55,7 +54,60 @@ CREATE TABLE IF NOT EXISTS messages (
sequence_number INTEGER NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),

-- UNIQUE constraint automatically creates index on (session_id, sequence_number)
CONSTRAINT unique_message_sequence UNIQUE (session_id, sequence_number),
CONSTRAINT message_role_check CHECK (role IN ('user', 'assistant', 'system', 'tool'))
);

-- ============================================================================
-- Memories Table (user memory with vector search, decay, dedup)
-- ============================================================================

CREATE TABLE IF NOT EXISTS memories (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
owner_id TEXT NOT NULL,
content TEXT NOT NULL,
embedding vector(768) NOT NULL,
category TEXT NOT NULL DEFAULT 'contextual'
CHECK (category IN ('identity', 'preference', 'project', 'contextual')),
source_session_id UUID REFERENCES sessions(id) ON DELETE SET NULL,
active BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
importance SMALLINT NOT NULL DEFAULT 5
CHECK (importance BETWEEN 1 AND 10),
access_count INTEGER NOT NULL DEFAULT 0,
last_accessed_at TIMESTAMPTZ,
decay_score REAL NOT NULL DEFAULT 1.0
CHECK (decay_score BETWEEN 0.0 AND 1.0),
superseded_by UUID REFERENCES memories(id) ON DELETE SET NULL,
CONSTRAINT memories_no_self_supersede
CHECK (superseded_by IS NULL OR superseded_by != id),
expires_at TIMESTAMPTZ,
search_text tsvector
GENERATED ALWAYS AS (to_tsvector('english', content)) STORED
);

CREATE INDEX idx_memories_embedding ON memories
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);

CREATE INDEX idx_memories_owner ON memories(owner_id);

CREATE INDEX idx_memories_owner_active_category
ON memories(owner_id, active, category);

CREATE UNIQUE INDEX idx_memories_owner_content_unique
ON memories(owner_id, md5(content)) WHERE active = true;
Comment on lines +99 to +100

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deduplication Vulnerability via MD5 Hash Collisions:

The unique index on md5(content) for active memories may allow hash collisions, potentially permitting duplicate content. Additionally, deduplication does not apply to inactive memories, which could result in inconsistent data. Consider using a stronger hash function (e.g., SHA256) or a direct unique constraint on the content column if performance and storage allow.

Example (if using SHA256):

ON memories(owner_id, encode(digest(content, 'sha256'), 'hex')) WHERE active = true;


CREATE INDEX idx_memories_search_text ON memories USING gin (search_text);

CREATE INDEX idx_memories_decay_candidates
ON memories (owner_id, updated_at)
WHERE active = true AND superseded_by IS NULL;

CREATE INDEX idx_memories_superseded_by ON memories (superseded_by)
WHERE superseded_by IS NOT NULL;

CREATE INDEX idx_memories_expires_at
ON memories (expires_at)
WHERE expires_at IS NOT NULL AND active = true;
2 changes: 0 additions & 2 deletions db/migrations/000002_add_owner_id.down.sql

This file was deleted.

6 changes: 0 additions & 6 deletions db/migrations/000002_add_owner_id.up.sql

This file was deleted.

2 changes: 0 additions & 2 deletions db/migrations/000003_add_document_owner.down.sql

This file was deleted.

6 changes: 0 additions & 6 deletions db/migrations/000003_add_document_owner.up.sql

This file was deleted.

36 changes: 24 additions & 12 deletions internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package app

import (
"context"
"fmt"
"log/slog"
"sync"
Expand All @@ -16,6 +17,7 @@ import (

"github.com/koopa0/koopa/internal/chat"
"github.com/koopa0/koopa/internal/config"
"github.com/koopa0/koopa/internal/memory"
"github.com/koopa0/koopa/internal/security"
"github.com/koopa0/koopa/internal/session"
"github.com/koopa0/koopa/internal/tools"
Expand All @@ -32,6 +34,7 @@ type App struct {
DocStore *postgresql.DocStore
Retriever ai.Retriever
SessionStore *session.Store
MemoryStore *memory.Store
PathValidator *security.Path
Tools []ai.Tool // Pre-registered Genkit tools (for chat agent)

Expand All @@ -41,8 +44,10 @@ type App struct {
Network *tools.Network
Knowledge *tools.Knowledge // nil if retriever unavailable

// Lifecycle management (unexported)
// Lifecycle management (unexported except bgCtx for agent construction)
bgCtx context.Context // Outlives individual requests; canceled by Close().
cancel func()
wg sync.WaitGroup // tracks background goroutines (scheduler, memory extraction)
dbCleanup func()
otelCleanup func()
closeOnce sync.Once
Expand All @@ -53,8 +58,9 @@ type App struct {
//
// Shutdown order:
// 1. Cancel context (signals background tasks to stop)
// 2. Close DB pool
// 3. Flush OTel spans
// 2. Wait for background goroutines (scheduler) to exit
// 3. Close DB pool
// 4. Flush OTel spans
func (a *App) Close() error {
a.closeOnce.Do(func() {
slog.Info("shutting down application")
Comment on lines 58 to 66

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resource cleanup error handling is missing in Close().

The Close method does not capture or propagate errors from dbCleanup or otelCleanup. If these cleanup functions fail, the error will be silently ignored, potentially masking critical shutdown failures. Consider aggregating errors from each cleanup step and returning a combined error:

var err error
if a.dbCleanup != nil {
    if cerr := a.dbCleanup(); cerr != nil {
        err = errors.Join(err, cerr)
    }
}
// ... same for otelCleanup
return err

Expand All @@ -64,12 +70,15 @@ func (a *App) Close() error {
a.cancel()
}

// 2. Close DB pool
// 2. Wait for background goroutines to finish
a.wg.Wait()

// 3. Close DB pool
if a.dbCleanup != nil {
a.dbCleanup()
}

// 3. Flush OTel spans
// 4. Flush OTel spans
if a.otelCleanup != nil {
a.otelCleanup()
}
Expand All @@ -82,13 +91,16 @@ func (a *App) Close() error {
// Setup guarantees all dependencies are non-nil.
func (a *App) CreateAgent() (*chat.Agent, error) {
agent, err := chat.New(chat.Config{
Genkit: a.Genkit,
SessionStore: a.SessionStore,
Logger: slog.Default(),
Tools: a.Tools,
ModelName: a.Config.FullModelName(),
MaxTurns: a.Config.MaxTurns,
Language: a.Config.Language,
Genkit: a.Genkit,
SessionStore: a.SessionStore,
MemoryStore: a.MemoryStore,
Logger: slog.Default(),
Tools: a.Tools,
ModelName: a.Config.FullModelName(),
MaxTurns: a.Config.MaxTurns,
Language: a.Config.Language,
BackgroundCtx: a.bgCtx,
WG: &a.wg,
})
if err != nil {
return nil, fmt.Errorf("creating chat agent: %w", err)
Comment on lines 91 to 106

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dependency guarantees in CreateAgent() rely on Setup correctness.

The comment claims Setup guarantees all dependencies are non-nil, but this is not enforced in code. If Setup fails, chat.New may panic or behave unpredictably. Consider adding explicit nil checks for critical dependencies before agent creation, or ensure Setup returns an error if any dependency is missing.

Expand Down
31 changes: 29 additions & 2 deletions internal/app/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/koopa0/koopa/db"
"github.com/koopa0/koopa/internal/config"
"github.com/koopa0/koopa/internal/memory"
"github.com/koopa0/koopa/internal/rag"
"github.com/koopa0/koopa/internal/security"
"github.com/koopa0/koopa/internal/session"
Expand Down Expand Up @@ -78,6 +79,12 @@ func Setup(ctx context.Context, cfg *config.Config) (_ *App, retErr error) {

a.SessionStore = provideSessionStore(pool)

memStore, err := provideMemoryStore(pool, embedder)
if err != nil {
return nil, err
}
a.MemoryStore = memStore

path, err := providePathValidator()
if err != nil {
return nil, err
Expand All @@ -88,10 +95,21 @@ func Setup(ctx context.Context, cfg *config.Config) (_ *App, retErr error) {
return nil, err
}

// Set up lifecycle management
_, cancel := context.WithCancel(ctx)
// Set up lifecycle management.
bgCtx, cancel := context.WithCancel(ctx)
a.bgCtx = bgCtx
a.cancel = cancel

// Start memory decay scheduler if memory store is available.
if memStore != nil {
scheduler := memory.NewScheduler(memStore, slog.Default())
a.wg.Add(1)
go func() {
defer a.wg.Done()
scheduler.Run(bgCtx)
}()
}

return a, nil
}

Expand Down Expand Up @@ -299,6 +317,15 @@ func provideSessionStore(pool *pgxpool.Pool) *session.Store {
return session.New(sqlc.New(pool), pool, nil)
}

// provideMemoryStore creates a memory store backed by pgvector.
func provideMemoryStore(pool *pgxpool.Pool, embedder ai.Embedder) (*memory.Store, error) {
store, err := memory.NewStore(pool, embedder, slog.Default())
if err != nil {
return nil, fmt.Errorf("creating memory store: %w", err)
}
return store, nil
}

// providePathValidator creates a path validator instance.
// Denies access to prompts/ to protect system prompt files from tool-based access.
func providePathValidator() (*security.Path, error) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Security/Configurability Issue:

The providePathValidator function hardcodes the denial of access to the prompts directory:

return security.NewPath([]string{"."}, []string{"prompts"})

If the directory structure changes or additional sensitive directories are introduced, this approach will not scale. Consider making the denial list configurable via application settings or environment variables to enhance security and maintainability.

Recommended Solution:

  • Allow the denied paths to be specified in the configuration, e.g., cfg.DeniedPaths, and pass them to security.NewPath.
  • Example:
    func providePathValidator(cfg *config.Config) (*security.Path, error) {
        return security.NewPath([]string{"."}, cfg.DeniedPaths)
    }

Expand Down
Loading
Loading