-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add memory API, search, session export, and normalize API #68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| .PHONY: build vet lint test test-race test-integration test-fuzz verify clean | ||
|
|
||
| # Build binary | ||
| build: | ||
| go build -o koopa ./ | ||
|
|
||
| # Static analysis | ||
| vet: | ||
| go vet ./... | ||
|
|
||
| # Lint (matches CI: golangci-lint v2.7.1) | ||
| lint: | ||
| golangci-lint run ./... | ||
|
|
||
| # Unit tests (fast, no database required) | ||
| test: | ||
| go test -short ./... | ||
|
|
||
| # Unit tests with race detector (matches CI) | ||
| test-race: | ||
| go test -short -race ./... | ||
|
|
||
| # Integration tests (requires PostgreSQL with pgvector) | ||
| test-integration: | ||
| go test -tags=integration -race -timeout 15m ./... | ||
|
|
||
| # Run security fuzz targets for 30s each | ||
| test-fuzz: | ||
| go test -fuzz=FuzzPathValidation -fuzztime=30s ./internal/security/ | ||
| go test -fuzz=FuzzCommandValidation -fuzztime=30s ./internal/security/ | ||
| go test -fuzz=FuzzURLValidation -fuzztime=30s ./internal/security/ | ||
| go test -fuzz=FuzzSafeDialContext -fuzztime=30s ./internal/security/ | ||
|
|
||
| # Full verification chain (matches /verify skill) | ||
| # Stop at first failure. | ||
| verify: build vet lint test-race | ||
|
|
||
| # Remove build artifacts | ||
| clean: | ||
| rm -f koopa | ||
| go clean -testcache |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,7 +55,7 @@ CREATE TABLE IF NOT EXISTS messages ( | |
| created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), | ||
|
|
||
| CONSTRAINT unique_message_sequence UNIQUE (session_id, sequence_number), | ||
| CONSTRAINT message_role_check CHECK (role IN ('user', 'assistant', 'system', 'tool')) | ||
| CONSTRAINT message_role_check CHECK (role IN ('user', 'assistant', 'system', 'tool', 'model')) | ||
| ); | ||
|
|
||
| -- ============================================================================ | ||
|
|
@@ -87,27 +87,27 @@ CREATE TABLE IF NOT EXISTS memories ( | |
| GENERATED ALWAYS AS (to_tsvector('english', content)) STORED | ||
| ); | ||
|
|
||
| CREATE INDEX idx_memories_embedding ON memories | ||
| CREATE INDEX IF NOT EXISTS idx_memories_embedding ON memories | ||
| USING hnsw (embedding vector_cosine_ops) | ||
| WITH (m = 16, ef_construction = 64); | ||
|
|
||
| CREATE INDEX idx_memories_owner ON memories(owner_id); | ||
| CREATE INDEX IF NOT EXISTS idx_memories_owner ON memories(owner_id); | ||
|
|
||
| CREATE INDEX idx_memories_owner_active_category | ||
| CREATE INDEX IF NOT EXISTS idx_memories_owner_active_category | ||
| ON memories(owner_id, active, category); | ||
|
|
||
| CREATE UNIQUE INDEX idx_memories_owner_content_unique | ||
| CREATE UNIQUE INDEX IF NOT EXISTS idx_memories_owner_content_unique | ||
| ON memories(owner_id, md5(content)) WHERE active = true; | ||
|
Comment on lines
+99
to
100
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential collision risk with md5(content) for uniqueness The unique index on CREATE UNIQUE INDEX IF NOT EXISTS idx_memories_owner_content_unique
ON memories(owner_id, encode(digest(content, 'sha256'), 'hex')) WHERE active = true;This reduces the risk of accidental duplicates due to hash collisions. |
||
|
|
||
| CREATE INDEX idx_memories_search_text ON memories USING gin (search_text); | ||
| CREATE INDEX IF NOT EXISTS idx_memories_search_text ON memories USING gin (search_text); | ||
|
|
||
| CREATE INDEX idx_memories_decay_candidates | ||
| CREATE INDEX IF NOT EXISTS idx_memories_decay_candidates | ||
| ON memories (owner_id, updated_at) | ||
| WHERE active = true AND superseded_by IS NULL; | ||
|
|
||
| CREATE INDEX idx_memories_superseded_by ON memories (superseded_by) | ||
| CREATE INDEX IF NOT EXISTS idx_memories_superseded_by ON memories (superseded_by) | ||
| WHERE superseded_by IS NOT NULL; | ||
|
|
||
| CREATE INDEX idx_memories_expires_at | ||
| CREATE INDEX IF NOT EXISTS idx_memories_expires_at | ||
| ON memories (expires_at) | ||
| WHERE expires_at IS NOT NULL AND active = true; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| DROP INDEX IF EXISTS idx_messages_search_text; | ||
| ALTER TABLE messages DROP COLUMN IF EXISTS search_text; | ||
| ALTER TABLE messages DROP COLUMN IF EXISTS text_content; | ||
|
Comment on lines
+2
to
+3
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Irreversible Data Loss Risk: Dropping the Recommended Solution: Add a warning or backup step before running this migration, or wrap the migration in a transaction to ensure atomicity.
Comment on lines
+1
to
+3
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential Dependency Conflicts: Dropping columns and indexes without checking for dependencies (such as triggers, views, or application code) may cause runtime errors or inconsistencies. Recommended Solution: Verify that no other database objects or application logic depend on these columns or index before executing the migration. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| -- Add text_content for full-text search on messages. | ||
| -- content is JSONB ([]*ai.Part), not directly searchable. | ||
| -- text_content is application-maintained, populated in AddMessage. | ||
| ALTER TABLE messages ADD COLUMN IF NOT EXISTS text_content TEXT; | ||
|
|
||
| -- Generated tsvector for FTS. | ||
| -- to_tsvector handles NULL natively (returns empty tsvector), no COALESCE needed. | ||
| ALTER TABLE messages ADD COLUMN IF NOT EXISTS search_text tsvector | ||
| GENERATED ALWAYS AS (to_tsvector('english', text_content)) STORED; | ||
|
|
||
| -- GIN index for fast full-text search. | ||
| CREATE INDEX IF NOT EXISTS idx_messages_search_text ON messages USING gin(search_text); | ||
|
|
||
| -- Backfill existing messages: extract text from JSONB parts. | ||
| UPDATE messages SET text_content = ( | ||
| SELECT string_agg(elem->>'text', ' ') | ||
| FROM jsonb_array_elements(content) AS elem | ||
| WHERE elem->>'text' IS NOT NULL AND elem->>'text' != '' | ||
| ) WHERE text_content IS NULL; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| DROP INDEX IF EXISTS idx_messages_session_id; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| -- Add index on messages.session_id for JOIN performance. | ||
| -- PostgreSQL does NOT auto-create indexes on FK referencing columns. | ||
| -- SearchMessages and CountMessages both JOIN on m.session_id = s.id. | ||
| CREATE INDEX IF NOT EXISTS idx_messages_session_id ON messages(session_id); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| DROP INDEX IF EXISTS idx_memories_content_trgm; | ||
| DROP INDEX IF EXISTS idx_messages_text_content_trgm; | ||
| -- Do not drop pg_trgm extension; other schemas may use it. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| -- Enable pg_trgm extension for trigram-based text search (CJK support). | ||
| CREATE EXTENSION IF NOT EXISTS pg_trgm; | ||
|
|
||
| -- CONCURRENTLY avoids locking the table during index creation. | ||
| -- NOTE: CONCURRENTLY cannot run inside a transaction block. | ||
| -- golang-migrate runs each file in a transaction by default; | ||
| -- the operator must run this migration manually with: | ||
| -- psql -f 000004_trigram_search.up.sql | ||
| -- or disable transactions in the migration tool. | ||
|
Comment on lines
+5
to
+9
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Migration Execution Risk: Recommendation: |
||
|
|
||
| -- GIN trigram index on messages.text_content for ILIKE fallback search. | ||
| CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_messages_text_content_trgm | ||
| ON messages USING gin (text_content gin_trgm_ops); | ||
|
|
||
| -- GIN trigram index on memories.content for similarity() scoring. | ||
| CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_memories_content_trgm | ||
| ON memories USING gin (content gin_trgm_ops); | ||
|
Comment on lines
+12
to
+17
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing Existence Checks: Recommendation: |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
| -- name: CreateSession :one | ||
| INSERT INTO sessions (title, owner_id) | ||
| VALUES ($1, sqlc.arg(owner_id)) | ||
|
Comment on lines
5
to
6
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inconsistent parameter usage in CreateSession query The INSERT INTO sessions (title, owner_id)
VALUES (sqlc.arg(title), sqlc.arg(owner_id))
RETURNING id, title, owner_id, created_at, updated_at;Or, if using positional parameters, ensure all are positional. |
||
| RETURNING *; | ||
| RETURNING id, title, owner_id, created_at, updated_at; | ||
|
|
||
| -- name: Session :one | ||
| SELECT id, title, owner_id, created_at, updated_at | ||
|
|
@@ -26,7 +26,7 @@ SELECT id, title, owner_id, created_at, updated_at | |
| FROM sessions | ||
| WHERE id = sqlc.arg(session_id) AND owner_id = sqlc.arg(owner_id); | ||
|
|
||
| -- name: UpdateSessionUpdatedAt :exec | ||
| -- name: UpdateSessionUpdatedAt :execrows | ||
| UPDATE sessions | ||
| SET updated_at = NOW() | ||
| WHERE id = sqlc.arg(session_id); | ||
|
|
@@ -44,8 +44,8 @@ WHERE id = $1; | |
|
|
||
| -- name: AddMessage :exec | ||
| -- Add a message to a session | ||
| INSERT INTO messages (session_id, role, content, sequence_number) | ||
| VALUES ($1, $2, $3, $4); | ||
| INSERT INTO messages (session_id, role, content, sequence_number, text_content) | ||
| VALUES ($1, $2, $3, $4, $5); | ||
|
|
||
| -- name: Messages :many | ||
| -- Get all messages for a session ordered by sequence | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Security: CSRF Secret Handling
The CSRF secret is set as
[]byte(cfg.HMACSecret). Ifcfg.HMACSecretis not validated for sufficient entropy and length, this could compromise CSRF protection. Ensure thatcfg.HMACSecretis at least 32 bytes of cryptographically secure random data. If not, add validation logic in the configuration loading or validation phase to enforce this requirement.Recommended solution: