diff --git a/.gitignore b/.gitignore index e386d6d..8d46349 100644 --- a/.gitignore +++ b/.gitignore @@ -36,4 +36,5 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json .turbo -apps/demo \ No newline at end of file +apps/demo +apps/examples/test \ No newline at end of file diff --git a/apps/examples/support-ticket-search/lib/unrag/core/ingest.ts b/apps/examples/support-ticket-search/lib/unrag/core/ingest.ts index 8c7870a..c8ba472 100644 --- a/apps/examples/support-ticket-search/lib/unrag/core/ingest.ts +++ b/apps/examples/support-ticket-search/lib/unrag/core/ingest.ts @@ -88,9 +88,13 @@ export const ingest = async ( const chunkingOptions = { ...config.defaults, - ...input.chunking + ...input.chunking, + sourceId: input.sourceId, + metadata: input.metadata ?? {} } + const chunker = input.chunker ?? config.chunker + const metadata = input.metadata ?? {} const documentId = config.idGenerator() const assets: AssetInput[] = Array.isArray(input.assets) ? input.assets : [] @@ -126,7 +130,7 @@ export const ingest = async ( const prepared: PreparedChunk[] = [] const warnings: IngestWarning[] = [] - const baseTextChunks = config.chunker(input.content, chunkingOptions) + const baseTextChunks = chunker(input.content, chunkingOptions) for (const c of baseTextChunks) { prepared.push({ chunk: { @@ -215,7 +219,7 @@ export const ingest = async ( .filter((t) => t.content.trim().length > 0) for (const item of nonEmptyItems) { - const chunks = config.chunker(item.content, chunkingOptions) + const chunks = chunker(item.content, chunkingOptions) for (const c of chunks) { outSpecs.push({ documentId, @@ -405,7 +409,7 @@ export const ingest = async ( storedTokenCount: storedCaptionTokenCount }) } else if (caption) { - const captionChunks = config.chunker(caption, chunkingOptions) + const captionChunks = chunker(caption, chunkingOptions) for (const c of captionChunks) { specs.push({ documentId, diff --git a/apps/examples/support-ticket-search/lib/unrag/core/types.ts b/apps/examples/support-ticket-search/lib/unrag/core/types.ts index 44768a6..2d50998 100644 --- a/apps/examples/support-ticket-search/lib/unrag/core/types.ts +++ b/apps/examples/support-ticket-search/lib/unrag/core/types.ts @@ -702,6 +702,13 @@ export type IngestInput = { sourceId: string content: string metadata?: Metadata + /** + * Per-ingest chunker override. + * + * Use this to switch chunking algorithms for a single ingest call without + * changing the engine's configured chunker. + */ + chunker?: Chunker chunking?: Partial /** Optional rich media attached to the document. */ assets?: AssetInput[] diff --git a/apps/web/app/api/_lib/registry-manifest.ts b/apps/web/app/api/_lib/registry-manifest.ts index 792fb08..31ceec1 100644 --- a/apps/web/app/api/_lib/registry-manifest.ts +++ b/apps/web/app/api/_lib/registry-manifest.ts @@ -5,6 +5,13 @@ export type RegistryManifest = { version: number extractors: Array<{id: string; status?: 'available' | 'coming-soon'}> connectors: Array<{id: string; status?: 'available' | 'coming-soon'}> + chunkers?: Array<{ + id: string + label?: string + description?: string + status?: 'available' | 'coming-soon' + docsPath?: string | null + }> batteries?: Array<{ id: string status?: 'available' | 'coming-soon' diff --git a/apps/web/app/api/presets/route.ts b/apps/web/app/api/presets/route.ts index 9e5fbfd..6bc5f40 100644 --- a/apps/web/app/api/presets/route.ts +++ b/apps/web/app/api/presets/route.ts @@ -30,6 +30,13 @@ type WizardStateV1 = { extractors: string[] connectors: string[] batteries?: string[] + chunkers?: string[] + } + chunking?: { + method?: string + minChunkSize?: number + model?: string + language?: string } defaults: { chunkSize: number @@ -63,8 +70,17 @@ type PresetPayloadV1 = { extractors: string[] connectors: string[] batteries: string[] + chunkers: string[] } config: { + chunking?: { + method?: string + options?: { + minChunkSize?: number + model?: string + language?: string + } + } defaults: { chunking: {chunkSize: number; chunkOverlap: number} retrieval: {topK: number} @@ -110,6 +126,15 @@ function isWizardStateV1(x: unknown): x is WizardStateV1 { ) { return false } + if ( + o.modules && + typeof o.modules === 'object' && + 'chunkers' in o.modules && + (o.modules as Record).chunkers != null && + !Array.isArray((o.modules as Record).chunkers) + ) { + return false + } return true } @@ -127,6 +152,36 @@ function normalizeWizardState(input: WizardStateV1): WizardStateV1 { const batteries = Array.isArray(input.modules.batteries) ? input.modules.batteries.map(String).filter(Boolean) : [] + const chunkers = Array.isArray(input.modules.chunkers) + ? input.modules.chunkers.map(String).filter(Boolean) + : [] + + const chunkingMethod = String(input.chunking?.method ?? 'recursive') + .trim() + .toLowerCase() + const chunkingMinChunkSize = Number(input.chunking?.minChunkSize) || 24 + const chunkingModelRaw = String(input.chunking?.model ?? '').trim() + const chunkingLanguageRaw = String(input.chunking?.language ?? '').trim() + + // These sentinels are used by the /install wizard UI. They should not be + // emitted into presets/config as literal values. + const CHUNKER_MODEL_DEFAULT_VALUE = '__default__' + const AUTO_LANGUAGE_VALUE = '__auto__' + + const chunkingModel = + chunkingModelRaw === CHUNKER_MODEL_DEFAULT_VALUE ? '' : chunkingModelRaw + const chunkingLanguage = + chunkingLanguageRaw === AUTO_LANGUAGE_VALUE ? '' : chunkingLanguageRaw + + const isBuiltInMethod = + chunkingMethod === 'recursive' || + chunkingMethod === 'token' || + chunkingMethod === 'custom' + const ensuredChunkers = isBuiltInMethod + ? chunkers + : chunkers.includes(chunkingMethod) + ? chunkers + : [...chunkers, chunkingMethod].sort() const chunkSize = Number(input.defaults.chunkSize) || 200 const chunkOverlap = Number(input.defaults.chunkOverlap) || 40 @@ -160,7 +215,13 @@ function normalizeWizardState(input: WizardStateV1): WizardStateV1 { return { v: 1, install: {installDir, storeAdapter, aliasBase}, - modules: {extractors, connectors, batteries}, + modules: {extractors, connectors, batteries, chunkers: ensuredChunkers}, + chunking: { + method: chunkingMethod, + minChunkSize: chunkingMinChunkSize, + ...(chunkingModel ? {model: chunkingModel} : {}), + ...(chunkingLanguage ? {language: chunkingLanguage} : {}) + }, defaults: {chunkSize, chunkOverlap, topK}, embedding: { type: embeddingType, @@ -174,6 +235,8 @@ function normalizeWizardState(input: WizardStateV1): WizardStateV1 { function makePresetFromWizard(state: WizardStateV1): PresetPayloadV1 { const assetProcessing = state.engine?.assetProcessing + const CHUNKER_MODEL_DEFAULT_VALUE = '__default__' + const AUTO_LANGUAGE_VALUE = '__auto__' return { version: 1, createdAt: new Date().toISOString(), @@ -187,9 +250,35 @@ function makePresetFromWizard(state: WizardStateV1): PresetPayloadV1 { connectors: state.modules.connectors, batteries: (state.modules.batteries ?? []) .map(String) - .filter(Boolean) + .filter(Boolean), + chunkers: (state.modules.chunkers ?? []).map(String).filter(Boolean) }, config: { + ...(state.chunking?.method + ? { + chunking: { + method: state.chunking.method, + options: { + ...(typeof state.chunking.minChunkSize === + 'number' + ? { + minChunkSize: + state.chunking.minChunkSize + } + : {}), + ...(state.chunking.model && + state.chunking.model !== + CHUNKER_MODEL_DEFAULT_VALUE + ? {model: state.chunking.model} + : {}), + ...(state.chunking.language && + state.chunking.language !== AUTO_LANGUAGE_VALUE + ? {language: state.chunking.language} + : {}) + } + } + } + : {}), defaults: { chunking: { chunkSize: state.defaults.chunkSize, @@ -281,6 +370,11 @@ export async function POST(req: NextRequest) { .filter((b) => b.status === 'available') .map((b) => b.id) ) + const allowedChunkers = new Set( + (manifest.chunkers ?? []) + .filter((c) => c.status === 'available') + .map((c) => c.id) + ) const unknownExtractors = state.modules.extractors.filter( (x) => !allowedExtractors.has(x) @@ -313,6 +407,17 @@ export async function POST(req: NextRequest) { ) } + const chunkerIds = (state.modules.chunkers ?? []) + .map(String) + .filter(Boolean) + const unknownChunkers = chunkerIds.filter((x) => !allowedChunkers.has(x)) + if (unknownChunkers.length > 0) { + return NextResponse.json( + {error: 'Unknown or unavailable chunkers', unknownChunkers}, + {status: 400} + ) + } + const preset = makePresetFromWizard(state) const id = newPresetId() const key = `unrag:preset:${id}` diff --git a/apps/web/app/install/install-wizard-client.tsx b/apps/web/app/install/install-wizard-client.tsx index b3a98ea..e39556c 100644 --- a/apps/web/app/install/install-wizard-client.tsx +++ b/apps/web/app/install/install-wizard-client.tsx @@ -110,6 +110,22 @@ type WizardStateV1 = { extractors: string[] connectors: string[] batteries: string[] + chunkers: string[] + } + chunking: { + /** + * Chunking method to use. + * - Built-in: "recursive" (default), "token" + * - Plugins: e.g. "markdown", "semantic", "code" + * - "custom" is supported in unrag.config.ts, but not scaffolded by this wizard. + */ + method: string + /** Minimum chunk size in tokens (small chunks are merged). */ + minChunkSize: number + /** Model hint for LLM-driven chunkers (semantic/agentic). */ + model?: string + /** Language hint for the code chunker. */ + language?: string } defaults: { chunkSize: number @@ -148,6 +164,13 @@ type RegistryManifest = { types?: string[] docsPath?: string | null }> + chunkers?: Array<{ + id: string + label?: string + description?: string + status?: 'available' | 'coming-soon' + docsPath?: string | null + }> batteries?: Array<{ id: string displayName?: string @@ -241,7 +264,14 @@ const DEFAULT_STATE: WizardStateV1 = { modules: { extractors: [], connectors: [], - batteries: [] + batteries: [], + chunkers: [] + }, + chunking: { + method: 'recursive', + minChunkSize: 24, + model: 'openai/gpt-5-mini', + language: 'typescript' }, defaults: { chunkSize: 200, @@ -632,6 +662,9 @@ const EMBEDDING_MODELS_BY_PROVIDER: Partial< } const CUSTOM_MODEL_VALUE = '__custom__' +const AUTO_LANGUAGE_VALUE = '__auto__' +const CHUNKER_MODEL_DEFAULT_VALUE = '__default__' +const CHUNKER_MODEL_CUSTOM_VALUE = '__custom_chunker__' const RECOMMENDED_DEFAULTS = { chunkSize: 200, @@ -643,6 +676,12 @@ const CHUNK_SIZE_OPTIONS = [ 64, 96, 128, 160, 200, 240, 300, 400, 512, 768, 1024, 1536, 2048 ] const CHUNK_OVERLAP_OPTIONS = [0, 10, 20, 30, 40, 60, 80, 100, 150, 200, 256] +const MIN_CHUNK_SIZE_OPTIONS = [8, 12, 16, 24, 32, 48, 64, 96] +const SEMANTIC_CHUNKER_MODELS = [ + 'openai/gpt-5-mini', + 'openai/gpt-4o-mini', + 'openai/gpt-4o' +] const TOP_K_OPTIONS = [3, 5, 8, 10, 12, 15, 20, 30, 50, 100] const EXTRACTOR_ICONS: Record = { @@ -732,6 +771,41 @@ function normalizeState(s: WizardStateV1): WizardStateV1 { const batteries = Array.isArray(s.modules?.batteries) ? s.modules.batteries.map(String) : [] + const chunkers = Array.isArray(s.modules?.chunkers) + ? s.modules.chunkers.map(String) + : [] + const chunkingMethodRaw = String( + (s as unknown as {chunking?: {method?: unknown}})?.chunking?.method ?? + DEFAULT_STATE.chunking.method + ) + const chunkingMethod = chunkingMethodRaw.trim().toLowerCase() || 'recursive' + const minChunkSize = + Number( + (s as unknown as {chunking?: {minChunkSize?: unknown}})?.chunking + ?.minChunkSize ?? DEFAULT_STATE.chunking.minChunkSize + ) || DEFAULT_STATE.chunking.minChunkSize + const chunkingModel = String( + (s as unknown as {chunking?: {model?: unknown}})?.chunking?.model ?? + DEFAULT_STATE.chunking.model ?? + '' + ).trim() + const chunkingLanguage = String( + (s as unknown as {chunking?: {language?: unknown}})?.chunking + ?.language ?? + DEFAULT_STATE.chunking.language ?? + '' + ).trim() + + // If a plugin chunker method is selected, ensure it is installed as a module. + const isBuiltInMethod = + chunkingMethod === 'recursive' || + chunkingMethod === 'token' || + chunkingMethod === 'custom' + const ensuredChunkers = isBuiltInMethod + ? chunkers + : chunkers.includes(chunkingMethod) + ? chunkers + : [...chunkers, chunkingMethod].sort() const chunkSize = Number(s.defaults?.chunkSize ?? DEFAULT_STATE.defaults.chunkSize) || DEFAULT_STATE.defaults.chunkSize @@ -779,7 +853,22 @@ function normalizeState(s: WizardStateV1): WizardStateV1 { return { v: 1, install: {installDir, storeAdapter, aliasBase}, - modules: {extractors, connectors, batteries}, + modules: {extractors, connectors, batteries, chunkers: ensuredChunkers}, + chunking: { + method: chunkingMethod, + minChunkSize, + // Keep defaults around even when not used, so switching methods is easy. + model: + chunkingModel || + (chunkingMethod === 'semantic' || chunkingMethod === 'agentic' + ? DEFAULT_STATE.chunking.model + : ''), + language: + chunkingLanguage || + (chunkingMethod === 'code' + ? DEFAULT_STATE.chunking.language + : '') + }, defaults: {chunkSize, chunkOverlap, topK}, embedding: { type: embeddingType, @@ -1130,6 +1219,95 @@ function ConnectorCard({ ) } +function ChunkerCard({ + id, + label, + description, + status, + docsHref, + selected, + onToggle +}: { + id: string + label?: string + description?: string + status?: 'available' | 'coming-soon' + docsHref?: string | null + selected: boolean + onToggle: () => void +}) { + const isAvailable = status !== 'coming-soon' + + return ( + +
+
+ +
+
+
+ + {label || id} + + + {isAvailable ? 'available' : 'coming soon'} + + {isAvailable && docsHref ? ( +
+ +
+ ) : null} +
+ {description && ( +

+ {description} +

+ )} +
+
+ {selected && } +
+
+
+ ) +} + function BatteryCard({ id, displayName, @@ -1376,6 +1554,12 @@ export default function InstallWizardClient() { .sort((a, b) => String(a.id).localeCompare(String(b.id))) }, [manifest]) + const availableChunkers = useMemo(() => { + return (manifest?.chunkers ?? []) + .slice() + .sort((a, b) => String(a.id).localeCompare(String(b.id))) + }, [manifest]) + const availableBatteries = useMemo(() => { return (manifest?.batteries ?? []) .slice() @@ -1398,6 +1582,14 @@ export default function InstallWizardClient() { return m }, [manifest]) + const chunkerDocsById = useMemo(() => { + const m = new Map() + for (const c of manifest?.chunkers ?? []) { + m.set(String(c.id), c.docsPath ?? null) + } + return m + }, [manifest]) + const embeddingModelOptionsAll = useMemo(() => { return EMBEDDING_MODELS_BY_PROVIDER[state.embedding.provider] ?? [] }, [state.embedding.provider]) @@ -1420,6 +1612,44 @@ export default function InstallWizardClient() { return m }, [embeddingModelOptionsAll]) + const semanticChunkerModelValue = useMemo(() => { + if (state.chunking.method !== 'semantic') { + return CHUNKER_MODEL_DEFAULT_VALUE + } + if ( + !state.chunking.model || + state.chunking.model === CHUNKER_MODEL_DEFAULT_VALUE + ) { + return CHUNKER_MODEL_DEFAULT_VALUE + } + return SEMANTIC_CHUNKER_MODELS.includes(state.chunking.model) + ? state.chunking.model + : CHUNKER_MODEL_CUSTOM_VALUE + }, [state.chunking.method, state.chunking.model]) + + const isCustomSemanticChunkerModel = + state.chunking.method === 'semantic' && + semanticChunkerModelValue === CHUNKER_MODEL_CUSTOM_VALUE + + const agenticChunkerModelValue = useMemo(() => { + if (state.chunking.method !== 'agentic') { + return CHUNKER_MODEL_DEFAULT_VALUE + } + if ( + !state.chunking.model || + state.chunking.model === CHUNKER_MODEL_DEFAULT_VALUE + ) { + return CHUNKER_MODEL_DEFAULT_VALUE + } + return SEMANTIC_CHUNKER_MODELS.includes(state.chunking.model) + ? state.chunking.model + : CHUNKER_MODEL_CUSTOM_VALUE + }, [state.chunking.method, state.chunking.model]) + + const isCustomAgenticChunkerModel = + state.chunking.method === 'agentic' && + agenticChunkerModelValue === CHUNKER_MODEL_CUSTOM_VALUE + const selectedEmbeddingModelOption = embeddingModelOptionById.get( state.embedding.model ) @@ -1504,7 +1734,9 @@ export default function InstallWizardClient() { )?.name ?? state.embedding.provider, extractorCount: state.modules.extractors.length, connectorCount: state.modules.connectors.length, - batteryCount: state.modules.batteries.length + batteryCount: state.modules.batteries.length, + chunkerCount: state.modules.chunkers.length, + chunkingMethod: state.chunking.method } }, [state]) @@ -2238,204 +2470,218 @@ export default function InstallWizardClient() { ) : null} - -
- ({ - ...prev, - embedding: { - ...prev.embedding, - model: v + onValueChange={(v) => { + if ( + v === + CUSTOM_MODEL_VALUE + ) { + setForceCustomEmbeddingModel( + true + ) + return } - })) - }} - > - - -
-
- {(() => { - return ( - - ) - })()} -
- - { - embeddingTriggerLabel - } - -
-
-
- - {embeddingModelOptions.map( - (opt) => { - const Icon = - opt.icon - return ( - - - + setForceCustomEmbeddingModel( + false + ) + setState((prev) => ({ + ...prev, + embedding: { + ...prev.embedding, + model: v + } + })) + }} + > + + +
+
+ {(() => { + return ( + ) + })()} +
+ + { + embeddingTriggerLabel + } + +
+
+
+ + {embeddingModelOptions.map( + (opt) => { + const Icon = + opt.icon + return ( + + + + + + + { + opt.label } - className="text-olive-950/85 dark:text-white/85" - aria-label={ + + + { opt.providerLabel } - /> - - - { - opt.label - } - - - { - opt.providerLabel - } + -
-
- ) - } - )} - - - - - - - - Custom model… + + ) + } + )} + + + + + + + + Custom + model… + - - -
- + + + - {isCustomEmbeddingModel ? ( -
- { - setForceCustomEmbeddingModel( - true - ) - setState( - (prev) => ({ - ...prev, - embedding: { - ...prev.embedding, - model: e - .target - .value - } - }) - ) - }} - placeholder={ - MODEL_PLACEHOLDER_BY_PROVIDER[ + {isCustomEmbeddingModel ? ( +
+ -
- {state.embedding - .type === - 'multimodal' - ? 'Make sure this model supports image embeddings.' - : state.embedding - .provider === - 'ai' - ? 'Tip: for AI Gateway, use the AI SDK model id (e.g. openai/text-embedding-3-small).' - : 'Tip: use the provider-native model id (see provider docs).'} + .model + } + onChange={(e) => { + setForceCustomEmbeddingModel( + true + ) + setState( + (prev) => ({ + ...prev, + embedding: + { + ...prev.embedding, + model: e + .target + .value + } + }) + ) + }} + placeholder={ + MODEL_PLACEHOLDER_BY_PROVIDER[ + state + .embedding + .provider + ] ?? 'model-id' + } + className="bg-white border-olive-950/10 text-olive-950 font-mono text-sm placeholder:text-olive-500 focus:border-olive-950/20 dark:bg-white/[0.03] dark:border-[#757572]/20 dark:text-white dark:placeholder:text-white/30 dark:focus:border-[#757572]/30" + /> +
+ {state.embedding + .type === + 'multimodal' + ? 'Make sure this model supports image embeddings.' + : state + .embedding + .provider === + 'ai' + ? 'Tip: for AI Gateway, use the AI SDK model id (e.g. openai/text-embedding-3-small).' + : 'Tip: use the provider-native model id (see provider docs).'} +
-
- ) : null} -
- + ) : null} +
+
-
- + - - - - - - + setState((prev) => { + const method = + String(v) + .trim() + .toLowerCase() + const isBuiltIn = + method === + 'recursive' || + method === + 'token' || + method === + 'custom' + const nextChunkers = + isBuiltIn + ? prev + .modules + .chunkers + : prev.modules.chunkers.includes( + method + ) + ? prev + .modules + .chunkers + : [ + ...prev + .modules + .chunkers, + method + ].sort() + return { + ...prev, + modules: { + ...prev.modules, + chunkers: + nextChunkers + }, + chunking: { + ...prev.chunking, + method + } + } + }) + } + > + + + + { + state + .chunking + .method + } + + + + + +
+ + recursive + + + default + +
+
+ + + token + + + + {(availableChunkers.length > + 0 + ? availableChunkers + : [ + { + id: 'agentic' + }, + { + id: 'code' + }, + { + id: 'hierarchical' + }, + { + id: 'markdown' + }, + { + id: 'semantic' + } + ] + ) + .filter( + (c) => + String( + ( + c as unknown as { + status?: unknown + } + ) + .status ?? + 'available' + ) === + 'available' + ) + .map((c) => { + const id = + String( + ( + c as unknown as { + id?: unknown + } + ).id + ) + return ( + +
+ + { + id + } + + + plugin + +
+
+ ) + })} +
+ +
+ + + + + + + + + + + + +
+ + {state.chunking.method === + 'semantic' ? ( +
+ + + + {isCustomSemanticChunkerModel ? ( +
+ + setState( + ( + prev + ) => ({ + ...prev, + chunking: + { + ...prev.chunking, + model: e + .target + .value + } + }) + ) + } + placeholder="openai/gpt-5-mini" + className="bg-white border-olive-950/10 text-olive-950 font-mono text-sm placeholder:text-olive-500 focus:border-olive-950/20 dark:bg-white/[0.03] dark:border-[#757572]/20 dark:text-white dark:placeholder:text-white/30 dark:focus:border-[#757572]/30" + /> +
+ ) : null} +
+
+ ) : state.chunking.method === + 'agentic' ? ( +
+ + + + {isCustomAgenticChunkerModel ? ( +
+ + setState( + ( + prev + ) => ({ + ...prev, + chunking: + { + ...prev.chunking, + model: e + .target + .value + } + }) + ) + } + placeholder="openai/gpt-5-mini" + className="bg-white border-olive-950/10 text-olive-950 font-mono text-sm placeholder:text-olive-500 focus:border-olive-950/20 dark:bg-white/[0.03] dark:border-[#757572]/20 dark:text-white dark:placeholder:text-white/30 dark:focus:border-[#757572]/30" + /> +
+ ) : null} +
+
+ ) : state.chunking.method === + 'code' ? ( +
+
+ + + +
+
+ ) : null} + +
+ This sets `chunking.method` and + `chunking.options` in + `unrag.config.ts`. If you pick a + plugin method, the wizard will + also install the corresponding + chunker module. +
+ + +
+
+
+
+ Optional chunkers to + install +
+
+ Install extra chunkers + now so you can switch + methods later without + rerunning the CLI. +
+
+
+ +
+ {!manifest ? ( +
+ Loading chunkers... +
+ ) : ( +
+ {(availableChunkers.length > + 0 + ? availableChunkers + : [ + { + id: 'semantic', + label: 'semantic', + description: + 'LLM-guided semantic chunking for general text', + status: 'available', + docsPath: + '/docs/chunking/semantic' + }, + { + id: 'markdown', + label: 'markdown', + description: + 'Markdown-aware chunking with fenced code preservation', + status: 'available', + docsPath: + '/docs/chunking/markdown' + }, + { + id: 'hierarchical', + label: 'hierarchical', + description: + 'Section-first chunking with header context', + status: 'available', + docsPath: + '/docs/chunking/hierarchical' + }, + { + id: 'code', + label: 'code', + description: + 'Structure-aware chunking for source code', + status: 'available', + docsPath: + '/docs/chunking/code' + }, + { + id: 'agentic', + label: 'agentic', + description: + 'LLM-guided chunking for highest quality', + status: 'available', + docsPath: + '/docs/chunking/agentic' + } + ] + ).map((c) => { + const id = String( + ( + c as unknown as { + id?: unknown + } + ).id + ) + return ( + + setState( + ( + prev + ) => ({ + ...prev, + modules: + { + ...prev.modules, + chunkers: + toggleInList( + prev + .modules + .chunkers, + id + ) + } + }) + ) + } + /> + ) + })} +
+ )} +
+
+ + {requiredEmbeddingEnvVars.length > 0 ? (
@@ -2985,6 +4005,43 @@ export default function InstallWizardClient() { {summary.embeddingProvider}
+
+
+ Chunking +
+
+ + {summary.chunkingMethod} + +
+
+ + {state.defaults.chunkSize} + + + {' '} + tokens + + + {' '} + • overlap{' '} + + + { + state.defaults + .chunkOverlap + } + +
+ {summary.chunkerCount > 0 ? ( +
+ Installed chunkers:{' '} + {state.modules.chunkers.join( + ', ' + )} +
+ ) : null} +
Extractors @@ -3252,6 +4309,12 @@ export default function InstallWizardClient() { {state.modules.connectors.length}
+
+ Chunkers + + {state.modules.chunkers.length} + +
Batteries @@ -3344,6 +4407,7 @@ export default function InstallWizardClient() { {(state.modules.extractors.length > 0 || state.modules.connectors.length > 0 || + state.modules.chunkers.length > 0 || state.modules.batteries.length > 0) && (
{state.modules.extractors.length > 0 && ( @@ -3378,6 +4442,36 @@ export default function InstallWizardClient() {
)} + {state.modules.chunkers.length > 0 && ( +
+
+ Chunking +
+
+ {state.modules.chunkers.map( + (id) => { + const href = + chunkerDocsById.get( + id + ) ?? '/docs/chunking' + return ( + + {id} + + ) + } + )} +
+
+ )} {state.modules.connectors.length > 0 && (
diff --git a/apps/web/app/install/wizard-types.ts b/apps/web/app/install/wizard-types.ts index bcc672c..4f3d783 100644 --- a/apps/web/app/install/wizard-types.ts +++ b/apps/web/app/install/wizard-types.ts @@ -27,6 +27,13 @@ export type WizardStateV1 = { extractors: string[] connectors: string[] batteries: string[] + chunkers: string[] + } + chunking: { + method: string + minChunkSize: number + model?: string + language?: string } defaults: { chunkSize: number @@ -70,6 +77,13 @@ export type RegistryManifest = { notes?: string }> }> + chunkers?: Array<{ + id: string + label?: string + description?: string + status?: 'available' | 'coming-soon' + docsPath?: string | null + }> batteries?: Array<{ id: string displayName?: string diff --git a/apps/web/components/home/hero.tsx b/apps/web/components/home/hero.tsx index f5403e3..564bea5 100644 --- a/apps/web/components/home/hero.tsx +++ b/apps/web/components/home/hero.tsx @@ -98,8 +98,8 @@ export function HeroSection() { className="py-16" eyebrow={ } diff --git a/apps/web/content/docs/(unrag)/chunking/agentic.mdx b/apps/web/content/docs/(unrag)/chunking/agentic.mdx new file mode 100644 index 0000000..516db9c --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/agentic.mdx @@ -0,0 +1,201 @@ +--- +title: Agentic Chunking +description: LLM-powered chunking optimized for maximum retrieval quality. +--- + +Most chunking methods focus on one goal: split documents into reasonable pieces. They respect structure (markdown chunking), identify topic boundaries (semantic chunking), or keep code units intact (code chunking). These are sensible approaches, and they work well. + +Agentic chunking takes a different perspective. Instead of asking "where should we split?", it asks "what chunks would best serve retrieval?" The LLM considers how users might query this content and structures chunks to maximize the chance of returning useful results. It's optimization for the end goal, not just for the splitting process. + +## What makes agentic chunking different + +The distinction between semantic and agentic chunking is subtle but important. + +**Semantic chunking** finds natural boundaries. It looks at content and asks: where do topics shift? Where do ideas complete? The goal is coherence—chunks that are internally consistent and don't awkwardly split mid-thought. + +**Agentic chunking** optimizes for retrieval. It looks at content and asks: what would users search for? What chunks would best answer their questions? The goal is queryability—chunks that are likely to match user intent and provide useful answers. + +Consider documentation about a software feature: + +``` +The export feature allows users to download their data in multiple formats. +CSV exports include all fields by default, while JSON exports use a nested +structure that mirrors the API response format. + +To export data, navigate to Settings > Data > Export. Select the format and +date range, then click "Generate Export". Large exports may take several +minutes to process. You'll receive an email when the export is ready. + +Export files are available for download for 7 days. After that, you'll need +to generate a new export. Enterprise customers can configure automatic +scheduled exports via the API. +``` + +Semantic chunking might produce two chunks: one about export formats, one combining the how-to instructions with availability information (since those paragraphs flow into each other). + +Agentic chunking might produce three chunks, each optimized for a different query pattern: +1. "What export formats are available?" → format descriptions +2. "How do I export my data?" → step-by-step instructions +3. "How long are exports available?" → availability and enterprise features + +The agentic chunker anticipates what users will ask and structures content to match. + +## Installation + +```bash +bunx unrag add chunker:agentic +``` + +This installs the agentic chunker plugin. It uses the AI SDK, which should already be present if you're using Unrag's embedding features. + +## Configuration + +Enable agentic chunking in your `unrag.config.ts`: + +```ts +export default defineUnragConfig({ + chunking: { + method: "agentic", + options: { + chunkSize: 512, + chunkOverlap: 50, + model: "gpt-4o", // optional: specify LLM model + }, + }, + // ... +}); +``` + +## When agentic chunking is worth it + +Agentic chunking is the most expensive option in Unrag's chunking toolkit. Every document requires an LLM call. For large corpuses, costs add up quickly. So when is it worth it? + +**High-value content where retrieval quality directly impacts business outcomes.** If poor search results mean lost customers, confused users, or missed sales, the cost of better chunking is easy to justify. + +**Customer-facing search and support systems.** Users have low tolerance for irrelevant results. When someone searches your help center, they expect the first result to answer their question. Agentic chunking maximizes that likelihood. + +**Content that will be queried frequently.** If a document will be searched thousands of times, spending an extra $0.03 to chunk it optimally has high ROI. The upfront cost is amortized across many retrievals. + +**Complex, nuanced documents.** Legal contracts, medical protocols, financial regulations—content where precision matters and the difference between a good and mediocre chunk could have real consequences. + +**When you've tried other chunkers and retrieval quality isn't good enough.** Agentic chunking is a tool to reach for when simpler approaches fall short, not a default starting point. + +## When to use simpler alternatives + +**Bulk ingestion of large corpuses.** If you're indexing 100,000 documents, agentic chunking at $0.03/document is $3,000. Consider using semantic or recursive chunking for the bulk, and reserve agentic chunking for the most important content. + +**Structured content.** Markdown with clear headings, code with function boundaries—these have explicit structure that rule-based chunkers handle well. The LLM's intelligence is less valuable when structure is already clear. + +**Latency-sensitive pipelines.** Each agentic chunking call takes 2-5 seconds (LLM inference time). For real-time or near-real-time ingestion, this may be unacceptable. + +**Experimental or rapidly-changing content.** If you're iterating on content that will be replaced soon, the premium for optimal chunking has less value. + +## Cost breakdown + +Agentic chunking costs vary by model and document size. Here are rough estimates for a 10,000-token document: + +| Model | Input Cost | Output Cost | Total | +|-------|------------|-------------|-------| +| gpt-4o-mini | ~$0.002 | ~$0.001 | ~$0.003 | +| gpt-4o | ~$0.025 | ~$0.010 | ~$0.035 | +| claude-3.5-sonnet | ~$0.030 | ~$0.015 | ~$0.045 | +| claude-3-opus | ~$0.150 | ~$0.075 | ~$0.225 | + +The input cost dominates because the entire document is sent to the LLM. Output is relatively small—just boundary markers or restructured text. + +**Practical recommendations:** + +Start with `gpt-4o-mini`. It's remarkably capable for chunking tasks and costs an order of magnitude less than larger models. The chunking prompt is straightforward; you don't need GPT-4's full reasoning capacity for most content. + +Use `gpt-4o` for complex content. If your documents have subtle nuances, multiple interleaved topics, or require sophisticated judgment about what users might search for, the extra capability helps. + +Reserve `claude-3-opus` or equivalent for critical content. Legal documents, compliance materials, content where getting it wrong has real consequences—these justify the premium. + +## How the LLM is prompted + +The agentic chunker sends your content to the LLM with instructions focused on retrieval optimization: + +- Consider what queries users might ask about this content +- Group information that would answer the same query together +- Keep related context together even if it spans formatting boundaries +- Never split mid-explanation or mid-example +- Create chunks that would be useful standalone search results + +The LLM returns either explicit boundary markers or restructured content. Unrag then enforces token limits, adds overlap, and produces the final chunks. + +## Fallback behavior + +LLM calls can fail. Rate limits, network issues, malformed responses—these happen, especially at scale. The agentic chunker handles failures gracefully. + +When the LLM call fails, the chunker falls back to sentence-based splitting. Your document still gets chunked and ingested, just without the retrieval optimization. The ingest result includes a warning you can monitor: + +```ts +const result = await engine.ingest({ sourceId, content }); + +for (const warning of result.warnings) { + if (warning.code === "agentic_fallback") { + // Log for later retry + await logForRetry(sourceId, warning.message); + } +} +``` + +For critical content, you might want to catch fallbacks and retry during off-peak hours or with a different model. + +## Practical example + +Here's how you might use agentic chunking for a knowledge base where search quality is critical: + +```ts +import { createUnragEngine } from "@unrag/config"; + +const engine = createUnragEngine(); + +async function ingestSupportArticle(article: Article) { + const result = await engine.ingest({ + sourceId: `support:${article.id}`, + content: article.body, + metadata: { + title: article.title, + category: article.category, + lastUpdated: article.updatedAt, + }, + }); + + // Track chunking quality for monitoring + if (result.warnings.some(w => w.code === "agentic_fallback")) { + console.warn(`Agentic chunking failed for ${article.id}, using fallback`); + await metrics.increment("chunking.agentic_fallback"); + } + + console.log(`Chunked "${article.title}" into ${result.chunkCount} chunks`); + return result; +} +``` + +The agentic chunker analyzes each article, considering how support users might search for help, and creates chunks optimized to match those queries. + +## Agentic vs semantic: a comparison + +Both methods use LLMs with similar costs. Here's how they differ: + +| Aspect | Semantic | Agentic | +|--------|----------|---------| +| Goal | Find natural boundaries | Optimize for retrieval | +| Question asked | "Where do topics change?" | "What would users search for?" | +| Chunk characteristic | Coherent | Queryable | +| Best for | Narrative content | Search-critical content | +| Predictability | More predictable | Less predictable | + +For most LLM-chunking use cases, start with semantic. It produces reliable, coherent chunks at the same cost. Switch to agentic when you've identified that retrieval quality is the specific bottleneck you need to address. + +## Monitoring and iteration + +Agentic chunking's value shows up in retrieval quality, not ingestion metrics. After implementing it, monitor: + +- **Relevance scores** — Are retrieved chunks more relevant to queries? +- **User feedback** — Are search results solving user problems? +- **Click-through rates** — Do users find what they need faster? +- **Escalation rates** — (For support) Are users escalating less after searching? + +If metrics improve, the investment is paying off. If they don't, the content might already be well-suited to simpler chunking, or the retrieval problem lies elsewhere (embedding model, reranking, query formulation). diff --git a/apps/web/content/docs/(unrag)/chunking/code.mdx b/apps/web/content/docs/(unrag)/chunking/code.mdx new file mode 100644 index 0000000..f77a77b --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/code.mdx @@ -0,0 +1,230 @@ +--- +title: Code Chunking +description: AST-based chunking that splits code at function and class boundaries. +--- + +Source code isn't prose. It has explicit structure—functions, classes, type definitions, imports—that determines how it should be split. A function is a logical unit. Cutting it in half produces two fragments that neither make sense alone nor embed usefully for retrieval. + +The code chunker understands this structure. It parses source code into an Abstract Syntax Tree (AST) using tree-sitter, identifies meaningful boundaries like function and class definitions, and splits there. The result is chunks that contain complete, coherent units of code. + +## Why AST-based chunking matters + +Consider a simple TypeScript file: + +```ts +import { db } from "./db"; + +interface User { + id: string; + name: string; + email: string; +} + +export async function getUser(id: string): Promise { + const result = await db.query("SELECT * FROM users WHERE id = $1", [id]); + return result.rows[0] ?? null; +} + +export async function createUser(data: Omit): Promise { + const id = crypto.randomUUID(); + await db.query( + "INSERT INTO users (id, name, email) VALUES ($1, $2, $3)", + [id, data.name, data.email] + ); + return { id, ...data }; +} +``` + +A token-based chunker might split this mid-function, producing a chunk that ends with `const result = await db.query(` and another that starts with `"SELECT * FROM users...`. Neither chunk is useful. The first has incomplete code. The second has no context about what function it's in or what `result` is for. + +The code chunker produces three chunks: +1. The imports and interface definition +2. The `getUser` function, complete +3. The `createUser` function, complete + +When a user searches for "how to create a user," they get back the complete `createUser` function—not a fragment that's missing its opening signature or closing brace. + +## Installation + +```bash +bunx unrag add chunker:code +``` + +This installs tree-sitter and language grammars for TypeScript, JavaScript, Python, and Go. These are native dependencies that provide fast, accurate parsing. + +## Configuration + +Enable code chunking in your `unrag.config.ts`: + +```ts +export default defineUnragConfig({ + chunking: { + method: "code", + options: { + chunkSize: 512, + chunkOverlap: 50, + language: "typescript", // optional: auto-detected from sourceId + }, + }, + // ... +}); +``` + +## Supported languages + +The code chunker currently supports four languages: + +**TypeScript** (`.ts`, `.tsx`) — Functions, classes, interfaces, type aliases, enums. The parser handles modern TypeScript including decorators, generics, and JSX. + +**JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) — Functions, classes, and arrow function expressions. ES modules and CommonJS are both supported. + +**Python** (`.py`) — Functions, classes, and decorated definitions. The parser handles Python 3 syntax including async functions and type hints. + +**Go** (`.go`) — Functions, methods, and type declarations. Package-level organization is preserved. + +Each language has different AST node types, and the chunker knows which ones represent meaningful boundaries in that language. + +## Language detection + +The code chunker can auto-detect the programming language from context. You don't need to specify it explicitly for every file. + +**Detection from sourceId:** If your source ID looks like a file path with an extension, the chunker uses that extension: + +```ts +await engine.ingest({ + sourceId: "src/utils/helpers.ts", // Detected as TypeScript + content: codeContent, +}); +``` + +**Detection from metadata:** You can provide a file path in metadata: + +```ts +await engine.ingest({ + sourceId: "code:12345", + content: codeContent, + metadata: { filePath: "lib/main.py" }, // Detected as Python +}); +``` + +**Explicit override:** For cases where detection doesn't work, specify the language directly: + +```ts +await engine.ingest({ + sourceId: "snippet:clipboard-paste", + content: codeContent, + chunking: { language: "go" }, +}); +``` + +If language detection fails and no override is provided, the chunker falls back to treating the content as plain text and using token-based splitting. + +## How the chunker works + +The code chunker processes files in several steps: + +**Parse the AST.** Tree-sitter reads the source code and builds a syntax tree. This is fast—tree-sitter is designed for IDE use cases where parsing happens on every keystroke. + +**Identify major boundaries.** The chunker walks the AST looking for top-level definitions: functions, classes, interfaces, type declarations. These are the natural units of code. + +**Create chunks from definitions.** Each major definition becomes a chunk (or part of a chunk). Imports and small declarations at the top of the file are grouped together. + +**Respect token limits.** If a single function exceeds `chunkSize`, the chunker must split it. It does so at statement boundaries when possible—between lines rather than mid-expression. + +**Merge small pieces.** Tiny chunks (under `minChunkSize`) are combined with neighbors. A single-line type alias might be grouped with the function that uses it. + +## When to use code chunking + +Code chunking is ideal when you're building search over source code: + +- **Codebase search** — Help developers find relevant code across a large repository +- **Code documentation** — Index code alongside its documentation for unified search +- **Code assistants** — Provide accurate context for LLM-powered coding help +- **Onboarding tools** — Help new developers discover how things work + +The chunker handles multiple languages in the same index. You can ingest TypeScript, Python, and Go files together, and each will be chunked appropriately based on its language. + +## When to use something else + +Code chunking is specifically for source code files. For other content types: + +- **Markdown with code blocks** — Use the [Markdown Chunker](/docs/chunking/markdown). It keeps code blocks intact while also respecting markdown structure like headings. + +- **Prose documentation** — Use the [Recursive Chunker](/docs/chunking/recursive) or [Semantic Chunker](/docs/chunking/semantic). They're optimized for natural language, not programming language. + +- **Unsupported languages** — If you're working with Rust, Ruby, C#, or other languages not yet supported, you'll get fallback text-based chunking. Consider contributing a grammar or using custom chunking logic. + +## A practical example + +Here's how you might index a TypeScript project: + +```ts +import { createUnragEngine } from "@unrag/config"; +import { glob } from "glob"; +import { readFile } from "fs/promises"; + +const engine = createUnragEngine(); + +async function indexCodebase(rootDir: string) { + const files = await glob(`${rootDir}/**/*.{ts,tsx}`, { + ignore: ["**/node_modules/**", "**/dist/**"], + }); + + for (const filePath of files) { + const content = await readFile(filePath, "utf-8"); + + await engine.ingest({ + sourceId: filePath, // Language detected from .ts/.tsx extension + content, + metadata: { + type: "source-code", + language: "typescript", + }, + }); + } + + console.log(`Indexed ${files.length} files`); +} + +await indexCodebase("./src"); +``` + +Each TypeScript file is parsed, split at function and class boundaries, and indexed with chunks that represent complete, searchable units of code. + +## Handling parse failures + +Not all code is valid. You might have work-in-progress files with syntax errors, or experimental code that tree-sitter's grammar doesn't handle. The code chunker degrades gracefully in these situations. + +When parsing fails, the chunker falls back to treating the file as plain text. It uses line-based splitting to stay within token limits, preferring to split at blank lines when possible. The result isn't as semantically meaningful as AST-based chunks, but ingestion doesn't fail. + +You can detect parse failures through ingest warnings: + +```ts +const result = await engine.ingest({ sourceId, content }); + +for (const warning of result.warnings) { + if (warning.code === "code_parse_fallback") { + console.warn(`Parse failed for ${sourceId}, using text fallback`); + } +} +``` + +This lets you track which files might benefit from manual review or re-ingestion once syntax issues are fixed. + +## Dependencies + +The code chunker relies on tree-sitter for parsing. These are native dependencies: + +```json +{ + "dependencies": { + "tree-sitter": "^0.22.6", + "tree-sitter-typescript": "^0.21.2", + "tree-sitter-javascript": "^0.21.4", + "tree-sitter-python": "^0.21.0", + "tree-sitter-go": "^0.21.0" + } +} +``` + +These are installed automatically when you run `bunx unrag add chunker:code`. The native bindings compile during installation, so you'll need a working C/C++ toolchain on your system. Most development machines have this already; if you encounter build errors, check that you have build tools installed (Xcode Command Line Tools on macOS, build-essential on Ubuntu, etc.). diff --git a/apps/web/content/docs/(unrag)/chunking/custom.mdx b/apps/web/content/docs/(unrag)/chunking/custom.mdx new file mode 100644 index 0000000..9ba97b3 --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/custom.mdx @@ -0,0 +1,332 @@ +--- +title: Custom Chunking +description: Build your own chunker for specialized content or unique requirements. +--- + +Unrag's built-in chunkers cover common cases well. Recursive chunking handles prose. Markdown chunking handles documentation. Code chunking handles source files. But your content might be different. Maybe you're processing legal documents with specific section numbering. Maybe you need to handle a mix of languages with different splitting rules. Maybe your domain has conventions that no generic chunker would understand. + +Custom chunking gives you complete control. You write a function that takes content and returns chunks. Unrag handles everything else—token counting utilities, integration with the ingest pipeline, embedding and storage. Your chunker just focuses on the splitting logic. + +## The chunker interface + +A chunker is a function with a simple signature: + +```ts +type Chunker = ( + content: string, + options: ChunkingOptions +) => ChunkText[] | Promise; +``` + +It receives the document content and configuration options. It returns an array of chunks. That's it. The function can be synchronous or asynchronous, depending on whether your logic needs to make network calls or process asynchronously. + +The `ChunkingOptions` type includes the standard parameters: + +```ts +type ChunkingOptions = { + chunkSize: number; // Maximum tokens per chunk + chunkOverlap: number; // Tokens to repeat at boundaries + minChunkSize?: number; // Minimum tokens per chunk + separators?: string[]; // Optional custom separator list + // Plus any custom options you add +}; +``` + +Each chunk you return has this structure: + +```ts +type ChunkText = { + index: number; // Position in document (0, 1, 2, ...) + content: string; // The chunk text + tokenCount: number; // Token count for this chunk +}; +``` + +The `index` field orders chunks within the document. Unrag uses this for overlap calculation and to preserve document structure in storage. + +## Configuring a custom chunker + +Once you've written your chunker function, register it in your config: + +```ts +import { defineUnragConfig, countTokens } from "unrag"; +import type { Chunker, ChunkText, ChunkingOptions } from "unrag"; + +const myChunker: Chunker = (content: string, options: ChunkingOptions): ChunkText[] => { + // Your splitting logic here + const parts = splitByYourRules(content); + + return parts.map((text, index) => ({ + index, + content: text, + tokenCount: countTokens(text), + })); +}; + +export default defineUnragConfig({ + chunking: { + method: "custom", + chunker: myChunker, + options: { + chunkSize: 512, + chunkOverlap: 50, + }, + }, + // ... +}); +``` + +The `method: "custom"` tells Unrag to use the function you provide via `chunker` rather than a built-in method. + +## Using the token counting utility + +Accurate token counts are essential for chunking. Unrag exports a `countTokens` function that uses the same `o200k_base` tokenizer as the default chunker: + +```ts +import { countTokens } from "unrag"; + +const tokens = countTokens("Hello world"); // 2 +const docTokens = countTokens(longDocument); // exact count +``` + +This matches what OpenAI's embedding models will see. If you're using a different embedding provider with a different tokenizer, you might need your own token counting logic, but for most cases `countTokens` is what you want. + +Always use token counts in your `ChunkText` return values. Unrag uses these for overlap calculations and to validate that chunks stay within limits. + +## Example: Sentence-based chunker + +Here's a complete chunker that never splits mid-sentence. It accumulates sentences until adding another would exceed the token limit, then starts a new chunk: + +```ts +import { countTokens } from "unrag"; +import type { Chunker, ChunkText, ChunkingOptions } from "unrag"; + +const sentenceChunker: Chunker = ( + content: string, + options: ChunkingOptions +): ChunkText[] => { + const { chunkSize, minChunkSize = 24 } = options; + + // Split on sentence boundaries (period, question mark, exclamation mark followed by space) + // The regex uses a lookbehind to keep the punctuation with the sentence + const sentences = content.split(/(?<=[.!?])\s+/).filter(s => s.trim()); + + const chunks: ChunkText[] = []; + let currentText = ""; + let currentTokens = 0; + let chunkIndex = 0; + + for (const sentence of sentences) { + const sentenceTokens = countTokens(sentence); + const spaceTokens = currentText ? 1 : 0; // Space between sentences + + // Would adding this sentence exceed the limit? + if (currentTokens + sentenceTokens + spaceTokens > chunkSize && currentText) { + // Only save if it meets minimum size + if (currentTokens >= minChunkSize) { + chunks.push({ + index: chunkIndex++, + content: currentText.trim(), + tokenCount: currentTokens, + }); + } + currentText = ""; + currentTokens = 0; + } + + // Add sentence to current chunk + currentText += (currentText ? " " : "") + sentence; + currentTokens += sentenceTokens + spaceTokens; + } + + // Don't forget the final chunk + if (currentText.trim() && currentTokens >= minChunkSize) { + chunks.push({ + index: chunkIndex++, + content: currentText.trim(), + tokenCount: countTokens(currentText.trim()), + }); + } + + return chunks; +}; +``` + +This chunker respects natural language boundaries. No sentence is ever cut in half. The tradeoff is that chunks might be smaller than optimal if sentences are long, but each chunk is guaranteed to be grammatically complete. + +## Example: Legal document chunker + +Legal documents often have explicit section structure: "1.", "1.1", "Section 2", etc. A custom chunker can split at these markers: + +```ts +import { countTokens } from "unrag"; +import type { Chunker, ChunkText, ChunkingOptions } from "unrag"; + +const legalChunker: Chunker = ( + content: string, + options: ChunkingOptions +): ChunkText[] => { + const { chunkSize, minChunkSize = 24 } = options; + + // Pattern matches section numbers at line start + // "1.", "1.1", "1.1.1", "Section 1", "ARTICLE II", etc. + const sectionPattern = /(?=(?:^|\n)(?:\d+\.[\d.]*|\bSection\s+\d+|\bARTICLE\s+[IVXLCDM]+))/gi; + const sections = content.split(sectionPattern).filter(s => s.trim()); + + const chunks: ChunkText[] = []; + let chunkIndex = 0; + + for (const section of sections) { + const sectionTokens = countTokens(section); + + if (sectionTokens <= chunkSize) { + // Section fits in one chunk + if (sectionTokens >= minChunkSize) { + chunks.push({ + index: chunkIndex++, + content: section.trim(), + tokenCount: sectionTokens, + }); + } + } else { + // Section too large—split by paragraphs within the section + const paragraphs = section.split(/\n\n+/).filter(p => p.trim()); + let current = ""; + let currentTokens = 0; + + for (const para of paragraphs) { + const paraTokens = countTokens(para); + + if (currentTokens + paraTokens > chunkSize && current) { + if (currentTokens >= minChunkSize) { + chunks.push({ + index: chunkIndex++, + content: current.trim(), + tokenCount: currentTokens, + }); + } + current = ""; + currentTokens = 0; + } + + current += (current ? "\n\n" : "") + para; + currentTokens += paraTokens; + } + + if (current.trim() && currentTokens >= minChunkSize) { + chunks.push({ + index: chunkIndex++, + content: current.trim(), + tokenCount: countTokens(current.trim()), + }); + } + } + } + + return chunks; +}; +``` + +This chunker preserves legal document structure. Each numbered section becomes a chunk (or multiple chunks if large). When users search for "Section 3.2 liability provisions," they get back the complete section, not a fragment that starts mid-paragraph. + +## Example: Async chunker with LLM + +Sometimes you want human-like judgment in your chunking logic but need control the built-in LLM chunkers don't provide. You can build an async chunker that calls an LLM: + +```ts +import { generateText } from "ai"; +import { openai } from "@ai-sdk/openai"; +import { countTokens } from "unrag"; +import type { Chunker, ChunkText, ChunkingOptions } from "unrag"; + +const llmChunker: Chunker = async ( + content: string, + options: ChunkingOptions +): Promise => { + const { chunkSize } = options; + + // Custom prompt for your specific use case + const { text } = await generateText({ + model: openai("gpt-4o-mini"), + prompt: `You are chunking a customer support article for search. +Split this text into chunks of roughly ${chunkSize} tokens each. +Each chunk should be a self-contained answer to a potential user question. +Return the text with "---SPLIT---" markers where chunks should divide. +Never split mid-paragraph or mid-sentence. + +Text to chunk: +${content}`, + }); + + // Parse LLM response + const parts = text + .split("---SPLIT---") + .map(s => s.trim()) + .filter(Boolean); + + return parts.map((part, index) => ({ + index, + content: part, + tokenCount: countTokens(part), + })); +}; +``` + +This gives you the flexibility of LLM-powered chunking with complete control over the prompt. You can tailor the instructions to your specific content type and search patterns. + +## Per-ingest chunker overrides + +You don't have to use your custom chunker for everything. Unrag supports overriding the chunker on a per-ingest basis: + +```ts +import { semanticChunker } from "@unrag/chunking/semantic"; + +// Use your custom chunker by default (from config) +await engine.ingest({ + sourceId: "legal:contract-123", + content: contractContent, +}); + +// Override with a different chunker for specific content +await engine.ingest({ + sourceId: "faq:general", + content: faqContent, + chunker: sentenceChunker, // Your custom sentence-based chunker +}); + +// Or use a built-in chunker +await engine.ingest({ + sourceId: "blog:post-456", + content: blogContent, + chunker: semanticChunker, // Semantic chunker for this ingest only +}); +``` + +This flexibility lets you handle heterogeneous content without multiple engine instances. + +## Best practices + +When building custom chunkers, keep these principles in mind: + +**Always use `countTokens` for accurate token counts.** Estimating based on characters or words leads to chunks that exceed limits or waste space. + +**Respect `chunkSize` as a hard limit.** Chunks should never exceed this value. If a single unit (sentence, section, function) exceeds the limit, you need logic to split it further. + +**Consider `minChunkSize` to avoid tiny fragments.** A chunk with 5 tokens adds noise without value. Merge small chunks with neighbors or filter them out. + +**Return sequential indices starting at 0.** The index field should count 0, 1, 2, ... in order. Unrag uses this for overlap calculation and document reconstruction. + +**Trim whitespace from chunk content.** Chunks shouldn't start or end with extra spaces or newlines. This wastes tokens and creates inconsistent embeddings. + +**Handle edge cases gracefully.** Empty content, single sentences, massive documents without structure—your chunker should handle these without crashing. + +## When to build custom + +Build a custom chunker when: + +- Your content has **domain-specific structure** that generic chunkers don't understand (legal documents, medical records, financial filings) +- You need **language-specific handling** with different rules for different languages +- You want to **combine strategies** based on content detection (use markdown chunking for docs, code chunking for source files) +- Built-in chunkers **consistently produce poor results** for your content type + +For most use cases, start with a built-in chunker. The recursive chunker handles general prose well, and the specialized chunkers (markdown, code, semantic) cover common structured content. Custom chunking is a power tool for when those don't fit. diff --git a/apps/web/content/docs/(unrag)/chunking/hierarchical.mdx b/apps/web/content/docs/(unrag)/chunking/hierarchical.mdx new file mode 100644 index 0000000..19cc412 --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/hierarchical.mdx @@ -0,0 +1,235 @@ +--- +title: Hierarchical Chunking +description: Section-first chunking that preserves header context in every chunk. +--- + +When you chunk a long document, individual chunks can lose their place. A chunk containing "Set the timeout to 5000" might be perfectly clear in context—but in isolation, which timeout? For which feature? In what part of the system? The information is technically present, but the context that makes it useful is gone. + +Hierarchical chunking addresses this by prepending section headers to every chunk. If a chunk comes from "Configuration > Timeouts > Request Settings," that path appears at the top of the chunk. When the chunk is retrieved, the user (or the LLM using it) immediately knows where this information fits in the larger document. + +## How it differs from markdown chunking + +Both hierarchical and markdown chunking understand document structure. Both split at headings and keep code blocks intact. The difference is what happens when a section is large enough to require multiple chunks. + +With **markdown chunking**, only the first chunk of a section includes the heading. Subsequent chunks from the same section start with prose. If the Configuration section splits into three chunks, only the first says "## Configuration." + +With **hierarchical chunking**, every chunk includes the heading. All three chunks from Configuration start with "## Configuration." This makes each chunk self-documenting. You can look at any single chunk and know exactly what section it came from. + +The tradeoff is token overhead. Prepending headers to every chunk uses some of your token budget. A "## Configuration > ### Database Settings" prefix might be 8-10 tokens. For a 512-token chunk, that's about 2% overhead—usually worth it for the context it provides. + +## Installation + +```bash +bunx unrag add chunker:hierarchical +``` + +No additional dependencies required beyond what Unrag already provides. + +## Configuration + +Enable hierarchical chunking in your `unrag.config.ts`: + +```ts +export default defineUnragConfig({ + chunking: { + method: "hierarchical", + options: { + chunkSize: 512, + chunkOverlap: 50, + }, + }, + // ... +}); +``` + +## How it works + +The hierarchical chunker processes documents in these steps: + +**First, it extracts the heading structure.** The chunker scans for headings at all levels (`#` through `######`) and builds a tree representing the document's hierarchy. + +**Second, it splits content by sections.** Each heading starts a new section. Content under a heading belongs to that section until the next heading at the same or higher level appears. + +**Third, it chunks section content.** If a section's body fits within `chunkSize`, it becomes one chunk. If it exceeds the limit, the chunker splits it using recursive token-based splitting, creating multiple chunks from that section. + +**Fourth, it prepends headers.** Each chunk gets its section header prepended. For nested sections, this includes the full path: "# Top Level > ## Section > ### Subsection." + +**Finally, it protects code blocks.** Like the markdown chunker, fenced code blocks are kept intact and not split internally. + +## When hierarchical chunking helps most + +The value of hierarchical chunking increases with document complexity. Consider these scenarios: + +**Reference documentation** with deep nesting benefits significantly. An API reference might have sections for each endpoint, subsections for parameters and responses, and sub-subsections for specific fields. When a chunk about a response field includes "## POST /users > ### Response > #### body.email," the context is immediate. + +**Technical specifications** where similar terms appear in different contexts need this disambiguation. A chunk saying "Set the value to 0" could mean anything. A chunk starting with "## Error Handling > ### Retry Logic" makes the meaning clear. + +**Long documents** where chunks might be retrieved far from their source context benefit from carrying that context with them. If your retrieval results mix chunks from different parts of a 50-page document, headers help users orient. + +**Multi-section searches** where users query across sections work better when chunks identify themselves. A search for "timeout configuration" might return chunks from three different sections; headers help users understand which timeout each chunk discusses. + +## A practical example + +Consider this API documentation: + +```markdown +# API Reference + +This document covers the REST API. + +## Authentication + +All requests require an API key in the header: + +``` +Authorization: Bearer YOUR_API_KEY +``` + +Keys can be generated in the dashboard. Each key has configurable permissions. + +## Endpoints + +### GET /users + +Returns a list of users. Supports pagination via `limit` and `offset` query parameters. + +```json +{ + "users": [...], + "total": 150, + "limit": 20, + "offset": 0 +} +``` + +### POST /users + +Creates a new user. Requires `name` and `email` fields in the request body. + +```json +{ + "name": "Alice", + "email": "alice@example.com" +} +``` + +Returns the created user with its assigned ID. +``` + +With hierarchical chunking, this produces: + +**Chunk 1:** +``` +# API Reference + +This document covers the REST API. +``` + +**Chunk 2:** +``` +## Authentication + +All requests require an API key in the header: + +``` +Authorization: Bearer YOUR_API_KEY +``` + +Keys can be generated in the dashboard. Each key has configurable permissions. +``` + +**Chunk 3:** +``` +### GET /users + +Returns a list of users. Supports pagination via `limit` and `offset` query parameters. + +```json +{ + "users": [...], + "total": 150, + "limit": 20, + "offset": 0 +} +``` +``` + +**Chunk 4:** +``` +### POST /users + +Creates a new user. Requires `name` and `email` fields in the request body. + +```json +{ + "name": "Alice", + "email": "alice@example.com" +} +``` + +Returns the created user with its assigned ID. +``` + +Each chunk starts with its heading. When a user searches for "how to create a user," they get back Chunk 4, which immediately identifies itself as documentation for "POST /users." + +## Long sections and repeated headers + +What happens when a section is long enough to split into multiple chunks? The header is prepended to each one. + +Suppose the "## Authentication" section had much more content—enough for three chunks. Each chunk would start with "## Authentication": + +``` +Chunk 2a: "## Authentication\n\nAll requests require an API key..." +Chunk 2b: "## Authentication\n\nTokens expire after 24 hours..." +Chunk 2c: "## Authentication\n\nFor service-to-service calls..." +``` + +This repetition uses tokens but ensures that any chunk from this section carries its context. When Chunk 2b is retrieved in isolation, the user knows it's about authentication without needing to see the other chunks. + +## Accounting for header overhead + +Because headers are prepended, your effective content per chunk is slightly less than `chunkSize`. The chunker accounts for this—it subtracts the header token count from the available budget before splitting content. You don't need to manually adjust settings. + +However, if you have very long heading paths (deeply nested sections with verbose headings), the overhead can become significant. A header like "# Comprehensive Guide to Advanced Configuration > ## Database Layer > ### PostgreSQL Settings > #### Connection Pooling" uses 20+ tokens. For a 512-token chunk, that's 4% overhead. + +If overhead concerns you, consider: + +1. Using shorter headings in your source documents +2. Increasing `chunkSize` to give more room for content +3. Using markdown chunking for less deeply-nested content + +## Header-only sections + +Some documents have heading placeholders that introduce subsections without substantial content: + +```markdown +## Endpoints + +### GET /users +... + +### POST /users +... +``` + +The "## Endpoints" heading has no body—it just introduces the subsections. The hierarchical chunker handles this by creating a minimal chunk containing just the heading, or by merging it with the first subsection depending on `minChunkSize` settings. + +If you have many such structural headings, you might want a lower `minChunkSize` to preserve them as navigation markers, or a higher value to merge them away. + +## Choosing between markdown and hierarchical chunking + +Both chunkers understand markdown structure. Here's how to decide: + +Choose **markdown chunking** when: +- Chunks are usually self-contained (one chunk per section) +- You want minimal overhead +- Headers are long or deeply nested +- Your content is code-heavy and headers add limited value + +Choose **hierarchical chunking** when: +- Sections often split into multiple chunks +- Context is critical and worth the overhead +- Your documentation is reference-style with users querying specific topics +- You're building search where users need to understand where each result fits + +For many projects, both work well. Try markdown chunking first since it's simpler, and switch to hierarchical if you find that chunks are losing context in retrieval results. diff --git a/apps/web/content/docs/(unrag)/chunking/index.mdx b/apps/web/content/docs/(unrag)/chunking/index.mdx new file mode 100644 index 0000000..6d94d76 --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/index.mdx @@ -0,0 +1,139 @@ +--- +title: Chunking Overview +description: How documents are split into chunks and why it matters for retrieval quality. +--- + +Before Unrag can embed your documents and make them searchable, it needs to break them into smaller pieces. This process—chunking—is one of the most important decisions in any RAG system, and getting it right has a significant impact on retrieval quality. + +## Why chunking matters + +Embedding models turn text into vectors—arrays of numbers that represent semantic meaning. But these models have limits. OpenAI's text-embedding-3-small, for example, can accept up to 8,191 tokens per call. More importantly, even within that limit, longer texts produce less useful embeddings. When you embed a 5,000-word document as a single vector, you get a vague average of everything the document discusses. The nuance of individual paragraphs, the specifics of each section—all of that gets compressed into one point in vector space. + +Chunking solves this by breaking documents into pieces small enough that each embedding captures specific, queryable meaning. When someone searches for "how to configure authentication," you want to return the paragraph that actually explains authentication configuration, not an entire document that happens to mention the word once. + +The tradeoff is that chunking can split information across boundaries. If an important concept spans two paragraphs, your chunks might separate them. When a user's query matches one half, they won't see the other. Overlap helps with this—by repeating some text at chunk boundaries, you increase the chance that related content ends up together. But overlap isn't free; it increases storage and embedding costs. Finding the right balance requires understanding your content and your users' queries. + +## The default: Token-based recursive chunking + +Unrag uses **token-based recursive chunking** by default. The algorithm tries to split text at natural boundaries—paragraphs first, then sentences, then clauses, then words—while counting actual tokens using the `o200k_base` encoding. This is the same tokenizer used by GPT-5, GPT-4o, o1, o3, o4-mini, and gpt-4.1, which means token counts match exactly what OpenAI's embedding models will see. + +The default settings work well for most content: + +- **chunkSize**: 512 tokens—large enough to preserve context, small enough for precise retrieval +- **chunkOverlap**: 50 tokens—enough to bridge ideas that span chunk boundaries +- **minChunkSize**: 24 tokens—prevents tiny fragments that add noise without value + +These numbers aren't magic. They're a reasonable starting point based on how embedding models behave and how users typically search. If your retrieval results feel too vague, try smaller chunks. If results feel like fragments missing context, try larger ones. The [Recursive Chunking](/docs/chunking/recursive) page explains the algorithm in detail. + +## Available chunking methods + +Different content types benefit from different chunking strategies. A legal contract has different structure than a TypeScript file, and both differ from a blog post. Unrag provides several chunking methods, each optimized for specific content: + +**Recursive chunking** (the default) works well for general prose. It respects natural text boundaries and handles mixed content gracefully. If you're not sure which chunker to use, start here. See [Recursive Chunking](/docs/chunking/recursive). + +**Semantic chunking** uses an LLM to identify where topics shift and ideas complete. This produces more coherent chunks than rule-based splitting, but adds cost and latency since every document requires an LLM call. It's ideal for long-form content without clear structural markers. See [Semantic Chunking](/docs/chunking/semantic). + +**Markdown chunking** understands markdown syntax. It splits at headings and horizontal rules while keeping fenced code blocks intact. This is the right choice for documentation, READMEs, and technical guides. See [Markdown Chunking](/docs/chunking/markdown). + +**Code chunking** uses tree-sitter to parse source code and split at function and class boundaries. Rather than cutting mid-function, it keeps complete definitions together. Currently supports TypeScript, JavaScript, Python, and Go. See [Code Chunking](/docs/chunking/code). + +**Hierarchical chunking** splits by section headings like markdown chunking, but goes further by prepending the section header to every chunk. This means each chunk knows where it came from, improving retrieval relevance for structured reference documentation. See [Hierarchical Chunking](/docs/chunking/hierarchical). + +**Agentic chunking** is the most sophisticated option. It uses an LLM not just to find boundaries but to actively optimize chunks for retrieval quality. The model considers what queries users might ask and structures chunks to match. This produces the best results but at the highest cost. See [Agentic Chunking](/docs/chunking/agentic). + +**Custom chunking** gives you full control. When none of the built-in options fit your content, you can implement your own chunker function. See [Custom Chunking](/docs/chunking/custom). + +## Installing plugin chunkers + +The recursive and token chunkers are built into Unrag's core. The others—semantic, markdown, code, hierarchical, and agentic—are plugins that you install when you need them: + +```bash +bunx unrag add chunker:markdown +bunx unrag add chunker:semantic +bunx unrag add chunker:code +bunx unrag add chunker:hierarchical +bunx unrag add chunker:agentic +``` + +Each command installs the chunker's source files into your `lib/unrag/chunking/` directory and registers it so you can reference it by name in your config. + +## Configuration + +Once you've chosen a chunking method, configure it in your `unrag.config.ts`: + +```ts +export default defineUnragConfig({ + chunking: { + method: "markdown", + options: { + chunkSize: 512, + chunkOverlap: 50, + minChunkSize: 24, + }, + }, + // ... +}); +``` + +This becomes the default chunker for all `engine.ingest()` calls. You don't need to think about chunking on every ingest—the engine handles it automatically. + +## Overriding chunking per document + +Sometimes you need different chunking behavior for specific content. A long technical specification might need larger chunks than a FAQ page. Unrag lets you override chunking options on individual ingest calls: + +```ts +// Use larger chunks for this particular document +await engine.ingest({ + sourceId: "specs:system-design-v2", + content: technicalSpec, + chunking: { chunkSize: 768, chunkOverlap: 75 }, +}); +``` + +You can also override the chunking algorithm entirely for a single ingest: + +```ts +import { markdownChunker } from "@unrag/chunking/markdown"; + +// This document is markdown, even though our default is recursive +await engine.ingest({ + sourceId: "docs:readme", + content: readmeContent, + chunker: markdownChunker, +}); +``` + +This flexibility means you can handle heterogeneous content without maintaining multiple engine instances. + +## Choosing the right chunk size + +The `chunkSize` parameter has significant impact on retrieval quality. There's no universally correct value—the right choice depends on your content and how users query it. + +**Smaller chunks** (128-256 tokens) give you precision. Each chunk represents roughly one idea, so when it matches a query, it's likely directly relevant. The downside is loss of context. A chunk might contain the answer to a question but lack the surrounding explanation that makes it useful. Smaller chunks also mean more embeddings, which increases storage and API costs. + +**Medium chunks** (400-600 tokens) balance precision and context. This range works well for most applications. You capture enough surrounding text to preserve meaning while keeping chunks focused enough for accurate matching. + +**Larger chunks** (700-1000 tokens) preserve more context and keep related information together. They're cheaper to store and embed. But they're less precise—a large chunk might match because of one sentence, pulling in paragraphs of irrelevant text alongside it. + +**Very large chunks** (1000+ tokens) are usually too broad for effective semantic search. The embedding becomes a vague average of many topics, making it hard to match specific queries. + +For most applications, start with the default 512 tokens and adjust based on what you observe. If users find results that contain the right information but surrounded by noise, try smaller chunks. If results feel like fragments missing crucial context, try larger ones. + +## Token counting + +Unrag uses token counts rather than character or word counts because embedding models think in tokens. A token is roughly 3-4 characters on average, but the exact mapping depends on the text. "Hello world" is 2 tokens. A complex technical term might be 3-4 tokens. A line of code with symbols might tokenize unexpectedly. + +Unrag exports a `countTokens` utility that uses the same tokenizer as the chunker: + +```ts +import { countTokens } from "unrag"; + +const tokens = countTokens("Hello world"); // 2 +const docTokens = countTokens(myDocument); // exact count +``` + +This is useful for understanding your content's size, debugging chunk boundaries, or building custom chunking logic. + + +The [RAG Handbook](/docs/rag) covers chunking in depth—including structure-aware strategies, multi-representation indexing, and how chunk size affects the quality-latency-cost triangle. See [Module 3: Chunking and Representation](/docs/rag/03-chunking-and-representation) for the full picture. + diff --git a/apps/web/content/docs/(unrag)/chunking/markdown.mdx b/apps/web/content/docs/(unrag)/chunking/markdown.mdx new file mode 100644 index 0000000..bb9cfbc --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/markdown.mdx @@ -0,0 +1,216 @@ +--- +title: Markdown Chunking +description: Structure-aware chunking that respects markdown formatting. +--- + +Markdown has explicit structure. Headings divide content into sections. Fenced code blocks mark off executable examples. Horizontal rules separate topics. When you're chunking markdown documents, you want a chunker that understands and respects this structure rather than treating the document as plain text. + +The markdown chunker does exactly this. It identifies structural boundaries in your markdown and uses them as natural split points. Code blocks stay intact. Sections become chunks. The result is chunks that align with how the author organized the content. + +## Why markdown needs special handling + +Consider what happens when you chunk markdown with a generic text splitter. A code block might get cut in half: + +``` +Chunk 1: "Install the package:\n\n```bash\nnpm install" + +Chunk 2: "my-package\n```\n\nThen configure it..." +``` + +This broken code block is useless in retrieval results. The user searching for "how to install" gets back invalid syntax. Even worse, embedding models might struggle to produce meaningful vectors for incomplete code. + +The markdown chunker prevents this. It recognizes fenced code blocks (``` or ~~~) and keeps them whole. It understands that a `## Heading` starts a new section and is a natural place to split. It preserves the structure that makes markdown readable. + +## Installation + +```bash +bunx unrag add chunker:markdown +``` + +The markdown chunker is pure TypeScript with no external dependencies. It parses markdown structure directly without requiring a full markdown-to-AST library. + +## Configuration + +Enable markdown chunking in your `unrag.config.ts`: + +```ts +export default defineUnragConfig({ + chunking: { + method: "markdown", + options: { + chunkSize: 512, + chunkOverlap: 50, + }, + }, + // ... +}); +``` + +## How it works + +The markdown chunker processes documents in several passes: + +**First, it identifies structural boundaries.** The chunker scans for headings at any level (`#` through `######`), horizontal rules (`---`, `***`, `___`), and fenced code block delimiters. These become potential split points. + +**Second, it protects code blocks.** Fenced code blocks are marked as atomic units. No matter how long they are, the chunker won't split inside them. This means a 400-token code example stays in one chunk. + +**Third, it splits at headings.** Each heading starts a new chunk. Content flows from one heading until the next, keeping related information together. A section about "Installation" stays separate from a section about "Configuration." + +**Fourth, it applies token limits.** If a section exceeds `chunkSize`, the chunker splits it using sentence boundaries while still respecting code block integrity. A long section becomes multiple chunks, each starting from the same heading. + +**Finally, it merges small sections.** Sections smaller than `minChunkSize` get merged with neighbors to avoid tiny, low-value chunks. + +## Configuration options + +**chunkSize** sets the maximum tokens per chunk. With markdown content, this limit might be reached within a single section that contains lots of prose. Code-heavy documentation might have many sections that fit well under the limit. + +**chunkOverlap** adds repeated tokens at chunk boundaries. This matters less for markdown chunking than for prose chunking because sections are usually self-contained. You might use lower overlap (20-30 tokens) for markdown. + +**minChunkSize** prevents tiny chunks from short sections. A single-line section like `## Related Links` followed by just a URL might fall below this threshold and get merged with the previous or next section. + +## When to use markdown chunking + +Markdown chunking is the right choice whenever your content is written in markdown: + +- Documentation sites and READMEs +- Technical guides with code examples +- API documentation +- Knowledge base articles in markdown format +- Blog posts written in markdown +- Wiki pages + +The chunker handles markdown-specific features that would trip up generic splitters: + +- Fenced code blocks with language tags +- Indented code blocks +- Block quotes +- Lists (ordered and unordered) +- Tables (kept intact when possible) +- Front matter (YAML headers are stripped) + +## A practical example + +Consider this markdown documentation: + +````markdown +# Installation + +Install the package from npm: + +```bash +npm install my-package +``` + +Make sure you're using Node.js 18 or later. + +## Configuration + +Create a config file at your project root: + +```ts +// my-package.config.ts +export default { + apiKey: process.env.API_KEY, + timeout: 5000, +}; +``` + +The config supports these options: + +- `apiKey`: Your API key (required) +- `timeout`: Request timeout in milliseconds +- `retries`: Number of retry attempts + +## Usage + +Import and initialize the client: + +```ts +import { createClient } from "my-package"; +import config from "./my-package.config"; + +const client = createClient(config); +await client.connect(); +``` +```` + +With markdown chunking, this becomes three chunks: + +**Chunk 1:** +``` +# Installation + +Install the package from npm: + +```bash +npm install my-package +``` + +Make sure you're using Node.js 18 or later. +``` + +**Chunk 2:** +``` +## Configuration + +Create a config file at your project root: + +```ts +// my-package.config.ts +export default { + apiKey: process.env.API_KEY, + timeout: 5000, +}; +``` + +The config supports these options: + +- `apiKey`: Your API key (required) +- `timeout`: Request timeout in milliseconds +- `retries`: Number of retry attempts +``` + +**Chunk 3:** +``` +## Usage + +Import and initialize the client: + +```ts +import { createClient } from "my-package"; +import config from "./my-package.config"; + +const client = createClient(config); +await client.connect(); +``` +``` + +Each chunk is a complete section. Code blocks are intact. When a user searches for "how to configure," they get back the Configuration section with its complete code example and options list. + +## Handling large code blocks + +What happens when a single code block exceeds `chunkSize`? The markdown chunker has a fallback: it splits the code block at line boundaries. This isn't ideal—you might end up with a function definition in one chunk and its body in another—but it ensures you never exceed token limits. + +If you frequently hit this situation, consider: + +1. **Increasing `chunkSize`** to accommodate your typical code examples. Documentation code samples are usually small; if yours are large, adjust accordingly. + +2. **Using the code chunker** for source code files. Markdown chunking is for documentation that contains code blocks. If you're chunking actual source code files, the [Code Chunker](/docs/chunking/code) understands AST structure and splits at function boundaries. + +## Preserving context + +Each chunk starts with its section heading, which provides immediate context. When a retrieval result includes "## Configuration" at the top, the user (or the LLM using the context) immediately knows this chunk is about configuration. + +For even richer context, consider the [Hierarchical Chunker](/docs/chunking/hierarchical). It prepends the full heading path to each chunk, so a subsection chunk might start with "# API Reference > ## Authentication > ### OAuth Flow". This is more verbose but provides complete navigation context. + +## Mixed content documents + +Real documentation often mixes markdown with other content. A README might have YAML front matter, HTML blocks, or non-markdown sections. The markdown chunker handles these gracefully: + +- **YAML front matter** (content between `---` markers at the start) is stripped and not included in chunks. If you want to preserve front matter as metadata, extract it separately before ingestion. + +- **HTML blocks** are treated as opaque content. They're not split internally, similar to code blocks. + +- **Raw text sections** (content without markdown formatting) are chunked using the recursive fallback when necessary. + +The chunker is designed to never fail, even on malformed or unconventional markdown. It might not produce optimal chunks for pathological inputs, but it will produce something usable. diff --git a/apps/web/content/docs/(unrag)/chunking/meta.json b/apps/web/content/docs/(unrag)/chunking/meta.json new file mode 100644 index 0000000..99b73e6 --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/meta.json @@ -0,0 +1,13 @@ +{ + "title": "Chunking", + "pages": [ + "index", + "recursive", + "semantic", + "markdown", + "code", + "hierarchical", + "agentic", + "custom" + ] +} diff --git a/apps/web/content/docs/(unrag)/chunking/recursive.mdx b/apps/web/content/docs/(unrag)/chunking/recursive.mdx new file mode 100644 index 0000000..9d8918c --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/recursive.mdx @@ -0,0 +1,153 @@ +--- +title: Recursive Chunking +description: Token-based recursive text splitting—the default chunking method. +--- + +Recursive chunking is Unrag's default method for splitting documents. It's called "recursive" because the algorithm tries progressively finer-grained separators until chunks fit within token limits. This approach balances simplicity with respect for natural text boundaries—paragraphs stay together when possible, sentences don't get cut mid-thought. + +## How the algorithm works + +The recursive chunker maintains a hierarchy of separators, ordered from coarsest to finest: + +1. `\n\n` — Paragraph breaks (two newlines) +2. `\n` — Single line breaks +3. `. ` — Sentence endings (period + space) +4. `? ` — Question marks +5. `! ` — Exclamation marks +6. `; ` — Semicolons +7. `: ` — Colons +8. `, ` — Commas +9. ` ` — Word boundaries (spaces) +10. `` — Individual characters (last resort) + +When you pass a document to the chunker, it first tries to split on paragraph breaks. If the resulting pieces are small enough, it's done. If any piece exceeds the configured `chunkSize`, the algorithm recurses on that piece using the next separator in the hierarchy—line breaks. This continues down the list until all pieces fit within limits. + +The result is chunks that split at the most meaningful boundary possible. A 1,500-token document might split into three paragraph-sized chunks. A dense paragraph that exceeds the limit might split at sentence boundaries. Only in edge cases—like extremely long URLs or unbroken strings—does the algorithm resort to character-level splitting. + +## Why this works well + +Natural language has structure. Paragraphs group related sentences. Sentences group related clauses. The recursive approach respects this structure by always preferring larger semantic units. When you retrieve a chunk, it's more likely to be a coherent thought rather than a fragment that starts mid-sentence. + +Token-based splitting adds precision. Unlike character-based chunkers that might estimate token counts, Unrag's recursive chunker uses the actual `o200k_base` tokenizer from `js-tiktoken`. This is the same encoding used by GPT-5, GPT-4o, and the current generation of OpenAI models. When you set `chunkSize: 512`, you get exactly 512 tokens—not an approximation that might exceed your embedding model's limits. + +## Configuration + +The recursive chunker is built into Unrag's core—no installation required. It's the default when you don't specify a chunking method. To configure it explicitly: + +```ts +export default defineUnragConfig({ + chunking: { + method: "recursive", + options: { + chunkSize: 512, + chunkOverlap: 50, + minChunkSize: 24, + }, + }, + // ... +}); +``` + +You can also omit the `method` entirely since `"recursive"` is the default: + +```ts +export default defineUnragConfig({ + chunking: { + options: { + chunkSize: 400, + chunkOverlap: 40, + }, + }, + // ... +}); +``` + +## Configuration options + +**chunkSize** sets the maximum number of tokens per chunk. The default of 512 works well for general prose. Larger values (700-1000) preserve more context but reduce retrieval precision. Smaller values (200-300) increase precision but may fragment ideas across multiple chunks. + +**chunkOverlap** determines how many tokens repeat at chunk boundaries. When set to 50, the last 50 tokens of chunk N appear again at the start of chunk N+1. This overlap helps preserve context when ideas span chunk boundaries. Higher overlap means better context preservation but more redundant storage and embedding costs. + +**minChunkSize** prevents tiny chunks from being created. If a potential chunk has fewer tokens than this threshold, the algorithm merges it with an adjacent chunk. The default of 24 tokens filters out fragments like single sentences or stray lines while keeping meaningful paragraphs. + +**separators** lets you customize the separator hierarchy. If your content uses unusual delimiters or you want to prioritize certain boundaries, you can provide your own array: + +```ts +chunking: { + method: "recursive", + options: { + separators: ["\n\n", "\n", "。", "。 ", ". ", "? ", "! ", " ", ""], + }, +} +``` + +This example adds Japanese period characters (。) for documents that mix English and Japanese text. + +## When to use recursive chunking + +The recursive chunker is a good default for several reasons. It handles mixed content gracefully—documents with paragraphs, lists, code snippets, and tables all work reasonably well. It's fast because it doesn't require external services or LLM calls. And it's predictable—the same input always produces the same output. + +Use recursive chunking when: + +- You're working with general prose content like articles, help center pages, or documentation +- You want a reliable starting point before experimenting with specialized chunkers +- Your content has varied structure and you need something that handles everything adequately +- Latency or cost constraints make LLM-based chunkers impractical + +Consider switching to a specialized chunker when: + +- You're processing markdown with code blocks and want those blocks kept intact → [Markdown Chunking](/docs/chunking/markdown) +- You're chunking source code and want to split at function/class boundaries → [Code Chunking](/docs/chunking/code) +- Your content has subtle topic shifts that structural splitting misses → [Semantic Chunking](/docs/chunking/semantic) +- You have structured documents with clear section headers → [Hierarchical Chunking](/docs/chunking/hierarchical) + +## Understanding the output + +Given this input text: + +``` +Unrag is a RAG installer for TypeScript projects. It installs a small, +composable module into your codebase as vendored source files. + +The two core operations are ingest and retrieve. Ingestion takes content, +splits it into chunks, generates embeddings for each chunk, and stores +everything in Postgres with pgvector. + +Retrieval embeds your query and finds the most similar chunks. You get +back the chunks, their scores, and timing information. +``` + +With `chunkSize: 512`, this entire text fits in one chunk (it's around 100 tokens). But if you set `chunkSize: 60`, the chunker splits at paragraph boundaries: + +``` +Chunk 0: "Unrag is a RAG installer for TypeScript projects. It installs a small, +composable module into your codebase as vendored source files." + +Chunk 1: "The two core operations are ingest and retrieve. Ingestion takes content, +splits it into chunks, generates embeddings for each chunk, and stores +everything in Postgres with pgvector." + +Chunk 2: "Retrieval embeds your query and finds the most similar chunks. You get +back the chunks, their scores, and timing information." +``` + +Notice how the chunker respects paragraph boundaries (`\n\n`). It doesn't split the first paragraph mid-sentence just because it could. This semantic awareness is what makes recursive chunking effective. + +## The token chunker alternative + +Unrag also provides a simpler `token` method that splits strictly by token count without the recursive separator logic: + +```ts +chunking: { + method: "token", + options: { chunkSize: 512, chunkOverlap: 50 }, +} +``` + +This is faster than recursive chunking but may split mid-sentence or mid-word. It's useful when you need maximum throughput and can tolerate less coherent chunk boundaries. For most applications, stick with `recursive`. + +## Performance characteristics + +The recursive chunker processes content locally using `js-tiktoken`. There are no API calls, no network latency, and no rate limits to worry about. On modern hardware, chunking speed is typically measured in megabytes per second rather than being a bottleneck. + +The tokenizer loads lazily on first use. The initial chunk operation has a small startup cost (~50ms) as the tokenizer initializes. Subsequent operations are essentially instantaneous for typical document sizes. diff --git a/apps/web/content/docs/(unrag)/chunking/semantic.mdx b/apps/web/content/docs/(unrag)/chunking/semantic.mdx new file mode 100644 index 0000000..ead1b58 --- /dev/null +++ b/apps/web/content/docs/(unrag)/chunking/semantic.mdx @@ -0,0 +1,179 @@ +--- +title: Semantic Chunking +description: LLM-guided chunking that splits at natural semantic boundaries. +--- + +Rule-based chunkers like the recursive splitter look at text structure—paragraph breaks, sentence endings, punctuation. They work well because text structure often correlates with semantic structure. But the correlation isn't perfect. A topic can shift mid-paragraph. Two paragraphs might belong together as one coherent thought. Structural boundaries and meaning boundaries don't always align. + +Semantic chunking addresses this by using an LLM to identify where ideas actually change. Instead of splitting at every paragraph break, it analyzes the content and finds the natural joints—places where one topic ends and another begins, where an explanation completes, where the narrative shifts direction. The result is chunks that are more coherent and self-contained than what rule-based splitting can achieve. + +## How it works + +When you ingest a document with semantic chunking enabled, Unrag sends the content to an LLM with instructions to identify semantic boundaries. The model reads through the text, understanding context and meaning, and returns suggested split points. Unrag then divides the text at those points and applies token limits to ensure chunks stay within bounds. + +The LLM is prompted to prefer boundaries at: + +- Transitions between distinct topics or subjects +- Completed thoughts or arguments +- Points where the narrative or explanation shifts +- Natural section breaks that aren't marked with formatting + +If the LLM suggests boundaries that would create chunks exceeding your configured `chunkSize`, Unrag further splits those chunks using sentence-based rules. This ensures you never exceed token limits while preserving semantic coherence wherever possible. + +## Installation + +Semantic chunking requires an LLM, so it's packaged as a plugin rather than being built into core: + +```bash +bunx unrag add chunker:semantic +``` + +This installs the semantic chunker and ensures the AI SDK dependencies are present. The chunker uses your configured AI provider, so there's no additional setup beyond what you've already done for embedding or other LLM features. + +## Configuration + +Enable semantic chunking in your `unrag.config.ts`: + +```ts +export default defineUnragConfig({ + chunking: { + method: "semantic", + options: { + chunkSize: 512, + chunkOverlap: 50, + model: "gpt-4o-mini", + }, + }, + // ... +}); +``` + +The `model` option is optional. If you don't specify it, the chunker uses your provider's default model. Specifying a model lets you choose the cost-quality tradeoff explicitly. + +## Configuration options + +**chunkSize** still matters even with semantic chunking. The LLM identifies boundaries, but if those boundaries would create chunks larger than your limit, Unrag splits further. Think of `chunkSize` as an upper bound that semantic chunking respects. + +**chunkOverlap** works the same as with other chunkers. Overlapping tokens at boundaries help preserve context when ideas span chunks. + +**minChunkSize** prevents the creation of tiny fragments. If the LLM identifies a boundary that would create a very small chunk, it gets merged with a neighbor. + +**model** specifies which LLM to use for boundary detection. Faster, cheaper models like `gpt-4o-mini` work well for most content. For complex or nuanced documents, a more capable model may identify better boundaries. + +## When semantic chunking shines + +The value of semantic chunking becomes clear with content that has subtle topic shifts. Consider a long article discussing machine learning: + +``` +Machine learning models learn patterns from data. They can identify relationships +that humans might miss, making them powerful for prediction tasks. The ability to +generalize from training data to new situations is what makes these models useful. + +However, this power comes with limitations. Models require large amounts of training +data. They can encode biases present in that data. And they can fail silently when +faced with situations that differ from their training distribution. + +Careful validation is therefore essential. You need test sets that represent real-world +conditions. You need monitoring to catch drift over time. And you need humans in the +loop for high-stakes decisions. +``` + +A recursive chunker might split this at paragraph boundaries, which isn't terrible. But semantic chunking recognizes that the first two paragraphs are really one thought (benefits and limitations), while the third paragraph is a distinct topic (validation practices). It might produce: + +``` +Chunk 1: "Machine learning models learn patterns from data... And they can fail +silently when faced with situations that differ from their training distribution." + +Chunk 2: "Careful validation is therefore essential. You need test sets that +represent real-world conditions..." +``` + +The word "However" in the second paragraph signals a contrast, not a topic change. A semantic chunker understands this; a rule-based chunker doesn't. + +## Cost and latency considerations + +Semantic chunking calls an LLM for every document you ingest. This has real costs: + +For a 10,000-token document using `gpt-4o-mini`: +- Input: ~10,000 tokens at $0.15/1M tokens = ~$0.0015 +- Output: ~500 tokens (boundary markers) at $0.60/1M tokens = ~$0.0003 +- **Total: ~$0.002 per document** + +This seems small, but it adds up. Ingesting 10,000 documents costs roughly $20 in LLM fees on top of your embedding costs. For 100,000 documents, that's $200. + +Latency is also a factor. Each document requires a round-trip to an LLM API. With typical latencies of 1-3 seconds per call, bulk ingestion becomes significantly slower than with local chunking. + +Consider using semantic chunking for: + +- High-value content where retrieval quality directly impacts user experience +- Relatively small corpuses where cost and latency are manageable +- Content that will be queried frequently, amortizing the upfront chunking cost +- Narrative or long-form content without clear structural markers + +Use simpler chunkers for: + +- Large-scale ingestion of thousands or millions of documents +- Content with clear structure (markdown, code, structured data) +- Real-time or near-real-time ingestion requirements +- Budget-constrained environments + +## Fallback behavior + +Network requests fail. APIs have rate limits. LLMs occasionally return unexpected responses. Semantic chunking is designed to degrade gracefully rather than fail hard. + +If the LLM call fails for any reason—timeout, rate limit, malformed response—the semantic chunker automatically falls back to sentence-based splitting. Your document still gets chunked and ingested, just without the semantic awareness. The fallback uses the same `chunkSize` and overlap settings, so chunk sizes remain consistent. + +You can detect when fallback occurred by checking the warnings in the ingest result: + +```ts +const result = await engine.ingest({ sourceId, content }); + +for (const warning of result.warnings) { + if (warning.code === "semantic_fallback") { + console.warn(`Semantic chunking fell back for ${sourceId}:`, warning.message); + } +} +``` + +This lets you log fallback occurrences, retry failed documents later, or alert on high fallback rates that might indicate API issues. + +## Choosing between semantic and agentic chunking + +Unrag offers two LLM-powered chunking methods: semantic and agentic. Both use LLMs and have similar costs, but they optimize for different goals. + +**Semantic chunking** asks the LLM to find natural topic boundaries. It produces clean, coherent chunks that respect how content is organized. + +**Agentic chunking** asks the LLM to optimize chunks for retrieval. It considers what queries users might ask and structures chunks to match those queries. + +For most LLM-chunking use cases, semantic chunking is the right choice. It's more predictable and produces reliable results. Agentic chunking is a specialized option for when you've identified that retrieval quality is the limiting factor and you're willing to pay for maximum optimization. See [Agentic Chunking](/docs/chunking/agentic) for details. + +## Practical example + +Here's a complete example showing semantic chunking for a knowledge base: + +```ts +import { createUnragEngine } from "@unrag/config"; + +const engine = createUnragEngine(); + +// Articles are narrative content well-suited to semantic chunking +const article = await fetchArticle("understanding-kubernetes"); + +const result = await engine.ingest({ + sourceId: `kb:${article.slug}`, + content: article.body, + metadata: { + title: article.title, + author: article.author, + category: "infrastructure", + }, +}); + +if (result.warnings.length > 0) { + console.warn("Ingestion warnings:", result.warnings); +} + +console.log(`Created ${result.chunkCount} chunks for "${article.title}"`); +``` + +The semantic chunker analyzes the article's content, identifies where topics shift, and creates chunks that capture complete ideas. When users later search for Kubernetes concepts, they'll get back coherent explanations rather than fragments that start mid-thought. diff --git a/apps/web/content/docs/(unrag)/concepts/architecture.mdx b/apps/web/content/docs/(unrag)/concepts/architecture.mdx index 8b8cf11..5900910 100644 --- a/apps/web/content/docs/(unrag)/concepts/architecture.mdx +++ b/apps/web/content/docs/(unrag)/concepts/architecture.mdx @@ -13,7 +13,7 @@ Every Unrag instance is built from three pluggable pieces: **The store adapter** handles persistence. It knows how to write documents, chunks, and their embeddings to your database, and how to query for similar vectors. Unrag ships adapters for Drizzle, Prisma, and raw SQL—all targeting Postgres with pgvector. -**The chunker** splits documents into smaller pieces. The default implementation does simple word-based chunking with configurable size and overlap. If you need sentence-aware chunking, markdown-specific splitting, or token-based boundaries, you can provide your own chunker function. +**The chunker** splits documents into smaller pieces. The default implementation uses token-based recursive chunking with the `o200k_base` tokenizer (same as GPT-5, GPT-4o). It splits at natural boundaries (paragraphs, sentences, clauses) while respecting token limits. For specialized content, you can install plugin chunkers (markdown, code, semantic) or provide your own chunker function. These components are assembled in your `unrag.config.ts` file to create a `ContextEngine` instance. The engine coordinates the components but doesn't contain business logic itself—it just calls the right methods in the right order. @@ -28,7 +28,7 @@ When you call `engine.ingest({ sourceId, content, metadata, assets })`, here's w The content string is passed to the chunker function along with your configured chunk size and overlap. The chunker returns an array of chunk objects, each containing the text, its position index, and an approximate token count. -For example, with a chunk size of 200 words and 40-word overlap, a 500-word document becomes roughly 3-4 chunks. Each chunk shares some text with its neighbors, which helps preserve context across chunk boundaries. +For example, with the default chunk size of 512 tokens and 50-token overlap, a 1500-token document becomes roughly 3-4 chunks. Each chunk shares some text with its neighbors, which helps preserve context across chunk boundaries. diff --git a/apps/web/content/docs/(unrag)/concepts/chunking.mdx b/apps/web/content/docs/(unrag)/concepts/chunking.mdx index df8ec0b..e1a496c 100644 --- a/apps/web/content/docs/(unrag)/concepts/chunking.mdx +++ b/apps/web/content/docs/(unrag)/concepts/chunking.mdx @@ -3,150 +3,105 @@ title: Chunking description: How documents are split into chunks and why it matters for retrieval quality. --- -Chunking is how Unrag splits your documents into smaller pieces before embedding them. The quality of your retrieval depends significantly on how well your chunking strategy matches your content type and use case. +Before your documents can be searched semantically, they need to be embedded—turned into vectors that represent their meaning. But embedding models work best with focused pieces of text, not sprawling documents. A 10,000-word article embedded as a single vector becomes a vague point in semantic space, losing the nuance of individual sections and paragraphs. -## Why chunk at all? +Chunking is how Unrag splits documents into those focused pieces. Each chunk gets its own embedding, its own position in vector space, its own chance to match user queries. The quality of your retrieval depends significantly on how well your chunking strategy matches your content. -Embedding models have context limits. OpenAI's text-embedding-3-small, for example, accepts up to 8,191 tokens per call. But even within that limit, longer texts produce less useful embeddings. A 5,000-word document embedded as a single vector captures the overall topic but loses the nuance of individual sections. +## The default chunker -Chunking solves this by breaking documents into pieces small enough that each embedding captures specific, queryable meaning. When someone searches for "how to configure authentication," you want to return the paragraph about authentication configuration, not an entire document that mentions authentication once. +When you create a new Unrag project, you get token-based recursive chunking out of the box. This algorithm tries to split text at natural boundaries—paragraphs first, then sentences, then clauses—while respecting token limits. It uses the `o200k_base` tokenizer, the same encoding used by GPT-5, GPT-4o, and current OpenAI models. -The tradeoff is that chunking can split information across boundaries. If an important concept spans two chunks, retrieval might only return one of them. Overlap helps with this—by repeating some text at chunk boundaries, you increase the chance that related content ends up in the same chunk. +The default settings work well for most content: -## The default chunker +- **chunkSize**: 512 tokens +- **chunkOverlap**: 50 tokens +- **minChunkSize**: 24 tokens + +These values balance precision (chunks focused enough to match specific queries) with context (chunks large enough to be self-contained). The overlap ensures that ideas spanning chunk boundaries appear in both adjacent chunks, reducing the chance of missing relevant content. + +## Choosing a chunking method + +Different content types benefit from different approaches. Unrag provides several chunking methods: + +**Recursive chunking** (the default) works for general prose. It's fast, predictable, and handles mixed content gracefully. + +**Semantic chunking** uses an LLM to identify where topics actually shift, rather than relying on formatting cues. It costs more but produces more coherent chunks. -Unrag ships with a simple word-based chunker. It splits text on whitespace, groups words into chunks of the configured size, and creates overlap by starting each new chunk some words back from where the previous one ended. +**Markdown chunking** understands markdown structure—headings, code blocks, horizontal rules. It keeps code blocks intact and splits at section boundaries. -Default settings: -- **chunkSize**: 200 words -- **chunkOverlap**: 40 words +**Code chunking** parses source code with tree-sitter and splits at function and class boundaries, keeping complete definitions together. -These defaults work reasonably well for prose content like documentation, articles, and help center pages. They produce chunks of roughly 150-300 tokens, which embed efficiently and retrieve precisely. +**Hierarchical chunking** prepends section headers to every chunk, so each chunk carries context about where it fits in the document. -You can change the defaults in `unrag.config.ts`: +**Agentic chunking** uses an LLM to optimize chunks specifically for retrieval, considering what users might search for. + +**Custom chunking** lets you implement your own logic when built-in options don't fit. + +You can configure your preferred method in `unrag.config.ts`: ```ts -export const unrag = defineUnragConfig({ - // ... - defaults: { +export default defineUnragConfig({ chunking: { - chunkSize: 300, - chunkOverlap: 60, + method: "markdown", + options: { + chunkSize: 512, + chunkOverlap: 50, + }, }, - }, -} as const); + // ... +}); ``` -## Per-document overrides +## Per-ingest overrides -Different content types benefit from different chunking strategies. A long technical document might need larger chunks to keep code examples intact, while short FAQ entries might work better with smaller chunks. +The configured chunking method becomes your default, but you're not locked into it. You can override chunking behavior for individual `engine.ingest()` calls in two ways. -Override chunking for specific ingests: +**Override just the options** when you want to keep the same algorithm but adjust parameters: ```ts -// Large chunks for technical docs with code blocks await engine.ingest({ - sourceId: "docs:api-reference", - content: technicalDoc, - chunking: { chunkSize: 500, chunkOverlap: 100 }, -}); - -// Smaller chunks for FAQ-style content -await engine.ingest({ - sourceId: "faq:billing", - content: faqContent, - chunking: { chunkSize: 100, chunkOverlap: 20 }, + sourceId: "specs:detailed-design", + content: designDoc, + chunking: { chunkSize: 768, chunkOverlap: 75 }, }); ``` -## Chunk size tradeoffs - -Choosing the right chunk size involves balancing several factors: +This uses your configured chunker but with larger chunks and more overlap—useful for dense technical content where you want more context per chunk. -**Smaller chunks** (50-150 words) give you more precise retrieval. Each chunk is about one idea, so when it matches a query, it's likely directly relevant. The downside is that you lose context—the chunk might not have enough surrounding information to be useful on its own. You also generate more embeddings, increasing cost and storage. +**Override the chunker itself** when different content needs a different algorithm: -**Larger chunks** (300-500 words) preserve more context and keep related information together. They're cheaper to embed and store. But they're less precise—a large chunk might match a query because of one sentence, then return a lot of irrelevant text along with it. +```ts +import { codeChunker } from "@unrag/chunking/code"; -**Very large chunks** (500+ words) are usually too broad for effective semantic search. The embedding becomes a vague average of many topics rather than a specific representation. +await engine.ingest({ + sourceId: "src/utils/helpers.ts", + content: sourceCode, + chunker: codeChunker, // Use code chunking for this file +}); +``` -For most applications, start with 150-250 words and adjust based on your retrieval quality. If you're getting good matches but not enough context, increase chunk size. If you're getting too much irrelevant text in results, decrease it. +The per-ingest `chunker` parameter takes precedence over your configured method. This means you can handle heterogeneous content—documentation, code, articles—with a single engine instance, applying the appropriate chunking strategy to each. -## Custom chunkers +## Token counting -The default word-based chunker is simple but naive. It doesn't understand sentence boundaries, paragraphs, markdown structure, or code blocks. If your content has structure that matters, you might want a smarter chunker. +Unrag measures chunk sizes in tokens, not characters or words. This matters because embedding models process tokens, and their context limits are defined in tokens. A 512-token chunk is guaranteed to fit in any modern embedding model's context window. -To use a custom chunker, pass it when constructing the engine: +If you're building custom logic or want to understand your content better, Unrag exports a `countTokens` utility: ```ts -import { defineUnragConfig } from "@unrag/core"; -import type { Chunker, ChunkText, ChunkingOptions } from "@unrag/core"; - -// A sentence-aware chunker (simplified example) -const sentenceChunker: Chunker = ( - content: string, - options: ChunkingOptions -): ChunkText[] => { - const sentences = content.split(/(?<=[.!?])\s+/); - const chunks: ChunkText[] = []; - - let current = ""; - let wordCount = 0; - let index = 0; - - for (const sentence of sentences) { - const sentenceWords = sentence.split(/\s+/).length; - - if (wordCount + sentenceWords > options.chunkSize && current) { - chunks.push({ - index: index++, - content: current.trim(), - tokenCount: wordCount, - }); - current = ""; - wordCount = 0; - } - - current += sentence + " "; - wordCount += sentenceWords; - } - - if (current.trim()) { - chunks.push({ - index: index++, - content: current.trim(), - tokenCount: wordCount, - }); - } - - return chunks; -}; - -// Use it in your engine config -const unrag = defineUnragConfig({ - defaults: { - chunking: { chunkSize: 200, chunkOverlap: 0 }, - retrieval: { topK: 8 }, - }, - embedding: { provider: "custom", create: () => myEmbeddingProvider }, - engine: { - chunker: sentenceChunker, - }, -} as const); +import { countTokens } from "unrag"; -const engine = unrag.createEngine({ store: myStore }); +const tokens = countTokens("Hello world"); // 2 +const docSize = countTokens(myDocument); // exact count ``` -Common reasons to build a custom chunker: - -1. **Sentence boundaries**: Never split mid-sentence -2. **Paragraph awareness**: Keep paragraphs together when possible -3. **Markdown structure**: Respect heading boundaries, keep code blocks intact -4. **Token-based sizing**: Count actual tokens instead of words for more predictable embedding behavior -5. **Semantic sections**: Split on topic boundaries detected through heuristics or ML - -For most text content, the default chunker works fine. Invest in a custom chunker when you notice retrieval quality issues that trace back to poor chunk boundaries. - - -The [RAG Handbook](/docs/rag) covers chunking in depth—including structure-aware strategies, multi-representation indexing, and how chunk size affects the quality-latency-cost triangle. See [Module 3: Chunking and Representation](/docs/rag/03-chunking-and-representation) for the full picture. - +This uses the same tokenizer as the chunker, so counts are consistent. + + + Complete guide to all chunking methods with examples + + + Deep dive into chunking strategies and tradeoffs + + diff --git a/apps/web/content/docs/(unrag)/concepts/context-engine.mdx b/apps/web/content/docs/(unrag)/concepts/context-engine.mdx index e5ccd3b..cd4ed72 100644 --- a/apps/web/content/docs/(unrag)/concepts/context-engine.mdx +++ b/apps/web/content/docs/(unrag)/concepts/context-engine.mdx @@ -25,7 +25,7 @@ import { createDrizzleVectorStore } from "@unrag/store/drizzle"; const unrag = defineUnragConfig({ defaults: { - chunking: { chunkSize: 500, chunkOverlap: 100 }, + chunking: { chunkSize: 768, chunkOverlap: 75 }, // tokens, not words retrieval: { topK: 8 }, }, embedding: { @@ -49,7 +49,7 @@ const result = await engine.ingest({ sourceId: "docs:architecture", content: "Your document text here...", metadata: { category: "technical", author: "alice" }, - chunking: { chunkSize: 300 }, // Optional per-call override + chunking: { chunkSize: 256 }, // Optional per-call override (in tokens) }); ``` diff --git a/apps/web/content/docs/(unrag)/getting-started/quickstart.mdx b/apps/web/content/docs/(unrag)/getting-started/quickstart.mdx index 3c0d7f1..9f3a083 100644 --- a/apps/web/content/docs/(unrag)/getting-started/quickstart.mdx +++ b/apps/web/content/docs/(unrag)/getting-started/quickstart.mdx @@ -183,7 +183,7 @@ The chunks are sorted by score ascending, so the first chunk is the most similar Open `unrag.config.ts` to adjust settings. The most common customizations are: -1. **Chunking parameters**: Adjust `chunkSize` and `chunkOverlap` to change how documents are split. Smaller chunks give more precise retrieval but cost more to embed. +1. **Chunking parameters**: Adjust `chunkSize` and `chunkOverlap` (in tokens) to change how documents are split. Smaller chunks give more precise retrieval but cost more to embed. Default: 512 tokens with 50 token overlap. 2. **Default topK**: Change how many chunks are returned by default when you don't specify `topK` in your retrieve call. diff --git a/apps/web/content/docs/(unrag)/guides/doctor.mdx b/apps/web/content/docs/(unrag)/guides/doctor.mdx index c6b83ee..2e6249f 100644 --- a/apps/web/content/docs/(unrag)/guides/doctor.mdx +++ b/apps/web/content/docs/(unrag)/guides/doctor.mdx @@ -7,9 +7,9 @@ After setting up Unrag, you might wonder whether everything is wired correctly. ## Why doctor exists -Unrag is vendored source code, which means you own it and can modify it. This flexibility comes with a trade-off: there's no central registry keeping track of what you've installed or whether it's configured correctly. You might add an extractor but forget to register it in your config. You might set up the database schema but miss an index that becomes important as your data grows. +Unrag is vendored source code. When you run `init`, you're copying files into your project that you own and can modify. This flexibility comes with a trade-off: there's no central registry keeping track of what you've installed or whether it's configured correctly. You might add an extractor but forget to register it in your config. You might set up the database schema but miss an index that becomes important as your data grows. You might install a plugin chunker but never wire it into your configuration. -Doctor scans your project and reports what it finds. It reads your `unrag.json`, examines your `unrag.config.ts`, checks your environment variables, and optionally connects to your database to verify the schema. The goal is to surface issues before they become production problems. +Doctor scans your project and reports what it finds. It reads your `unrag.json` manifest, examines your `unrag.config.ts`, checks your environment variables, and optionally connects to your database to verify the schema. The goal is to surface issues before they become production problems. ## Running the basic checks @@ -19,21 +19,23 @@ The simplest invocation runs static checks—things that don't require a databas bunx unrag doctor ``` -This examines your project for: +This examines your project across several dimensions: -**Installation integrity.** Does `unrag.json` exist and parse correctly? Is there a `unrag.config.ts` file? Does your install directory contain the expected folders (`core/`, `store/`, `embedding/`)? +**Installation integrity.** Does `unrag.json` exist and parse correctly? Is there a `unrag.config.ts` file at your project root? Does your install directory contain the expected folders (`core/`, `store/`, `embedding/`)? These are the foundational files that every Unrag project needs. -**Environment variables.** Based on your embedding provider and store adapter, doctor checks whether the required environment variables are set. If you're using the default AI Gateway provider, it looks for `AI_GATEWAY_API_KEY`. If you're using the Drizzle adapter with Postgres, it looks for `DATABASE_URL`. +**Environment variables.** Based on your embedding provider and store adapter, doctor checks whether the required environment variables are set. If you're using the default AI Gateway provider, it looks for `AI_GATEWAY_API_KEY`. If you're using the Drizzle adapter with Postgres, it looks for `DATABASE_URL`. Missing variables are reported with clear messages about what each one is for. -**Module presence.** If you've installed extractors or connectors, doctor verifies that the source files exist in the expected locations. A module directory without its main files suggests a partial installation. +**Module presence.** If you've installed extractors, connectors, or chunkers, doctor verifies that the source files exist in the expected locations. A module listed in `unrag.json` but missing its directory suggests a partial installation. Doctor checks that each installed module has its main files (`index.ts`, configuration, etc.) in place. -**Config coherence.** Doctor performs static analysis on your `unrag.config.ts` to check whether installed extractors are actually registered and enabled. If you've installed `pdf-llm` but didn't add it to your extractors array, doctor warns you. +**Config coherence.** Doctor performs static analysis on your `unrag.config.ts` to check whether things are wired correctly. If you've installed the `pdf-llm` extractor but didn't register it in your extractors array, doctor warns you. If you've configured `chunking.method: "semantic"` but haven't installed the semantic chunker module, doctor flags the mismatch. For custom chunkers (`method: "custom"`), doctor verifies that you've actually provided a `chunker` function in your config. -The output tells you what passed, what needs attention, and how to fix any issues. Passing checks show a checkmark; warnings show a caution symbol with suggested fixes. +**API feature support.** Doctor can verify that your vendored engine sources support newer API capabilities. For example, if you upgrade to a version of Unrag that supports per-ingest chunker overrides, doctor checks whether your vendored `core/types.ts` includes the new `chunker` field in `IngestInput`. This helps you catch API mismatches after partial upgrades. + +The output tells you what passed, what needs attention, and how to fix issues. Passing checks show a checkmark; warnings show a caution symbol with suggested fixes; failures show what went wrong and what to do about it. ## Checking the database -Static checks can only go so far. To verify that your database is correctly set up, add the `--db` flag: +Static checks only go so far. To verify that your database is correctly set up, add the `--db` flag: ```bash bunx unrag doctor --db @@ -41,21 +43,19 @@ bunx unrag doctor --db This connects to your Postgres database and runs additional checks: -**Connectivity.** Can doctor establish a connection? It reports the PostgreSQL version, database name, and connected user. +**Connectivity.** Can doctor establish a connection? This is the most basic test—if it fails, nothing else database-related will work. Doctor reports the PostgreSQL version, database name, and connected user, helping you verify you're connected to the right place. -**pgvector extension.** Is the vector extension installed and working? Doctor tests that the `<=>` operator works and checks for HNSW index support. +**pgvector extension.** Is the vector extension installed and working? Doctor tests that the `<=>` operator (cosine distance) works correctly and checks for HNSW index support. If pgvector isn't installed, you'll need to run `CREATE EXTENSION vector` before using Unrag. -**Schema validation.** Do the expected tables exist (`documents`, `chunks`, `embeddings`)? Do they have the required columns? Are foreign key constraints configured with `ON DELETE CASCADE`? +**Schema validation.** Do the expected tables exist (`documents`, `chunks`, `embeddings`)? Do they have the required columns with correct types? Are foreign key constraints configured with `ON DELETE CASCADE`? Doctor compares your actual schema against what Unrag expects and reports any discrepancies. -**Index recommendations.** Doctor checks for btree indexes on `source_id` columns, which speed up filtering and deletes. It also checks for vector indexes on the embeddings table. For small datasets, sequential scan is fine—doctor only warns about missing vector indexes when you have more than 50,000 embeddings. +**Index recommendations.** Doctor checks for btree indexes on `source_id` columns, which speed up filtering and cascade deletes. It also checks for vector indexes on the embeddings table. For small datasets, sequential scan is fine—doctor only warns about missing vector indexes when you have more than 50,000 embeddings. At that scale, an HNSW or IVFFlat index becomes important for query performance. -**Dimension consistency.** If you've switched embedding models at some point, you might have embeddings with different dimensions in the same database. Doctor detects this and warns you, because mixed dimensions can cause retrieval issues. +**Dimension consistency.** If you've switched embedding models at some point, you might have embeddings with different dimensions in the same database. Doctor detects this and warns you, because pgvector can't compare vectors of different dimensions. Mixed dimensions usually mean you need to re-embed some content. ## Configuring doctor for your project -Every project is a little different. Maybe your database URL lives in a custom environment variable. Maybe you use a non-standard schema name. Maybe you want strict mode in CI but not locally. - -The `doctor setup` command walks you through these options and generates a config file: +Every project is different. Maybe your database URL lives in a custom environment variable. Maybe you use a non-standard schema name. Maybe you want strict mode in CI but not locally. The `doctor setup` command walks you through these options: ```bash bunx unrag doctor setup @@ -73,15 +73,15 @@ This interactive wizard asks about your setup and creates `.unrag/doctor.json` w } ``` -Now your team can run `npm run unrag:doctor` and get consistent results. The CI script adds `--strict` (treat warnings as failures) and `--json` (machine-readable output) for use in automated pipelines. +Now your team can run `npm run unrag:doctor` and get consistent results based on your project's configuration. The CI script adds `--strict` (treat warnings as failures) and `--json` (machine-readable output) for use in automated pipelines. -If you prefer non-interactive setup, pass `--yes` to use detected defaults: +If you prefer to skip the interactive prompts, pass `--yes` to accept detected defaults: ```bash bunx unrag doctor setup --yes ``` -## The config file +## Understanding the config file The `.unrag/doctor.json` file stores your project-specific settings. It doesn't contain secrets—those stay in environment variables. Here's what the file looks like: @@ -108,9 +108,11 @@ The `.unrag/doctor.json` file stores your project-specific settings. It doesn't } ``` -The `env.loadFiles` array controls which dotenv files doctor loads before running checks. The `env.databaseUrlEnv` field tells doctor which environment variable contains your database URL—useful if you've renamed it from the default `DATABASE_URL`. +The `env.loadFiles` array controls which dotenv files doctor loads before running checks. This matters because doctor checks environment variables—if they're in `.env.local` but you didn't configure that file to be loaded, checks will fail incorrectly. -The `db` section lets you specify a custom schema name or table names if you've customized the Drizzle schema. The `defaults` section sets default values for flags like `--strict`. +The `env.databaseUrlEnv` field tells doctor which environment variable contains your database URL. This is useful if you've renamed it from the default `DATABASE_URL` to something like `POSTGRES_URL` or `UNRAG_DATABASE_URL`. + +The `db` section lets you specify custom schema or table names if you've modified the default Drizzle schema. If you're using `app_data` schema instead of `public`, configure it here. When you run `unrag doctor --config .unrag/doctor.json`, these settings are applied automatically. CLI flags still override config values, so you can always do `npm run unrag:doctor -- --strict` to add strict mode for a single run. @@ -132,11 +134,13 @@ This runs with `--json` for structured output and `--strict` to fail on warnings AI_GATEWAY_API_KEY: ${{ secrets.AI_GATEWAY_API_KEY }} ``` -If doctor finds issues, the step fails and the JSON output tells you exactly what went wrong. +If doctor finds issues, the step fails and the JSON output tells you exactly what went wrong. This catches configuration drift early—before broken configs reach production. ## Common issues and fixes -**Missing source_id indexes.** Doctor recommends btree indexes on `chunks.source_id` and `documents.source_id`. These speed up queries that filter by source and are essential for efficient prefix deletes. If you're using Drizzle, add indexes to your schema: +Here are the issues doctor most commonly finds, with guidance on fixing them: + +**Missing source_id indexes.** Doctor recommends btree indexes on `chunks.source_id` and `documents.source_id`. These speed up queries that filter by source and are essential for efficient prefix deletes. Without them, a delete operation scans the entire table. If you're using Drizzle, add indexes to your schema: ```ts export const documents = pgTable( @@ -166,9 +170,13 @@ engine: { } ``` +**Chunker method not installed.** You've set `chunking.method: "semantic"` in your config, but the semantic chunker module isn't installed. Either install it with `bunx unrag add chunker:semantic` or change your config to use a built-in method like `"recursive"`. + +**Custom chunker without chunker function.** You've set `chunking.method: "custom"` but haven't provided a `chunker` function. Custom chunking requires you to pass your own chunker implementation in the config. See [Custom Chunking](/docs/chunking/custom) for examples. + **DATABASE_URL not set.** Doctor looks in your environment and the dotenv files it loads. If you're storing the URL in a custom variable, either use `--database-url-env YOUR_VAR_NAME` or configure it in `.unrag/doctor.json` under `env.databaseUrlEnv`. -**Mixed embedding dimensions.** You changed embedding models at some point, and now your database contains vectors of different sizes. This causes retrieval errors because pgvector can't compare vectors of different dimensions. You'll need to re-ingest your content with the current model, or use the `--scope` flag to limit checks to a specific source prefix: +**Mixed embedding dimensions.** You changed embedding models at some point, and now your database contains vectors of different sizes. This causes retrieval errors because pgvector can't compare vectors of different dimensions. You'll need to re-ingest your content with the current model. You can use the `--scope` flag to limit checks to a specific source prefix: ```bash unrag doctor --db --scope "docs:" @@ -176,13 +184,13 @@ unrag doctor --db --scope "docs:" This checks only embeddings for chunks whose `source_id` starts with "docs:". -## What doctor doesn't do +**Per-ingest chunker override not supported.** After upgrading Unrag, you might have new API capabilities in the registry that your vendored code doesn't include yet. Doctor detects this by checking whether your vendored `IngestInput` type includes the `chunker` field. If it doesn't, you can re-vendor the core types by running `bunx unrag add core` or manually adding the field to your local `core/types.ts`. -Doctor is a diagnostic tool, not a repair tool. It tells you what's wrong but doesn't automatically fix anything. This is intentional—database changes and file modifications should be deliberate actions you control. +## What doctor doesn't do -Doctor also doesn't test your application's runtime behavior. It can verify that your config file exists and parses, but it doesn't actually call `engine.ingest()` or `engine.retrieve()`. If you want to verify the full pipeline works, write an integration test that ingests sample content and retrieves it. +Doctor is a diagnostic tool, not a repair tool. It tells you what's wrong but doesn't automatically fix anything. Database changes, file modifications, and configuration updates should be deliberate actions you control. Doctor gives you the information; you decide what to do with it. -## Next steps +Doctor also doesn't test runtime behavior. It can verify that your config file exists and parses, that modules are present, and that your schema looks right. But it doesn't actually call `engine.ingest()` or `engine.retrieve()`. If you want to verify the full pipeline works end-to-end, write an integration test that ingests sample content and retrieves it. diff --git a/apps/web/content/docs/(unrag)/meta.json b/apps/web/content/docs/(unrag)/meta.json index d47cc5f..83ea1df 100644 --- a/apps/web/content/docs/(unrag)/meta.json +++ b/apps/web/content/docs/(unrag)/meta.json @@ -9,6 +9,7 @@ "getting-started", "upgrade", "concepts", + "chunking", "adapters", "providers", "embedding", diff --git a/apps/web/content/docs/(unrag)/reference/core-types.mdx b/apps/web/content/docs/(unrag)/reference/core-types.mdx index 577ff82..cb65276 100644 --- a/apps/web/content/docs/(unrag)/reference/core-types.mdx +++ b/apps/web/content/docs/(unrag)/reference/core-types.mdx @@ -3,60 +3,80 @@ title: Core Types Reference description: TypeScript types for the Unrag engine, inputs, outputs, and interfaces. --- -Unrag's type system is intentionally small. Understanding these types helps you work with the engine effectively and build custom components. +Unrag's type system is intentionally small. A handful of types cover the core operations—ingesting content, retrieving chunks, managing documents. Understanding these types helps you work with the engine effectively and build custom components when you need them. ## IngestInput -The input to `engine.ingest()`: +When you call `engine.ingest()`, you pass an object matching this type: ```ts type IngestInput = { sourceId: string; content: string; metadata?: Metadata; - chunking?: { chunkSize?: number; chunkOverlap?: number }; + chunker?: Chunker; + chunking?: { chunkSize?: number; chunkOverlap?: number; minChunkSize?: number }; assets?: AssetInput[]; assetProcessing?: DeepPartial; }; ``` +The `sourceId` is the logical identifier for your document. This is how you'll reference it later—for updates, deletes, or scoped retrieval. Use consistent, meaningful identifiers like `docs:getting-started` or `ticket:12345`. If you ingest with the same `sourceId` again, you're updating that document; the old chunks are replaced with new ones. + +The `content` string is the text you want to chunk and embed. This is the searchable content. + +The optional `metadata` object stores structured data alongside the document. It appears in retrieval results and can help with filtering or display. Keep values simple and serializable—the adapter stores metadata as JSONB. + +The `chunker` parameter lets you override the chunking algorithm for this specific ingest. Pass a chunker function to use different splitting logic without changing your engine's default configuration. This is useful when you're ingesting heterogeneous content—documentation, code, and prose—with a single engine instance. + +The `chunking` parameter overrides chunking options (chunk size, overlap, minimum size) for this ingest while keeping your configured chunker. Use this when the same algorithm should behave differently for specific content. + +The `assets` array contains rich media inputs like images and PDFs. Connectors like Notion and Google Drive populate this automatically. Each asset can be processed into additional chunks. + +The `assetProcessing` parameter overrides asset handling behavior for this ingest, such as enabling or disabling PDF extraction. + ', + "Optional per-call override for asset processing behavior (enable/disable extraction, adjust limits, etc.).", + type: "DeepPartial", }, }} /> ## AssetInput -Assets are non-text inputs attached to a document (e.g., a PDF embed in Notion). They can be turned into text chunks (via extraction) or embedded directly (for images when using a multimodal embedding provider). +Assets are non-text inputs attached to a document—a PDF embedded in a Notion page, an image in a knowledge base article. The engine can process these into text chunks (via extraction) or embed them directly (for images when using a multimodal embedding provider). ```ts type AssetInput = { @@ -66,14 +86,16 @@ type AssetInput = { | { kind: "url"; url: string; headers?: Record; mediaType?: string; filename?: string } | { kind: "bytes"; bytes: Uint8Array; mediaType: string; filename?: string }; uri?: string; - text?: string; // caption / alt text + text?: string; metadata?: Metadata; }; ``` +The `assetId` uniquely identifies the asset within the document. The `kind` indicates what type of media it is. The `data` field provides either a URL to fetch or raw bytes. The optional `text` field can contain a caption or alt text, which is used for embedding when direct processing isn't available. + ## IngestResult -The output from `engine.ingest()`: +After ingestion completes, you get back information about what was stored: ```ts type IngestResult = { @@ -85,122 +107,73 @@ type IngestResult = { }; ``` +The `documentId` is the UUID assigned to this document in the database. The `chunkCount` tells you how many chunks were created. The `durations` object breaks down where time was spent—embedding typically dominates because of API latency. + +The `warnings` array contains structured information about anything that didn't go perfectly. If an asset was skipped because extraction wasn't enabled, or if a PDF produced no text, you'll find that information here. Treat warnings as observability signals: + +```ts +const result = await engine.ingest(input); +if (result.warnings.length > 0) { + console.warn("Ingest warnings:", result.warnings); +} +``` + ## IngestWarning -Warnings emitted by `engine.ingest()` when rich media assets are skipped or best-effort processing fails (while continuing). +When assets are skipped or processing partially fails, the engine emits structured warnings rather than throwing errors. This keeps ingestion flowing while giving you visibility into what was missed. ```ts type IngestWarning = - | { - code: "asset_skipped_unsupported_kind"; - message: string; - assetId: string; - assetKind: AssetKind; - assetUri?: string; - assetMediaType?: string; - } - | { - code: "asset_skipped_extraction_disabled"; - message: string; - assetId: string; - assetKind: AssetKind; - assetUri?: string; - assetMediaType?: string; - } - | { - code: "asset_skipped_pdf_llm_extraction_disabled"; - message: string; - assetId: string; - assetKind: "pdf"; - assetUri?: string; - assetMediaType?: string; - } - | { - code: "asset_skipped_image_no_multimodal_and_no_caption"; - message: string; - assetId: string; - assetKind: "image"; - assetUri?: string; - assetMediaType?: string; - } - | { - code: "asset_skipped_pdf_empty_extraction"; - message: string; - assetId: string; - assetKind: "pdf"; - assetUri?: string; - assetMediaType?: string; - } - | { - code: "asset_skipped_extraction_empty"; - message: string; - assetId: string; - assetKind: AssetKind; - assetUri?: string; - assetMediaType?: string; - } - | { - code: "asset_processing_error"; - message: string; - assetId: string; - assetKind: AssetKind; - stage: "fetch" | "extract" | "embed" | "unknown"; - assetUri?: string; - assetMediaType?: string; - }; + | { code: "asset_skipped_unsupported_kind"; message: string; assetId: string; assetKind: AssetKind; ... } + | { code: "asset_skipped_extraction_disabled"; message: string; assetId: string; assetKind: AssetKind; ... } + | { code: "asset_skipped_pdf_llm_extraction_disabled"; message: string; assetId: string; assetKind: "pdf"; ... } + | { code: "asset_skipped_image_no_multimodal_and_no_caption"; message: string; assetId: string; ... } + | { code: "asset_skipped_pdf_empty_extraction"; message: string; assetId: string; assetKind: "pdf"; ... } + | { code: "asset_skipped_extraction_empty"; message: string; assetId: string; assetKind: AssetKind; ... } + | { code: "asset_processing_error"; message: string; assetId: string; stage: "fetch" | "extract" | "embed" | "unknown"; ... }; ``` -The `stage` field in `asset_processing_error` indicates where the failure occurred: +Each warning includes the `assetId` so you can identify which asset had the issue, plus a human-readable `message`. The `code` field lets you programmatically categorize and handle warnings. -| Stage | Description | -|-------|-------------| -| `fetch` | Failed to fetch URL-based asset data (applies to extractors and image embedding) | -| `extract` | Extractor threw an error while processing asset bytes | -| `embed` | Embedding provider threw an error | -| `unknown` | Fallback for unexpected error locations | +For processing errors, the `stage` field indicates where the failure occurred: -**Recommended practice**: treat warnings as observability signals. - -```ts -const result = await engine.ingest(input); -if (result.warnings.length > 0) { - // Send to logs/metrics/alerts in production - console.warn("unrag ingest warnings", result.warnings); -} -``` - -For configuration knobs that control when assets are skipped vs failed, see [Asset Processing Reference](/docs/reference/asset-processing). +| Stage | What happened | +|-------|---------------| +| `fetch` | Failed to download URL-based asset data | +| `extract` | Extractor threw an error while processing | +| `embed` | Embedding provider failed | +| `unknown` | Unexpected error location | ## RetrieveInput -The input to `engine.retrieve()`: +When you call `engine.retrieve()`, you pass a query and optional parameters: ```ts type RetrieveInput = { @@ -210,27 +183,33 @@ type RetrieveInput = { }; ``` +The `query` is the search string. It gets embedded using the same model that embedded your chunks, then compared against stored embeddings to find matches. + +The `topK` parameter controls how many results you get back. The default of 8 is usually a good starting point—enough to find relevant content without overwhelming downstream processing. + +The `scope` parameter filters results. When you provide `{ sourceId: "docs:" }`, only chunks whose source ID starts with "docs:" are considered. This is how you implement scoped search, tenant isolation, or collection filtering. + ## RetrieveResult -The output from `engine.retrieve()`: +Retrieval returns the matching chunks with metadata: ```ts type RetrieveResult = { @@ -240,19 +219,21 @@ type RetrieveResult = { }; ``` +Each chunk includes a `score` representing similarity to the query. With cosine distance (the default), lower scores mean higher similarity. The chunks are sorted by score ascending, so the most relevant results come first. + ', + description: "Matching chunks with scores, ordered by score ascending (lower = more similar).", + type: "Array", }, embeddingModel: { - description: 'Which model embedded the query.', - type: 'string', + description: "Which model embedded the query.", + type: "string", }, durations: { - description: 'Timing breakdown: totalMs, embeddingMs, retrievalMs.', - type: 'object', + description: "Timing breakdown: totalMs, embeddingMs, retrievalMs.", + type: "object", }, }} /> @@ -263,82 +244,70 @@ The chunk type represents a piece of a document: ```ts type Chunk = { - id: string; // UUID of the chunk - documentId: string; // UUID of the parent document - sourceId: string; // Logical identifier from ingestion - index: number; // Position in the original document (0, 1, 2, ...) - content: string; // The chunk's text (may be empty if storage.storeChunkContent is false) - tokenCount: number; // Approximate token count - metadata: Metadata; // JSON metadata from ingestion - embedding?: number[]; // Vector (present during upsert, not in query results) - documentContent?: string; // Full document text (during upsert only; may be empty if storage.storeDocumentContent is false) + id: string; + documentId: string; + sourceId: string; + index: number; + content: string; + tokenCount: number; + metadata: Metadata; + embedding?: number[]; + documentContent?: string; }; ``` -During retrieval, chunks include a `score` field representing similarity to the query. +During retrieval, chunks include the `score` field. The `embedding` field is present during upsert operations but not returned in query results. The `documentContent` field contains the full document text during upsert and may be empty if you've disabled document content storage. ## Metadata -Metadata is a flexible JSON object: +Metadata is a flexible JSON structure: ```ts type MetadataValue = string | number | boolean | null; - -type Metadata = Record< - string, - MetadataValue | MetadataValue[] | undefined ->; +type Metadata = Record; ``` -Keep values simple and serializable. The adapter stores metadata as JSONB, so complex nested objects work but may be harder to query. +Keep metadata simple and serializable. The adapter stores it as JSONB, so you can use it in queries, but complex nested structures are harder to work with. ## EmbeddingProvider -The interface for embedding text into vectors: +If you need to implement a custom embedding provider, it follows this interface: ```ts type EmbeddingInput = { - text: string; // The text to embed - metadata: Metadata; // Context (from chunk or query) - position: number; // Chunk index (or 0 for queries) - sourceId: string; // Document sourceId (or "query") - documentId: string; // Document UUID (or "query") + text: string; + metadata: Metadata; + position: number; + sourceId: string; + documentId: string; }; type EmbeddingProvider = { - name: string; // Identifier for debugging - dimensions?: number; // Expected output size (optional) + name: string; + dimensions?: number; embed: (input: EmbeddingInput) => Promise; }; ``` -The `embed` function receives context about what's being embedded, though most implementations only use `text`. Return a numeric array representing the embedding vector. +The `embed` function receives context about what's being embedded, though most implementations only use the `text` field. Return a numeric array representing the embedding vector. ## VectorStore -The interface for database operations: +The store adapter interface handles database operations: ```ts type VectorStore = { upsert: (chunks: Chunk[]) => Promise; - query: (params: { - embedding: number[]; - topK: number; - scope?: { sourceId?: string }; - }) => Promise>; + query: (params: { embedding: number[]; topK: number; scope?: { sourceId?: string } }) => Promise>; delete: (input: DeleteInput) => Promise; }; ``` -The `upsert` method replaces stored content for the logical document identified by `chunks[0].sourceId` (exact match). - -The `query` method finds the most similar chunks and returns them with similarity scores. - -The `delete` method removes stored content by logical identity (either an exact `sourceId` or a namespace `sourceIdPrefix`). +The `upsert` method replaces stored content for the logical document. The `query` method finds similar chunks. The `delete` method removes documents by source ID or prefix. ## DeleteInput -The input to `engine.delete()` and `store.delete()`: +Deletion supports exact match or prefix match: ```ts type DeleteInput = @@ -346,97 +315,34 @@ type DeleteInput = | { sourceIdPrefix: string }; ``` -## AssetExtractor +Use exact deletion for single documents. Use prefix deletion for namespaces (e.g., deleting all documents for a tenant). + +## Chunker and ChunkingOptions -The interface for extractor modules that process rich media assets into text or embeddings. Extractors are installed via the CLI and registered in your engine configuration. +Custom chunkers implement this interface: ```ts -type AssetExtractor = { - name: string; - supports: (args: { asset: AssetInput; ctx: AssetExtractorContext }) => boolean; - extract: (args: { asset: AssetInput; ctx: AssetExtractorContext }) => Promise<{ - texts: Array<{ - label: string; - content: string; - confidence?: number; - pageRange?: [number, number]; - timeRangeSec?: [number, number]; - }>; - skipped?: { code: string; message: string }; - metadata?: Metadata; - diagnostics?: { model?: string; tokens?: number; seconds?: number }; - }>; +type ChunkingOptions = { + chunkSize: number; + chunkOverlap: number; + minChunkSize?: number; + separators?: string[]; }; -``` - boolean', - }, - extract: { - description: 'Performs extraction and returns text segments with optional metadata and diagnostics.', - type: '(args) => Promise', - }, - }} -/> - -### Extract result - -The `extract` function returns: - -| Field | Description | -|-------|-------------| -| `texts` | Array of extracted text segments, each with a `label` (e.g., "page-1", "transcription") and `content` | -| `texts[].confidence` | Optional confidence score (0-1) for the extraction | -| `texts[].pageRange` | Optional page range `[start, end]` for PDFs | -| `texts[].timeRangeSec` | Optional time range in seconds for audio/video | -| `skipped` | Optional structured skip reason (e.g., disabled by config, too-large) | -| `metadata` | Optional metadata merged into chunks created from this asset | -| `diagnostics` | Optional diagnostic info (model used, token count, processing time) | - -### Example: Custom extractor +type ChunkText = { + index: number; + content: string; + tokenCount: number; +}; -```ts -import type { AssetExtractor } from "@unrag/core"; - -export function createAudioTranscriptExtractor(): AssetExtractor { - return { - name: "audio:whisper", - - supports: ({ asset, ctx }) => { - return asset.kind === "audio" && ctx.assetProcessing.audio.transcription.enabled; - }, - - extract: async ({ asset, ctx }) => { - const audioBytes = asset.data.kind === "bytes" - ? asset.data.bytes - : await fetchAssetBytes(asset.data.url); - - const transcription = await whisperTranscribe(audioBytes); - - return { - texts: [ - { label: "transcription", content: transcription.text }, - ], - diagnostics: { - model: "whisper-large-v3", - seconds: transcription.durationSec, - }, - }; - }, - }; -} +type Chunker = (content: string, options: ChunkingOptions) => ChunkText[] | Promise; ``` +Your chunker receives the document content and configuration options. Return an array of chunks with sequential indices, the chunk text, and accurate token counts. + ## ContextEngineConfig -The configuration for creating an engine: +When creating an engine, you provide this configuration: ```ts type ContextEngineConfig = { @@ -449,56 +355,42 @@ type ContextEngineConfig = { chunker?: Chunker; idGenerator?: () => string; }; - -type ChunkingOptions = { - chunkSize: number; - chunkOverlap: number; -}; - -type Chunker = (content: string, options: ChunkingOptions) => ChunkText[]; - -type ChunkText = { - index: number; - content: string; - tokenCount: number; -}; ``` ', + description: "Configuration for how assets are processed (fetch limits, extraction settings).", + type: "DeepPartial", }, storage: { - description: 'Controls what content is persisted to the database.', - type: 'object', + description: "Controls what content is persisted to the database.", + type: "object", }, defaults: { - description: 'Default chunking settings (chunkSize, chunkOverlap).', - type: 'Partial', + description: "Default chunking settings (chunkSize, chunkOverlap, minChunkSize).", + type: "Partial", }, chunker: { - description: 'Optional custom chunking function. Defaults to recursive text splitter.', - type: 'Chunker', + description: "Optional custom chunking function. Defaults to token-based recursive splitting.", + type: "Chunker", }, idGenerator: { - description: 'Optional custom UUID generator.', - type: '() => string', + description: "Optional custom UUID generator. Defaults to crypto.randomUUID().", + type: "() => string", }, }} /> - diff --git a/apps/web/content/docs/(unrag)/reference/unrag-config.mdx b/apps/web/content/docs/(unrag)/reference/unrag-config.mdx index 464c357..33ef498 100644 --- a/apps/web/content/docs/(unrag)/reference/unrag-config.mdx +++ b/apps/web/content/docs/(unrag)/reference/unrag-config.mdx @@ -18,8 +18,8 @@ import { Pool } from "pg"; export const unrag = defineUnragConfig({ defaults: { chunking: { - chunkSize: 200, - chunkOverlap: 40, + chunkSize: 512, + chunkOverlap: 50, }, retrieval: { topK: 8, @@ -98,19 +98,24 @@ This object holds your default settings. Changing values here affects all operat ### defaults.chunking -Controls how documents are split into chunks. See [Chunking](/docs/concepts/chunking) for details on strategies. +Controls how documents are split into chunks. Unrag uses token-based recursive chunking with the `o200k_base` tokenizer (same as GPT-5, GPT-4o). See [Chunking](/docs/concepts/chunking) for details on strategies and plugin chunkers. @@ -152,6 +157,52 @@ You can set these under `defaults.embedding` (recommended). If you need a per-en }} /> +### chunking + +Configures the chunking method and options at the top level. This is an alternative to `defaults.chunking` that also lets you specify which chunking method to use (built-in, plugin, or custom). + +```ts +export default defineUnragConfig({ + chunking: { + method: "recursive", // or "markdown", "code", "semantic", etc. + options: { + chunkSize: 512, + chunkOverlap: 50, + minChunkSize: 24, + }, + }, + // ... +}); +``` + + + +Plugin chunkers must be installed via CLI before use: + +```bash +bunx unrag add chunker:markdown # For documentation +bunx unrag add chunker:code # For source code (uses tree-sitter) +bunx unrag add chunker:semantic # LLM-guided semantic boundaries +bunx unrag add chunker:hierarchical # Section-first with header context +bunx unrag add chunker:agentic # LLM-powered highest quality +``` + ### storage Controls what Unrag persists to your database. diff --git a/bun.lock b/bun.lock index b13e7d4..9c29475 100644 --- a/bun.lock +++ b/bun.lock @@ -55,6 +55,31 @@ "typescript": "^5", }, }, + "apps/examples/test": { + "name": "test", + "version": "0.1.0", + "dependencies": { + "ai": "^6.0.3", + "dotenv": "^17.2.3", + "drizzle-orm": "^0.45.1", + "next": "16.1.4", + "pg": "^8.16.3", + "react": "19.2.3", + "react-dom": "19.2.3", + }, + "devDependencies": { + "@tailwindcss/postcss": "^4", + "@types/node": "^20", + "@types/pg": "^8.16.0", + "@types/react": "^19", + "@types/react-dom": "^19", + "drizzle-kit": "^0.31.8", + "eslint": "^9", + "eslint-config-next": "16.1.4", + "tailwindcss": "^4", + "typescript": "^5", + }, + }, "apps/web": { "name": "web", "version": "0.0.0", @@ -108,6 +133,7 @@ }, "dependencies": { "@clack/prompts": "^0.11.0", + "js-tiktoken": "^1.0.21", "jsonc-parser": "^3.3.1", "pg": "^8.16.3", "semver": "^7.6.3", @@ -139,6 +165,7 @@ }, "trustedDependencies": [ "sharp", + "unrs-resolver", ], "packages": { "@ai-sdk/amazon-bedrock": ["@ai-sdk/amazon-bedrock@3.0.72", "", { "dependencies": { "@ai-sdk/anthropic": "2.0.56", "@ai-sdk/provider": "2.0.0", "@ai-sdk/provider-utils": "3.0.19", "@smithy/eventstream-codec": "^4.0.1", "@smithy/util-utf8": "^4.0.0", "aws4fetch": "^1.0.20" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-NUDgTtSbaQ1le8U1F7g79hmDlk0wwH+BKnzKpWF9ShO8SaYib7I2XUAik5STRAmDmbjfUNTtLQx+fizHUzCkBQ=="], @@ -309,8 +336,12 @@ "@ecies/ciphers": ["@ecies/ciphers@0.2.5", "", { "peerDependencies": { "@noble/ciphers": "^1.0.0" } }, "sha512-GalEZH4JgOMHYYcYmVqnFirFsjZHeoGMDt9IxEnM9F7GRUUyUksJ7Ou53L83WHJq3RWKD3AcBpo0iQh0oMpf8A=="], + "@emnapi/core": ["@emnapi/core@1.7.1", "", { "dependencies": { "@emnapi/wasi-threads": "1.1.0", "tslib": "^2.4.0" } }, "sha512-o1uhUASyo921r2XtHYOHy7gdkGLge8ghBEQHMWmyJFoXlpU58kIrhhN3w26lpQb6dspetweapMn2CSNwQ8I4wg=="], + "@emnapi/runtime": ["@emnapi/runtime@1.7.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA=="], + "@emnapi/wasi-threads": ["@emnapi/wasi-threads@1.1.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-WI0DdZ8xFSbgMjR1sFsKABJ/C5OnRrjT06JXbZKexJGrDuPTzZdDYfFlsgcCXCyf+suG5QU2e/y1Wo2V/OapLQ=="], + "@esbuild-kit/core-utils": ["@esbuild-kit/core-utils@3.3.2", "", { "dependencies": { "esbuild": "~0.18.20", "source-map-support": "^0.5.21" } }, "sha512-sPRAnw9CdSsRmEtnsl2WXWdyquogVpB3yZ3dgwJfe8zrOzTsV7cJvmwrKVa+0ma5BoiGJ+BoqkMvawbayKUsqQ=="], "@esbuild-kit/esm-loader": ["@esbuild-kit/esm-loader@2.6.5", "", { "dependencies": { "@esbuild-kit/core-utils": "^3.3.2", "get-tsconfig": "^4.7.0" } }, "sha512-FxEMIkJKnodyA1OaCUoEvbYRkoZlLZ4d/eXFu9Fh8CbBBgP5EmZxrfTRyN0qpXZ4vOvqnE5YdRdcrmUUXuU+dA=="], @@ -367,6 +398,24 @@ "@esbuild/win32-x64": ["@esbuild/win32-x64@0.25.12", "", { "os": "win32", "cpu": "x64" }, "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA=="], + "@eslint-community/eslint-utils": ["@eslint-community/eslint-utils@4.9.1", "", { "dependencies": { "eslint-visitor-keys": "^3.4.3" }, "peerDependencies": { "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" } }, "sha512-phrYmNiYppR7znFEdqgfWHXR6NCkZEK7hwWDHZUjit/2/U0r6XvkDl0SYnoM51Hq7FhCGdLDT6zxCCOY1hexsQ=="], + + "@eslint-community/regexpp": ["@eslint-community/regexpp@4.12.2", "", {}, "sha512-EriSTlt5OC9/7SXkRSCAhfSxxoSUgBm33OH+IkwbdpgoqsSsUg7y3uh+IICI/Qg4BBWr3U2i39RpmycbxMq4ew=="], + + "@eslint/config-array": ["@eslint/config-array@0.21.1", "", { "dependencies": { "@eslint/object-schema": "^2.1.7", "debug": "^4.3.1", "minimatch": "^3.1.2" } }, "sha512-aw1gNayWpdI/jSYVgzN5pL0cfzU02GT3NBpeT/DXbx1/1x7ZKxFPd9bwrzygx/qiwIQiJ1sw/zD8qY/kRvlGHA=="], + + "@eslint/config-helpers": ["@eslint/config-helpers@0.4.2", "", { "dependencies": { "@eslint/core": "^0.17.0" } }, "sha512-gBrxN88gOIf3R7ja5K9slwNayVcZgK6SOUORm2uBzTeIEfeVaIhOpCtTox3P6R7o2jLFwLFTLnC7kU/RGcYEgw=="], + + "@eslint/core": ["@eslint/core@0.17.0", "", { "dependencies": { "@types/json-schema": "^7.0.15" } }, "sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ=="], + + "@eslint/eslintrc": ["@eslint/eslintrc@3.3.3", "", { "dependencies": { "ajv": "^6.12.4", "debug": "^4.3.2", "espree": "^10.0.1", "globals": "^14.0.0", "ignore": "^5.2.0", "import-fresh": "^3.2.1", "js-yaml": "^4.1.1", "minimatch": "^3.1.2", "strip-json-comments": "^3.1.1" } }, "sha512-Kr+LPIUVKz2qkx1HAMH8q1q6azbqBAsXJUxBl/ODDuVPX45Z9DfwB8tPjTi6nNZ8BuM3nbJxC5zCAg5elnBUTQ=="], + + "@eslint/js": ["@eslint/js@9.39.2", "", {}, "sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA=="], + + "@eslint/object-schema": ["@eslint/object-schema@2.1.7", "", {}, "sha512-VtAOaymWVfZcmZbp6E2mympDIHvyjXs/12LqWYjVw6qjrfF+VK+fyG33kChz3nnK+SU5/NeHOqrTEHS8sXO3OA=="], + + "@eslint/plugin-kit": ["@eslint/plugin-kit@0.4.1", "", { "dependencies": { "@eslint/core": "^0.17.0", "levn": "^0.4.1" } }, "sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA=="], + "@floating-ui/core": ["@floating-ui/core@1.7.3", "", { "dependencies": { "@floating-ui/utils": "^0.2.10" } }, "sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w=="], "@floating-ui/dom": ["@floating-ui/dom@1.7.4", "", { "dependencies": { "@floating-ui/core": "^1.7.3", "@floating-ui/utils": "^0.2.10" } }, "sha512-OOchDgh4F2CchOX94cRVqhvy7b3AFb+/rQXyswmzmGakRfkMgoWVjfnLWkRirfLEfuD4ysVW16eXzwt3jHIzKA=="], @@ -379,6 +428,14 @@ "@hono/node-server": ["@hono/node-server@1.19.8", "", { "peerDependencies": { "hono": "^4" } }, "sha512-0/g2lIOPzX8f3vzW1ggQgvG5mjtFBDBHFAzI5SFAi2DzSqS9luJwqg9T6O/gKYLi+inS7eNxBeIFkkghIPvrMA=="], + "@humanfs/core": ["@humanfs/core@0.19.1", "", {}, "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA=="], + + "@humanfs/node": ["@humanfs/node@0.16.7", "", { "dependencies": { "@humanfs/core": "^0.19.1", "@humanwhocodes/retry": "^0.4.0" } }, "sha512-/zUx+yOsIrG4Y43Eh2peDeKCxlRt/gET6aHfaKpuq267qXdYDFViVHfMaLyygZOnl0kGWxFIgsBy8QFuTLUXEQ=="], + + "@humanwhocodes/module-importer": ["@humanwhocodes/module-importer@1.0.1", "", {}, "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA=="], + + "@humanwhocodes/retry": ["@humanwhocodes/retry@0.4.3", "", {}, "sha512-bV0Tgo9K4hfPCek+aMAn81RppFKv2ySDQeMoSZuvTASywNTnVJCArCZE2FWqpvIatKu7VMRLWlR1EazvVhDyhQ=="], + "@iconify/types": ["@iconify/types@2.0.0", "", {}, "sha512-+wluvCrRhXrhyOmRDJ3q8mux9JkKy5SJ/v8ol2tu4FVjyYvtEzkc/3pK15ET6RKg4b4w4BmTk1+gsCUhf21Ykg=="], "@iconify/utils": ["@iconify/utils@3.1.0", "", { "dependencies": { "@antfu/install-pkg": "^1.1.0", "@iconify/types": "^2.0.0", "mlly": "^1.8.0" } }, "sha512-Zlzem1ZXhI1iHeeERabLNzBHdOa4VhQbqAcOQaMKuTuyZCpwKbC2R4Dd0Zo3g9EAc+Y4fiarO8HIHRAth7+skw=="], @@ -473,8 +530,12 @@ "@mswjs/interceptors": ["@mswjs/interceptors@0.40.0", "", { "dependencies": { "@open-draft/deferred-promise": "^2.2.0", "@open-draft/logger": "^0.3.0", "@open-draft/until": "^2.0.0", "is-node-process": "^1.2.0", "outvariant": "^1.4.3", "strict-event-emitter": "^0.5.1" } }, "sha512-EFd6cVbHsgLa6wa4RljGj6Wk75qoHxUSyc5asLyyPSyuhIcdS2Q3Phw6ImS1q+CkALthJRShiYfKANcQMuMqsQ=="], + "@napi-rs/wasm-runtime": ["@napi-rs/wasm-runtime@0.2.12", "", { "dependencies": { "@emnapi/core": "^1.4.3", "@emnapi/runtime": "^1.4.3", "@tybys/wasm-util": "^0.10.0" } }, "sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ=="], + "@next/env": ["@next/env@16.1.1", "", {}, "sha512-3oxyM97Sr2PqiVyMyrZUtrtM3jqqFxOQJVuKclDsgj/L728iZt/GyslkN4NwarledZATCenbk4Offjk1hQmaAA=="], + "@next/eslint-plugin-next": ["@next/eslint-plugin-next@16.1.4", "", { "dependencies": { "fast-glob": "3.3.1" } }, "sha512-38WMjGP8y+1MN4bcZFs+GTcBe0iem5GGTzFE5GWW/dWdRKde7LOXH3lQT2QuoquVWyfl2S0fQRchGmeacGZ4Wg=="], + "@next/swc-darwin-arm64": ["@next/swc-darwin-arm64@16.1.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-JS3m42ifsVSJjSTzh27nW+Igfha3NdBOFScr9C80hHGrWx55pTrVL23RJbqir7k7/15SKlrLHhh/MQzqBBYrQA=="], "@next/swc-darwin-x64": ["@next/swc-darwin-x64@16.1.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-hbyKtrDGUkgkyQi1m1IyD3q4I/3m9ngr+V93z4oKHrPcmxwNL5iMWORvLSGAf2YujL+6HxgVvZuCYZfLfb4bGw=="], @@ -503,6 +564,8 @@ "@nodelib/fs.walk": ["@nodelib/fs.walk@1.2.8", "", { "dependencies": { "@nodelib/fs.scandir": "2.1.5", "fastq": "^1.6.0" } }, "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg=="], + "@nolyfill/is-core-module": ["@nolyfill/is-core-module@1.0.39", "", {}, "sha512-nn5ozdjYQpUCZlWGuxcJY/KpxkWQs4DcbMCmKojjyrYDEAGy4Ce19NN4v5MduafTwJlbKc99UA8YhSVqq9yPZA=="], + "@open-draft/deferred-promise": ["@open-draft/deferred-promise@2.2.0", "", {}, "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA=="], "@open-draft/logger": ["@open-draft/logger@0.3.0", "", { "dependencies": { "is-node-process": "^1.2.0", "outvariant": "^1.4.0" } }, "sha512-X2g45fzhxH238HKO4xbSr7+wBS8Fvw6ixhTDuvLd5mqh6bJJCFAPwU9mPDxbcrRtfxv4u5IHCEH77BmxvXmmxQ=="], @@ -611,6 +674,8 @@ "@ridemountainpig/svgl-react": ["@ridemountainpig/svgl-react@1.0.14", "", { "peerDependencies": { "react": "^16.8 || ^17.0 || ^18.0 || ^19.0", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0" } }, "sha512-nCFhnM5cR8dgAUZYqs4mdPTF/zgKPYsIW627xbd1eAvVefmmt0a6odj0DhPxlt50J5rrc5SmrcUV+n+01bAcTw=="], + "@rtsao/scc": ["@rtsao/scc@1.1.0", "", {}, "sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g=="], + "@sec-ant/readable-stream": ["@sec-ant/readable-stream@0.4.1", "", {}, "sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg=="], "@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="], @@ -685,6 +750,8 @@ "@ts-morph/common": ["@ts-morph/common@0.27.0", "", { "dependencies": { "fast-glob": "^3.3.3", "minimatch": "^10.0.1", "path-browserify": "^1.0.1" } }, "sha512-Wf29UqxWDpc+i61k3oIOzcUfQt79PIT9y/MWfAGlrkjg6lBC1hwDECLXPVJAhWjiGbfBCxZd65F/LIZF3+jeJQ=="], + "@tybys/wasm-util": ["@tybys/wasm-util@0.10.1", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg=="], + "@types/bun": ["@types/bun@1.3.6", "", { "dependencies": { "bun-types": "1.3.6" } }, "sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA=="], "@types/d3": ["@types/d3@7.4.3", "", { "dependencies": { "@types/d3-array": "*", "@types/d3-axis": "*", "@types/d3-brush": "*", "@types/d3-chord": "*", "@types/d3-color": "*", "@types/d3-contour": "*", "@types/d3-delaunay": "*", "@types/d3-dispatch": "*", "@types/d3-drag": "*", "@types/d3-dsv": "*", "@types/d3-ease": "*", "@types/d3-fetch": "*", "@types/d3-force": "*", "@types/d3-format": "*", "@types/d3-geo": "*", "@types/d3-hierarchy": "*", "@types/d3-interpolate": "*", "@types/d3-path": "*", "@types/d3-polygon": "*", "@types/d3-quadtree": "*", "@types/d3-random": "*", "@types/d3-scale": "*", "@types/d3-scale-chromatic": "*", "@types/d3-selection": "*", "@types/d3-shape": "*", "@types/d3-time": "*", "@types/d3-time-format": "*", "@types/d3-timer": "*", "@types/d3-transition": "*", "@types/d3-zoom": "*" } }, "sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww=="], @@ -759,6 +826,10 @@ "@types/hast": ["@types/hast@3.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ=="], + "@types/json-schema": ["@types/json-schema@7.0.15", "", {}, "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA=="], + + "@types/json5": ["@types/json5@0.0.29", "", {}, "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="], + "@types/mdast": ["@types/mdast@4.0.4", "", { "dependencies": { "@types/unist": "*" } }, "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA=="], "@types/mdx": ["@types/mdx@2.0.13", "", {}, "sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw=="], @@ -781,8 +852,66 @@ "@types/validate-npm-package-name": ["@types/validate-npm-package-name@4.0.2", "", {}, "sha512-lrpDziQipxCEeK5kWxvljWYhUvOiB2A9izZd9B2AFarYAkqZshb4lPbRs7zKEic6eGtH8V/2qJW+dPp9OtF6bw=="], + "@typescript-eslint/eslint-plugin": ["@typescript-eslint/eslint-plugin@8.53.1", "", { "dependencies": { "@eslint-community/regexpp": "^4.12.2", "@typescript-eslint/scope-manager": "8.53.1", "@typescript-eslint/type-utils": "8.53.1", "@typescript-eslint/utils": "8.53.1", "@typescript-eslint/visitor-keys": "8.53.1", "ignore": "^7.0.5", "natural-compare": "^1.4.0", "ts-api-utils": "^2.4.0" }, "peerDependencies": { "@typescript-eslint/parser": "^8.53.1", "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-cFYYFZ+oQFi6hUnBTbLRXfTJiaQtYE3t4O692agbBl+2Zy+eqSKWtPjhPXJu1G7j4RLjKgeJPDdq3EqOwmX5Ag=="], + + "@typescript-eslint/parser": ["@typescript-eslint/parser@8.53.1", "", { "dependencies": { "@typescript-eslint/scope-manager": "8.53.1", "@typescript-eslint/types": "8.53.1", "@typescript-eslint/typescript-estree": "8.53.1", "@typescript-eslint/visitor-keys": "8.53.1", "debug": "^4.4.3" }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-nm3cvFN9SqZGXjmw5bZ6cGmvJSyJPn0wU9gHAZZHDnZl2wF9PhHv78Xf06E0MaNk4zLVHL8hb2/c32XvyJOLQg=="], + + "@typescript-eslint/project-service": ["@typescript-eslint/project-service@8.53.1", "", { "dependencies": { "@typescript-eslint/tsconfig-utils": "^8.53.1", "@typescript-eslint/types": "^8.53.1", "debug": "^4.4.3" }, "peerDependencies": { "typescript": ">=4.8.4 <6.0.0" } }, "sha512-WYC4FB5Ra0xidsmlPb+1SsnaSKPmS3gsjIARwbEkHkoWloQmuzcfypljaJcR78uyLA1h8sHdWWPHSLDI+MtNog=="], + + "@typescript-eslint/scope-manager": ["@typescript-eslint/scope-manager@8.53.1", "", { "dependencies": { "@typescript-eslint/types": "8.53.1", "@typescript-eslint/visitor-keys": "8.53.1" } }, "sha512-Lu23yw1uJMFY8cUeq7JlrizAgeQvWugNQzJp8C3x8Eo5Jw5Q2ykMdiiTB9vBVOOUBysMzmRRmUfwFrZuI2C4SQ=="], + + "@typescript-eslint/tsconfig-utils": ["@typescript-eslint/tsconfig-utils@8.53.1", "", { "peerDependencies": { "typescript": ">=4.8.4 <6.0.0" } }, "sha512-qfvLXS6F6b1y43pnf0pPbXJ+YoXIC7HKg0UGZ27uMIemKMKA6XH2DTxsEDdpdN29D+vHV07x/pnlPNVLhdhWiA=="], + + "@typescript-eslint/type-utils": ["@typescript-eslint/type-utils@8.53.1", "", { "dependencies": { "@typescript-eslint/types": "8.53.1", "@typescript-eslint/typescript-estree": "8.53.1", "@typescript-eslint/utils": "8.53.1", "debug": "^4.4.3", "ts-api-utils": "^2.4.0" }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-MOrdtNvyhy0rHyv0ENzub1d4wQYKb2NmIqG7qEqPWFW7Mpy2jzFC3pQ2yKDvirZB7jypm5uGjF2Qqs6OIqu47w=="], + + "@typescript-eslint/types": ["@typescript-eslint/types@8.53.1", "", {}, "sha512-jr/swrr2aRmUAUjW5/zQHbMaui//vQlsZcJKijZf3M26bnmLj8LyZUpj8/Rd6uzaek06OWsqdofN/Thenm5O8A=="], + + "@typescript-eslint/typescript-estree": ["@typescript-eslint/typescript-estree@8.53.1", "", { "dependencies": { "@typescript-eslint/project-service": "8.53.1", "@typescript-eslint/tsconfig-utils": "8.53.1", "@typescript-eslint/types": "8.53.1", "@typescript-eslint/visitor-keys": "8.53.1", "debug": "^4.4.3", "minimatch": "^9.0.5", "semver": "^7.7.3", "tinyglobby": "^0.2.15", "ts-api-utils": "^2.4.0" }, "peerDependencies": { "typescript": ">=4.8.4 <6.0.0" } }, "sha512-RGlVipGhQAG4GxV1s34O91cxQ/vWiHJTDHbXRr0li2q/BGg3RR/7NM8QDWgkEgrwQYCvmJV9ichIwyoKCQ+DTg=="], + + "@typescript-eslint/utils": ["@typescript-eslint/utils@8.53.1", "", { "dependencies": { "@eslint-community/eslint-utils": "^4.9.1", "@typescript-eslint/scope-manager": "8.53.1", "@typescript-eslint/types": "8.53.1", "@typescript-eslint/typescript-estree": "8.53.1" }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-c4bMvGVWW4hv6JmDUEG7fSYlWOl3II2I4ylt0NM+seinYQlZMQIaKaXIIVJWt9Ofh6whrpM+EdDQXKXjNovvrg=="], + + "@typescript-eslint/visitor-keys": ["@typescript-eslint/visitor-keys@8.53.1", "", { "dependencies": { "@typescript-eslint/types": "8.53.1", "eslint-visitor-keys": "^4.2.1" } }, "sha512-oy+wV7xDKFPRyNggmXuZQSBzvoLnpmJs+GhzRhPjrxl2b/jIlyjVokzm47CZCDUdXKr2zd7ZLodPfOBpOPyPlg=="], + "@ungap/structured-clone": ["@ungap/structured-clone@1.3.0", "", {}, "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g=="], + "@unrs/resolver-binding-android-arm-eabi": ["@unrs/resolver-binding-android-arm-eabi@1.11.1", "", { "os": "android", "cpu": "arm" }, "sha512-ppLRUgHVaGRWUx0R0Ut06Mjo9gBaBkg3v/8AxusGLhsIotbBLuRk51rAzqLC8gq6NyyAojEXglNjzf6R948DNw=="], + + "@unrs/resolver-binding-android-arm64": ["@unrs/resolver-binding-android-arm64@1.11.1", "", { "os": "android", "cpu": "arm64" }, "sha512-lCxkVtb4wp1v+EoN+HjIG9cIIzPkX5OtM03pQYkG+U5O/wL53LC4QbIeazgiKqluGeVEeBlZahHalCaBvU1a2g=="], + + "@unrs/resolver-binding-darwin-arm64": ["@unrs/resolver-binding-darwin-arm64@1.11.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-gPVA1UjRu1Y/IsB/dQEsp2V1pm44Of6+LWvbLc9SDk1c2KhhDRDBUkQCYVWe6f26uJb3fOK8saWMgtX8IrMk3g=="], + + "@unrs/resolver-binding-darwin-x64": ["@unrs/resolver-binding-darwin-x64@1.11.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-cFzP7rWKd3lZaCsDze07QX1SC24lO8mPty9vdP+YVa3MGdVgPmFc59317b2ioXtgCMKGiCLxJ4HQs62oz6GfRQ=="], + + "@unrs/resolver-binding-freebsd-x64": ["@unrs/resolver-binding-freebsd-x64@1.11.1", "", { "os": "freebsd", "cpu": "x64" }, "sha512-fqtGgak3zX4DCB6PFpsH5+Kmt/8CIi4Bry4rb1ho6Av2QHTREM+47y282Uqiu3ZRF5IQioJQ5qWRV6jduA+iGw=="], + + "@unrs/resolver-binding-linux-arm-gnueabihf": ["@unrs/resolver-binding-linux-arm-gnueabihf@1.11.1", "", { "os": "linux", "cpu": "arm" }, "sha512-u92mvlcYtp9MRKmP+ZvMmtPN34+/3lMHlyMj7wXJDeXxuM0Vgzz0+PPJNsro1m3IZPYChIkn944wW8TYgGKFHw=="], + + "@unrs/resolver-binding-linux-arm-musleabihf": ["@unrs/resolver-binding-linux-arm-musleabihf@1.11.1", "", { "os": "linux", "cpu": "arm" }, "sha512-cINaoY2z7LVCrfHkIcmvj7osTOtm6VVT16b5oQdS4beibX2SYBwgYLmqhBjA1t51CarSaBuX5YNsWLjsqfW5Cw=="], + + "@unrs/resolver-binding-linux-arm64-gnu": ["@unrs/resolver-binding-linux-arm64-gnu@1.11.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-34gw7PjDGB9JgePJEmhEqBhWvCiiWCuXsL9hYphDF7crW7UgI05gyBAi6MF58uGcMOiOqSJ2ybEeCvHcq0BCmQ=="], + + "@unrs/resolver-binding-linux-arm64-musl": ["@unrs/resolver-binding-linux-arm64-musl@1.11.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-RyMIx6Uf53hhOtJDIamSbTskA99sPHS96wxVE/bJtePJJtpdKGXO1wY90oRdXuYOGOTuqjT8ACccMc4K6QmT3w=="], + + "@unrs/resolver-binding-linux-ppc64-gnu": ["@unrs/resolver-binding-linux-ppc64-gnu@1.11.1", "", { "os": "linux", "cpu": "ppc64" }, "sha512-D8Vae74A4/a+mZH0FbOkFJL9DSK2R6TFPC9M+jCWYia/q2einCubX10pecpDiTmkJVUH+y8K3BZClycD8nCShA=="], + + "@unrs/resolver-binding-linux-riscv64-gnu": ["@unrs/resolver-binding-linux-riscv64-gnu@1.11.1", "", { "os": "linux", "cpu": "none" }, "sha512-frxL4OrzOWVVsOc96+V3aqTIQl1O2TjgExV4EKgRY09AJ9leZpEg8Ak9phadbuX0BA4k8U5qtvMSQQGGmaJqcQ=="], + + "@unrs/resolver-binding-linux-riscv64-musl": ["@unrs/resolver-binding-linux-riscv64-musl@1.11.1", "", { "os": "linux", "cpu": "none" }, "sha512-mJ5vuDaIZ+l/acv01sHoXfpnyrNKOk/3aDoEdLO/Xtn9HuZlDD6jKxHlkN8ZhWyLJsRBxfv9GYM2utQ1SChKew=="], + + "@unrs/resolver-binding-linux-s390x-gnu": ["@unrs/resolver-binding-linux-s390x-gnu@1.11.1", "", { "os": "linux", "cpu": "s390x" }, "sha512-kELo8ebBVtb9sA7rMe1Cph4QHreByhaZ2QEADd9NzIQsYNQpt9UkM9iqr2lhGr5afh885d/cB5QeTXSbZHTYPg=="], + + "@unrs/resolver-binding-linux-x64-gnu": ["@unrs/resolver-binding-linux-x64-gnu@1.11.1", "", { "os": "linux", "cpu": "x64" }, "sha512-C3ZAHugKgovV5YvAMsxhq0gtXuwESUKc5MhEtjBpLoHPLYM+iuwSj3lflFwK3DPm68660rZ7G8BMcwSro7hD5w=="], + + "@unrs/resolver-binding-linux-x64-musl": ["@unrs/resolver-binding-linux-x64-musl@1.11.1", "", { "os": "linux", "cpu": "x64" }, "sha512-rV0YSoyhK2nZ4vEswT/QwqzqQXw5I6CjoaYMOX0TqBlWhojUf8P94mvI7nuJTeaCkkds3QE4+zS8Ko+GdXuZtA=="], + + "@unrs/resolver-binding-wasm32-wasi": ["@unrs/resolver-binding-wasm32-wasi@1.11.1", "", { "dependencies": { "@napi-rs/wasm-runtime": "^0.2.11" }, "cpu": "none" }, "sha512-5u4RkfxJm+Ng7IWgkzi3qrFOvLvQYnPBmjmZQ8+szTK/b31fQCnleNl1GgEt7nIsZRIf5PLhPwT0WM+q45x/UQ=="], + + "@unrs/resolver-binding-win32-arm64-msvc": ["@unrs/resolver-binding-win32-arm64-msvc@1.11.1", "", { "os": "win32", "cpu": "arm64" }, "sha512-nRcz5Il4ln0kMhfL8S3hLkxI85BXs3o8EYoattsJNdsX4YUU89iOkVn7g0VHSRxFuVMdM4Q1jEpIId1Ihim/Uw=="], + + "@unrs/resolver-binding-win32-ia32-msvc": ["@unrs/resolver-binding-win32-ia32-msvc@1.11.1", "", { "os": "win32", "cpu": "ia32" }, "sha512-DCEI6t5i1NmAZp6pFonpD5m7i6aFrpofcp4LA2i8IIq60Jyo28hamKBxNrZcyOwVOZkgsRp9O2sXWBWP8MnvIQ=="], + + "@unrs/resolver-binding-win32-x64-msvc": ["@unrs/resolver-binding-win32-x64-msvc@1.11.1", "", { "os": "win32", "cpu": "x64" }, "sha512-lrW200hZdbfRtztbygyaq/6jP6AKE8qQN2KvPcJ+x7wiD038YtnYtZ82IMNJ69GJibV7bwL3y9FgK+5w/pYt6g=="], + "@upstash/core-analytics": ["@upstash/core-analytics@0.0.10", "", { "dependencies": { "@upstash/redis": "^1.28.3" } }, "sha512-7qJHGxpQgQr9/vmeS1PktEwvNAF7TI4iJDi8Pu2CFZ9YUGHZH4fOP5TfYlZ4aVxfopnELiE4BS4FBjyK7V1/xQ=="], "@upstash/ratelimit": ["@upstash/ratelimit@2.0.8", "", { "dependencies": { "@upstash/core-analytics": "^0.0.10" }, "peerDependencies": { "@upstash/redis": "^1.34.3" } }, "sha512-YSTMBJ1YIxsoPkUMX/P4DDks/xV5YYCswWMamU8ZIfK9ly6ppjRnVOyBhMDXBmzjODm4UQKcxsJPvaeFAijp5w=="], @@ -803,7 +932,7 @@ "ai": ["ai@6.0.3", "", { "dependencies": { "@ai-sdk/gateway": "3.0.2", "@ai-sdk/provider": "3.0.0", "@ai-sdk/provider-utils": "4.0.1", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-OOo+/C+sEyscoLnbY3w42vjQDICioVNyS+F+ogwq6O5RJL/vgWGuiLzFwuP7oHTeni/MkmX8tIge48GTdaV7QQ=="], - "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], + "ajv": ["ajv@6.12.6", "", { "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", "json-schema-traverse": "^0.4.1", "uri-js": "^4.2.2" } }, "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g=="], "ajv-formats": ["ajv-formats@3.0.1", "", { "dependencies": { "ajv": "^8.0.0" } }, "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ=="], @@ -821,24 +950,58 @@ "aria-hidden": ["aria-hidden@1.2.6", "", { "dependencies": { "tslib": "^2.0.0" } }, "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA=="], + "aria-query": ["aria-query@5.3.2", "", {}, "sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw=="], + + "array-buffer-byte-length": ["array-buffer-byte-length@1.0.2", "", { "dependencies": { "call-bound": "^1.0.3", "is-array-buffer": "^3.0.5" } }, "sha512-LHE+8BuR7RYGDKvnrmcuSq3tDcKv9OFEXQt/HpbZhY7V6h0zlUXutnAD82GiFx9rdieCMjkvtcsPqBwgUl1Iiw=="], + + "array-includes": ["array-includes@3.1.9", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.4", "define-properties": "^1.2.1", "es-abstract": "^1.24.0", "es-object-atoms": "^1.1.1", "get-intrinsic": "^1.3.0", "is-string": "^1.1.1", "math-intrinsics": "^1.1.0" } }, "sha512-FmeCCAenzH0KH381SPT5FZmiA/TmpndpcaShhfgEN9eCVjnFBqq3l1xrI42y8+PPLI6hypzou4GXw00WHmPBLQ=="], + "array-union": ["array-union@2.1.0", "", {}, "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw=="], + "array.prototype.findlast": ["array.prototype.findlast@1.2.5", "", { "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", "es-abstract": "^1.23.2", "es-errors": "^1.3.0", "es-object-atoms": "^1.0.0", "es-shim-unscopables": "^1.0.2" } }, "sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ=="], + + "array.prototype.findlastindex": ["array.prototype.findlastindex@1.2.6", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.4", "define-properties": "^1.2.1", "es-abstract": "^1.23.9", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "es-shim-unscopables": "^1.1.0" } }, "sha512-F/TKATkzseUExPlfvmwQKGITM3DGTK+vkAsCZoDc5daVygbJBnjEUCbgkAvVFsgfXfX4YIqZ/27G3k3tdXrTxQ=="], + + "array.prototype.flat": ["array.prototype.flat@1.3.3", "", { "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", "es-abstract": "^1.23.5", "es-shim-unscopables": "^1.0.2" } }, "sha512-rwG/ja1neyLqCuGZ5YYrznA62D4mZXg0i1cIskIUKSiqF3Cje9/wXAls9B9s1Wa2fomMsIv8czB8jZcPmxCXFg=="], + + "array.prototype.flatmap": ["array.prototype.flatmap@1.3.3", "", { "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", "es-abstract": "^1.23.5", "es-shim-unscopables": "^1.0.2" } }, "sha512-Y7Wt51eKJSyi80hFrJCePGGNo5ktJCslFuboqJsbf57CCPcm5zztluPlc4/aD8sWsKvlwatezpV4U1efk8kpjg=="], + + "array.prototype.tosorted": ["array.prototype.tosorted@1.1.4", "", { "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", "es-abstract": "^1.23.3", "es-errors": "^1.3.0", "es-shim-unscopables": "^1.0.2" } }, "sha512-p6Fx8B7b7ZhL/gmUsAy0D15WhvDccw3mnGNbZpi3pmeJdxtWsj2jEaI4Y6oo3XiHfzuSgPwKc04MYt6KgvC/wA=="], + + "arraybuffer.prototype.slice": ["arraybuffer.prototype.slice@1.0.4", "", { "dependencies": { "array-buffer-byte-length": "^1.0.1", "call-bind": "^1.0.8", "define-properties": "^1.2.1", "es-abstract": "^1.23.5", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6", "is-array-buffer": "^3.0.4" } }, "sha512-BNoCY6SXXPQ7gF2opIP4GBE+Xw7U+pHMYKuzjgCN3GwiaIR09UUeKfheyIry77QtrCBlC0KK0q5/TER/tYh3PQ=="], + "ast-types": ["ast-types@0.16.1", "", { "dependencies": { "tslib": "^2.0.1" } }, "sha512-6t10qk83GOG8p0vKmaCr8eiilZwO171AvbROMtvvNiwrTly62t+7XkA8RdIIVbpMhCASAsxgAzdRSwh6nw/5Dg=="], + "ast-types-flow": ["ast-types-flow@0.0.8", "", {}, "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ=="], + "astring": ["astring@1.9.0", "", { "bin": { "astring": "bin/astring" } }, "sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg=="], + "async-function": ["async-function@1.0.0", "", {}, "sha512-hsU18Ae8CDTR6Kgu9DYf0EbCr/a5iGL0rytQDobUcdpYOKokk8LEjVphnXkDkgpi0wYVsqrXuP0bZxJaTqdgoA=="], + "auto-bind": ["auto-bind@5.0.1", "", {}, "sha512-ooviqdwwgfIfNmDwo94wlshcdzfO64XV0Cg6oDsDYBJfITDz1EngD2z7DkbvCWn+XIMsIqW27sEVF6qcpJrRcg=="], + "available-typed-arrays": ["available-typed-arrays@1.0.7", "", { "dependencies": { "possible-typed-array-names": "^1.0.0" } }, "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ=="], + "aws4fetch": ["aws4fetch@1.0.20", "", {}, "sha512-/djoAN709iY65ETD6LKCtyyEI04XIBP5xVvfmNxsEP0uJB5tyaGBztSryRr4HqMStr9R06PisQE7m9zDTXKu6g=="], + "axe-core": ["axe-core@4.11.1", "", {}, "sha512-BASOg+YwO2C+346x3LZOeoovTIoTrRqEsqMa6fmfAV0P+U9mFr9NsyOEpiYvFjbc64NMrSswhV50WdXzdb/Z5A=="], + + "axobject-query": ["axobject-query@4.1.0", "", {}, "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ=="], + "bail": ["bail@2.0.2", "", {}, "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw=="], + "balanced-match": ["balanced-match@1.0.2", "", {}, "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="], + + "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="], + "baseline-browser-mapping": ["baseline-browser-mapping@2.9.11", "", { "bin": { "baseline-browser-mapping": "dist/cli.js" } }, "sha512-Sg0xJUNDU1sJNGdfGWhVHX0kkZ+HWcvmVymJbj6NSgZZmW/8S9Y2HQ5euytnIgakgxN6papOAWiwDo1ctFDcoQ=="], "better-path-resolve": ["better-path-resolve@1.0.0", "", { "dependencies": { "is-windows": "^1.0.0" } }, "sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g=="], "body-parser": ["body-parser@2.2.2", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.1", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA=="], + "brace-expansion": ["brace-expansion@1.1.12", "", { "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" } }, "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg=="], + "braces": ["braces@3.0.3", "", { "dependencies": { "fill-range": "^7.1.1" } }, "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA=="], "browserslist": ["browserslist@4.28.1", "", { "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", "electron-to-chromium": "^1.5.263", "node-releases": "^2.0.27", "update-browserslist-db": "^1.2.0" }, "bin": { "browserslist": "cli.js" } }, "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA=="], @@ -853,6 +1016,8 @@ "c12": ["c12@3.1.0", "", { "dependencies": { "chokidar": "^4.0.3", "confbox": "^0.2.2", "defu": "^6.1.4", "dotenv": "^16.6.1", "exsolve": "^1.0.7", "giget": "^2.0.0", "jiti": "^2.4.2", "ohash": "^2.0.11", "pathe": "^2.0.3", "perfect-debounce": "^1.0.0", "pkg-types": "^2.2.0", "rc9": "^2.1.2" }, "peerDependencies": { "magicast": "^0.3.5" }, "optionalPeers": ["magicast"] }, "sha512-uWoS8OU1MEIsOv8p/5a82c3H31LsWVR5qiyXVfBNOzfffjUWtPnhAb4BYI2uG2HfGmZmFjCtui5XNWaps+iFuw=="], + "call-bind": ["call-bind@1.0.8", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.0", "es-define-property": "^1.0.0", "get-intrinsic": "^1.2.4", "set-function-length": "^1.2.2" } }, "sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww=="], + "call-bind-apply-helpers": ["call-bind-apply-helpers@1.0.2", "", { "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" } }, "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ=="], "call-bound": ["call-bound@1.0.4", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "get-intrinsic": "^1.3.0" } }, "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg=="], @@ -863,7 +1028,7 @@ "ccount": ["ccount@2.0.1", "", {}, "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg=="], - "chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="], + "chalk": ["chalk@4.1.2", "", { "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" } }, "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA=="], "character-entities": ["character-entities@2.0.2", "", {}, "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ=="], @@ -921,6 +1086,8 @@ "compute-scroll-into-view": ["compute-scroll-into-view@3.1.1", "", {}, "sha512-VRhuHOLoKYOy4UbilLbUzbYg93XLjv2PncJC50EuTWPA3gaja1UjBsUP/D/9/juV3vQFr6XBEzn9KCAHdUvOHw=="], + "concat-map": ["concat-map@0.0.1", "", {}, "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg=="], + "confbox": ["confbox@0.2.2", "", {}, "sha512-1NB+BKqhtNipMsov4xI/NnhCKp9XG9NamYp5PVm9klAT0fsrNPjaFICsCFhNhwZJKNh7zB/3q8qXz0E9oaMNtQ=="], "consola": ["consola@3.4.2", "", {}, "sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA=="], @@ -1021,8 +1188,16 @@ "dagre-d3-es": ["dagre-d3-es@7.0.13", "", { "dependencies": { "d3": "^7.9.0", "lodash-es": "^4.17.21" } }, "sha512-efEhnxpSuwpYOKRm/L5KbqoZmNNukHa/Flty4Wp62JRvgH2ojwVgPgdYyr4twpieZnyRDdIH7PY2mopX26+j2Q=="], + "damerau-levenshtein": ["damerau-levenshtein@1.0.8", "", {}, "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA=="], + "data-uri-to-buffer": ["data-uri-to-buffer@4.0.1", "", {}, "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A=="], + "data-view-buffer": ["data-view-buffer@1.0.2", "", { "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", "is-data-view": "^1.0.2" } }, "sha512-EmKO5V3OLXh1rtK2wgXRansaK1/mtVdTUEiEI0W8RkvgT05kfxaH29PliLnpLP73yYO6142Q72QNa8Wx/A5CqQ=="], + + "data-view-byte-length": ["data-view-byte-length@1.0.2", "", { "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", "is-data-view": "^1.0.2" } }, "sha512-tuhGbE6CfTM9+5ANGf+oQb72Ky/0+s3xKUpHvShfiz2RxMFgFPjsXuRLBVMtvMs15awe45SRb83D6wH4ew6wlQ=="], + + "data-view-byte-offset": ["data-view-byte-offset@1.0.1", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "is-data-view": "^1.0.1" } }, "sha512-BS8PfmtDGnrgYdOonGZQdLZslWIeCGFP9tpan0hi1Co2Zr2NKADsvGYA8XxuG/4UWgJ6Cjtv+YJnB6MM69QGlQ=="], + "date-fns": ["date-fns@4.1.0", "", {}, "sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg=="], "date-fns-jalali": ["date-fns-jalali@4.1.0-0", "", {}, "sha512-hTIP/z+t+qKwBDcmmsnmjWTduxCg+5KfdqWQvb2X/8C9+knYY6epN/pfxdDuyVlSVeFz0sM5eEfwIUQ70U4ckg=="], @@ -1037,6 +1212,8 @@ "dedent": ["dedent@1.7.1", "", { "peerDependencies": { "babel-plugin-macros": "^3.1.0" }, "optionalPeers": ["babel-plugin-macros"] }, "sha512-9JmrhGZpOlEgOLdQgSm0zxFaYoQon408V1v49aqTWuXENVlnCuY9JBZcXZiCsZQWDjTm5Qf/nIvAy77mXDAjEg=="], + "deep-is": ["deep-is@0.1.4", "", {}, "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ=="], + "deepmerge": ["deepmerge@4.3.1", "", {}, "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A=="], "deepmerge-ts": ["deepmerge-ts@7.1.5", "", {}, "sha512-HOJkrhaYsweh+W+e74Yn7YStZOilkoPb6fycpwNLKzSPtruFs48nYis0zy5yJz1+ktUhHxoRDJ27RQAWLIJVJw=="], @@ -1045,8 +1222,12 @@ "default-browser-id": ["default-browser-id@5.0.1", "", {}, "sha512-x1VCxdX4t+8wVfd1so/9w+vQ4vx7lKd2Qp5tDRutErwmR85OgmfX7RlLRMWafRMY7hbEiXIbudNrjOAPa/hL8Q=="], + "define-data-property": ["define-data-property@1.1.4", "", { "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", "gopd": "^1.0.1" } }, "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A=="], + "define-lazy-prop": ["define-lazy-prop@3.0.0", "", {}, "sha512-N+MeXYoqr3pOgn8xfyRPREN7gHakLYjhsHhWGT3fWAiL4IkAt0iDw14QiiEm2bE30c5XX5q0FtAA3CK5f9/BUg=="], + "define-properties": ["define-properties@1.2.1", "", { "dependencies": { "define-data-property": "^1.0.1", "has-property-descriptors": "^1.0.0", "object-keys": "^1.1.1" } }, "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg=="], + "defu": ["defu@6.1.4", "", {}, "sha512-mEQCMmwJu317oSz8CwdIOdwf3xMif1ttiM8LTufzc3g6kR+9Pe236twL8j3IYT1F7GfRgGcW6MWxzZjLIkuHIg=="], "delaunator": ["delaunator@5.0.1", "", { "dependencies": { "robust-predicates": "^3.0.2" } }, "sha512-8nvh+XBe96aCESrGOqMp/84b13H9cdKbG5P2ejQCh4d4sK9RL4371qou9drQjMhvnPmhWl5hnmqbEE0fXr9Xnw=="], @@ -1069,6 +1250,8 @@ "dir-glob": ["dir-glob@3.0.1", "", { "dependencies": { "path-type": "^4.0.0" } }, "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA=="], + "doctrine": ["doctrine@2.1.0", "", { "dependencies": { "esutils": "^2.0.2" } }, "sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw=="], + "dom-helpers": ["dom-helpers@5.2.1", "", { "dependencies": { "@babel/runtime": "^7.8.7", "csstype": "^3.0.2" } }, "sha512-nRCa7CK3VTrM2NmGkIy4cbK7IZlgBE/PYMn55rrXefr5xXDP0LdtfPnblFDoVdcAfslJ7or6iqAUnx0CCGIWQA=="], "dompurify": ["dompurify@3.3.1", "", { "optionalDependencies": { "@types/trusted-types": "^2.0.7" } }, "sha512-qkdCKzLNtrgPFP1Vo+98FRzJnBRGe4ffyCea9IwHB1fyxPOeNTHpLKYGd4Uk9xvNoH0ZoOjwZxNptyMwqrId1Q=="], @@ -1095,7 +1278,7 @@ "embla-carousel-reactive-utils": ["embla-carousel-reactive-utils@8.6.0", "", { "peerDependencies": { "embla-carousel": "8.6.0" } }, "sha512-fMVUDUEx0/uIEDM0Mz3dHznDhfX+znCCDCeIophYb1QGVM7YThSWX+wz11zlYwWFOr74b4QLGg0hrGPJeG2s4A=="], - "emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="], + "emoji-regex": ["emoji-regex@9.2.2", "", {}, "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="], "empathic": ["empathic@2.0.0", "", {}, "sha512-i6UzDscO/XfAcNYD75CfICkmfLedpyPDdozrLMmQc5ORaQcdMoc21OnlEylMIqI7U8eniKrPMxxtj8k0vhmJhA=="], @@ -1111,12 +1294,22 @@ "error-ex": ["error-ex@1.3.4", "", { "dependencies": { "is-arrayish": "^0.2.1" } }, "sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ=="], + "es-abstract": ["es-abstract@1.24.1", "", { "dependencies": { "array-buffer-byte-length": "^1.0.2", "arraybuffer.prototype.slice": "^1.0.4", "available-typed-arrays": "^1.0.7", "call-bind": "^1.0.8", "call-bound": "^1.0.4", "data-view-buffer": "^1.0.2", "data-view-byte-length": "^1.0.2", "data-view-byte-offset": "^1.0.1", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "es-set-tostringtag": "^2.1.0", "es-to-primitive": "^1.3.0", "function.prototype.name": "^1.1.8", "get-intrinsic": "^1.3.0", "get-proto": "^1.0.1", "get-symbol-description": "^1.1.0", "globalthis": "^1.0.4", "gopd": "^1.2.0", "has-property-descriptors": "^1.0.2", "has-proto": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "internal-slot": "^1.1.0", "is-array-buffer": "^3.0.5", "is-callable": "^1.2.7", "is-data-view": "^1.0.2", "is-negative-zero": "^2.0.3", "is-regex": "^1.2.1", "is-set": "^2.0.3", "is-shared-array-buffer": "^1.0.4", "is-string": "^1.1.1", "is-typed-array": "^1.1.15", "is-weakref": "^1.1.1", "math-intrinsics": "^1.1.0", "object-inspect": "^1.13.4", "object-keys": "^1.1.1", "object.assign": "^4.1.7", "own-keys": "^1.0.1", "regexp.prototype.flags": "^1.5.4", "safe-array-concat": "^1.1.3", "safe-push-apply": "^1.0.0", "safe-regex-test": "^1.1.0", "set-proto": "^1.0.0", "stop-iteration-iterator": "^1.1.0", "string.prototype.trim": "^1.2.10", "string.prototype.trimend": "^1.0.9", "string.prototype.trimstart": "^1.0.8", "typed-array-buffer": "^1.0.3", "typed-array-byte-length": "^1.0.3", "typed-array-byte-offset": "^1.0.4", "typed-array-length": "^1.0.7", "unbox-primitive": "^1.1.0", "which-typed-array": "^1.1.19" } }, "sha512-zHXBLhP+QehSSbsS9Pt23Gg964240DPd6QCf8WpkqEXxQ7fhdZzYsocOr5u7apWonsS5EjZDmTF+/slGMyasvw=="], + "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="], "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="], + "es-iterator-helpers": ["es-iterator-helpers@1.2.2", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.4", "define-properties": "^1.2.1", "es-abstract": "^1.24.1", "es-errors": "^1.3.0", "es-set-tostringtag": "^2.1.0", "function-bind": "^1.1.2", "get-intrinsic": "^1.3.0", "globalthis": "^1.0.4", "gopd": "^1.2.0", "has-property-descriptors": "^1.0.2", "has-proto": "^1.2.0", "has-symbols": "^1.1.0", "internal-slot": "^1.1.0", "iterator.prototype": "^1.1.5", "safe-array-concat": "^1.1.3" } }, "sha512-BrUQ0cPTB/IwXj23HtwHjS9n7O4h9FX94b4xc5zlTHxeLgTAdzYUDyy6KdExAl9lbN5rtfe44xpjpmj9grxs5w=="], + "es-object-atoms": ["es-object-atoms@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA=="], + "es-set-tostringtag": ["es-set-tostringtag@2.1.0", "", { "dependencies": { "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6", "has-tostringtag": "^1.0.2", "hasown": "^2.0.2" } }, "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA=="], + + "es-shim-unscopables": ["es-shim-unscopables@1.1.0", "", { "dependencies": { "hasown": "^2.0.2" } }, "sha512-d9T8ucsEhh8Bi1woXCf+TIKDIROLG5WCkxg8geBCbvk22kzwC5G2OnXVMO6FUsvQlgUUXQ2itephWDLqDzbeCw=="], + + "es-to-primitive": ["es-to-primitive@1.3.0", "", { "dependencies": { "is-callable": "^1.2.7", "is-date-object": "^1.0.5", "is-symbol": "^1.0.4" } }, "sha512-w+5mJ3GuFL+NjVtJlvydShqE1eN3h3PbI7/5LAsYJP/2qtuMXjfL2LpHSRqo4b4eSF5K/DH1JXKUAHSB2UW50g=="], + "es-toolkit": ["es-toolkit@1.43.0", "", {}, "sha512-SKCT8AsWvYzBBuUqMk4NPwFlSdqLpJwmy6AP322ERn8W2YLIB6JBXnwMI2Qsh2gfphT3q7EKAxKb23cvFHFwKA=="], "esast-util-from-estree": ["esast-util-from-estree@2.0.0", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "devlop": "^1.0.0", "estree-util-visit": "^2.0.0", "unist-util-position-from-estree": "^2.0.0" } }, "sha512-4CyanoAudUSBAn5K13H4JhsMH6L9ZP7XbLVe/dKybkxMO7eDyLsT8UHl9TRNrU2Gr9nz+FovfSIjuXWJ81uVwQ=="], @@ -1131,10 +1324,40 @@ "escape-html": ["escape-html@1.0.3", "", {}, "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="], - "escape-string-regexp": ["escape-string-regexp@2.0.0", "", {}, "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="], + "escape-string-regexp": ["escape-string-regexp@4.0.0", "", {}, "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA=="], + + "eslint": ["eslint@9.39.2", "", { "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", "@eslint/config-array": "^0.21.1", "@eslint/config-helpers": "^0.4.2", "@eslint/core": "^0.17.0", "@eslint/eslintrc": "^3.3.1", "@eslint/js": "9.39.2", "@eslint/plugin-kit": "^0.4.1", "@humanfs/node": "^0.16.6", "@humanwhocodes/module-importer": "^1.0.1", "@humanwhocodes/retry": "^0.4.2", "@types/estree": "^1.0.6", "ajv": "^6.12.4", "chalk": "^4.0.0", "cross-spawn": "^7.0.6", "debug": "^4.3.2", "escape-string-regexp": "^4.0.0", "eslint-scope": "^8.4.0", "eslint-visitor-keys": "^4.2.1", "espree": "^10.4.0", "esquery": "^1.5.0", "esutils": "^2.0.2", "fast-deep-equal": "^3.1.3", "file-entry-cache": "^8.0.0", "find-up": "^5.0.0", "glob-parent": "^6.0.2", "ignore": "^5.2.0", "imurmurhash": "^0.1.4", "is-glob": "^4.0.0", "json-stable-stringify-without-jsonify": "^1.0.1", "lodash.merge": "^4.6.2", "minimatch": "^3.1.2", "natural-compare": "^1.4.0", "optionator": "^0.9.3" }, "peerDependencies": { "jiti": "*" }, "optionalPeers": ["jiti"], "bin": { "eslint": "bin/eslint.js" } }, "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw=="], + + "eslint-config-next": ["eslint-config-next@16.1.4", "", { "dependencies": { "@next/eslint-plugin-next": "16.1.4", "eslint-import-resolver-node": "^0.3.6", "eslint-import-resolver-typescript": "^3.5.2", "eslint-plugin-import": "^2.32.0", "eslint-plugin-jsx-a11y": "^6.10.0", "eslint-plugin-react": "^7.37.0", "eslint-plugin-react-hooks": "^7.0.0", "globals": "16.4.0", "typescript-eslint": "^8.46.0" }, "peerDependencies": { "eslint": ">=9.0.0", "typescript": ">=3.3.1" }, "optionalPeers": ["typescript"] }, "sha512-iCrrNolUPpn/ythx0HcyNRfUBgTkaNBXByisKUbusPGCl8DMkDXXAu7exlSTSLGTIsH9lFE/c4s/3Qiyv2qwdA=="], + + "eslint-import-resolver-node": ["eslint-import-resolver-node@0.3.9", "", { "dependencies": { "debug": "^3.2.7", "is-core-module": "^2.13.0", "resolve": "^1.22.4" } }, "sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g=="], + + "eslint-import-resolver-typescript": ["eslint-import-resolver-typescript@3.10.1", "", { "dependencies": { "@nolyfill/is-core-module": "1.0.39", "debug": "^4.4.0", "get-tsconfig": "^4.10.0", "is-bun-module": "^2.0.0", "stable-hash": "^0.0.5", "tinyglobby": "^0.2.13", "unrs-resolver": "^1.6.2" }, "peerDependencies": { "eslint": "*", "eslint-plugin-import": "*", "eslint-plugin-import-x": "*" }, "optionalPeers": ["eslint-plugin-import", "eslint-plugin-import-x"] }, "sha512-A1rHYb06zjMGAxdLSkN2fXPBwuSaQ0iO5M/hdyS0Ajj1VBaRp0sPD3dn1FhME3c/JluGFbwSxyCfqdSbtQLAHQ=="], + + "eslint-module-utils": ["eslint-module-utils@2.12.1", "", { "dependencies": { "debug": "^3.2.7" } }, "sha512-L8jSWTze7K2mTg0vos/RuLRS5soomksDPoJLXIslC7c8Wmut3bx7CPpJijDcBZtxQ5lrbUdM+s0OlNbz0DCDNw=="], + + "eslint-plugin-import": ["eslint-plugin-import@2.32.0", "", { "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.9", "array.prototype.findlastindex": "^1.2.6", "array.prototype.flat": "^1.3.3", "array.prototype.flatmap": "^1.3.3", "debug": "^3.2.7", "doctrine": "^2.1.0", "eslint-import-resolver-node": "^0.3.9", "eslint-module-utils": "^2.12.1", "hasown": "^2.0.2", "is-core-module": "^2.16.1", "is-glob": "^4.0.3", "minimatch": "^3.1.2", "object.fromentries": "^2.0.8", "object.groupby": "^1.0.3", "object.values": "^1.2.1", "semver": "^6.3.1", "string.prototype.trimend": "^1.0.9", "tsconfig-paths": "^3.15.0" }, "peerDependencies": { "eslint": "^2 || ^3 || ^4 || ^5 || ^6 || ^7.2.0 || ^8 || ^9" } }, "sha512-whOE1HFo/qJDyX4SnXzP4N6zOWn79WhnCUY/iDR0mPfQZO8wcYE4JClzI2oZrhBnnMUCBCHZhO6VQyoBU95mZA=="], + + "eslint-plugin-jsx-a11y": ["eslint-plugin-jsx-a11y@6.10.2", "", { "dependencies": { "aria-query": "^5.3.2", "array-includes": "^3.1.8", "array.prototype.flatmap": "^1.3.2", "ast-types-flow": "^0.0.8", "axe-core": "^4.10.0", "axobject-query": "^4.1.0", "damerau-levenshtein": "^1.0.8", "emoji-regex": "^9.2.2", "hasown": "^2.0.2", "jsx-ast-utils": "^3.3.5", "language-tags": "^1.0.9", "minimatch": "^3.1.2", "object.fromentries": "^2.0.8", "safe-regex-test": "^1.0.3", "string.prototype.includes": "^2.0.1" }, "peerDependencies": { "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8 || ^9" } }, "sha512-scB3nz4WmG75pV8+3eRUQOHZlNSUhFNq37xnpgRkCCELU3XMvXAxLk1eqWWyE22Ki4Q01Fnsw9BA3cJHDPgn2Q=="], + + "eslint-plugin-react": ["eslint-plugin-react@7.37.5", "", { "dependencies": { "array-includes": "^3.1.8", "array.prototype.findlast": "^1.2.5", "array.prototype.flatmap": "^1.3.3", "array.prototype.tosorted": "^1.1.4", "doctrine": "^2.1.0", "es-iterator-helpers": "^1.2.1", "estraverse": "^5.3.0", "hasown": "^2.0.2", "jsx-ast-utils": "^2.4.1 || ^3.0.0", "minimatch": "^3.1.2", "object.entries": "^1.1.9", "object.fromentries": "^2.0.8", "object.values": "^1.2.1", "prop-types": "^15.8.1", "resolve": "^2.0.0-next.5", "semver": "^6.3.1", "string.prototype.matchall": "^4.0.12", "string.prototype.repeat": "^1.0.0" }, "peerDependencies": { "eslint": "^3 || ^4 || ^5 || ^6 || ^7 || ^8 || ^9.7" } }, "sha512-Qteup0SqU15kdocexFNAJMvCJEfa2xUKNV4CC1xsVMrIIqEy3SQ/rqyxCWNzfrd3/ldy6HMlD2e0JDVpDg2qIA=="], + + "eslint-plugin-react-hooks": ["eslint-plugin-react-hooks@7.0.1", "", { "dependencies": { "@babel/core": "^7.24.4", "@babel/parser": "^7.24.4", "hermes-parser": "^0.25.1", "zod": "^3.25.0 || ^4.0.0", "zod-validation-error": "^3.5.0 || ^4.0.0" }, "peerDependencies": { "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" } }, "sha512-O0d0m04evaNzEPoSW+59Mezf8Qt0InfgGIBJnpC0h3NH/WjUAR7BIKUfysC6todmtiZ/A0oUVS8Gce0WhBrHsA=="], + + "eslint-scope": ["eslint-scope@8.4.0", "", { "dependencies": { "esrecurse": "^4.3.0", "estraverse": "^5.2.0" } }, "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg=="], + + "eslint-visitor-keys": ["eslint-visitor-keys@4.2.1", "", {}, "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ=="], + + "espree": ["espree@10.4.0", "", { "dependencies": { "acorn": "^8.15.0", "acorn-jsx": "^5.3.2", "eslint-visitor-keys": "^4.2.1" } }, "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ=="], "esprima": ["esprima@4.0.1", "", { "bin": { "esparse": "./bin/esparse.js", "esvalidate": "./bin/esvalidate.js" } }, "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A=="], + "esquery": ["esquery@1.7.0", "", { "dependencies": { "estraverse": "^5.1.0" } }, "sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g=="], + + "esrecurse": ["esrecurse@4.3.0", "", { "dependencies": { "estraverse": "^5.2.0" } }, "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag=="], + + "estraverse": ["estraverse@5.3.0", "", {}, "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA=="], + "estree-util-attach-comments": ["estree-util-attach-comments@3.0.0", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-cKUwm/HUcTDsYh/9FgnuFqpfquUbwIqwKM26BVCGDPVgvaCl/nDCCjUfiLlx6lsEZ3Z4RFxNbOQ60pkaEwFxGw=="], "estree-util-build-jsx": ["estree-util-build-jsx@3.0.1", "", { "dependencies": { "@types/estree-jsx": "^1.0.0", "devlop": "^1.0.0", "estree-util-is-identifier-name": "^3.0.0", "estree-walker": "^3.0.0" } }, "sha512-8U5eiL6BTrPxp/CHbs2yMgP8ftMhR5ww1eIKoWRMlqvltHF8fZn5LRDvTKuxD3DUn+shRbLGqXemcP51oFCsGQ=="], @@ -1151,6 +1374,8 @@ "estree-walker": ["estree-walker@3.0.3", "", { "dependencies": { "@types/estree": "^1.0.0" } }, "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g=="], + "esutils": ["esutils@2.0.3", "", {}, "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g=="], + "etag": ["etag@1.8.1", "", {}, "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg=="], "eventemitter3": ["eventemitter3@4.0.7", "", {}, "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw=="], @@ -1179,6 +1404,10 @@ "fast-glob": ["fast-glob@3.3.3", "", { "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", "glob-parent": "^5.1.2", "merge2": "^1.3.0", "micromatch": "^4.0.8" } }, "sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg=="], + "fast-json-stable-stringify": ["fast-json-stable-stringify@2.1.0", "", {}, "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="], + + "fast-levenshtein": ["fast-levenshtein@2.0.6", "", {}, "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw=="], + "fast-uri": ["fast-uri@3.1.0", "", {}, "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA=="], "fastq": ["fastq@1.20.1", "", { "dependencies": { "reusify": "^1.0.4" } }, "sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw=="], @@ -1189,11 +1418,19 @@ "figures": ["figures@6.1.0", "", { "dependencies": { "is-unicode-supported": "^2.0.0" } }, "sha512-d+l3qxjSesT4V7v2fh+QnmFnUWv9lSpjarhShNTgBOfA0ttejbQUAlHLitbjkoRiDulW0OPoQPYIGhIC8ohejg=="], + "file-entry-cache": ["file-entry-cache@8.0.0", "", { "dependencies": { "flat-cache": "^4.0.0" } }, "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ=="], + "fill-range": ["fill-range@7.1.1", "", { "dependencies": { "to-regex-range": "^5.0.1" } }, "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg=="], "finalhandler": ["finalhandler@2.1.1", "", { "dependencies": { "debug": "^4.4.0", "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "on-finished": "^2.4.1", "parseurl": "^1.3.3", "statuses": "^2.0.1" } }, "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA=="], - "find-up": ["find-up@4.1.0", "", { "dependencies": { "locate-path": "^5.0.0", "path-exists": "^4.0.0" } }, "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw=="], + "find-up": ["find-up@5.0.0", "", { "dependencies": { "locate-path": "^6.0.0", "path-exists": "^4.0.0" } }, "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng=="], + + "flat-cache": ["flat-cache@4.0.1", "", { "dependencies": { "flatted": "^3.2.9", "keyv": "^4.5.4" } }, "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw=="], + + "flatted": ["flatted@3.3.3", "", {}, "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg=="], + + "for-each": ["for-each@0.3.5", "", { "dependencies": { "is-callable": "^1.2.7" } }, "sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg=="], "formdata-polyfill": ["formdata-polyfill@4.0.10", "", { "dependencies": { "fetch-blob": "^3.1.2" } }, "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g=="], @@ -1215,10 +1452,16 @@ "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], + "function.prototype.name": ["function.prototype.name@1.1.8", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "functions-have-names": "^1.2.3", "hasown": "^2.0.2", "is-callable": "^1.2.7" } }, "sha512-e5iwyodOHhbMr/yNrc7fDYG4qlbIvI5gajyzPnb5TCwyhjApznQh1BMFou9b30SevY43gCJKXycoCBjMbsuW0Q=="], + + "functions-have-names": ["functions-have-names@1.2.3", "", {}, "sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ=="], + "fuzzysort": ["fuzzysort@3.1.0", "", {}, "sha512-sR9BNCjBg6LNgwvxlBd0sBABvQitkLzoVY9MYYROQVX/FvfJ4Mai9LsGhDgd8qYdds0bY77VzYd5iuB+v5rwQQ=="], "fzf": ["fzf@0.5.2", "", {}, "sha512-Tt4kuxLXFKHy8KT40zwsUPUkg1CrsgY25FxA2U/j/0WgEDCk3ddc/zLTCCcbSHX9FcKtLuVaDGtGE/STWC+j3Q=="], + "generator-function": ["generator-function@2.0.1", "", {}, "sha512-SFdFmIJi+ybC0vjlHN0ZGVGHc3lgE0DxPAT0djjVg+kjOnSqclqmj0KQ7ykTOLP6YxoqOvuAODGdcHJn+43q3g=="], + "gensync": ["gensync@1.0.0-beta.2", "", {}, "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg=="], "get-caller-file": ["get-caller-file@2.0.5", "", {}, "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="], @@ -1235,13 +1478,19 @@ "get-stream": ["get-stream@9.0.1", "", { "dependencies": { "@sec-ant/readable-stream": "^0.4.1", "is-stream": "^4.0.1" } }, "sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA=="], + "get-symbol-description": ["get-symbol-description@1.1.0", "", { "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6" } }, "sha512-w9UMqWwJxHNOvoNzSJ2oPF5wvYcvP7jUvYzhp67yEhTi17ZDBBC1z9pTdGuzjD+EFIqLSYRweZjqfiPzQ06Ebg=="], + "get-tsconfig": ["get-tsconfig@4.13.0", "", { "dependencies": { "resolve-pkg-maps": "^1.0.0" } }, "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ=="], "giget": ["giget@2.0.0", "", { "dependencies": { "citty": "^0.1.6", "consola": "^3.4.0", "defu": "^6.1.4", "node-fetch-native": "^1.6.6", "nypm": "^0.6.0", "pathe": "^2.0.3" }, "bin": { "giget": "dist/cli.mjs" } }, "sha512-L5bGsVkxJbJgdnwyuheIunkGatUF/zssUoxxjACCseZYAVbaqdh9Tsmmlkl8vYan09H7sbvKt4pS8GqKLBrEzA=="], "github-slugger": ["github-slugger@2.0.0", "", {}, "sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw=="], - "glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="], + "glob-parent": ["glob-parent@6.0.2", "", { "dependencies": { "is-glob": "^4.0.3" } }, "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A=="], + + "globals": ["globals@16.4.0", "", {}, "sha512-ob/2LcVVaVGCYN+r14cnwnoDPUufjiYgSqRhiFD0Q1iI4Odora5RE8Iv1D24hAz5oMophRGkGz+yuvQmmUMnMw=="], + + "globalthis": ["globalthis@1.0.4", "", { "dependencies": { "define-properties": "^1.2.1", "gopd": "^1.0.1" } }, "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ=="], "globby": ["globby@11.1.0", "", { "dependencies": { "array-union": "^2.1.0", "dir-glob": "^3.0.1", "fast-glob": "^3.2.9", "ignore": "^5.2.0", "merge2": "^1.4.1", "slash": "^3.0.0" } }, "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g=="], @@ -1253,8 +1502,18 @@ "hachure-fill": ["hachure-fill@0.5.2", "", {}, "sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg=="], + "has-bigints": ["has-bigints@1.1.0", "", {}, "sha512-R3pbpkcIqv2Pm3dUwgjclDRVmWpTJW2DcMzcIhEXEx1oh/CEMObMm3KLmRJOdvhM7o4uQBnwr8pzRK2sJWIqfg=="], + + "has-flag": ["has-flag@4.0.0", "", {}, "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ=="], + + "has-property-descriptors": ["has-property-descriptors@1.0.2", "", { "dependencies": { "es-define-property": "^1.0.0" } }, "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg=="], + + "has-proto": ["has-proto@1.2.0", "", { "dependencies": { "dunder-proto": "^1.0.0" } }, "sha512-KIL7eQPfHQRC8+XluaIw7BHUwwqL19bQn4hzNgdr+1wXoU0KKj6rufu47lhY7KbJR2C6T6+PfyN0Ea7wkSS+qQ=="], + "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="], + "has-tostringtag": ["has-tostringtag@1.0.2", "", { "dependencies": { "has-symbols": "^1.0.3" } }, "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw=="], + "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="], "hast-util-to-estree": ["hast-util-to-estree@3.1.3", "", { "dependencies": { "@types/estree": "^1.0.0", "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "comma-separated-tokens": "^2.0.0", "devlop": "^1.0.0", "estree-util-attach-comments": "^3.0.0", "estree-util-is-identifier-name": "^3.0.0", "hast-util-whitespace": "^3.0.0", "mdast-util-mdx-expression": "^2.0.0", "mdast-util-mdx-jsx": "^3.0.0", "mdast-util-mdxjs-esm": "^2.0.0", "property-information": "^7.0.0", "space-separated-tokens": "^2.0.0", "style-to-js": "^1.0.0", "unist-util-position": "^5.0.0", "zwitch": "^2.0.0" } }, "sha512-48+B/rJWAp0jamNbAAf9M7Uf//UVqAoMmgXhBdxTDJLGKY+LRnZ99qcG+Qjl5HfMpYNzS5v4EAwVEF34LeAj7w=="], @@ -1269,6 +1528,10 @@ "headers-polyfill": ["headers-polyfill@4.0.3", "", {}, "sha512-IScLbePpkvO846sIwOtOTDjutRMWdXdJmXdMvk6gCBHxFO8d+QKOQedyZSxFTTFYRSmlgSTDtXqqq4pcenBXLQ=="], + "hermes-estree": ["hermes-estree@0.25.1", "", {}, "sha512-0wUoCcLp+5Ev5pDW2OriHC2MJCbwLwuRx+gAqMTOkGKJJiBCLjtrvy4PWUGn6MIVefecRpzoOZ/UV6iGdOr+Cw=="], + + "hermes-parser": ["hermes-parser@0.25.1", "", { "dependencies": { "hermes-estree": "0.25.1" } }, "sha512-6pEjquH3rqaI6cYAXYPcz9MS4rY6R4ngRgrgfDshRptUZIc3lw0MCIJIGDj9++mfySOuPTHB4nrSW99BCvOPIA=="], + "hono": ["hono@4.11.4", "", {}, "sha512-U7tt8JsyrxSRKspfhtLET79pU8K+tInj5QZXs1jSugO1Vq5dFj3kmZsRldo29mTBfcjDRVRXrEZ6LS63Cog9ZA=="], "html-void-elements": ["html-void-elements@3.0.0", "", {}, "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg=="], @@ -1289,6 +1552,8 @@ "import-fresh": ["import-fresh@3.3.1", "", { "dependencies": { "parent-module": "^1.0.0", "resolve-from": "^4.0.0" } }, "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ=="], + "imurmurhash": ["imurmurhash@0.1.4", "", {}, "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA=="], + "indent-string": ["indent-string@5.0.0", "", {}, "sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg=="], "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], @@ -1299,6 +1564,8 @@ "input-otp": ["input-otp@1.4.2", "", { "peerDependencies": { "react": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0.0 || ^19.0.0-rc" } }, "sha512-l3jWwYNvrEa6NTCt7BECfCm48GvwuZzkoeG3gBL2w4CHeOXW3eKFmf9UNYkNfYc3mxMrthMnxjIE07MT0zLBQA=="], + "internal-slot": ["internal-slot@1.1.0", "", { "dependencies": { "es-errors": "^1.3.0", "hasown": "^2.0.2", "side-channel": "^1.1.0" } }, "sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw=="], + "internmap": ["internmap@2.0.3", "", {}, "sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg=="], "ipaddr.js": ["ipaddr.js@1.9.1", "", {}, "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="], @@ -1307,16 +1574,38 @@ "is-alphanumerical": ["is-alphanumerical@2.0.1", "", { "dependencies": { "is-alphabetical": "^2.0.0", "is-decimal": "^2.0.0" } }, "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw=="], + "is-array-buffer": ["is-array-buffer@3.0.5", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "get-intrinsic": "^1.2.6" } }, "sha512-DDfANUiiG2wC1qawP66qlTugJeL5HyzMpfr8lLK+jMQirGzNod0B12cFB/9q838Ru27sBwfw78/rdoU7RERz6A=="], + "is-arrayish": ["is-arrayish@0.2.1", "", {}, "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg=="], + "is-async-function": ["is-async-function@2.1.1", "", { "dependencies": { "async-function": "^1.0.0", "call-bound": "^1.0.3", "get-proto": "^1.0.1", "has-tostringtag": "^1.0.2", "safe-regex-test": "^1.1.0" } }, "sha512-9dgM/cZBnNvjzaMYHVoxxfPj2QXt22Ev7SuuPrs+xav0ukGB0S6d4ydZdEiM48kLx5kDV+QBPrpVnFyefL8kkQ=="], + + "is-bigint": ["is-bigint@1.1.0", "", { "dependencies": { "has-bigints": "^1.0.2" } }, "sha512-n4ZT37wG78iz03xPRKJrHTdZbe3IicyucEtdRsV5yglwc3GyUfbAfpSeD0FJ41NbUNSt5wbhqfp1fS+BgnvDFQ=="], + + "is-boolean-object": ["is-boolean-object@1.2.2", "", { "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" } }, "sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A=="], + + "is-bun-module": ["is-bun-module@2.0.0", "", { "dependencies": { "semver": "^7.7.1" } }, "sha512-gNCGbnnnnFAUGKeZ9PdbyeGYJqewpmc2aKHUEMO5nQPWU9lOmv7jcmQIv+qHD8fXW6W7qfuCwX4rY9LNRjXrkQ=="], + + "is-callable": ["is-callable@1.2.7", "", {}, "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA=="], + + "is-core-module": ["is-core-module@2.16.1", "", { "dependencies": { "hasown": "^2.0.2" } }, "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w=="], + + "is-data-view": ["is-data-view@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "get-intrinsic": "^1.2.6", "is-typed-array": "^1.1.13" } }, "sha512-RKtWF8pGmS87i2D6gqQu/l7EYRlVdfzemCJN/P3UOs//x1QE7mfhvzHIApBTRf7axvT6DMGwSwBXYCT0nfB9xw=="], + + "is-date-object": ["is-date-object@1.1.0", "", { "dependencies": { "call-bound": "^1.0.2", "has-tostringtag": "^1.0.2" } }, "sha512-PwwhEakHVKTdRNVOw+/Gyh0+MzlCl4R6qKvkhuvLtPMggI1WAHt9sOwZxQLSGpUaDnrdyDsomoRgNnCfKNSXXg=="], + "is-decimal": ["is-decimal@2.0.1", "", {}, "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A=="], "is-docker": ["is-docker@3.0.0", "", { "bin": { "is-docker": "cli.js" } }, "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ=="], "is-extglob": ["is-extglob@2.1.1", "", {}, "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ=="], + "is-finalizationregistry": ["is-finalizationregistry@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3" } }, "sha512-1pC6N8qWJbWoPtEjgcL2xyhQOP491EQjeUo3qTKcmV8YSDDJrOepfG8pcC7h/QgnQHYSv0mJ3Z/ZWxmatVrysg=="], + "is-fullwidth-code-point": ["is-fullwidth-code-point@5.1.0", "", { "dependencies": { "get-east-asian-width": "^1.3.1" } }, "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ=="], + "is-generator-function": ["is-generator-function@1.1.2", "", { "dependencies": { "call-bound": "^1.0.4", "generator-function": "^2.0.0", "get-proto": "^1.0.1", "has-tostringtag": "^1.0.2", "safe-regex-test": "^1.1.0" } }, "sha512-upqt1SkGkODW9tsGNG5mtXTXtECizwtS2kA161M+gJPc1xdb/Ax629af6YrTwcOeQHbewrPNlE5Dx7kzvXTizA=="], + "is-glob": ["is-glob@4.0.3", "", { "dependencies": { "is-extglob": "^2.1.1" } }, "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg=="], "is-hexadecimal": ["is-hexadecimal@2.0.1", "", {}, "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg=="], @@ -1329,64 +1618,108 @@ "is-interactive": ["is-interactive@2.0.0", "", {}, "sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ=="], + "is-map": ["is-map@2.0.3", "", {}, "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw=="], + + "is-negative-zero": ["is-negative-zero@2.0.3", "", {}, "sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw=="], + "is-node-process": ["is-node-process@1.2.0", "", {}, "sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw=="], "is-number": ["is-number@7.0.0", "", {}, "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng=="], + "is-number-object": ["is-number-object@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" } }, "sha512-lZhclumE1G6VYD8VHe35wFaIif+CTy5SJIi5+3y4psDgWu4wPDoBhF8NxUOinEc7pHgiTsT6MaBb92rKhhD+Xw=="], + "is-obj": ["is-obj@3.0.0", "", {}, "sha512-IlsXEHOjtKhpN8r/tRFj2nDyTmHvcfNeu/nrRIcXE17ROeatXchkojffa1SpdqW4cr/Fj6QkEf/Gn4zf6KKvEQ=="], "is-plain-obj": ["is-plain-obj@4.1.0", "", {}, "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg=="], "is-promise": ["is-promise@4.0.0", "", {}, "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ=="], + "is-regex": ["is-regex@1.2.1", "", { "dependencies": { "call-bound": "^1.0.2", "gopd": "^1.2.0", "has-tostringtag": "^1.0.2", "hasown": "^2.0.2" } }, "sha512-MjYsKHO5O7mCsmRGxWcLWheFqN9DJ/2TmngvjKXihe6efViPqc274+Fx/4fYj/r03+ESvBdTXK0V6tA3rgez1g=="], + "is-regexp": ["is-regexp@3.1.0", "", {}, "sha512-rbku49cWloU5bSMI+zaRaXdQHXnthP6DZ/vLnfdSKyL4zUzuWnomtOEiZZOd+ioQ+avFo/qau3KPTc7Fjy1uPA=="], + "is-set": ["is-set@2.0.3", "", {}, "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg=="], + + "is-shared-array-buffer": ["is-shared-array-buffer@1.0.4", "", { "dependencies": { "call-bound": "^1.0.3" } }, "sha512-ISWac8drv4ZGfwKl5slpHG9OwPNty4jOWPRIhBpxOoD+hqITiwuipOQ2bNthAzwA3B4fIjO4Nln74N0S9byq8A=="], + "is-stream": ["is-stream@4.0.1", "", {}, "sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A=="], + "is-string": ["is-string@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" } }, "sha512-BtEeSsoaQjlSPBemMQIrY1MY0uM6vnS1g5fmufYOtnxLGUZM2178PKbhsk7Ffv58IX+ZtcvoGwccYsh0PglkAA=="], + "is-subdir": ["is-subdir@1.2.0", "", { "dependencies": { "better-path-resolve": "1.0.0" } }, "sha512-2AT6j+gXe/1ueqbW6fLZJiIw3F8iXGJtt0yDrZaBhAZEG1raiTxKWU+IPqMCzQAXOUCKdA4UDMgacKH25XG2Cw=="], + "is-symbol": ["is-symbol@1.1.1", "", { "dependencies": { "call-bound": "^1.0.2", "has-symbols": "^1.1.0", "safe-regex-test": "^1.1.0" } }, "sha512-9gGx6GTtCQM73BgmHQXfDmLtfjjTUDSyoxTCbp5WtoixAhfgsDirWIcVQ/IHpvI5Vgd5i/J5F7B9cN/WlVbC/w=="], + + "is-typed-array": ["is-typed-array@1.1.15", "", { "dependencies": { "which-typed-array": "^1.1.16" } }, "sha512-p3EcsicXjit7SaskXHs1hA91QxgTw46Fv6EFKKGS5DRFLD8yKnohjF3hxoju94b/OcMZoQukzpPpBE9uLVKzgQ=="], + "is-unicode-supported": ["is-unicode-supported@2.1.0", "", {}, "sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ=="], + "is-weakmap": ["is-weakmap@2.0.2", "", {}, "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w=="], + + "is-weakref": ["is-weakref@1.1.1", "", { "dependencies": { "call-bound": "^1.0.3" } }, "sha512-6i9mGWSlqzNMEqpCp93KwRS1uUOodk2OJ6b+sq7ZPDSy2WuI5NFIxp/254TytR8ftefexkWn5xNiHUNpPOfSew=="], + + "is-weakset": ["is-weakset@2.0.4", "", { "dependencies": { "call-bound": "^1.0.3", "get-intrinsic": "^1.2.6" } }, "sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ=="], + "is-windows": ["is-windows@1.0.2", "", {}, "sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA=="], "is-wsl": ["is-wsl@3.1.0", "", { "dependencies": { "is-inside-container": "^1.0.0" } }, "sha512-UcVfVfaK4Sc4m7X3dUSoHoozQGBEFeDC+zVo06t98xe8CzHSZZBekNXH+tu0NalHolcJ/QAGqS46Hef7QXBIMw=="], + "isarray": ["isarray@2.0.5", "", {}, "sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw=="], + "isexe": ["isexe@3.1.1", "", {}, "sha512-LpB/54B+/2J5hqQ7imZHfdU31OlgQqx7ZicVlkm9kzg9/w8GKLEcFfJl/t7DCEDueOyBAD6zCCwTO6Fzs0NoEQ=="], + "iterator.prototype": ["iterator.prototype@1.1.5", "", { "dependencies": { "define-data-property": "^1.1.4", "es-object-atoms": "^1.0.0", "get-intrinsic": "^1.2.6", "get-proto": "^1.0.0", "has-symbols": "^1.1.0", "set-function-name": "^2.0.2" } }, "sha512-H0dkQoCa3b2VEeKQBOxFph+JAbcrQdE7KC0UkqwpLmv2EC4P41QXP+rqo9wYodACiG5/WM5s9oDApTU8utwj9g=="], + "jiti": ["jiti@2.6.1", "", { "bin": { "jiti": "lib/jiti-cli.mjs" } }, "sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ=="], "jose": ["jose@6.1.3", "", {}, "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ=="], + "js-tiktoken": ["js-tiktoken@1.0.21", "", { "dependencies": { "base64-js": "^1.5.1" } }, "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g=="], + "js-tokens": ["js-tokens@4.0.0", "", {}, "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="], "js-yaml": ["js-yaml@4.1.1", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA=="], "jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="], + "json-buffer": ["json-buffer@3.0.1", "", {}, "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ=="], + "json-parse-even-better-errors": ["json-parse-even-better-errors@2.3.1", "", {}, "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w=="], "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="], - "json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + "json-schema-traverse": ["json-schema-traverse@0.4.1", "", {}, "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="], "json-schema-typed": ["json-schema-typed@8.0.2", "", {}, "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA=="], + "json-stable-stringify-without-jsonify": ["json-stable-stringify-without-jsonify@1.0.1", "", {}, "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw=="], + "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], "jsonc-parser": ["jsonc-parser@3.3.1", "", {}, "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ=="], "jsonfile": ["jsonfile@4.0.0", "", { "optionalDependencies": { "graceful-fs": "^4.1.6" } }, "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg=="], + "jsx-ast-utils": ["jsx-ast-utils@3.3.5", "", { "dependencies": { "array-includes": "^3.1.6", "array.prototype.flat": "^1.3.1", "object.assign": "^4.1.4", "object.values": "^1.1.6" } }, "sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ=="], + "katex": ["katex@0.16.27", "", { "dependencies": { "commander": "^8.3.0" }, "bin": { "katex": "cli.js" } }, "sha512-aeQoDkuRWSqQN6nSvVCEFvfXdqo1OQiCmmW1kc9xSdjutPv7BGO7pqY9sQRJpMOGrEdfDgF2TfRXe5eUAD2Waw=="], + "keyv": ["keyv@4.5.4", "", { "dependencies": { "json-buffer": "3.0.1" } }, "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw=="], + "khroma": ["khroma@2.1.0", "", {}, "sha512-Ls993zuzfayK269Svk9hzpeGUKob/sIgZzyHYdjQoAdQetRKpOLj+k/QQQ/6Qi0Yz65mlROrfd+Ev+1+7dz9Kw=="], "kleur": ["kleur@4.1.5", "", {}, "sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ=="], "langium": ["langium@3.3.1", "", { "dependencies": { "chevrotain": "~11.0.3", "chevrotain-allstar": "~0.3.0", "vscode-languageserver": "~9.0.1", "vscode-languageserver-textdocument": "~1.0.11", "vscode-uri": "~3.0.8" } }, "sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w=="], + "language-subtag-registry": ["language-subtag-registry@0.3.23", "", {}, "sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ=="], + + "language-tags": ["language-tags@1.0.9", "", { "dependencies": { "language-subtag-registry": "^0.3.20" } }, "sha512-MbjN408fEndfiQXbFQ1vnd+1NoLDsnQW41410oQBXiyXDMYH5z505juWa4KUE1LqxRC7DgOgZDbKLxHIwm27hA=="], + "layout-base": ["layout-base@1.0.2", "", {}, "sha512-8h2oVEZNktL4BH2JCOI90iD1yXwL6iNW7KcCKT2QZgQJR2vbqDsldCTPRU9NifTCqHZci57XvQQ15YTu+sTYPg=="], + "levn": ["levn@0.4.1", "", { "dependencies": { "prelude-ls": "^1.2.1", "type-check": "~0.4.0" } }, "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ=="], + "lightningcss": ["lightningcss@1.30.2", "", { "dependencies": { "detect-libc": "^2.0.3" }, "optionalDependencies": { "lightningcss-android-arm64": "1.30.2", "lightningcss-darwin-arm64": "1.30.2", "lightningcss-darwin-x64": "1.30.2", "lightningcss-freebsd-x64": "1.30.2", "lightningcss-linux-arm-gnueabihf": "1.30.2", "lightningcss-linux-arm64-gnu": "1.30.2", "lightningcss-linux-arm64-musl": "1.30.2", "lightningcss-linux-x64-gnu": "1.30.2", "lightningcss-linux-x64-musl": "1.30.2", "lightningcss-win32-arm64-msvc": "1.30.2", "lightningcss-win32-x64-msvc": "1.30.2" } }, "sha512-utfs7Pr5uJyyvDETitgsaqSyjCb2qNRAtuqUeWIAKztsOYdcACf2KtARYXg2pSvhkt+9NfoaNY7fxjl6nuMjIQ=="], "lightningcss-android-arm64": ["lightningcss-android-arm64@1.30.2", "", { "os": "android", "cpu": "arm64" }, "sha512-BH9sEdOCahSgmkVhBLeU7Hc9DWeZ1Eb6wNS6Da8igvUwAe0sqROHddIlvU06q3WyXVEOYDZ6ykBZQnjTbmo4+A=="], @@ -1413,7 +1746,7 @@ "lines-and-columns": ["lines-and-columns@1.2.4", "", {}, "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="], - "locate-path": ["locate-path@5.0.0", "", { "dependencies": { "p-locate": "^4.1.0" } }, "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g=="], + "locate-path": ["locate-path@6.0.0", "", { "dependencies": { "p-locate": "^5.0.0" } }, "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw=="], "lodash": ["lodash@4.17.21", "", {}, "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="], @@ -1565,7 +1898,7 @@ "mimic-function": ["mimic-function@5.0.1", "", {}, "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA=="], - "minimatch": ["minimatch@10.1.1", "", { "dependencies": { "@isaacs/brace-expansion": "^5.0.0" } }, "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ=="], + "minimatch": ["minimatch@3.1.2", "", { "dependencies": { "brace-expansion": "^1.1.7" } }, "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw=="], "minimist": ["minimist@1.2.8", "", {}, "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="], @@ -1587,6 +1920,10 @@ "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], + "napi-postinstall": ["napi-postinstall@0.3.4", "", { "bin": { "napi-postinstall": "lib/cli.js" } }, "sha512-PHI5f1O0EP5xJ9gQmFGMS6IZcrVvTjpXjz7Na41gTE7eE2hK11lg04CECCYEEjdc17EV4DO+fkGEtt7TpTaTiQ=="], + + "natural-compare": ["natural-compare@1.4.0", "", {}, "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw=="], + "negotiator": ["negotiator@1.0.0", "", {}, "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg=="], "next": ["next@16.1.1", "", { "dependencies": { "@next/env": "16.1.1", "@swc/helpers": "0.5.15", "baseline-browser-mapping": "^2.8.3", "caniuse-lite": "^1.0.30001579", "postcss": "8.4.31", "styled-jsx": "5.1.6" }, "optionalDependencies": { "@next/swc-darwin-arm64": "16.1.1", "@next/swc-darwin-x64": "16.1.1", "@next/swc-linux-arm64-gnu": "16.1.1", "@next/swc-linux-arm64-musl": "16.1.1", "@next/swc-linux-x64-gnu": "16.1.1", "@next/swc-linux-x64-musl": "16.1.1", "@next/swc-win32-arm64-msvc": "16.1.1", "@next/swc-win32-x64-msvc": "16.1.1", "sharp": "^0.34.4" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", "@playwright/test": "^1.51.1", "babel-plugin-react-compiler": "*", "react": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", "react-dom": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", "sass": "^1.3.0" }, "optionalPeers": ["@opentelemetry/api", "@playwright/test", "babel-plugin-react-compiler", "sass"], "bin": { "next": "dist/bin/next" } }, "sha512-QI+T7xrxt1pF6SQ/JYFz95ro/mg/1Znk5vBebsWwbpejj1T0A23hO7GYEaVac9QUOT2BIMiuzm0L99ooq7k0/w=="], @@ -1613,8 +1950,20 @@ "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="], + "object-keys": ["object-keys@1.1.1", "", {}, "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA=="], + "object-treeify": ["object-treeify@1.1.33", "", {}, "sha512-EFVjAYfzWqWsBMRHPMAXLCDIJnpMhdWAqR7xG6M6a2cs6PMFpl/+Z20w9zDW4vkxOFfddegBKq9Rehd0bxWE7A=="], + "object.assign": ["object.assign@4.1.7", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0", "has-symbols": "^1.1.0", "object-keys": "^1.1.1" } }, "sha512-nK28WOo+QIjBkDduTINE4JkF/UJJKyf2EJxvJKfblDpyg0Q+pkOHNTL0Qwy6NP6FhE/EnzV73BxxqcJaXY9anw=="], + + "object.entries": ["object.entries@1.1.9", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.4", "define-properties": "^1.2.1", "es-object-atoms": "^1.1.1" } }, "sha512-8u/hfXFRBD1O0hPUjioLhoWFHRmt6tKA4/vZPyckBr18l1KE9uHrFaFaUi8MDRTpi4uak2goyPTSNJLXX2k2Hw=="], + + "object.fromentries": ["object.fromentries@2.0.8", "", { "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", "es-abstract": "^1.23.2", "es-object-atoms": "^1.0.0" } }, "sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ=="], + + "object.groupby": ["object.groupby@1.0.3", "", { "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", "es-abstract": "^1.23.2" } }, "sha512-+Lhy3TQTuzXI5hevh8sBGqbmurHbbIjAi0Z4S63nthVLmLxfbj4T54a4CfZrXIrt9iP4mVAPYMo/v99taj3wjQ=="], + + "object.values": ["object.values@1.2.1", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA=="], + "ohash": ["ohash@2.0.11", "", {}, "sha512-RdR9FQrFwNBNXAr4GixM8YaRZRJ5PUWbKYbE5eOsrwAjJW0q2REGcf79oYPsLyskQCZG1PLN+S/K1V00joZAoQ=="], "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="], @@ -1629,17 +1978,21 @@ "open": ["open@11.0.0", "", { "dependencies": { "default-browser": "^5.4.0", "define-lazy-prop": "^3.0.0", "is-in-ssh": "^1.0.0", "is-inside-container": "^1.0.0", "powershell-utils": "^0.1.0", "wsl-utils": "^0.3.0" } }, "sha512-smsWv2LzFjP03xmvFoJ331ss6h+jixfA4UUV/Bsiyuu4YJPfN+FIQGOIiv4w9/+MoHkfkJ22UIaQWRVFRfH6Vw=="], + "optionator": ["optionator@0.9.4", "", { "dependencies": { "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", "type-check": "^0.4.0", "word-wrap": "^1.2.5" } }, "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g=="], + "ora": ["ora@8.2.0", "", { "dependencies": { "chalk": "^5.3.0", "cli-cursor": "^5.0.0", "cli-spinners": "^2.9.2", "is-interactive": "^2.0.0", "is-unicode-supported": "^2.0.0", "log-symbols": "^6.0.0", "stdin-discarder": "^0.2.2", "string-width": "^7.2.0", "strip-ansi": "^7.1.0" } }, "sha512-weP+BZ8MVNnlCm8c0Qdc1WSWq4Qn7I+9CJGm7Qali6g44e/PUzbjNqJX5NJ9ljlNMosfJvg1fKEGILklK9cwnw=="], "outdent": ["outdent@0.5.0", "", {}, "sha512-/jHxFIzoMXdqPzTaCpFzAAWhpkSjZPF4Vsn6jAfNpmbH/ymsmd7Qc6VE9BGn0L6YMj6uwpQLxCECpus4ukKS9Q=="], "outvariant": ["outvariant@1.4.3", "", {}, "sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA=="], + "own-keys": ["own-keys@1.0.1", "", { "dependencies": { "get-intrinsic": "^1.2.6", "object-keys": "^1.1.1", "safe-push-apply": "^1.0.0" } }, "sha512-qFOyK5PjiWZd+QQIh+1jhdb9LpxTF0qs7Pm8o5QHYZ0M3vKqSqzsZaEB6oWlxZ+q2sJBMI/Ktgd2N5ZwQoRHfg=="], + "p-filter": ["p-filter@2.1.0", "", { "dependencies": { "p-map": "^2.0.0" } }, "sha512-ZBxxZ5sL2HghephhpGAQdoskxplTwr7ICaehZwLIlfL6acuVgZPm8yBNuRAFBGEqtD/hmUeq9eqLg2ys9Xr/yw=="], "p-limit": ["p-limit@2.3.0", "", { "dependencies": { "p-try": "^2.0.0" } }, "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w=="], - "p-locate": ["p-locate@4.1.0", "", { "dependencies": { "p-limit": "^2.2.0" } }, "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A=="], + "p-locate": ["p-locate@5.0.0", "", { "dependencies": { "p-limit": "^3.0.2" } }, "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw=="], "p-map": ["p-map@2.1.0", "", {}, "sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw=="], @@ -1667,6 +2020,8 @@ "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], + "path-parse": ["path-parse@1.0.7", "", {}, "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="], + "path-to-regexp": ["path-to-regexp@8.3.0", "", {}, "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA=="], "path-type": ["path-type@4.0.0", "", {}, "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw=="], @@ -1705,6 +2060,8 @@ "points-on-path": ["points-on-path@0.2.1", "", { "dependencies": { "path-data-parser": "0.1.0", "points-on-curve": "0.2.0" } }, "sha512-25ClnWWuw7JbWZcgqY/gJ4FQWadKxGWk+3kR/7kD0tCaDtPPMj7oHu2ToLaVhfpnHrZzYby2w6tUA0eOIuUg8g=="], + "possible-typed-array-names": ["possible-typed-array-names@1.1.0", "", {}, "sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg=="], + "postcss": ["postcss@8.5.6", "", { "dependencies": { "nanoid": "^3.3.11", "picocolors": "^1.1.1", "source-map-js": "^1.2.1" } }, "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg=="], "postcss-selector-parser": ["postcss-selector-parser@7.1.1", "", { "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" } }, "sha512-orRsuYpJVw8LdAwqqLykBj9ecS5/cRHlI5+nvTo8LcCKmzDmqVORXtOIYEEQuL9D4BxtA1lm5isAqzQZCoQ6Eg=="], @@ -1719,6 +2076,8 @@ "powershell-utils": ["powershell-utils@0.1.0", "", {}, "sha512-dM0jVuXJPsDN6DvRpea484tCUaMiXWjuCn++HGTqUWzGDjv5tZkEZldAJ/UMlqRYGFrD/etByo4/xOuC/snX2A=="], + "prelude-ls": ["prelude-ls@1.2.1", "", {}, "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g=="], + "prettier": ["prettier@2.8.8", "", { "bin": { "prettier": "bin-prettier.js" } }, "sha512-tdN8qQGvNjw4CHbY+XXk0JgCXn9QiF21a55rBe5LJAU+kDyC4WQn4+awm2Xfk2lQMk5fKup9XgzTZtGkjBdP9Q=="], "pretty-ms": ["pretty-ms@9.3.0", "", { "dependencies": { "parse-ms": "^4.0.0" } }, "sha512-gjVS5hOP+M3wMm5nmNOucbIrqudzs9v/57bWRHQWLYklXqoXKrVfYW2W9+glfGsqtPgpiz5WwyEEB+ksXIx3gQ=="], @@ -1733,6 +2092,8 @@ "proxy-addr": ["proxy-addr@2.0.7", "", { "dependencies": { "forwarded": "0.2.0", "ipaddr.js": "1.9.1" } }, "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg=="], + "punycode": ["punycode@2.3.1", "", {}, "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg=="], + "pure-rand": ["pure-rand@6.1.0", "", {}, "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA=="], "qs": ["qs@6.14.1", "", { "dependencies": { "side-channel": "^1.1.0" } }, "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ=="], @@ -1791,12 +2152,16 @@ "recma-stringify": ["recma-stringify@1.0.0", "", { "dependencies": { "@types/estree": "^1.0.0", "estree-util-to-js": "^2.0.0", "unified": "^11.0.0", "vfile": "^6.0.0" } }, "sha512-cjwII1MdIIVloKvC9ErQ+OgAtwHBmcZ0Bg4ciz78FtbT8In39aAYbaA7zvxQ61xVMSPE8WxhLwLbhif4Js2C+g=="], + "reflect.getprototypeof": ["reflect.getprototypeof@1.0.10", "", { "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", "es-abstract": "^1.23.9", "es-errors": "^1.3.0", "es-object-atoms": "^1.0.0", "get-intrinsic": "^1.2.7", "get-proto": "^1.0.1", "which-builtin-type": "^1.2.1" } }, "sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw=="], + "regex": ["regex@6.1.0", "", { "dependencies": { "regex-utilities": "^2.3.0" } }, "sha512-6VwtthbV4o/7+OaAF9I5L5V3llLEsoPyq9P1JVXkedTP33c7MfCG0/5NOPcSJn0TzXcG9YUrR0gQSWioew3LDg=="], "regex-recursion": ["regex-recursion@6.0.2", "", { "dependencies": { "regex-utilities": "^2.3.0" } }, "sha512-0YCaSCq2VRIebiaUviZNs0cBz1kg5kVS2UKUfNIx8YVs1cN3AV7NTctO5FOKBA+UT2BPJIWZauYHPqJODG50cg=="], "regex-utilities": ["regex-utilities@2.3.0", "", {}, "sha512-8VhliFJAWRaUiVvREIiW2NXXTmHs4vMNnSzuJVhscgmGav3g9VDxLrQndI3dZZVVdp0ZO/5v0xmX516/7M9cng=="], + "regexp.prototype.flags": ["regexp.prototype.flags@1.5.4", "", { "dependencies": { "call-bind": "^1.0.8", "define-properties": "^1.2.1", "es-errors": "^1.3.0", "get-proto": "^1.0.1", "gopd": "^1.2.0", "set-function-name": "^2.0.2" } }, "sha512-dYqgNSZbDwkaJ2ceRd9ojCGjBq+mOm9LmtXnAnEGyHhN/5R7iDW2TRw3h+o/jCFxus3P2LfWIIiwowAjANm7IA=="], + "rehype-recma": ["rehype-recma@1.0.0", "", { "dependencies": { "@types/estree": "^1.0.0", "@types/hast": "^3.0.0", "hast-util-to-estree": "^3.0.0" } }, "sha512-lqA4rGUf1JmacCNWWZx0Wv1dHqMwxzsDWYMTowuplHF3xH0N/MmrZ/G3BDZnzAkRmxDadujCjaKM2hqYdCBOGw=="], "remark": ["remark@15.0.1", "", { "dependencies": { "@types/mdast": "^4.0.0", "remark-parse": "^11.0.0", "remark-stringify": "^11.0.0", "unified": "^11.0.0" } }, "sha512-Eht5w30ruCXgFmxVUSlNWQ9iiimq07URKeFS3hNc8cUWy1llX4KDWfyEDZRycMc+znsN9Ux5/tJ/BFdgdOwA3A=="], @@ -1817,6 +2182,8 @@ "reselect": ["reselect@5.1.1", "", {}, "sha512-K/BG6eIky/SBpzfHZv/dd+9JBFiS4SWV7FIujVyJRux6e45+73RaUHXLmIR1f7WOMaQ0U1km6qwklRQxpJJY0w=="], + "resolve": ["resolve@1.22.11", "", { "dependencies": { "is-core-module": "^2.16.1", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, "bin": { "resolve": "bin/resolve" } }, "sha512-RfqAvLnMl313r7c9oclB1HhUEAezcpLjz95wFH4LVuhk9JF/r22qmVP9AMmOU4vMX7Q8pN8jwNg/CSpdFnMjTQ=="], + "resolve-from": ["resolve-from@5.0.0", "", {}, "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw=="], "resolve-pkg-maps": ["resolve-pkg-maps@1.0.0", "", {}, "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw=="], @@ -1839,6 +2206,12 @@ "rw": ["rw@1.3.3", "", {}, "sha512-PdhdWy89SiZogBLaw42zdeqtRJ//zFd2PgQavcICDUgJT5oW10QCRKbJ6bg4r0/UY2M6BWd5tkxuGFRvCkgfHQ=="], + "safe-array-concat": ["safe-array-concat@1.1.3", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.2", "get-intrinsic": "^1.2.6", "has-symbols": "^1.1.0", "isarray": "^2.0.5" } }, "sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q=="], + + "safe-push-apply": ["safe-push-apply@1.0.0", "", { "dependencies": { "es-errors": "^1.3.0", "isarray": "^2.0.5" } }, "sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA=="], + + "safe-regex-test": ["safe-regex-test@1.1.0", "", { "dependencies": { "call-bound": "^1.0.2", "es-errors": "^1.3.0", "is-regex": "^1.2.1" } }, "sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw=="], + "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], "scheduler": ["scheduler@0.27.0", "", {}, "sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q=="], @@ -1851,6 +2224,12 @@ "serve-static": ["serve-static@2.2.1", "", { "dependencies": { "encodeurl": "^2.0.0", "escape-html": "^1.0.3", "parseurl": "^1.3.3", "send": "^1.2.0" } }, "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw=="], + "set-function-length": ["set-function-length@1.2.2", "", { "dependencies": { "define-data-property": "^1.1.4", "es-errors": "^1.3.0", "function-bind": "^1.1.2", "get-intrinsic": "^1.2.4", "gopd": "^1.0.1", "has-property-descriptors": "^1.0.2" } }, "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg=="], + + "set-function-name": ["set-function-name@2.0.2", "", { "dependencies": { "define-data-property": "^1.1.4", "es-errors": "^1.3.0", "functions-have-names": "^1.2.3", "has-property-descriptors": "^1.0.2" } }, "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ=="], + + "set-proto": ["set-proto@1.0.0", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.0.0" } }, "sha512-RJRdvCo6IAnPdsvP/7m6bsQqNnn1FCBX5ZNtFL98MmFF/4xAIJTIg1YbHW5DC2W5SKZanrC6i4HsJqlajw/dZw=="], + "setprototypeof": ["setprototypeof@1.2.0", "", {}, "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="], "shadcn": ["shadcn@3.6.3", "", { "dependencies": { "@antfu/ni": "^25.0.0", "@babel/core": "^7.28.0", "@babel/parser": "^7.28.0", "@babel/plugin-transform-typescript": "^7.28.0", "@babel/preset-typescript": "^7.27.1", "@dotenvx/dotenvx": "^1.48.4", "@modelcontextprotocol/sdk": "^1.17.2", "@types/validate-npm-package-name": "^4.0.2", "browserslist": "^4.26.2", "commander": "^14.0.0", "cosmiconfig": "^9.0.0", "dedent": "^1.6.0", "deepmerge": "^4.3.1", "diff": "^8.0.2", "execa": "^9.6.0", "fast-glob": "^3.3.3", "fs-extra": "^11.3.1", "fuzzysort": "^3.1.0", "https-proxy-agent": "^7.0.6", "kleur": "^4.1.5", "msw": "^2.10.4", "node-fetch": "^3.3.2", "open": "^11.0.0", "ora": "^8.2.0", "postcss": "^8.5.6", "postcss-selector-parser": "^7.1.0", "prompts": "^2.4.2", "recast": "^0.23.11", "stringify-object": "^5.0.0", "ts-morph": "^26.0.0", "tsconfig-paths": "^4.2.0", "validate-npm-package-name": "^7.0.1", "zod": "^3.24.1", "zod-to-json-schema": "^3.24.6" }, "bin": { "shadcn": "dist/index.js" } }, "sha512-j2xlma8PtYLbhvA612/MPOrDYsEp0DIiU1gC0BEbSBqWR6mBgwiKpA21Juq9tSswgUeIfxoUzZX8c7YwcL3ncA=="], @@ -1897,16 +2276,32 @@ "sprintf-js": ["sprintf-js@1.0.3", "", {}, "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g=="], + "stable-hash": ["stable-hash@0.0.5", "", {}, "sha512-+L3ccpzibovGXFK+Ap/f8LOS0ahMrHTf3xu7mMLSpEGU0EO9ucaysSylKo9eRDFNhWve/y275iPmIZ4z39a9iA=="], + "stack-utils": ["stack-utils@2.0.6", "", { "dependencies": { "escape-string-regexp": "^2.0.0" } }, "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ=="], "statuses": ["statuses@2.0.2", "", {}, "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw=="], "stdin-discarder": ["stdin-discarder@0.2.2", "", {}, "sha512-UhDfHmA92YAlNnCfhmq0VeNL5bDbiZGg7sZ2IvPsXubGkiNa9EC+tUTsjBRsYUAz87btI6/1wf4XoVvQ3uRnmQ=="], + "stop-iteration-iterator": ["stop-iteration-iterator@1.1.0", "", { "dependencies": { "es-errors": "^1.3.0", "internal-slot": "^1.1.0" } }, "sha512-eLoXW/DHyl62zxY4SCaIgnRhuMr6ri4juEYARS8E6sCEqzKpOiE521Ucofdx+KnDZl5xmvGYaaKCk5FEOxJCoQ=="], + "strict-event-emitter": ["strict-event-emitter@0.5.1", "", {}, "sha512-vMgjE/GGEPEFnhFub6pa4FmJBRBVOLpIII2hvCZ8Kzb7K0hlHo7mQv6xYrBvCL2LtAIBwFUK8wvuJgTVSQ5MFQ=="], "string-width": ["string-width@8.1.0", "", { "dependencies": { "get-east-asian-width": "^1.3.0", "strip-ansi": "^7.1.0" } }, "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg=="], + "string.prototype.includes": ["string.prototype.includes@2.0.1", "", { "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", "es-abstract": "^1.23.3" } }, "sha512-o7+c9bW6zpAdJHTtujeePODAhkuicdAryFsfVKwA+wGw89wJ4GTY484WTucM9hLtDEOpOvI+aHnzqnC5lHp4Rg=="], + + "string.prototype.matchall": ["string.prototype.matchall@4.0.12", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.3", "define-properties": "^1.2.1", "es-abstract": "^1.23.6", "es-errors": "^1.3.0", "es-object-atoms": "^1.0.0", "get-intrinsic": "^1.2.6", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "internal-slot": "^1.1.0", "regexp.prototype.flags": "^1.5.3", "set-function-name": "^2.0.2", "side-channel": "^1.1.0" } }, "sha512-6CC9uyBL+/48dYizRf7H7VAYCMCNTBeM78x/VTUe9bFEaxBepPJDa1Ow99LqI/1yF7kuy7Q3cQsYMrcjGUcskA=="], + + "string.prototype.repeat": ["string.prototype.repeat@1.0.0", "", { "dependencies": { "define-properties": "^1.1.3", "es-abstract": "^1.17.5" } }, "sha512-0u/TldDbKD8bFCQ/4f5+mNRrXwZ8hg2w7ZR8wa16e8z9XpePWl3eGEcUD0OXpEH/VJH/2G3gjUtR3ZOiBe2S/w=="], + + "string.prototype.trim": ["string.prototype.trim@1.2.10", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.2", "define-data-property": "^1.1.4", "define-properties": "^1.2.1", "es-abstract": "^1.23.5", "es-object-atoms": "^1.0.0", "has-property-descriptors": "^1.0.2" } }, "sha512-Rs66F0P/1kedk5lyYyH9uBzuiI/kNRmwJAR9quK6VOtIpZ2G+hMZd+HQbbv25MgCA6gEffoMZYxlTod4WcdrKA=="], + + "string.prototype.trimend": ["string.prototype.trimend@1.0.9", "", { "dependencies": { "call-bind": "^1.0.8", "call-bound": "^1.0.2", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-G7Ok5C6E/j4SGfyLCloXTrngQIQU3PWtXGst3yM7Bea9FRURf1S42ZHlZZtsNque2FN2PoUhfZXYLNWwEr4dLQ=="], + + "string.prototype.trimstart": ["string.prototype.trimstart@1.0.8", "", { "dependencies": { "call-bind": "^1.0.7", "define-properties": "^1.2.1", "es-object-atoms": "^1.0.0" } }, "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg=="], + "stringify-entities": ["stringify-entities@4.0.4", "", { "dependencies": { "character-entities-html4": "^2.0.0", "character-entities-legacy": "^3.0.0" } }, "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg=="], "stringify-object": ["stringify-object@5.0.0", "", { "dependencies": { "get-own-enumerable-keys": "^1.0.0", "is-obj": "^3.0.0", "is-regexp": "^3.1.0" } }, "sha512-zaJYxz2FtcMb4f+g60KsRNFOpVMUyuJgA51Zi5Z1DOTC3S59+OQiVOzE9GZt0x72uBGWKsQIuBKeF9iusmKFsg=="], @@ -1917,6 +2312,8 @@ "strip-final-newline": ["strip-final-newline@4.0.0", "", {}, "sha512-aulFJcD6YK8V1G7iRB5tigAP4TsHBZZrOV8pjV++zdUwmeV8uzbY7yn6h9MswN62adStNZFuCIx4haBnRuMDaw=="], + "strip-json-comments": ["strip-json-comments@3.1.1", "", {}, "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig=="], + "style-to-js": ["style-to-js@1.1.21", "", { "dependencies": { "style-to-object": "1.0.14" } }, "sha512-RjQetxJrrUJLQPHbLku6U/ocGtzyjbJMP9lCNK7Ag0CNh690nSH8woqWH9u16nMjYBAok+i7JO1NP2pOy8IsPQ=="], "style-to-object": ["style-to-object@1.0.14", "", { "dependencies": { "inline-style-parser": "0.2.7" } }, "sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw=="], @@ -1927,6 +2324,10 @@ "support-ticket-search": ["support-ticket-search@workspace:apps/examples/support-ticket-search"], + "supports-color": ["supports-color@7.2.0", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="], + + "supports-preserve-symlinks-flag": ["supports-preserve-symlinks-flag@1.0.0", "", {}, "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w=="], + "tabbable": ["tabbable@6.4.0", "", {}, "sha512-05PUHKSNE8ou2dwIxTngl4EzcnsCDZGJ/iCLtDflR/SHB/ny14rXc+qU5P4mG9JkusiV7EivzY9Mhm55AzAvCg=="], "tagged-tag": ["tagged-tag@1.0.0", "", {}, "sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng=="], @@ -1939,6 +2340,8 @@ "term-size": ["term-size@2.2.1", "", {}, "sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg=="], + "test": ["test@workspace:apps/examples/test"], + "tiny-invariant": ["tiny-invariant@1.3.3", "", {}, "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg=="], "tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], @@ -1959,6 +2362,8 @@ "trough": ["trough@2.2.0", "", {}, "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw=="], + "ts-api-utils": ["ts-api-utils@2.4.0", "", { "peerDependencies": { "typescript": ">=4.8.4" } }, "sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA=="], + "ts-dedent": ["ts-dedent@2.2.0", "", {}, "sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ=="], "ts-morph": ["ts-morph@26.0.0", "", { "dependencies": { "@ts-morph/common": "~0.27.0", "code-block-writer": "^13.0.3" } }, "sha512-ztMO++owQnz8c/gIENcM9XfCEzgoGphTv+nKpYNM1bgsdOVC/jRZuEBf6N+mLLDNg68Kl+GgUZfOySaRiG1/Ug=="], @@ -1985,14 +2390,28 @@ "tw-animate-css": ["tw-animate-css@1.4.0", "", {}, "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ=="], + "type-check": ["type-check@0.4.0", "", { "dependencies": { "prelude-ls": "^1.2.1" } }, "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew=="], + "type-fest": ["type-fest@4.41.0", "", {}, "sha512-TeTSQ6H5YHvpqVwBRcnLDCBnDOHWYu7IvGbHT6N8AOymcr9PJGjc1GTtiWZTYg0NCgYwvnYWEkVChQAr9bjfwA=="], "type-is": ["type-is@2.0.1", "", { "dependencies": { "content-type": "^1.0.5", "media-typer": "^1.1.0", "mime-types": "^3.0.0" } }, "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw=="], + "typed-array-buffer": ["typed-array-buffer@1.0.3", "", { "dependencies": { "call-bound": "^1.0.3", "es-errors": "^1.3.0", "is-typed-array": "^1.1.14" } }, "sha512-nAYYwfY3qnzX30IkA6AQZjVbtK6duGontcQm1WSG1MD94YLqK0515GNApXkoxKOWMusVssAHWLh9SeaoefYFGw=="], + + "typed-array-byte-length": ["typed-array-byte-length@1.0.3", "", { "dependencies": { "call-bind": "^1.0.8", "for-each": "^0.3.3", "gopd": "^1.2.0", "has-proto": "^1.2.0", "is-typed-array": "^1.1.14" } }, "sha512-BaXgOuIxz8n8pIq3e7Atg/7s+DpiYrxn4vdot3w9KbnBhcRQq6o3xemQdIfynqSeXeDrF32x+WvfzmOjPiY9lg=="], + + "typed-array-byte-offset": ["typed-array-byte-offset@1.0.4", "", { "dependencies": { "available-typed-arrays": "^1.0.7", "call-bind": "^1.0.8", "for-each": "^0.3.3", "gopd": "^1.2.0", "has-proto": "^1.2.0", "is-typed-array": "^1.1.15", "reflect.getprototypeof": "^1.0.9" } }, "sha512-bTlAFB/FBYMcuX81gbL4OcpH5PmlFHqlCCpAl8AlEzMz5k53oNDvN8p1PNOWLEmI2x4orp3raOFB51tv9X+MFQ=="], + + "typed-array-length": ["typed-array-length@1.0.7", "", { "dependencies": { "call-bind": "^1.0.7", "for-each": "^0.3.3", "gopd": "^1.0.1", "is-typed-array": "^1.1.13", "possible-typed-array-names": "^1.0.0", "reflect.getprototypeof": "^1.0.6" } }, "sha512-3KS2b+kL7fsuk/eJZ7EQdnEmQoaho/r6KUef7hxvltNA5DR8NAUM+8wJMbJyZ4G9/7i3v5zPBIMN5aybAh2/Jg=="], + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + "typescript-eslint": ["typescript-eslint@8.53.1", "", { "dependencies": { "@typescript-eslint/eslint-plugin": "8.53.1", "@typescript-eslint/parser": "8.53.1", "@typescript-eslint/typescript-estree": "8.53.1", "@typescript-eslint/utils": "8.53.1" }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0", "typescript": ">=4.8.4 <6.0.0" } }, "sha512-gB+EVQfP5RDElh9ittfXlhZJdjSU4jUSTyE2+ia8CYyNvet4ElfaLlAIqDvQV9JPknKx0jQH1racTYe/4LaLSg=="], + "ufo": ["ufo@1.6.1", "", {}, "sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA=="], + "unbox-primitive": ["unbox-primitive@1.1.0", "", { "dependencies": { "call-bound": "^1.0.3", "has-bigints": "^1.0.2", "has-symbols": "^1.1.0", "which-boxed-primitive": "^1.1.1" } }, "sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw=="], + "uncrypto": ["uncrypto@0.1.3", "", {}, "sha512-Ql87qFHB3s/De2ClA9e0gsnS6zXG27SkTiSJwjCc9MebbfapQfuPzumMIUMi38ezPZVNFcHI9sUIepeQfw8J8Q=="], "undici-types": ["undici-types@6.21.0", "", {}, "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ=="], @@ -2021,10 +2440,14 @@ "unrag": ["unrag@workspace:packages/unrag"], + "unrs-resolver": ["unrs-resolver@1.11.1", "", { "dependencies": { "napi-postinstall": "^0.3.0" }, "optionalDependencies": { "@unrs/resolver-binding-android-arm-eabi": "1.11.1", "@unrs/resolver-binding-android-arm64": "1.11.1", "@unrs/resolver-binding-darwin-arm64": "1.11.1", "@unrs/resolver-binding-darwin-x64": "1.11.1", "@unrs/resolver-binding-freebsd-x64": "1.11.1", "@unrs/resolver-binding-linux-arm-gnueabihf": "1.11.1", "@unrs/resolver-binding-linux-arm-musleabihf": "1.11.1", "@unrs/resolver-binding-linux-arm64-gnu": "1.11.1", "@unrs/resolver-binding-linux-arm64-musl": "1.11.1", "@unrs/resolver-binding-linux-ppc64-gnu": "1.11.1", "@unrs/resolver-binding-linux-riscv64-gnu": "1.11.1", "@unrs/resolver-binding-linux-riscv64-musl": "1.11.1", "@unrs/resolver-binding-linux-s390x-gnu": "1.11.1", "@unrs/resolver-binding-linux-x64-gnu": "1.11.1", "@unrs/resolver-binding-linux-x64-musl": "1.11.1", "@unrs/resolver-binding-wasm32-wasi": "1.11.1", "@unrs/resolver-binding-win32-arm64-msvc": "1.11.1", "@unrs/resolver-binding-win32-ia32-msvc": "1.11.1", "@unrs/resolver-binding-win32-x64-msvc": "1.11.1" } }, "sha512-bSjt9pjaEBnNiGgc9rUiHGKv5l4/TGzDmYw3RhnkJGtLhbnnA/5qJj7x3dNDCRx/PJxu774LlH8lCOlB4hEfKg=="], + "until-async": ["until-async@3.0.2", "", {}, "sha512-IiSk4HlzAMqTUseHHe3VhIGyuFmN90zMTpD3Z3y8jeQbzLIq500MVM7Jq2vUAnTKAFPJrqwkzr6PoTcPhGcOiw=="], "update-browserslist-db": ["update-browserslist-db@1.2.3", "", { "dependencies": { "escalade": "^3.2.0", "picocolors": "^1.1.1" }, "peerDependencies": { "browserslist": ">= 4.21.0" }, "bin": { "update-browserslist-db": "cli.js" } }, "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w=="], + "uri-js": ["uri-js@4.4.1", "", { "dependencies": { "punycode": "^2.1.0" } }, "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg=="], + "use-callback-ref": ["use-callback-ref@1.3.3", "", { "dependencies": { "tslib": "^2.0.0" }, "peerDependencies": { "@types/react": "*", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg=="], "use-sidecar": ["use-sidecar@1.1.3", "", { "dependencies": { "detect-node-es": "^1.1.0", "tslib": "^2.0.0" }, "peerDependencies": { "@types/react": "*", "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ=="], @@ -2067,8 +2490,18 @@ "which": ["which@4.0.0", "", { "dependencies": { "isexe": "^3.1.1" }, "bin": { "node-which": "bin/which.js" } }, "sha512-GlaYyEb07DPxYCKhKzplCWBJtvxZcZMrL+4UkrTSJHHPyZU4mYYTv3qaOe77H7EODLSSopAUFAc6W8U4yqvscg=="], + "which-boxed-primitive": ["which-boxed-primitive@1.1.1", "", { "dependencies": { "is-bigint": "^1.1.0", "is-boolean-object": "^1.2.1", "is-number-object": "^1.1.1", "is-string": "^1.1.1", "is-symbol": "^1.1.1" } }, "sha512-TbX3mj8n0odCBFVlY8AxkqcHASw3L60jIuF8jFP78az3C2YhmGvqbHBpAjTRH2/xqYunrJ9g1jSyjCjpoWzIAA=="], + + "which-builtin-type": ["which-builtin-type@1.2.1", "", { "dependencies": { "call-bound": "^1.0.2", "function.prototype.name": "^1.1.6", "has-tostringtag": "^1.0.2", "is-async-function": "^2.0.0", "is-date-object": "^1.1.0", "is-finalizationregistry": "^1.1.0", "is-generator-function": "^1.0.10", "is-regex": "^1.2.1", "is-weakref": "^1.0.2", "isarray": "^2.0.5", "which-boxed-primitive": "^1.1.0", "which-collection": "^1.0.2", "which-typed-array": "^1.1.16" } }, "sha512-6iBczoX+kDQ7a3+YJBnh3T+KZRxM/iYNPXicqk66/Qfm1b93iu+yOImkg0zHbj5LNOcNv1TEADiZ0xa34B4q6Q=="], + + "which-collection": ["which-collection@1.0.2", "", { "dependencies": { "is-map": "^2.0.3", "is-set": "^2.0.3", "is-weakmap": "^2.0.2", "is-weakset": "^2.0.3" } }, "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw=="], + + "which-typed-array": ["which-typed-array@1.1.20", "", { "dependencies": { "available-typed-arrays": "^1.0.7", "call-bind": "^1.0.8", "call-bound": "^1.0.4", "for-each": "^0.3.5", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-tostringtag": "^1.0.2" } }, "sha512-LYfpUkmqwl0h9A2HL09Mms427Q1RZWuOHsukfVcKRq9q95iQxdw0ix1JQrqbcDR9PH1QDwf5Qo8OZb5lksZ8Xg=="], + "widest-line": ["widest-line@5.0.0", "", { "dependencies": { "string-width": "^7.0.0" } }, "sha512-c9bZp7b5YtRj2wOe6dlj32MK+Bx/M/d+9VB2SHM1OtsUHR0aV0tdP6DWh/iMt0kWi1t5g1Iudu6hQRNd1A4PVA=="], + "word-wrap": ["word-wrap@1.2.5", "", {}, "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA=="], + "wrap-ansi": ["wrap-ansi@9.0.2", "", { "dependencies": { "ansi-styles": "^6.2.1", "string-width": "^7.0.0", "strip-ansi": "^7.1.0" } }, "sha512-42AtmgqjV+X1VpdOfyTGOYRi0/zsoLqtXQckTmqTeybT+BDIbM/Guxo7x3pE2vtpr1ok6xRqM9OpBe+Jyoqyww=="], "wrappy": ["wrappy@1.0.2", "", {}, "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="], @@ -2087,6 +2520,8 @@ "yargs-parser": ["yargs-parser@21.1.1", "", {}, "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw=="], + "yocto-queue": ["yocto-queue@0.1.0", "", {}, "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q=="], + "yoctocolors": ["yoctocolors@2.1.2", "", {}, "sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug=="], "yoctocolors-cjs": ["yoctocolors-cjs@2.1.3", "", {}, "sha512-U/PBtDf35ff0D8X8D0jfdzHYEPFxAI7jJlxZXwCSez5M3190m+QobIfh+sWDWSHMCWWJN2AWamkegn6vr6YBTw=="], @@ -2097,6 +2532,8 @@ "zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="], + "zod-validation-error": ["zod-validation-error@4.0.2", "", { "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" } }, "sha512-Q6/nZLe6jxuU80qb/4uJ4t5v2VEZ44lzQjPDhYJNztRQ4wyWc6VF3D3Kb/fAuPetZQnhS3hnajCf9CsWesghLQ=="], + "zwitch": ["zwitch@2.0.4", "", {}, "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A=="], "@ai-sdk/amazon-bedrock/@ai-sdk/provider": ["@ai-sdk/provider@2.0.0", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA=="], @@ -2141,12 +2578,18 @@ "@esbuild-kit/core-utils/esbuild": ["esbuild@0.18.20", "", { "optionalDependencies": { "@esbuild/android-arm": "0.18.20", "@esbuild/android-arm64": "0.18.20", "@esbuild/android-x64": "0.18.20", "@esbuild/darwin-arm64": "0.18.20", "@esbuild/darwin-x64": "0.18.20", "@esbuild/freebsd-arm64": "0.18.20", "@esbuild/freebsd-x64": "0.18.20", "@esbuild/linux-arm": "0.18.20", "@esbuild/linux-arm64": "0.18.20", "@esbuild/linux-ia32": "0.18.20", "@esbuild/linux-loong64": "0.18.20", "@esbuild/linux-mips64el": "0.18.20", "@esbuild/linux-ppc64": "0.18.20", "@esbuild/linux-riscv64": "0.18.20", "@esbuild/linux-s390x": "0.18.20", "@esbuild/linux-x64": "0.18.20", "@esbuild/netbsd-x64": "0.18.20", "@esbuild/openbsd-x64": "0.18.20", "@esbuild/sunos-x64": "0.18.20", "@esbuild/win32-arm64": "0.18.20", "@esbuild/win32-ia32": "0.18.20", "@esbuild/win32-x64": "0.18.20" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-ceqxoedUrcayh7Y7ZX6NdbbDzGROiyVBgC4PriJThBKSVPWnnFHZAkfI1lJT8QFkOwH4qOS2SJkS4wvpGl8BpA=="], + "@eslint-community/eslint-utils/eslint-visitor-keys": ["eslint-visitor-keys@3.4.3", "", {}, "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag=="], + + "@eslint/eslintrc/globals": ["globals@14.0.0", "", {}, "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ=="], + "@inquirer/core/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], "@inquirer/core/wrap-ansi": ["wrap-ansi@6.2.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA=="], "@manypkg/find-root/@types/node": ["@types/node@12.20.55", "", {}, "sha512-J8xLz7q2OFulZ2cyGTLE1TbbZcjpno7FaN6zdJNrgAdrJ+DZzh/uFR6YrTb4C+nXakvud8Q4+rbhoIWlYQbUFQ=="], + "@manypkg/find-root/find-up": ["find-up@4.1.0", "", { "dependencies": { "locate-path": "^5.0.0", "path-exists": "^4.0.0" } }, "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw=="], + "@manypkg/find-root/fs-extra": ["fs-extra@8.1.0", "", { "dependencies": { "graceful-fs": "^4.2.0", "jsonfile": "^4.0.0", "universalify": "^0.1.0" } }, "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g=="], "@manypkg/get-packages/@changesets/types": ["@changesets/types@4.1.0", "", {}, "sha512-LDQvVDv5Kb50ny2s25Fhm3d9QSZimsoUGBsUioj6MC3qbMUCuC8GPIvk/M6IvXx3lYhAs0lwWUQLb+VIEUCECw=="], @@ -2155,6 +2598,10 @@ "@mdx-js/mdx/source-map": ["source-map@0.7.6", "", {}, "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ=="], + "@modelcontextprotocol/sdk/ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], + + "@next/eslint-plugin-next/fast-glob": ["fast-glob@3.3.1", "", { "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", "glob-parent": "^5.1.2", "merge2": "^1.3.0", "micromatch": "^4.0.4" } }, "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg=="], + "@radix-ui/react-accordion/@radix-ui/react-primitive": ["@radix-ui/react-primitive@2.1.3", "", { "dependencies": { "@radix-ui/react-slot": "1.2.3" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ=="], "@radix-ui/react-arrow/@radix-ui/react-primitive": ["@radix-ui/react-primitive@2.1.3", "", { "dependencies": { "@radix-ui/react-slot": "1.2.3" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ=="], @@ -2211,16 +2658,26 @@ "@tailwindcss/oxide-wasm32-wasi/tslib": ["tslib@2.8.1", "", { "bundled": true }, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], + "@ts-morph/common/minimatch": ["minimatch@10.1.1", "", { "dependencies": { "@isaacs/brace-expansion": "^5.0.0" } }, "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ=="], + "@types/bun/bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="], "@types/pg/@types/node": ["@types/node@24.10.4", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg=="], + "@typescript-eslint/eslint-plugin/ignore": ["ignore@7.0.5", "", {}, "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg=="], + + "@typescript-eslint/typescript-estree/minimatch": ["minimatch@9.0.5", "", { "dependencies": { "brace-expansion": "^2.0.1" } }, "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow=="], + + "ajv-formats/ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="], + "bun-types/@types/node": ["@types/node@24.10.4", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg=="], "c12/chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="], "c12/dotenv": ["dotenv@16.6.1", "", {}, "sha512-uBq4egWHTcTt33a72vpSG0z3HnPuIl6NqYcTrKEg2azoEyl2hpW0zqlxysq2pK9HlDIHyHyakeYaYnSAwd8bow=="], + "chalk/ansi-styles": ["ansi-styles@4.3.0", "", { "dependencies": { "color-convert": "^2.0.1" } }, "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg=="], + "chevrotain/lodash-es": ["lodash-es@4.17.21", "", {}, "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw=="], "cliui/string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], @@ -2239,20 +2696,42 @@ "d3-sankey/d3-shape": ["d3-shape@1.3.7", "", { "dependencies": { "d3-path": "1" } }, "sha512-EUkvKjqPFUAZyOlhY5gzCxCeI0Aep04LwIRpsZ/mLFelJiUfnK56jo5JMDSE7yyP2kLSb6LtF+S5chMk7uqPqw=="], + "eslint-import-resolver-node/debug": ["debug@3.2.7", "", { "dependencies": { "ms": "^2.1.1" } }, "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ=="], + + "eslint-module-utils/debug": ["debug@3.2.7", "", { "dependencies": { "ms": "^2.1.1" } }, "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ=="], + + "eslint-plugin-import/debug": ["debug@3.2.7", "", { "dependencies": { "ms": "^2.1.1" } }, "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ=="], + + "eslint-plugin-import/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], + + "eslint-plugin-import/tsconfig-paths": ["tsconfig-paths@3.15.0", "", { "dependencies": { "@types/json5": "^0.0.29", "json5": "^1.0.2", "minimist": "^1.2.6", "strip-bom": "^3.0.0" } }, "sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg=="], + + "eslint-plugin-react/resolve": ["resolve@2.0.0-next.5", "", { "dependencies": { "is-core-module": "^2.13.0", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, "bin": { "resolve": "bin/resolve" } }, "sha512-U7WjGVG9sH8tvjW5SmGbQuui75FiyjAX72HX15DwBBwF9dNiQZRQAg9nnPhYy+TUnE0+VcrttuvNI8oSxZcocA=="], + + "eslint-plugin-react/semver": ["semver@6.3.1", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="], + + "eslint-plugin-react-hooks/@babel/parser": ["@babel/parser@7.28.6", "", { "dependencies": { "@babel/types": "^7.28.6" }, "bin": "./bin/babel-parser.js" }, "sha512-TeR9zWR18BvbfPmGbLampPMW+uW1NZnJlRuuHso8i87QZNq2JRF9i6RgxRqtEq+wQGsS19NNTWr2duhnE49mfQ=="], + "estree-util-to-js/source-map": ["source-map@0.7.6", "", {}, "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ=="], "execa/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], "express/cookie": ["cookie@0.7.2", "", {}, "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w=="], + "fast-glob/glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="], + "fumadocs-mdx/esbuild": ["esbuild@0.27.2", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.27.2", "@esbuild/android-arm": "0.27.2", "@esbuild/android-arm64": "0.27.2", "@esbuild/android-x64": "0.27.2", "@esbuild/darwin-arm64": "0.27.2", "@esbuild/darwin-x64": "0.27.2", "@esbuild/freebsd-arm64": "0.27.2", "@esbuild/freebsd-x64": "0.27.2", "@esbuild/linux-arm": "0.27.2", "@esbuild/linux-arm64": "0.27.2", "@esbuild/linux-ia32": "0.27.2", "@esbuild/linux-loong64": "0.27.2", "@esbuild/linux-mips64el": "0.27.2", "@esbuild/linux-ppc64": "0.27.2", "@esbuild/linux-riscv64": "0.27.2", "@esbuild/linux-s390x": "0.27.2", "@esbuild/linux-x64": "0.27.2", "@esbuild/netbsd-arm64": "0.27.2", "@esbuild/netbsd-x64": "0.27.2", "@esbuild/openbsd-arm64": "0.27.2", "@esbuild/openbsd-x64": "0.27.2", "@esbuild/openharmony-arm64": "0.27.2", "@esbuild/sunos-x64": "0.27.2", "@esbuild/win32-arm64": "0.27.2", "@esbuild/win32-ia32": "0.27.2", "@esbuild/win32-x64": "0.27.2" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw=="], "globby/fast-glob": ["fast-glob@3.3.1", "", { "dependencies": { "@nodelib/fs.stat": "^2.0.2", "@nodelib/fs.walk": "^1.2.3", "glob-parent": "^5.1.2", "merge2": "^1.3.0", "micromatch": "^4.0.4" } }, "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg=="], "import-fresh/resolve-from": ["resolve-from@4.0.0", "", {}, "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g=="], + "ink/chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="], + "katex/commander": ["commander@8.3.0", "", {}, "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww=="], + "log-symbols/chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="], + "log-symbols/is-unicode-supported": ["is-unicode-supported@1.3.0", "", {}, "sha512-43r2mRvz+8JRIKnWJ+3j8JtjRKZ6GmjzfaE/qiBJnikNnYv/6bagRJ1kUhNk8R5EX/GkobD+r+sfxCPJsiKBLQ=="], "mdast-util-find-and-replace/escape-string-regexp": ["escape-string-regexp@5.0.0", "", {}, "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw=="], @@ -2271,12 +2750,16 @@ "nuqs/@standard-schema/spec": ["@standard-schema/spec@1.0.0", "", {}, "sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA=="], + "ora/chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="], + "ora/cli-cursor": ["cli-cursor@5.0.0", "", { "dependencies": { "restore-cursor": "^5.0.0" } }, "sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw=="], "ora/string-width": ["string-width@7.2.0", "", { "dependencies": { "emoji-regex": "^10.3.0", "get-east-asian-width": "^1.0.0", "strip-ansi": "^7.1.0" } }, "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ=="], "ora/strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="], + "p-locate/p-limit": ["p-limit@3.1.0", "", { "dependencies": { "yocto-queue": "^0.1.0" } }, "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ=="], + "parse-entities/@types/unist": ["@types/unist@2.0.11", "", {}, "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA=="], "prompts/kleur": ["kleur@3.0.3", "", {}, "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w=="], @@ -2293,8 +2776,12 @@ "spawndamnit/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], + "stack-utils/escape-string-regexp": ["escape-string-regexp@2.0.0", "", {}, "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w=="], + "string-width/strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="], + "test/next": ["next@16.1.4", "", { "dependencies": { "@next/env": "16.1.4", "@swc/helpers": "0.5.15", "baseline-browser-mapping": "^2.8.3", "caniuse-lite": "^1.0.30001579", "postcss": "8.4.31", "styled-jsx": "5.1.6" }, "optionalDependencies": { "@next/swc-darwin-arm64": "16.1.4", "@next/swc-darwin-x64": "16.1.4", "@next/swc-linux-arm64-gnu": "16.1.4", "@next/swc-linux-arm64-musl": "16.1.4", "@next/swc-linux-x64-gnu": "16.1.4", "@next/swc-linux-x64-musl": "16.1.4", "@next/swc-win32-arm64-msvc": "16.1.4", "@next/swc-win32-x64-msvc": "16.1.4", "sharp": "^0.34.4" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", "@playwright/test": "^1.51.1", "babel-plugin-react-compiler": "*", "react": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", "react-dom": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0", "sass": "^1.3.0" }, "optionalPeers": ["@opentelemetry/api", "@playwright/test", "babel-plugin-react-compiler", "sass"], "bin": { "next": "dist/bin/next" } }, "sha512-gKSecROqisnV7Buen5BfjmXAm7Xlpx9o2ueVQRo5DxQcjC8d330dOM1xiGWc2k3Dcnz0In3VybyRPOsudwgiqQ=="], + "tsx/esbuild": ["esbuild@0.27.2", "", { "optionalDependencies": { "@esbuild/aix-ppc64": "0.27.2", "@esbuild/android-arm": "0.27.2", "@esbuild/android-arm64": "0.27.2", "@esbuild/android-x64": "0.27.2", "@esbuild/darwin-arm64": "0.27.2", "@esbuild/darwin-x64": "0.27.2", "@esbuild/freebsd-arm64": "0.27.2", "@esbuild/freebsd-x64": "0.27.2", "@esbuild/linux-arm": "0.27.2", "@esbuild/linux-arm64": "0.27.2", "@esbuild/linux-ia32": "0.27.2", "@esbuild/linux-loong64": "0.27.2", "@esbuild/linux-mips64el": "0.27.2", "@esbuild/linux-ppc64": "0.27.2", "@esbuild/linux-riscv64": "0.27.2", "@esbuild/linux-s390x": "0.27.2", "@esbuild/linux-x64": "0.27.2", "@esbuild/netbsd-arm64": "0.27.2", "@esbuild/netbsd-x64": "0.27.2", "@esbuild/openbsd-arm64": "0.27.2", "@esbuild/openbsd-x64": "0.27.2", "@esbuild/openharmony-arm64": "0.27.2", "@esbuild/sunos-x64": "0.27.2", "@esbuild/win32-arm64": "0.27.2", "@esbuild/win32-ia32": "0.27.2", "@esbuild/win32-x64": "0.27.2" }, "bin": { "esbuild": "bin/esbuild" } }, "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw=="], "web/@types/node": ["@types/node@24.10.4", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-vnDVpYPMzs4wunl27jHrfmwojOGKya0xyM3sH+UE5iv5uPS6vX7UIoh6m+vQc5LGBq52HBKPIn/zcSZVzeDEZg=="], @@ -2403,6 +2890,12 @@ "@inquirer/core/wrap-ansi/string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], + "@manypkg/find-root/find-up/locate-path": ["locate-path@5.0.0", "", { "dependencies": { "p-locate": "^4.1.0" } }, "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g=="], + + "@modelcontextprotocol/sdk/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + + "@next/eslint-plugin-next/fast-glob/glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="], + "@radix-ui/react-accordion/@radix-ui/react-primitive/@radix-ui/react-slot": ["@radix-ui/react-slot@1.2.3", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2" }, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A=="], "@radix-ui/react-arrow/@radix-ui/react-primitive/@radix-ui/react-slot": ["@radix-ui/react-slot@1.2.3", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2" }, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A=="], @@ -2435,6 +2928,10 @@ "@types/pg/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + "@typescript-eslint/typescript-estree/minimatch/brace-expansion": ["brace-expansion@2.0.2", "", { "dependencies": { "balanced-match": "^1.0.0" } }, "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ=="], + + "ajv-formats/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + "bun-types/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], "c12/chokidar/readdirp": ["readdirp@4.1.2", "", {}, "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg=="], @@ -2453,6 +2950,10 @@ "d3-sankey/d3-shape/d3-path": ["d3-path@1.0.9", "", {}, "sha512-VLaYcn81dtHVTjEHd8B+pbe9yHWpXKZUC87PzoFmsFrJqgFwDe/qxfp5MlfsfM1V5E/iVt0MmEbWQ7FVIXh/bg=="], + "eslint-plugin-import/tsconfig-paths/json5": ["json5@1.0.2", "", { "dependencies": { "minimist": "^1.2.0" }, "bin": { "json5": "lib/cli.js" } }, "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA=="], + + "eslint-plugin-react-hooks/@babel/parser/@babel/types": ["@babel/types@7.28.6", "", { "dependencies": { "@babel/helper-string-parser": "^7.27.1", "@babel/helper-validator-identifier": "^7.28.5" } }, "sha512-0ZrskXVEHSWIqZM/sQZ4EV3jZJXRkio/WCxaqKZP1g//CEWEPSfeZFcms4XeKBCHU0ZKnIkdJeU/kF+eRp5lBg=="], + "fumadocs-mdx/esbuild/@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.27.2", "", { "os": "aix", "cpu": "ppc64" }, "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw=="], "fumadocs-mdx/esbuild/@esbuild/android-arm": ["@esbuild/android-arm@0.27.2", "", { "os": "android", "cpu": "arm" }, "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA=="], @@ -2505,10 +3006,14 @@ "fumadocs-mdx/esbuild/@esbuild/win32-x64": ["@esbuild/win32-x64@0.27.2", "", { "os": "win32", "cpu": "x64" }, "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ=="], + "globby/fast-glob/glob-parent": ["glob-parent@5.1.2", "", { "dependencies": { "is-glob": "^4.0.1" } }, "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow=="], + "mlly/pkg-types/confbox": ["confbox@0.1.8", "", {}, "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w=="], "ora/cli-cursor/restore-cursor": ["restore-cursor@5.1.0", "", { "dependencies": { "onetime": "^7.0.0", "signal-exit": "^4.1.0" } }, "sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA=="], + "ora/string-width/emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="], + "ora/strip-ansi/ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], "read-yaml-file/js-yaml/argparse": ["argparse@1.0.10", "", { "dependencies": { "sprintf-js": "~1.0.2" } }, "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg=="], @@ -2519,6 +3024,26 @@ "string-width/strip-ansi/ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], + "test/next/@next/env": ["@next/env@16.1.4", "", {}, "sha512-gkrXnZyxPUy0Gg6SrPQPccbNVLSP3vmW8LU5dwEttEEC1RwDivk8w4O+sZIjFvPrSICXyhQDCG+y3VmjlJf+9A=="], + + "test/next/@next/swc-darwin-arm64": ["@next/swc-darwin-arm64@16.1.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-T8atLKuvk13XQUdVLCv1ZzMPgLPW0+DWWbHSQXs0/3TjPrKNxTmUIhOEaoEyl3Z82k8h/gEtqyuoZGv6+Ugawg=="], + + "test/next/@next/swc-darwin-x64": ["@next/swc-darwin-x64@16.1.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-AKC/qVjUGUQDSPI6gESTx0xOnOPQ5gttogNS3o6bA83yiaSZJek0Am5yXy82F1KcZCx3DdOwdGPZpQCluonuxg=="], + + "test/next/@next/swc-linux-arm64-gnu": ["@next/swc-linux-arm64-gnu@16.1.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-POQ65+pnYOkZNdngWfMEt7r53bzWiKkVNbjpmCt1Zb3V6lxJNXSsjwRuTQ8P/kguxDC8LRkqaL3vvsFrce4dMQ=="], + + "test/next/@next/swc-linux-arm64-musl": ["@next/swc-linux-arm64-musl@16.1.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-3Wm0zGYVCs6qDFAiSSDL+Z+r46EdtCv/2l+UlIdMbAq9hPJBvGu/rZOeuvCaIUjbArkmXac8HnTyQPJFzFWA0Q=="], + + "test/next/@next/swc-linux-x64-gnu": ["@next/swc-linux-x64-gnu@16.1.4", "", { "os": "linux", "cpu": "x64" }, "sha512-lWAYAezFinaJiD5Gv8HDidtsZdT3CDaCeqoPoJjeB57OqzvMajpIhlZFce5sCAH6VuX4mdkxCRqecCJFwfm2nQ=="], + + "test/next/@next/swc-linux-x64-musl": ["@next/swc-linux-x64-musl@16.1.4", "", { "os": "linux", "cpu": "x64" }, "sha512-fHaIpT7x4gA6VQbdEpYUXRGyge/YbRrkG6DXM60XiBqDM2g2NcrsQaIuj375egnGFkJow4RHacgBOEsHfGbiUw=="], + + "test/next/@next/swc-win32-arm64-msvc": ["@next/swc-win32-arm64-msvc@16.1.4", "", { "os": "win32", "cpu": "arm64" }, "sha512-MCrXxrTSE7jPN1NyXJr39E+aNFBrQZtO154LoCz7n99FuKqJDekgxipoodLNWdQP7/DZ5tKMc/efybx1l159hw=="], + + "test/next/@next/swc-win32-x64-msvc": ["@next/swc-win32-x64-msvc@16.1.4", "", { "os": "win32", "cpu": "x64" }, "sha512-JSVlm9MDhmTXw/sO2PE/MRj+G6XOSMZB+BcZ0a7d6KwVFZVpkHcb2okyoYFBaco6LeiL53BBklRlOrDDbOeE5w=="], + + "test/next/postcss": ["postcss@8.4.31", "", { "dependencies": { "nanoid": "^3.3.6", "picocolors": "^1.0.0", "source-map-js": "^1.0.2" } }, "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ=="], + "tsx/esbuild/@esbuild/aix-ppc64": ["@esbuild/aix-ppc64@0.27.2", "", { "os": "aix", "cpu": "ppc64" }, "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw=="], "tsx/esbuild/@esbuild/android-arm": ["@esbuild/android-arm@0.27.2", "", { "os": "android", "cpu": "arm" }, "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA=="], @@ -2593,8 +3118,12 @@ "web/next/postcss": ["postcss@8.4.31", "", { "dependencies": { "nanoid": "^3.3.6", "picocolors": "^1.0.0", "source-map-js": "^1.0.2" } }, "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ=="], + "widest-line/string-width/emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="], + "widest-line/string-width/strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="], + "wrap-ansi/string-width/emoji-regex": ["emoji-regex@10.6.0", "", {}, "sha512-toUI84YS5YmxW219erniWD0CIVOo46xGKColeNQRgOzDorgBi1v4D71/OFzgD9GO2UGKIv1C3Sp8DAn0+j5w7A=="], + "wrap-ansi/strip-ansi/ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], "yargs/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], @@ -2619,6 +3148,8 @@ "@inquirer/core/wrap-ansi/string-width/is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="], + "@manypkg/find-root/find-up/locate-path/p-locate": ["p-locate@4.1.0", "", { "dependencies": { "p-limit": "^2.2.0" } }, "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A=="], + "@types/bun/bun-types/@types/node/undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], "ora/cli-cursor/restore-cursor/onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="], diff --git a/packages/unrag/cli/commands/add.ts b/packages/unrag/cli/commands/add.ts index 051b4ad..60b2122 100644 --- a/packages/unrag/cli/commands/add.ts +++ b/packages/unrag/cli/commands/add.ts @@ -9,10 +9,12 @@ import {readJsonFile, writeJsonFile} from '../lib/json' import {readRegistryManifest} from '../lib/manifest' import { type BatteryName, + type ChunkerName, type ConnectorName, type DepChange, type ExtractorName, depsForBattery, + depsForChunker, depsForConnector, depsForExtractor, installDependencies, @@ -22,6 +24,7 @@ import { } from '../lib/packageJson' import { copyBatteryFiles, + copyChunkerFiles, copyConnectorFiles, copyExtractorFiles } from '../lib/registry' @@ -35,6 +38,7 @@ type InitConfig = { connectors?: string[] extractors?: string[] batteries?: string[] + chunkers?: string[] managedFiles?: string[] } @@ -365,6 +369,130 @@ async function patchUnragConfig(args: { } } +/** + * Patch unrag.config.ts to add chunker import (registers plugin on import). + */ +async function patchUnragConfigChunker(args: { + projectRoot: string + installDir: string + chunker: ChunkerName +}): Promise { + const configPath = path.join(args.projectRoot, 'unrag.config.ts') + + if (!(await exists(configPath))) { + return false + } + + try { + const content = await readFile(configPath, 'utf8') + const installImportBase = `./${args.installDir.replace(/\\/g, '/')}` + const importLine = `import "${installImportBase}/chunkers/${args.chunker}";` + + let newContent = content + let modified = false + + if ( + !content.includes(importLine) && + !content.includes(`/chunkers/${args.chunker}`) + ) { + const importMarker = '// __UNRAG_IMPORTS__' + if (content.includes(importMarker)) { + newContent = newContent.replace( + importMarker, + `${importMarker}\n${importLine}` + ) + } else { + const importMatch = newContent.match(/^(import .+?\n)+/m) + if (importMatch) { + newContent = newContent.replace( + importMatch[0], + `${importMatch[0] + importLine}\n` + ) + } else { + const headerDocComment = newContent.match( + /^\s*\/\*\*[\s\S]*?\*\/\s*\n/ + ) + if (headerDocComment?.[0]) { + newContent = `${headerDocComment[0]}${importLine}\n${newContent.slice( + headerDocComment[0].length + )}` + } else { + newContent = `${importLine}\n${newContent}` + } + } + } + modified = true + } + + const needsModel = + args.chunker === 'semantic' || args.chunker === 'agentic' + if (needsModel) { + const optionsRegex = + /(chunking:\s*{[\s\S]*?options:\s*{)([\s\S]*?)(\n\s*}\s*,?)/ + const match = newContent.match(optionsRegex) + if (match) { + const prefix = match[1] ?? '' + const suffix = match[3] ?? '' + let block = match[2] ?? '' + if (!/model\s*:/m.test(block)) { + const ensureTrailingComma = (input: string): string => { + const lines = input.split('\n') + for (let i = lines.length - 1; i >= 0; i--) { + const line = lines[i] + if (!line || line.trim().length === 0) { + continue + } + if (!line.trim().endsWith(',')) { + lines[i] = `${line},` + } + break + } + return lines.join('\n') + } + + if (/^[ \t]*minChunkSize:/m.test(block)) { + block = block.replace( + /^[ \t]*minChunkSize:[^\n]*$/m, + (line) => { + const indent = + line.match(/^([ \t]*)/)?.[1] ?? '' + const withComma = line.trim().endsWith(',') + ? line + : `${line},` + return `${withComma}\n${indent}model: "openai/gpt-5-mini",` + } + ) + } else { + const withComma = ensureTrailingComma(block) + const indentMatch = + withComma.match(/\n([ \t]*)chunkSize:/) ?? + withComma.match(/\n([ \t]*)\w/) + const indent = indentMatch?.[1] ?? '\t\t\t' + block = `${withComma}\n${indent}model: "openai/gpt-5-mini",` + } + + newContent = newContent.replace( + optionsRegex, + `${prefix}${block}${suffix}` + ) + modified = true + } + } + } + + if (modified && newContent !== content) { + await writeFile(configPath, newContent, 'utf8') + return true + } + } catch (err) { + console.warn( + `[unrag:add] Could not patch unrag.config.ts for chunker: ${err instanceof Error ? err.message : String(err)}` + ) + } + + return false +} + const shouldWriteFile = async ( absPath: string, projectRoot: string, @@ -481,7 +609,7 @@ const addPackageJsonScripts = async (args: { } type ParsedAddArgs = { - kind?: 'connector' | 'extractor' | 'battery' | 'skills' + kind?: 'connector' | 'extractor' | 'battery' | 'chunker' | 'skills' name?: string yes?: boolean noInstall?: boolean @@ -515,17 +643,28 @@ const parseAddArgs = (args: string[]): ParsedAddArgs => { out.kind = 'battery' continue } + if (a === 'chunker') { + out.kind = 'chunker' + continue + } if (a === 'skills') { out.kind = 'skills' continue } + if (a.startsWith('chunker:')) { + out.kind = 'chunker' + out.name = a.slice('chunker:'.length) + continue + } out.kind = 'connector' out.name = a continue } if ( - (out.kind === 'extractor' || out.kind === 'battery') && + (out.kind === 'extractor' || + out.kind === 'battery' || + out.kind === 'chunker') && !out.name && a && !a.startsWith('-') @@ -589,6 +728,11 @@ export async function addCommand(args: string[]) { .filter((b) => b.status === 'available') .map((b) => b.id as BatteryName) ) + const availableChunkers = new Set( + (manifest.chunkers ?? []) + .filter((c) => c.status === 'available') + .map((c) => c.id as ChunkerName) + ) if (!name) { if (!quiet) { @@ -598,10 +742,12 @@ export async function addCommand(args: string[]) { ' unrag add ', ' unrag add extractor ', ' unrag add battery ', + ' unrag add chunker ', '', `Available connectors: ${Array.from(availableConnectors).join(', ')}`, `Available extractors: ${Array.from(availableExtractors).join(', ')}`, - `Available batteries: ${Array.from(availableBatteries).join(', ')}` + `Available batteries: ${Array.from(availableBatteries).join(', ')}`, + `Available chunkers: ${Array.from(availableChunkers).join(', ')}` ].join('\n') ) } @@ -1138,6 +1284,77 @@ main().catch((err) => { return } + if (kind === 'chunker') { + const chunker = name as ChunkerName | undefined + if (!chunker || !availableChunkers.has(chunker)) { + if (!quiet) { + outro( + `Unknown chunker: ${name}\n\nAvailable chunkers: ${Array.from(availableChunkers).join(', ')}` + ) + } + return + } + + const chunkerFiles = await copyChunkerFiles({ + projectRoot: root, + registryRoot, + installDir: config.installDir, + aliasBase: config.aliasBase ?? '@unrag', + chunker, + yes: nonInteractive + }) + for (const file of chunkerFiles) { + managedFiles.add(file) + } + + const {deps, devDeps} = depsForChunker(chunker) + const merged = mergeDeps(pkg, deps, devDeps) + if (merged.changes.length > 0) { + await writePackageJson(root, merged.pkg) + if (!noInstall) { + await installDependencies(root) + } + } + + const chunkers = Array.from( + new Set([...(config.chunkers ?? []), chunker]) + ).sort() + + await writeJsonFile(configPath, { + ...config, + chunkers, + version: CONFIG_VERSION, + installedFrom: {unragVersion: cliPackageVersion}, + managedFiles: Array.from(managedFiles).sort() + }) + + const patched = await patchUnragConfigChunker({ + projectRoot: root, + installDir: config.installDir, + chunker + }) + + if (!quiet) { + const installImportBase = `./${config.installDir.replace(/\\/g, '/')}` + const importHint = `import "${installImportBase}/chunkers/${chunker}";` + outro( + [ + `Installed chunker: ${chunker}.`, + '', + `- Code: ${path.join(config.installDir, 'chunkers', chunker)}`, + patched + ? '- Config: unrag.config.ts (updated)' + : `- Config: unrag.config.ts (add import: ${importHint})`, + '', + 'Next:', + ` - Set chunking.method = "${chunker}" in unrag.config.ts` + ].join('\n') + ) + } + + return + } + if (kind === 'connector') { const connector = name as ConnectorName | undefined if (!connector || !availableConnectors.has(connector)) { diff --git a/packages/unrag/cli/commands/init.ts b/packages/unrag/cli/commands/init.ts index 6f543bd..25458a1 100644 --- a/packages/unrag/cli/commands/init.ts +++ b/packages/unrag/cli/commands/init.ts @@ -28,12 +28,14 @@ import {readJsonFile, writeJsonFile} from '../lib/json' import {readRegistryManifest} from '../lib/manifest' import { type BatteryName, + type ChunkerName, type ConnectorName, type DepChange, type EmbeddingProviderName, type ExtractorName, depsForAdapter, depsForBattery, + depsForChunker, depsForConnector, depsForEmbeddingProvider, depsForExtractor, @@ -48,6 +50,7 @@ import {type PresetPayloadV1, fetchPreset} from '../lib/preset' import { type RegistrySelection, copyBatteryFiles, + copyChunkerFiles, copyConnectorFiles, copyExtractorFiles, copyRegistryFiles @@ -68,6 +71,7 @@ type InitConfig = { connectors?: string[] extractors?: string[] batteries?: string[] + chunkers?: string[] managedFiles?: string[] } @@ -222,6 +226,11 @@ const toBatteries = (xs: string[] | undefined): BatteryName[] => .map((s) => String(s).trim()) .filter(Boolean) as BatteryName[] +const toChunkers = (xs: string[] | undefined): ChunkerName[] => + (Array.isArray(xs) ? xs : []) + .map((s) => String(s).trim()) + .filter(Boolean) as ChunkerName[] + function getPresetEmbeddingProvider( preset: PresetPayloadV1 | null ): EmbeddingProviderName | undefined { @@ -560,6 +569,25 @@ export async function initCommand(args: string[]) { ) ).sort() + const chunkersFromPreset = preset + ? Array.from(new Set(toChunkers(preset.modules?.chunkers))).sort() + : [] + const availableChunkerIds = new Set( + (manifest.chunkers ?? []) + .filter((c) => c.status === 'available') + .map((c) => c.id as ChunkerName) + ) + if (preset) { + const unknown = chunkersFromPreset.filter( + (c) => !availableChunkerIds.has(c) + ) + if (unknown.length > 0) { + throw new Error( + `Preset contains unknown/unavailable chunkers: ${unknown.join(', ')}` + ) + } + } + const selection: RegistrySelection = { installDir, storeAdapter: storeAdapterAnswer as RegistrySelection['storeAdapter'], @@ -574,6 +602,7 @@ export async function initCommand(args: string[]) { presetConfig: (preset?.config as RegistrySelection['presetConfig'] | undefined) ?? undefined, + chunkers: chunkersFromPreset, richMedia: richMediaEnabled ? { enabled: true, @@ -588,6 +617,24 @@ export async function initCommand(args: string[]) { managedFiles.add(file) } + // Install chunker modules (vendor code) before updating deps. + if (chunkersFromPreset.length > 0) { + for (const chunker of chunkersFromPreset) { + const chunkerFiles = await copyChunkerFiles({ + projectRoot: root, + registryRoot, + installDir, + aliasBase, + chunker, + yes: nonInteractive, + overwrite: overwritePolicy + }) + for (const file of chunkerFiles) { + managedFiles.add(file) + } + } + } + // Install selected extractor modules (vendor code) before updating deps. if (richMediaEnabled && selectedExtractors.length > 0) { for (const extractor of selectedExtractors) { @@ -617,6 +664,14 @@ export async function initCommand(args: string[]) { Object.assign(extractorDevDeps, r.devDeps) } + const chunkerDeps: Record = {} + const chunkerDevDeps: Record = {} + for (const chunker of chunkersFromPreset) { + const r = depsForChunker(chunker) + Object.assign(chunkerDeps, r.deps) + Object.assign(chunkerDevDeps, r.devDeps) + } + const connectorsFromPreset = preset ? toConnectors(preset.modules?.connectors) : [] @@ -713,6 +768,7 @@ export async function initCommand(args: string[]) { ...deps, ...embeddingDeps.deps, ...extractorDeps, + ...chunkerDeps, ...connectorDeps, ...batteryDeps }, @@ -720,6 +776,7 @@ export async function initCommand(args: string[]) { ...devDeps, ...embeddingDeps.devDeps, ...extractorDevDeps, + ...chunkerDevDeps, ...connectorDevDeps, ...batteryDevDeps } @@ -753,6 +810,9 @@ export async function initCommand(args: string[]) { ).sort(), batteries: Array.from( new Set([...(existing?.batteries ?? []), ...batteriesFromPreset]) + ).sort(), + chunkers: Array.from( + new Set([...(existing?.chunkers ?? []), ...chunkersFromPreset]) ).sort() } config.managedFiles = Array.from(managedFiles).sort() diff --git a/packages/unrag/cli/lib/doctor/configScan.ts b/packages/unrag/cli/lib/doctor/configScan.ts index 8fad53a..4e1fe7d 100644 --- a/packages/unrag/cli/lib/doctor/configScan.ts +++ b/packages/unrag/cli/lib/doctor/configScan.ts @@ -53,6 +53,12 @@ export async function runConfigCoherenceChecks( return results } + // Chunker config checks (method wiring + plugin presence) + results.push(...checkChunkerCoherence(state, scanResult)) + + // Verify that the installed/vendored engine supports per-ingest `chunker` overrides + results.push(await checkPerIngestChunkerOverrideSupport(state)) + // Report scan confidence if (scanResult.parseWarnings.length > 0) { results.push({ @@ -87,6 +93,177 @@ export async function runConfigCoherenceChecks( return results } +function detectChunkingMethod(configContent: string): string | null { + // Match: chunking: { ... method: "recursive" ... } + // Keep this conservative: prefer explicit string literal. + const m = configContent.match( + /chunking\s*:\s*\{[\s\S]*?\bmethod\s*:\s*['"]([a-z0-9_-]+)['"]/i + ) + return m?.[1] ? String(m[1]).trim() : null +} + +function detectHasChunkerImport( + configContent: string, + chunker: string +): boolean { + // Plugin chunkers register on import, so config needs a side-effect import: + // import ".//chunkers/"; + return new RegExp(`/chunkers/${chunker}\\b`, 'i').test(configContent) +} + +function detectHasCustomChunkerFn(configContent: string): boolean { + // Best-effort: look for `chunker:` field inside chunking block. + return /chunking\s*:\s*\{[\s\S]*?\bchunker\s*:/i.test(configContent) +} + +function checkChunkerCoherence( + state: InferredInstallState, + scanResult: ConfigScanResult +): CheckResult[] { + const out: CheckResult[] = [] + const content = scanResult.configContent ?? '' + const method = detectChunkingMethod(content) + + if (!method) { + out.push({ + id: 'chunking.method', + title: 'Chunking method', + status: 'warn', + summary: 'Could not detect `chunking.method` in unrag.config.ts.', + details: [ + 'Doctor expects a config block like:', + ' chunking: { method: "recursive", options: { ... } }' + ], + docsLink: docsUrl('/docs/reference/unrag-config') + }) + return out + } + + const builtIn = method === 'recursive' || method === 'token' + if (builtIn) { + out.push({ + id: 'chunking.method', + title: 'Chunking method', + status: 'pass', + summary: `Using built-in chunker "${method}".`, + meta: {method} + }) + return out + } + + if (method === 'custom') { + const hasFn = detectHasCustomChunkerFn(content) + out.push({ + id: 'chunking.custom', + title: 'Custom chunker', + status: hasFn ? 'pass' : 'fail', + summary: hasFn + ? 'Custom chunker appears to be configured.' + : 'chunking.method is "custom" but no `chunking.chunker` function was detected.', + fixHints: hasFn + ? undefined + : [ + 'Add `chunking: { method: "custom", chunker: (content, options) => [...] }` to unrag.config.ts.' + ], + docsLink: docsUrl('/docs/chunking') + }) + return out + } + + // Plugin chunker + const installed = state.installedChunkers.includes(method) + const hasImport = detectHasChunkerImport(content, method) + + out.push({ + id: 'chunking.method', + title: 'Chunking method', + status: installed ? 'pass' : 'warn', + summary: installed + ? `Using plugin chunker "${method}".` + : `Config references plugin chunker "${method}", but it does not appear to be installed.`, + fixHints: installed ? undefined : [`Run: unrag add chunker ${method}`], + meta: {method} + }) + + out.push({ + id: 'chunking.import', + title: 'Chunker module import', + status: hasImport ? 'pass' : 'warn', + summary: hasImport + ? `Chunker module "${method}" appears to be imported (plugin registration).` + : `Chunker module "${method}" does not appear to be imported in unrag.config.ts.`, + details: hasImport + ? undefined + : [ + 'Plugin chunkers register on import. Without the import, the engine may throw at startup.', + 'Expected a line like:', + state.installDir + ? ` import "./${state.installDir}/chunkers/${method}";` + : ` import "./lib/unrag/chunkers/${method}";` + ], + fixHints: hasImport + ? undefined + : [ + 'Install chunker via CLI (auto-patches config):', + ` unrag add chunker ${method}` + ], + docsLink: docsUrl('/docs/chunking') + }) + + return out +} + +async function checkPerIngestChunkerOverrideSupport( + state: InferredInstallState +): Promise { + if (!state.installDir || !state.installDirExists) { + return { + id: 'api.ingest.chunker_override', + title: 'Per-ingest chunker override', + status: 'skip', + summary: + 'Install directory not found; cannot verify vendored engine API.' + } + } + + const vendoredTypesPath = path.join( + state.projectRoot, + state.installDir, + 'core', + 'types.ts' + ) + if (!(await exists(vendoredTypesPath))) { + return { + id: 'api.ingest.chunker_override', + title: 'Per-ingest chunker override', + status: 'warn', + summary: + 'Could not find vendored core/types.ts to verify API support.', + details: [vendoredTypesPath] + } + } + + const raw = await readFile(vendoredTypesPath, 'utf8').catch(() => '') + const hasChunkerField = + /export\s+type\s+IngestInput\s*=\s*\{[\s\S]*?\bchunker\s*\?:/m.test(raw) + + return { + id: 'api.ingest.chunker_override', + title: 'Per-ingest chunker override', + status: hasChunkerField ? 'pass' : 'warn', + summary: hasChunkerField + ? 'Your vendored engine supports `engine.ingest({ chunker })` overrides.' + : 'Your vendored engine may not support `engine.ingest({ chunker })` overrides yet.', + fixHints: hasChunkerField + ? undefined + : [ + 'Upgrade vendored Unrag files (recommended):', + ' unrag upgrade' + ], + docsLink: docsUrl('/docs/upgrade') + } +} + /** * Scan config file for extractor registrations and flag settings. */ diff --git a/packages/unrag/cli/lib/doctor/infer.ts b/packages/unrag/cli/lib/doctor/infer.ts index 04abd88..c54c317 100644 --- a/packages/unrag/cli/lib/doctor/infer.ts +++ b/packages/unrag/cli/lib/doctor/infer.ts @@ -119,6 +119,18 @@ export async function inferInstallState(options: { ).sort() } + // 8.1 Determine installed chunkers + let installedChunkers: string[] = [] + if (unragJson?.chunkers && Array.isArray(unragJson.chunkers)) { + installedChunkers = unragJson.chunkers + } + if (installDirFull && installDirExists) { + const fsChunkers = await inferChunkersFromFilesystem(installDirFull) + installedChunkers = Array.from( + new Set([...installedChunkers, ...fsChunkers]) + ).sort() + } + // 9. Try to infer DB env var from config let inferredDbEnvVar: string | null = null if (configFileExists) { @@ -145,6 +157,7 @@ export async function inferInstallState(options: { embeddingProvider, installedExtractors, installedConnectors, + installedChunkers, inferredDbEnvVar, inferenceConfidence, warnings @@ -299,6 +312,36 @@ async function inferConnectorsFromFilesystem( } } +/** + * Infer installed chunkers from filesystem. + */ +async function inferChunkersFromFilesystem( + installDir: string +): Promise { + const chunkersDir = path.join(installDir, 'chunkers') + if (!(await exists(chunkersDir))) { + return [] + } + + try { + const entries = await readdir(chunkersDir, {withFileTypes: true}) + return entries + .filter((e) => e.isDirectory()) + .map((e) => e.name) + .filter((name) => { + if (name === '_shared') { + return false + } + if (name.startsWith('_')) { + return false + } + return true + }) + } catch { + return [] + } +} + /** * Best-effort inference of DB env var from unrag.config.ts. * Scans for process.env.SOME_VAR patterns in connection strings. diff --git a/packages/unrag/cli/lib/doctor/staticChecks.ts b/packages/unrag/cli/lib/doctor/staticChecks.ts index e672d38..650a473 100644 --- a/packages/unrag/cli/lib/doctor/staticChecks.ts +++ b/packages/unrag/cli/lib/doctor/staticChecks.ts @@ -581,6 +581,39 @@ async function runModuleChecks( } } + // Check chunkers + for (const chunker of state.installedChunkers) { + const chunkerDir = path.join(installDirFull, 'chunkers', chunker) + const chunkerExists = await exists(chunkerDir) + const hasIndex = + chunkerExists && (await exists(path.join(chunkerDir, 'index.ts'))) + + if (chunkerExists && hasIndex) { + results.push({ + id: `module-chunker-${chunker}`, + title: `Chunker: ${chunker}`, + status: 'pass', + summary: 'Module files present.' + }) + } else if (chunkerExists) { + results.push({ + id: `module-chunker-${chunker}`, + title: `Chunker: ${chunker}`, + status: 'warn', + summary: 'Module directory exists but may be incomplete.', + details: ['Expected index.ts not found.'] + }) + } else { + results.push({ + id: `module-chunker-${chunker}`, + title: `Chunker: ${chunker}`, + status: 'fail', + summary: 'Listed in unrag.json but directory not found.', + fixHints: [`Run: unrag add chunker ${chunker}`] + }) + } + } + // Check extractor dependencies const depResults = await checkExtractorDependencies(state) results.push(...depResults) diff --git a/packages/unrag/cli/lib/doctor/types.ts b/packages/unrag/cli/lib/doctor/types.ts index fd81301..84f91db 100644 --- a/packages/unrag/cli/lib/doctor/types.ts +++ b/packages/unrag/cli/lib/doctor/types.ts @@ -57,6 +57,7 @@ export type InferredInstallState = { embeddingProvider: string | null installedExtractors: string[] installedConnectors: string[] + installedChunkers: string[] inferredDbEnvVar: string | null inferenceConfidence: 'high' | 'medium' | 'low' warnings: string[] @@ -72,6 +73,7 @@ export type UnragJsonConfig = { connectors?: string[] extractors?: string[] batteries?: string[] + chunkers?: string[] managedFiles?: string[] } diff --git a/packages/unrag/cli/lib/manifest.ts b/packages/unrag/cli/lib/manifest.ts index 7675007..3b5f0c3 100644 --- a/packages/unrag/cli/lib/manifest.ts +++ b/packages/unrag/cli/lib/manifest.ts @@ -50,6 +50,15 @@ export type RegistryManifestV1 = { defaultModel?: string envVars?: Array<{name: string; required?: boolean; notes?: string}> }> + /** Optional chunkers (semantic, markdown, etc.) */ + chunkers?: Array<{ + id: string // e.g. "semantic" + label?: string + description?: string + status?: 'available' | 'coming-soon' + deps?: Record + devDeps?: Record + }> } export async function readRegistryManifest( diff --git a/packages/unrag/cli/lib/packageJson.ts b/packages/unrag/cli/lib/packageJson.ts index d5363e8..9061656 100644 --- a/packages/unrag/cli/lib/packageJson.ts +++ b/packages/unrag/cli/lib/packageJson.ts @@ -174,6 +174,13 @@ export type ExtractorName = | 'file-pptx' | 'file-xlsx' +export type ChunkerName = + | 'semantic' + | 'markdown' + | 'hierarchical' + | 'code' + | 'agentic' + export function depsForExtractor(extractor: ExtractorName) { const deps: Record = {} const devDeps: Record = {} @@ -223,6 +230,25 @@ export function depsForExtractor(extractor: ExtractorName) { return {deps, devDeps} } +export function depsForChunker(_chunker: ChunkerName) { + const deps: Record = {} + const devDeps: Record = {} + + if (_chunker === 'semantic' || _chunker === 'agentic') { + deps.ai = '^6.0.3' + } + + if (_chunker === 'code') { + deps['tree-sitter'] = '^0.22.6' + deps['tree-sitter-typescript'] = '^0.21.2' + deps['tree-sitter-javascript'] = '^0.21.4' + deps['tree-sitter-python'] = '^0.21.0' + deps['tree-sitter-go'] = '^0.21.0' + } + + return {deps, devDeps} +} + export type EmbeddingProviderName = | 'ai' | 'openai' diff --git a/packages/unrag/cli/lib/preset.ts b/packages/unrag/cli/lib/preset.ts index 859aa51..ea8cb2d 100644 --- a/packages/unrag/cli/lib/preset.ts +++ b/packages/unrag/cli/lib/preset.ts @@ -12,6 +12,7 @@ export type PresetPayloadV1 = { extractors: string[] connectors: string[] batteries?: string[] + chunkers?: string[] } config?: unknown } @@ -59,6 +60,12 @@ function isPresetPayloadV1(x: unknown): x is PresetPayloadV1 { return false } } + if ('chunkers' in modulesObj) { + const chunkers = modulesObj.chunkers + if (chunkers != null && !Array.isArray(chunkers)) { + return false + } + } return true } diff --git a/packages/unrag/cli/lib/registry.ts b/packages/unrag/cli/lib/registry.ts index 0fec844..a00ba3f 100644 --- a/packages/unrag/cli/lib/registry.ts +++ b/packages/unrag/cli/lib/registry.ts @@ -16,6 +16,14 @@ export type RegistrySelection = { yes?: boolean // non-interactive overwrite?: 'skip' | 'force' // behavior when dest exists presetConfig?: { + chunking?: { + method?: string + options?: { + minChunkSize?: number + model?: string + language?: string + } + } defaults?: { chunking?: {chunkSize?: number; chunkOverlap?: number} retrieval?: {topK?: number} @@ -36,6 +44,7 @@ export type RegistrySelection = { assetProcessing?: unknown } } + chunkers?: string[] richMedia?: { enabled: boolean extractors: ExtractorName[] @@ -324,9 +333,17 @@ const renderUnragConfig = (content: string, selection: RegistrySelection) => { } } + const chunkerImports = Array.from( + new Set((selection.chunkers ?? []).map((c) => String(c).trim())) + ) + .filter(Boolean) + .sort() + .map((chunker) => `import "${installImportBase}/chunkers/${chunker}";`) + const importsBlock = [ ...baseImports, ...storeImports, + ...chunkerImports, ...extractorImports ].join('\n') @@ -349,41 +366,117 @@ const renderUnragConfig = (content: string, selection: RegistrySelection) => { const presetChunkSize = preset?.defaults?.chunking?.chunkSize const presetChunkOverlap = preset?.defaults?.chunking?.chunkOverlap const presetTopK = preset?.defaults?.retrieval?.topK + const presetChunkingMethod = + typeof preset?.chunking?.method === 'string' + ? preset.chunking.method.trim() + : undefined + const presetMinChunkSize = + typeof preset?.chunking?.options?.minChunkSize === 'number' + ? preset.chunking.options.minChunkSize + : undefined + const presetChunkerModel = + typeof preset?.chunking?.options?.model === 'string' + ? preset.chunking.options.model.trim() + : undefined + const presetChunkerLanguage = + typeof preset?.chunking?.options?.language === 'string' + ? preset.chunking.options.language.trim() + : undefined if (typeof presetChunkSize === 'number') { out = out.replace( - /chunkSize:\s*200\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkSize__/, - `chunkSize: ${presetChunkSize},` + /chunkSize:\s*[\d_]+\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkSize__/, + `chunkSize: ${presetChunkSize}, // __UNRAG_DEFAULT_chunkSize__` ) } else { out = out.replace( - /chunkSize:\s*200\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkSize__/, - 'chunkSize: 200,' + /chunkSize:\s*[\d_]+\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkSize__/, + 'chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__' ) } if (typeof presetChunkOverlap === 'number') { out = out.replace( - /chunkOverlap:\s*40\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkOverlap__/, - `chunkOverlap: ${presetChunkOverlap},` + /chunkOverlap:\s*[\d_]+\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkOverlap__/, + `chunkOverlap: ${presetChunkOverlap}, // __UNRAG_DEFAULT_chunkOverlap__` ) } else { out = out.replace( - /chunkOverlap:\s*40\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkOverlap__/, - 'chunkOverlap: 40,' + /chunkOverlap:\s*[\d_]+\s*,?\s*\/\/ __UNRAG_DEFAULT_chunkOverlap__/, + 'chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__' ) } if (typeof presetTopK === 'number') { out = out.replace( - /topK:\s*8\s*,?\s*\/\/ __UNRAG_DEFAULT_topK__/, - `topK: ${presetTopK},` + /topK:\s*[\d_]+\s*,?\s*\/\/ __UNRAG_DEFAULT_topK__/, + `topK: ${presetTopK}, // __UNRAG_DEFAULT_topK__` ) } else { out = out.replace( - /topK:\s*8\s*,?\s*\/\/ __UNRAG_DEFAULT_topK__/, - 'topK: 8,' + /topK:\s*[\d_]+\s*,?\s*\/\/ __UNRAG_DEFAULT_topK__/, + 'topK: 8, // __UNRAG_DEFAULT_topK__' + ) + } + + // Chunking method (defaults to "recursive" in template). + if (presetChunkingMethod) { + out = out.replace( + /method:\s*['"][^'"]*['"]\s*,?\s*\/\/ __UNRAG_CHUNKING_METHOD__/, + `method: '${presetChunkingMethod}', // __UNRAG_CHUNKING_METHOD__` ) } + // Chunking options: minChunkSize is template-driven; model/language are inserted when provided. + if (typeof presetMinChunkSize === 'number') { + out = out.replace( + /minChunkSize:\s*[\d_]+\s*,?\s*\/\/ __UNRAG_DEFAULT_minChunkSize__/, + `minChunkSize: ${presetMinChunkSize} // __UNRAG_DEFAULT_minChunkSize__` + ) + } + + const injectChunkingOption = ( + input: string, + key: 'model' | 'language', + value?: string + ) => { + if (!value) { + return input + } + // If key exists anywhere already, replace the first occurrence. + const existing = new RegExp(`^([ \\t]*)${key}\\s*:\\s*[^\\n]*$`, 'm') + if (existing.test(input)) { + return input.replace(existing, (_m, indent: string) => { + return `${indent}${key}: ${JSON.stringify(value)},` + }) + } + + // Otherwise, insert inside the chunking.options block. + const optionsRegex = + /(chunking:\s*{[\s\S]*?options:\s*{)([\s\S]*?)(\n\s*}\s*,?)/m + const match = input.match(optionsRegex) + if (!match) { + return input + } + const prefix = match[1] ?? '' + const suffix = match[3] ?? '' + let block = match[2] ?? '' + const indentMatch = block.match(/\n([ \t]*)\w/) + const indent = indentMatch?.[1] ?? '\t\t\t' + // Insert after minChunkSize when present, else append. + if (/^\s*minChunkSize\s*:/m.test(block)) { + block = block.replace( + /^\s*minChunkSize[^\n]*$/m, + (line) => + `${line.trimEnd().endsWith(',') ? line : `${line},`}\n${indent}${key}: ${JSON.stringify(value)},` + ) + } else { + block = `${block}\n${indent}${key}: ${JSON.stringify(value)},` + } + return input.replace(optionsRegex, `${prefix}${block}${suffix}`) + } + + out = injectChunkingOption(out, 'model', presetChunkerModel) + out = injectChunkingOption(out, 'language', presetChunkerLanguage) + // Embedding config: // - Provider always comes from `selection.embeddingProvider` (or preset override, if provided). // - Preset can override model/timeout/type, but rich media should NOT implicitly flip embeddings to multimodal. @@ -1212,6 +1305,139 @@ export async function copyExtractorFiles( return Array.from(managedFiles) } +export type ChunkerSelection = { + projectRoot: string + registryRoot: string + installDir: string // project-relative posix + aliasBase: string // e.g. "@unrag" + chunker: string // e.g. "semantic" + yes?: boolean // non-interactive skip-overwrite + overwrite?: 'skip' | 'force' +} + +export async function copyChunkerFiles( + selection: ChunkerSelection +): Promise { + const toAbs = (projectRelative: string) => + path.join(selection.projectRoot, projectRelative) + + const installBaseAbs = toAbs(selection.installDir) + const chunkerRegistryAbs = path.join( + selection.registryRoot, + 'chunkers', + selection.chunker + ) + const sharedRegistryAbs = path.join( + selection.registryRoot, + 'chunkers', + '_shared' + ) + + if (!(await exists(chunkerRegistryAbs))) { + throw new Error( + `Unknown chunker registry: ${path.relative(selection.registryRoot, chunkerRegistryAbs)}` + ) + } + + const chunkerFiles = await listFilesRecursive(chunkerRegistryAbs) + const sharedFiles = (await exists(sharedRegistryAbs)) + ? await listFilesRecursive(sharedRegistryAbs) + : [] + + const destRootAbs = path.join(installBaseAbs, 'chunkers', selection.chunker) + const sharedDestRootAbs = path.join(installBaseAbs, 'chunkers', '_shared') + + const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY + const overwritePolicy = selection.overwrite ?? 'skip' + + const shouldWrite = async (src: string, dest: string): Promise => { + if (!(await exists(dest))) { + return true + } + + if (overwritePolicy === 'force') { + return true + } + + if (nonInteractive) { + return false + } + + try { + const [srcRaw, destRaw] = await Promise.all([ + readText(src), + readText(dest) + ]) + const nextSrc = rewriteRegistryAliasImports( + srcRaw, + selection.aliasBase + ) + if (nextSrc === destRaw) { + return false + } + } catch { + // Fall back to prompting below. + } + + const answer = await confirm({ + message: `Overwrite ${path.relative(selection.projectRoot, dest)}?`, + initialValue: false + }) + if (isCancel(answer)) { + cancel('Cancelled.') + return false + } + return Boolean(answer) + } + + const managedFiles = new Set() + + for (const src of chunkerFiles) { + if (!(await exists(src))) { + throw new Error(`Registry file missing: ${src}`) + } + + const rel = path.relative(chunkerRegistryAbs, src) + const dest = path.join(destRootAbs, rel) + if (!(await shouldWrite(src, dest))) { + continue + } + + const raw = await readText(src) + const content = rewriteRegistryAliasImports(raw, selection.aliasBase) + await writeText(dest, content) + } + + for (const src of sharedFiles) { + if (!(await exists(src))) { + throw new Error(`Registry file missing: ${src}`) + } + + const rel = path.relative(sharedRegistryAbs, src) + const dest = path.join(sharedDestRootAbs, rel) + if (!(await shouldWrite(src, dest))) { + continue + } + + const raw = await readText(src) + const content = rewriteRegistryAliasImports(raw, selection.aliasBase) + await writeText(dest, content) + } + + for (const src of chunkerFiles) { + const rel = path.relative(chunkerRegistryAbs, src) + const dest = path.join(destRootAbs, rel) + managedFiles.add(toProjectRelative(selection.projectRoot, dest)) + } + for (const src of sharedFiles) { + const rel = path.relative(sharedRegistryAbs, src) + const dest = path.join(sharedDestRootAbs, rel) + managedFiles.add(toProjectRelative(selection.projectRoot, dest)) + } + + return Array.from(managedFiles) +} + export type BatterySelection = { projectRoot: string registryRoot: string diff --git a/packages/unrag/cli/run.ts b/packages/unrag/cli/run.ts index 8dc861f..ade6a28 100644 --- a/packages/unrag/cli/run.ts +++ b/packages/unrag/cli/run.ts @@ -34,6 +34,7 @@ function renderHelp() { ' add Install a connector (notion, google-drive)', ' add extractor Install an extractor (pdf-llm, image-ocr, etc.)', ' add battery Install a battery module (reranker, eval, debug)', + ' add chunker Install a chunker plugin (semantic, markdown, etc.)', ' add skills Install Unrag agent skills for your IDE/agent', ' upgrade Upgrade vendored sources (git-style merge)', ' doctor Validate installation and configuration', @@ -72,6 +73,7 @@ function renderHelp() { ' bunx unrag@latest init --yes --extractors pdf-text-layer,file-text', ' bunx unrag add notion --yes', ' bunx unrag add battery reranker --yes', + ' bunx unrag add chunker semantic --yes', ' bunx unrag upgrade', ' bunx unrag doctor', ' bunx unrag doctor --db', diff --git a/packages/unrag/package.json b/packages/unrag/package.json index cba37a9..82d23d5 100644 --- a/packages/unrag/package.json +++ b/packages/unrag/package.json @@ -31,6 +31,7 @@ }, "dependencies": { "@clack/prompts": "^0.11.0", + "js-tiktoken": "^1.0.21", "jsonc-parser": "^3.3.1", "pg": "^8.16.3", "semver": "^7.6.3" diff --git a/packages/unrag/registry/chunkers/_shared/llm.ts b/packages/unrag/registry/chunkers/_shared/llm.ts new file mode 100644 index 0000000..82ca199 --- /dev/null +++ b/packages/unrag/registry/chunkers/_shared/llm.ts @@ -0,0 +1,79 @@ +import {generateText} from 'ai' + +const DEFAULT_LLM_MODEL = 'openai/gpt-5-mini' + +const extractJsonArray = (raw: string): string[] | null => { + const start = raw.indexOf('[') + const end = raw.lastIndexOf(']') + if (start < 0 || end <= start) { + return null + } + + try { + const parsed = JSON.parse(raw.slice(start, end + 1)) + if (!Array.isArray(parsed)) { + return null + } + if (parsed.some((item) => typeof item !== 'string')) { + return null + } + return parsed as string[] + } catch { + return null + } +} + +export async function splitWithLlm(args: { + content: string + model?: string + chunkSize: number + goal: string +}): Promise { + const model = args.model?.trim() || DEFAULT_LLM_MODEL + + try { + const result = await generateText({ + model, + messages: [ + { + role: 'system', + content: + 'You are a document chunking tool. Return ONLY a JSON array of strings.' + }, + { + role: 'user', + content: [ + 'Split the input into an ordered JSON array of strings.', + 'Rules:', + '- Each element must be a contiguous substring of the input.', + '- Preserve text exactly (no edits, no normalization).', + '- The array must cover the entire input with no gaps or overlaps.', + '- Avoid empty strings.', + `- Keep chunks roughly under ${args.chunkSize} tokens when possible.`, + `Goal: ${args.goal}`, + 'Return JSON only.' + ].join('\n') + }, + {role: 'user', content: args.content} + ] + }) + + const parsed = extractJsonArray(result.text ?? '') + if (!parsed || parsed.length === 0) { + return null + } + + if (parsed.some((item) => item.length === 0)) { + return null + } + + const combined = parsed.join('') + if (combined !== args.content) { + return null + } + + return parsed + } catch { + return null + } +} diff --git a/packages/unrag/registry/chunkers/_shared/optional.ts b/packages/unrag/registry/chunkers/_shared/optional.ts new file mode 100644 index 0000000..17bdf83 --- /dev/null +++ b/packages/unrag/registry/chunkers/_shared/optional.ts @@ -0,0 +1,18 @@ +import {createRequire} from 'node:module' + +const require = createRequire(import.meta.url) + +export function requireOptional(args: { + id: string + installHint: string + chunkerName: string +}): T { + try { + return require(args.id) as T + } catch { + throw new Error( + `Unrag chunker "${args.chunkerName}" requires "${args.id}" to be installed.\n` + + `Install it with: ${args.installHint}` + ) + } +} diff --git a/packages/unrag/registry/chunkers/_shared/text.ts b/packages/unrag/registry/chunkers/_shared/text.ts new file mode 100644 index 0000000..d0ed7a2 --- /dev/null +++ b/packages/unrag/registry/chunkers/_shared/text.ts @@ -0,0 +1,119 @@ +import {Tiktoken} from 'js-tiktoken/lite' +import o200k_base from 'js-tiktoken/ranks/o200k_base' + +const encoder = new Tiktoken(o200k_base) + +export const countTokens = (text: string): number => encoder.encode(text).length + +export const getOverlapText = (text: string, overlapTokens: number): string => { + const tokens = encoder.encode(text) + if (tokens.length <= overlapTokens) { + return text + } + + const overlapTokenSlice = tokens.slice(-overlapTokens) + try { + return encoder.decode(overlapTokenSlice) + } catch { + return text.slice(-overlapTokens * 4) + } +} + +export const forceSplitByTokens = ( + text: string, + chunkSize: number, + chunkOverlap: number +): string[] => { + const tokens = encoder.encode(text) + const chunks: string[] = [] + const stride = Math.max(1, chunkSize - chunkOverlap) + + for (let i = 0; i < tokens.length; i += stride) { + const chunkTokens = tokens.slice(i, i + chunkSize) + try { + const chunk = encoder.decode(chunkTokens).trim() + if (chunk) { + chunks.push(chunk) + } + } catch { + // Skip invalid token sequences. + } + + if (i + chunkSize >= tokens.length) { + break + } + } + + return chunks +} + +export const mergeSplits = ( + splits: string[], + chunkSize: number, + chunkOverlap: number, + minChunkSize: number +): string[] => { + const chunks: string[] = [] + let currentChunk = '' + let currentTokens = 0 + + for (const split of splits) { + const splitTokens = countTokens(split) + + if (splitTokens > chunkSize) { + if (currentChunk.trim()) { + if (currentTokens >= minChunkSize) { + chunks.push(currentChunk.trim()) + } else if (chunks.length > 0) { + const lastChunk = chunks.pop() + if (lastChunk) { + chunks.push(`${lastChunk} ${currentChunk}`.trim()) + } + } else { + chunks.push(currentChunk.trim()) + } + currentChunk = '' + currentTokens = 0 + } + + const forced = forceSplitByTokens(split, chunkSize, chunkOverlap) + for (const forcedChunk of forced) { + if (forcedChunk.trim()) { + chunks.push(forcedChunk.trim()) + } + } + continue + } + + if (currentTokens + splitTokens > chunkSize && currentChunk) { + if (currentTokens >= minChunkSize) { + chunks.push(currentChunk.trim()) + } + + if (chunkOverlap > 0 && currentChunk) { + const overlapText = getOverlapText(currentChunk, chunkOverlap) + currentChunk = overlapText + split + currentTokens = countTokens(currentChunk) + } else { + currentChunk = split + currentTokens = splitTokens + } + } else { + currentChunk += split + currentTokens += splitTokens + } + } + + if (currentChunk.trim() && currentTokens >= minChunkSize) { + chunks.push(currentChunk.trim()) + } else if (currentChunk.trim() && chunks.length > 0) { + const lastChunk = chunks.pop() + if (lastChunk) { + chunks.push(`${lastChunk} ${currentChunk}`.trim()) + } + } else if (currentChunk.trim()) { + chunks.push(currentChunk.trim()) + } + + return chunks +} diff --git a/packages/unrag/registry/chunkers/agentic/index.ts b/packages/unrag/registry/chunkers/agentic/index.ts new file mode 100644 index 0000000..8683e79 --- /dev/null +++ b/packages/unrag/registry/chunkers/agentic/index.ts @@ -0,0 +1,84 @@ +import {splitWithLlm} from '@registry/chunkers/_shared/llm' +import {countTokens, mergeSplits} from '@registry/chunkers/_shared/text' +import { + registerChunkerPlugin, + resolveChunkingOptions +} from '@registry/core/chunking' +import type { + ChunkText, + Chunker, + ChunkerPlugin, + ChunkingOptions +} from '@registry/core/types' + +const splitSentences = (text: string): string[] => { + const splits: string[] = [] + let buffer = '' + + for (let i = 0; i < text.length; i++) { + const ch = text[i] + buffer += ch + + if (ch === '.' || ch === '!' || ch === '?') { + const next = text[i + 1] + if (!next || /\s/.test(next)) { + if (buffer.trim()) { + splits.push(buffer) + } + buffer = '' + } + } else if (ch === '\n' && text[i + 1] === '\n') { + if (buffer.trim()) { + splits.push(buffer) + } + buffer = '' + } + } + + if (buffer.trim()) { + splits.push(buffer) + } + + return splits +} + +export const agenticChunker: Chunker = async ( + content: string, + options: ChunkingOptions +): Promise => { + const resolved = resolveChunkingOptions(options) + const {chunkSize, chunkOverlap, minChunkSize = 24} = resolved + + if (!content.trim()) { + return [] + } + + const model = options.model + const llmSplits = + (await splitWithLlm({ + content, + model, + chunkSize, + goal: 'Maximize retrieval quality by keeping coherent ideas together and preserving nearby context.' + })) ?? null + + const splits = llmSplits ?? splitSentences(content) + const chunks = mergeSplits(splits, chunkSize, chunkOverlap, minChunkSize) + + return chunks.map((chunkContent, index) => ({ + index, + content: chunkContent, + tokenCount: countTokens(chunkContent) + })) +} + +export const createAgenticChunkerPlugin = (): ChunkerPlugin => ({ + name: 'agentic', + createChunker: () => agenticChunker +}) + +export const registerAgenticChunker = (): void => { + registerChunkerPlugin(createAgenticChunkerPlugin()) +} + +registerAgenticChunker() diff --git a/packages/unrag/registry/chunkers/code/index.ts b/packages/unrag/registry/chunkers/code/index.ts new file mode 100644 index 0000000..f682a86 --- /dev/null +++ b/packages/unrag/registry/chunkers/code/index.ts @@ -0,0 +1,286 @@ +import {requireOptional} from '@registry/chunkers/_shared/optional' +import {countTokens, mergeSplits} from '@registry/chunkers/_shared/text' +import { + registerChunkerPlugin, + resolveChunkingOptions +} from '@registry/core/chunking' +import type { + ChunkText, + Chunker, + ChunkerPlugin, + ChunkingOptions, + Metadata, + MetadataValue +} from '@registry/core/types' + +type TreeSitterModule = { + default?: new () => { + setLanguage: (lang: unknown) => void + parse: (input: string) => { + rootNode: { + namedChildren: Array<{ + type: string + startIndex: number + endIndex: number + }> + } + } + } +} + +type LanguageModule = { + default?: unknown + typescript?: unknown + javascript?: unknown + tsx?: unknown + jsx?: unknown +} + +const DEFAULT_LANGUAGE = 'typescript' + +const normalizeLanguage = (language?: string): string => { + const normalized = (language ?? '').toLowerCase().trim() + if ( + normalized === 'ts' || + normalized === 'tsx' || + normalized === 'typescript' + ) { + return 'typescript' + } + if ( + normalized === 'js' || + normalized === 'jsx' || + normalized === 'javascript' + ) { + return 'javascript' + } + if (normalized === 'py' || normalized === 'python') { + return 'python' + } + if (normalized === 'go' || normalized === 'golang') { + return 'go' + } + return DEFAULT_LANGUAGE +} + +const EXTENSION_LANGUAGE: Record = { + ts: 'typescript', + tsx: 'typescript', + js: 'javascript', + jsx: 'javascript', + mjs: 'javascript', + cjs: 'javascript', + py: 'python', + go: 'go' +} + +const detectLanguageFromPath = (value?: string): string | undefined => { + if (!value) { + return undefined + } + const cleaned = value.split(/[?#]/)[0] ?? '' + if (!cleaned) { + return undefined + } + const base = cleaned.split(/[\\/]/).pop() ?? cleaned + if (!base) { + return undefined + } + const match = base.toLowerCase().match(/\.([a-z0-9]+)$/) + if (!match?.[1]) { + return undefined + } + const detected = EXTENSION_LANGUAGE[match[1]] + return detected ?? undefined +} + +const extractString = (value: MetadataValue | MetadataValue[] | undefined) => { + if (typeof value === 'string') { + return value + } + if (Array.isArray(value)) { + const found = value.find((item) => typeof item === 'string') + return typeof found === 'string' ? found : undefined + } + return undefined +} + +const detectLanguageFromMetadata = (metadata?: Metadata) => { + if (!metadata) { + return undefined + } + const keys = [ + 'path', + 'filePath', + 'filepath', + 'filename', + 'fileName', + 'name', + 'sourcePath' + ] + for (const key of keys) { + const candidate = extractString(metadata[key]) + const detected = detectLanguageFromPath(candidate) + if (detected) { + return detected + } + } + return undefined +} + +const MAJOR_TYPES: Record> = { + typescript: new Set([ + 'function_declaration', + 'class_declaration', + 'interface_declaration', + 'type_alias_declaration', + 'enum_declaration', + 'lexical_declaration' + ]), + javascript: new Set([ + 'function_declaration', + 'class_declaration', + 'lexical_declaration' + ]), + python: new Set([ + 'function_definition', + 'class_definition', + 'decorated_definition' + ]), + go: new Set([ + 'function_declaration', + 'method_declaration', + 'type_declaration' + ]) +} + +const loadParser = () => { + const ParserModule = requireOptional({ + id: 'tree-sitter', + installHint: 'bunx unrag add chunker code', + chunkerName: 'code' + }) + return (ParserModule.default ?? ParserModule) as unknown as new () => { + setLanguage: (lang: unknown) => void + parse: (input: string) => { + rootNode: { + namedChildren: Array<{ + type: string + startIndex: number + endIndex: number + }> + } + } + } +} + +const loadLanguage = (language: string) => { + if (language === 'typescript') { + const module = requireOptional({ + id: 'tree-sitter-typescript', + installHint: 'bunx unrag add chunker code', + chunkerName: 'code' + }) + return module.typescript ?? module.tsx ?? module.default ?? module + } + if (language === 'javascript') { + const module = requireOptional({ + id: 'tree-sitter-javascript', + installHint: 'bunx unrag add chunker code', + chunkerName: 'code' + }) + return module.javascript ?? module.jsx ?? module.default ?? module + } + if (language === 'python') { + const module = requireOptional({ + id: 'tree-sitter-python', + installHint: 'bunx unrag add chunker code', + chunkerName: 'code' + }) + return module.default ?? module + } + const module = requireOptional({ + id: 'tree-sitter-go', + installHint: 'bunx unrag add chunker code', + chunkerName: 'code' + }) + return module.default ?? module +} + +export const codeChunker: Chunker = ( + content: string, + options: ChunkingOptions +): ChunkText[] => { + const resolved = resolveChunkingOptions(options) + const {chunkSize, chunkOverlap, minChunkSize = 24} = resolved + + if (!content.trim()) { + return [] + } + + const inferred = + detectLanguageFromMetadata(options.metadata) ?? + detectLanguageFromPath(options.sourceId) + const language = normalizeLanguage(options.language ?? inferred) + const fallbackMajorTypes = MAJOR_TYPES[DEFAULT_LANGUAGE] ?? new Set() + const majorTypes = MAJOR_TYPES[language] ?? fallbackMajorTypes + + let blocks: string[] = [] + try { + const Parser = loadParser() + const parser = new Parser() + const lang = loadLanguage(language) + parser.setLanguage(lang) + + const tree = parser.parse(content) + const children = tree.rootNode.namedChildren + + let cursor = 0 + for (const child of children) { + if (!majorTypes.has(child.type)) { + continue + } + + const prefix = content.slice(cursor, child.startIndex) + if (prefix.trim()) { + blocks.push(prefix) + } + + const block = content.slice(child.startIndex, child.endIndex) + if (block.trim()) { + blocks.push(block) + } + cursor = child.endIndex + } + + const tail = content.slice(cursor) + if (tail.trim()) { + blocks.push(tail) + } + } catch { + blocks = [content] + } + + if (blocks.length === 0) { + blocks = [content] + } + + const chunks = mergeSplits(blocks, chunkSize, chunkOverlap, minChunkSize) + + return chunks.map((chunkContent, index) => ({ + index, + content: chunkContent, + tokenCount: countTokens(chunkContent) + })) +} + +export const createCodeChunkerPlugin = (): ChunkerPlugin => ({ + name: 'code', + createChunker: () => codeChunker +}) + +export const registerCodeChunker = (): void => { + registerChunkerPlugin(createCodeChunkerPlugin()) +} + +registerCodeChunker() diff --git a/packages/unrag/registry/chunkers/hierarchical/index.ts b/packages/unrag/registry/chunkers/hierarchical/index.ts new file mode 100644 index 0000000..417a182 --- /dev/null +++ b/packages/unrag/registry/chunkers/hierarchical/index.ts @@ -0,0 +1,115 @@ +import {countTokens} from '@registry/chunkers/_shared/text' +import { + recursiveChunker, + registerChunkerPlugin, + resolveChunkingOptions +} from '@registry/core/chunking' +import type { + ChunkText, + Chunker, + ChunkerPlugin, + ChunkingOptions +} from '@registry/core/types' + +const isFence = (line: string): boolean => + line.trim().startsWith('```') || line.trim().startsWith('~~~') + +const isHeading = (line: string): boolean => /^#{1,6}\s+/.test(line) + +type Section = { + header?: string + body: string +} + +const splitSections = (text: string): Section[] => { + const lines = text.split('\n') + const sections: Section[] = [] + let currentHeader: string | undefined + let current: string[] = [] + let inCodeBlock = false + + const flush = () => { + const body = current.join('\n').trim() + if (body || currentHeader) { + sections.push({header: currentHeader, body}) + } + current = [] + } + + for (const line of lines) { + if (isFence(line)) { + inCodeBlock = !inCodeBlock + current.push(line) + continue + } + + if (!inCodeBlock && isHeading(line)) { + flush() + currentHeader = line.trim() + continue + } + + current.push(line) + } + + flush() + return sections +} + +export const hierarchicalChunker: Chunker = async ( + content: string, + options: ChunkingOptions +): Promise => { + const resolved = resolveChunkingOptions(options) + const {chunkSize, chunkOverlap, minChunkSize = 24} = resolved + + if (!content.trim()) { + return [] + } + + const sections = splitSections(content) + const chunks: ChunkText[] = [] + + for (const section of sections) { + const body = section.body + if (!body && section.header) { + const headerOnly = section.header + chunks.push({ + index: chunks.length, + content: headerOnly, + tokenCount: countTokens(headerOnly) + }) + continue + } + + const sectionChunks = await recursiveChunker(body, { + chunkSize, + chunkOverlap, + minChunkSize + }) + + for (const chunk of sectionChunks) { + const contentWithHeader = section.header + ? `${section.header}\n${chunk.content}` + : chunk.content + chunks.push({ + index: chunks.length, + content: contentWithHeader, + tokenCount: countTokens(contentWithHeader) + }) + } + } + + return chunks +} + +export const createHierarchicalChunkerPlugin = (): ChunkerPlugin => ({ + name: 'hierarchical', + createChunker: () => hierarchicalChunker +}) + +export const registerHierarchicalChunker = (): void => { + registerChunkerPlugin(createHierarchicalChunkerPlugin()) +} + +registerHierarchicalChunker() diff --git a/packages/unrag/registry/chunkers/markdown/index.ts b/packages/unrag/registry/chunkers/markdown/index.ts new file mode 100644 index 0000000..9206ad2 --- /dev/null +++ b/packages/unrag/registry/chunkers/markdown/index.ts @@ -0,0 +1,85 @@ +import {countTokens, mergeSplits} from '@registry/chunkers/_shared/text' +import { + registerChunkerPlugin, + resolveChunkingOptions +} from '@registry/core/chunking' +import type { + ChunkText, + Chunker, + ChunkerPlugin, + ChunkingOptions +} from '@registry/core/types' + +const isFence = (line: string): boolean => + line.trim().startsWith('```') || line.trim().startsWith('~~~') + +const isHeading = (line: string): boolean => /^#{1,6}\s+/.test(line) + +const isRule = (line: string): boolean => + /^(-{3,}|\*{3,}|_{3,})\s*$/.test(line.trim()) + +const splitMarkdownBlocks = (text: string): string[] => { + const lines = text.split('\n') + const blocks: string[] = [] + let current: string[] = [] + let inCodeBlock = false + + const flush = () => { + const block = current.join('\n') + if (block.trim()) { + blocks.push(block) + } + current = [] + } + + for (const line of lines) { + if (isFence(line)) { + inCodeBlock = !inCodeBlock + current.push(line) + continue + } + + if (!inCodeBlock && (isHeading(line) || isRule(line))) { + flush() + current.push(line) + continue + } + + current.push(line) + } + + flush() + return blocks +} + +export const markdownChunker: Chunker = ( + content: string, + options: ChunkingOptions +): ChunkText[] => { + const resolved = resolveChunkingOptions(options) + const {chunkSize, chunkOverlap, minChunkSize = 24} = resolved + + if (!content.trim()) { + return [] + } + + const blocks = splitMarkdownBlocks(content) + const chunks = mergeSplits(blocks, chunkSize, chunkOverlap, minChunkSize) + + return chunks.map((chunkContent, index) => ({ + index, + content: chunkContent, + tokenCount: countTokens(chunkContent) + })) +} + +export const createMarkdownChunkerPlugin = (): ChunkerPlugin => ({ + name: 'markdown', + createChunker: () => markdownChunker +}) + +export const registerMarkdownChunker = (): void => { + registerChunkerPlugin(createMarkdownChunkerPlugin()) +} + +registerMarkdownChunker() diff --git a/packages/unrag/registry/chunkers/semantic/index.ts b/packages/unrag/registry/chunkers/semantic/index.ts new file mode 100644 index 0000000..e69bfe5 --- /dev/null +++ b/packages/unrag/registry/chunkers/semantic/index.ts @@ -0,0 +1,88 @@ +import {splitWithLlm} from '@registry/chunkers/_shared/llm' +import {countTokens, mergeSplits} from '@registry/chunkers/_shared/text' +import { + registerChunkerPlugin, + resolveChunkingOptions +} from '@registry/core/chunking' +import type { + ChunkText, + Chunker, + ChunkerPlugin, + ChunkingOptions +} from '@registry/core/types' + +const splitSentences = (text: string): string[] => { + const splits: string[] = [] + let buffer = '' + + for (let i = 0; i < text.length; i++) { + const ch = text[i] + buffer += ch + + if (ch === '.' || ch === '!' || ch === '?') { + const next = text[i + 1] + if (!next || /\s/.test(next)) { + if (buffer.trim()) { + splits.push(buffer) + } + buffer = '' + } + } else if (ch === '\n' && text[i + 1] === '\n') { + if (buffer.trim()) { + splits.push(buffer) + } + buffer = '' + } + } + + if (buffer.trim()) { + splits.push(buffer) + } + + return splits +} + +const fallbackSemanticSplits = (content: string): string[] => { + return splitSentences(content) +} + +export const semanticChunker: Chunker = async ( + content: string, + options: ChunkingOptions +): Promise => { + const resolved = resolveChunkingOptions(options) + const {chunkSize, chunkOverlap, minChunkSize = 24} = resolved + + if (!content.trim()) { + return [] + } + + const model = options.model + const llmSplits = + (await splitWithLlm({ + content, + model, + chunkSize, + goal: 'Prefer semantic boundaries between sentences and paragraphs.' + })) ?? null + + const splits = llmSplits ?? fallbackSemanticSplits(content) + const chunks = mergeSplits(splits, chunkSize, chunkOverlap, minChunkSize) + + return chunks.map((chunkContent, index) => ({ + index, + content: chunkContent, + tokenCount: countTokens(chunkContent) + })) +} + +export const createSemanticChunkerPlugin = (): ChunkerPlugin => ({ + name: 'semantic', + createChunker: () => semanticChunker +}) + +export const registerSemanticChunker = (): void => { + registerChunkerPlugin(createSemanticChunkerPlugin()) +} + +registerSemanticChunker() diff --git a/packages/unrag/registry/config/unrag.config.ts b/packages/unrag/registry/config/unrag.config.ts index c611ac8..e49f67c 100644 --- a/packages/unrag/registry/config/unrag.config.ts +++ b/packages/unrag/registry/config/unrag.config.ts @@ -2,7 +2,8 @@ * Root Unrag config (generated). * * This file is meant to be the single place you tweak: - * - Defaults (chunking + retrieval) + * - Chunking (method + options) + * - Defaults (retrieval) * - Engine settings (storage, asset processing, extractors) * - Embedding provider/model/timeouts * - How you construct your DB client (Pool/Prisma/etc) and vector store adapter @@ -16,11 +17,31 @@ // __UNRAG_IMPORTS__ export const unrag = defineUnragConfig({ + /** + * Chunking configuration. + * + * Default method: "recursive" (token-based with js-tiktoken o200k_base encoding) + * Supports GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1 + * + * Available methods: + * - "recursive" (built-in, default) + * - "token" (built-in, fixed-size tokens) + * - "semantic", "markdown", "hierarchical", "code", "agentic", "late", "maxmin", "proposition" (plugins) + * - "custom" (bring your own chunker) + * + * Plugin notes: + * - semantic/agentic: options.model (default "openai/gpt-5-mini") + * - code: options.language ("typescript" | "javascript" | "python" | "go") + */ + chunking: { + method: 'recursive', // __UNRAG_CHUNKING_METHOD__ + options: { + chunkSize: 512, // __UNRAG_DEFAULT_chunkSize__ (in tokens) + chunkOverlap: 50, // __UNRAG_DEFAULT_chunkOverlap__ (in tokens) + minChunkSize: 24 // __UNRAG_DEFAULT_minChunkSize__ (in tokens) + } + }, defaults: { - chunking: { - chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__ - chunkOverlap: 40 // __UNRAG_DEFAULT_chunkOverlap__ - }, retrieval: { topK: 8 // __UNRAG_DEFAULT_topK__ } diff --git a/packages/unrag/registry/core/chunking.ts b/packages/unrag/registry/core/chunking.ts index c366961..64db540 100644 --- a/packages/unrag/registry/core/chunking.ts +++ b/packages/unrag/registry/core/chunking.ts @@ -1,54 +1,474 @@ -import type {ChunkText, Chunker, ChunkingOptions} from '@registry/core/types' +import type { + ChunkText, + Chunker, + ChunkerPlugin, + ChunkingConfig, + ChunkingMethod, + ChunkingOptions +} from '@registry/core/types' +import {Tiktoken} from 'js-tiktoken/lite' +import o200k_base from 'js-tiktoken/ranks/o200k_base' -const DEFAULT_CHUNK_SIZE = 200 -const DEFAULT_CHUNK_OVERLAP = 40 +// --------------------------------------------------------------------------- +// Tokenizer (GPT-5 / o200k_base encoding) +// --------------------------------------------------------------------------- + +/** + * Tokenizer using o200k_base encoding (GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1). + * This is the standard encoding for all modern OpenAI models. + */ +const encoder = new Tiktoken(o200k_base) + +/** + * Count tokens in text using o200k_base encoding. + */ +export const countTokens = (text: string): number => { + return encoder.encode(text).length +} + +// --------------------------------------------------------------------------- +// Default options +// --------------------------------------------------------------------------- + +const DEFAULT_CHUNK_SIZE = 512 +const DEFAULT_CHUNK_OVERLAP = 50 +const DEFAULT_MIN_CHUNK_SIZE = 24 + +/** + * Enhanced separator hierarchy for recursive chunking. + * Splits on larger semantic boundaries first, falls back to smaller ones. + */ +const DEFAULT_SEPARATORS = [ + '\n\n', // paragraphs + '\n', // lines + '. ', // sentences (period) + '? ', // sentences (question) + '! ', // sentences (exclamation) + '; ', // semicolon clauses + ': ', // colon clauses + ', ', // comma phrases + ' ', // words + '' // characters (last resort) +] export const defaultChunkingOptions: ChunkingOptions = { chunkSize: DEFAULT_CHUNK_SIZE, - chunkOverlap: DEFAULT_CHUNK_OVERLAP + chunkOverlap: DEFAULT_CHUNK_OVERLAP, + minChunkSize: DEFAULT_MIN_CHUNK_SIZE, + separators: DEFAULT_SEPARATORS +} + +// --------------------------------------------------------------------------- +// Token-based Recursive Chunker +// --------------------------------------------------------------------------- + +/** + * Split text by a separator, keeping the separator at the end of each piece. + */ +const splitWithSeparator = (text: string, separator: string): string[] => { + if (separator === '') { + // Character-level split + return text.split('') + } + + const parts = text.split(separator) + const result: string[] = [] + + for (let i = 0; i < parts.length; i++) { + const part = parts[i] + if (part === undefined) { + continue + } + + // Add separator back to the end of each part (except the last) + if (i < parts.length - 1) { + result.push(part + separator) + } else if (part) { + result.push(part) + } + } + + return result +} + +/** + * Merge small splits into chunks that respect the token limit. + */ +const mergeSplits = ( + splits: string[], + chunkSize: number, + chunkOverlap: number, + minChunkSize: number +): string[] => { + const chunks: string[] = [] + let currentChunk = '' + let currentTokens = 0 + + for (const split of splits) { + const splitTokens = countTokens(split) + + // If adding this split would exceed chunk size + if (currentTokens + splitTokens > chunkSize && currentChunk) { + // Only add chunk if it meets minimum size + if (currentTokens >= minChunkSize) { + chunks.push(currentChunk.trim()) + } + + // Start new chunk with overlap from previous + if (chunkOverlap > 0 && currentChunk) { + const overlapText = getOverlapText(currentChunk, chunkOverlap) + currentChunk = overlapText + split + currentTokens = countTokens(currentChunk) + } else { + currentChunk = split + currentTokens = splitTokens + } + } else { + currentChunk += split + currentTokens += splitTokens + } + } + + // Don't forget the last chunk + if (currentChunk.trim() && currentTokens >= minChunkSize) { + chunks.push(currentChunk.trim()) + } else if (currentChunk.trim() && chunks.length > 0) { + // Merge small last chunk with previous + const lastChunk = chunks.pop() + if (lastChunk) { + chunks.push(`${lastChunk} ${currentChunk}`.trim()) + } + } else if (currentChunk.trim()) { + // Single chunk that's smaller than min - still include it + chunks.push(currentChunk.trim()) + } + + return chunks } -const splitWords = (content: string) => - content.trim().split(/\s+/).filter(Boolean) +/** + * Get overlap text from the end of a chunk. + */ +const getOverlapText = (text: string, overlapTokens: number): string => { + const tokens = encoder.encode(text) + if (tokens.length <= overlapTokens) { + return text + } + + const overlapTokenSlice = tokens.slice(-overlapTokens) + try { + return encoder.decode(overlapTokenSlice) + } catch { + // If decode fails, fall back to character-based overlap + return text.slice(-overlapTokens * 4) // ~4 chars per token estimate + } +} + +/** + * Recursively split text using a hierarchy of separators. + */ +const recursiveSplit = ( + text: string, + separators: string[], + chunkSize: number, + chunkOverlap: number, + minChunkSize: number +): string[] => { + // Base case: text fits in chunk + const textTokens = countTokens(text) + if (textTokens <= chunkSize) { + return text.trim() ? [text.trim()] : [] + } + + // Find the first separator that exists in the text + let separatorToUse = '' + let remainingSeparators = separators + + for (let i = 0; i < separators.length; i++) { + const sep = separators[i] + if (sep !== undefined && (sep === '' || text.includes(sep))) { + separatorToUse = sep + remainingSeparators = separators.slice(i + 1) + break + } + } + + // Split by the chosen separator + const splits = splitWithSeparator(text, separatorToUse) + + // Process splits - recursively split any that are too large + const goodSplits: string[] = [] + + for (const split of splits) { + const splitTokens = countTokens(split) + + if (splitTokens <= chunkSize) { + goodSplits.push(split) + } else if (remainingSeparators.length > 0) { + // Recursively split with finer separators + const subSplits = recursiveSplit( + split, + remainingSeparators, + chunkSize, + chunkOverlap, + minChunkSize + ) + goodSplits.push(...subSplits) + } else { + // No more separators - force split by tokens + goodSplits.push( + ...forceSplitByTokens(split, chunkSize, chunkOverlap) + ) + } + } + + // Merge splits into chunks respecting size limits + return mergeSplits(goodSplits, chunkSize, chunkOverlap, minChunkSize) +} + +/** + * Force split text by token count when no separators work. + */ +const forceSplitByTokens = ( + text: string, + chunkSize: number, + chunkOverlap: number +): string[] => { + const tokens = encoder.encode(text) + const chunks: string[] = [] + const stride = Math.max(1, chunkSize - chunkOverlap) -export const defaultChunker: Chunker = ( + for (let i = 0; i < tokens.length; i += stride) { + const chunkTokens = tokens.slice(i, i + chunkSize) + try { + const chunk = encoder.decode(chunkTokens).trim() + if (chunk) { + chunks.push(chunk) + } + } catch { + // Skip invalid token sequences + } + + // Stop if we've processed all tokens + if (i + chunkSize >= tokens.length) { + break + } + } + + return chunks +} + +/** + * Token-based recursive text splitter (default chunker). + * + * Uses o200k_base encoding (GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1) for accurate + * token counting. Splits text using a hierarchy of separators to preserve semantic + * boundaries while respecting token limits. + * + * Features: + * - Accurate token counting using js-tiktoken with o200k_base encoding + * - 10-level separator hierarchy (paragraphs → sentences → words → characters) + * - Minimum chunk size threshold to avoid tiny chunks + * - Token-based overlap for context preservation + * + * @example + * ```typescript + * const chunks = recursiveChunker(text, { + * chunkSize: 512, // max tokens per chunk + * chunkOverlap: 50, // overlap tokens between chunks + * minChunkSize: 24 // minimum tokens per chunk + * }) + * ``` + */ +export const recursiveChunker: Chunker = ( content: string, options: ChunkingOptions ): ChunkText[] => { - const {chunkSize, chunkOverlap} = options - const words = splitWords(content) - const chunks: ChunkText[] = [] + const { + chunkSize, + chunkOverlap, + minChunkSize = DEFAULT_MIN_CHUNK_SIZE, + separators = DEFAULT_SEPARATORS + } = options - if (words.length === 0) { - return chunks + if (!content.trim()) { + return [] } - let cursor = 0 - let index = 0 + // Perform recursive splitting + const chunks = recursiveSplit( + content, + separators, + chunkSize, + chunkOverlap, + minChunkSize + ) - const stride = Math.max(1, chunkSize - chunkOverlap) + // Convert to ChunkText format with accurate token counts + return chunks.map((chunkContent, index) => ({ + index, + content: chunkContent, + tokenCount: countTokens(chunkContent) + })) +} - while (cursor < words.length) { - const slice = words.slice(cursor, cursor + chunkSize) - const chunkContent = slice.join(' ').trim() +/** + * Token-based fixed-size chunker. + * + * Splits text strictly by token count with overlap, without recursive separators. + */ +export const tokenChunker: Chunker = ( + content: string, + options: ChunkingOptions +): ChunkText[] => { + const { + chunkSize, + chunkOverlap, + minChunkSize = DEFAULT_MIN_CHUNK_SIZE + } = options - if (chunkContent.length === 0) { - break + if (!content.trim()) { + return [] + } + + const chunks = forceSplitByTokens(content, chunkSize, chunkOverlap) + if (chunks.length > 1) { + const last = chunks[chunks.length - 1] + if (last && countTokens(last) < minChunkSize) { + const prev = chunks[chunks.length - 2] + chunks.splice(chunks.length - 2, 2, `${prev} ${last}`.trim()) } + } + + return chunks.map((chunkContent, index) => ({ + index, + content: chunkContent, + tokenCount: countTokens(chunkContent) + })) +} + +// --------------------------------------------------------------------------- +// Default chunker +// --------------------------------------------------------------------------- + +/** + * Default chunker - token-based recursive splitting. + */ +export const defaultChunker: Chunker = recursiveChunker - chunks.push({ - index, - content: chunkContent, - tokenCount: slice.length - }) +// --------------------------------------------------------------------------- +// Plugin registry +// --------------------------------------------------------------------------- + +const loadedPlugins = new Map() + +/** + * Register a chunker plugin. + * Plugins are typically auto-registered when installed via `bunx unrag add chunker:`. + */ +export const registerChunkerPlugin = (plugin: ChunkerPlugin): void => { + loadedPlugins.set(plugin.name, plugin) +} - cursor += stride - index += 1 +/** + * Get a registered chunker plugin by name. + */ +export const getChunkerPlugin = (name: string): ChunkerPlugin | undefined => { + return loadedPlugins.get(name) +} + +/** + * List all registered chunker plugins. + */ +export const listChunkerPlugins = (): string[] => { + return Array.from(loadedPlugins.keys()) +} + +// --------------------------------------------------------------------------- +// Built-in chunkers registry +// --------------------------------------------------------------------------- + +const builtInChunkers: Record = { + recursive: recursiveChunker, + token: tokenChunker +} + +// --------------------------------------------------------------------------- +// Chunker resolution +// --------------------------------------------------------------------------- + +/** + * Resolve a chunker based on the chunking configuration. + * + * @param config - Chunking configuration from unrag.config.ts + * @returns A chunker function + * @throws Error if the specified method is not found (plugin not installed) + */ +export const resolveChunker = (config?: ChunkingConfig): Chunker => { + // Default to recursive if no config + if (!config || !config.method) { + return recursiveChunker } - return chunks + const {method, chunker} = config + + // Handle custom chunker + if (method === 'custom') { + if (!chunker) { + throw new Error( + 'Chunking method "custom" requires a chunker function in config.chunker' + ) + } + return chunker + } + + // Check built-in chunkers + const builtIn = builtInChunkers[method] + if (builtIn) { + return builtIn + } + + // Check plugins + const plugin = loadedPlugins.get(method) + if (plugin) { + return plugin.createChunker(config.options) + } + + // Method not found - provide helpful error message + throw new Error( + `Chunker "${method}" not found.\n` + + `Run: bunx unrag add chunker:${method}` + ) +} + +/** + * Check if a chunking method is available (built-in or plugin installed). + */ +export const isChunkerAvailable = (method: ChunkingMethod): boolean => { + if (method === 'custom') { + return true + } + if (method in builtInChunkers) { + return true + } + return loadedPlugins.has(method) } +/** + * Get information about available chunkers. + */ +export const getAvailableChunkers = (): { + builtIn: string[] + plugins: string[] +} => ({ + builtIn: Object.keys(builtInChunkers), + plugins: Array.from(loadedPlugins.keys()) +}) + +// --------------------------------------------------------------------------- +// Options resolution +// --------------------------------------------------------------------------- + export const resolveChunkingOptions = ( overrides?: Partial ): ChunkingOptions => ({ diff --git a/packages/unrag/registry/core/context-engine.ts b/packages/unrag/registry/core/context-engine.ts index 42ef381..400d7e3 100644 --- a/packages/unrag/registry/core/context-engine.ts +++ b/packages/unrag/registry/core/context-engine.ts @@ -1,3 +1,4 @@ +import {resolveChunker, resolveChunkingOptions} from '@registry/core/chunking' import {defineConfig, resolveConfig} from '@registry/core/config' import { type RunConnectorStreamOptions, @@ -242,14 +243,20 @@ export const defineUnragConfig = ( return embeddingProvider } + // Resolve chunking options from config + const chunkingOptions = resolveChunkingOptions(config.chunking?.options) + const defaults = { - chunking: config.defaults?.chunking ?? {}, + chunking: chunkingOptions, embedding: config.defaults?.embedding ?? {}, retrieval: { topK: config.defaults?.retrieval?.topK ?? 8 } } as const + // Resolve chunker based on config (supports method selection + plugins) + const chunker = resolveChunker(config.chunking) + const createEngineConfig = ( runtime: UnragCreateEngineRuntime ): ContextEngineConfig => { @@ -263,6 +270,7 @@ export const defineUnragConfig = ( return defineConfig({ ...(config.engine ?? {}), defaults: defaults.chunking, + chunker, embeddingProcessing: { ...(defaults.embedding ?? {}), ...(config.engine?.embeddingProcessing ?? {}) diff --git a/packages/unrag/registry/core/index.ts b/packages/unrag/registry/core/index.ts index 2aa6f86..156c436 100644 --- a/packages/unrag/registry/core/index.ts +++ b/packages/unrag/registry/core/index.ts @@ -9,7 +9,20 @@ export {ingest, planIngest} from '@registry/core/ingest' export {rerank} from '@registry/core/rerank' export {retrieve} from '@registry/core/retrieve' export * from '@registry/core/connectors' -export {defaultChunker, resolveChunkingOptions} from '@registry/core/chunking' +export { + countTokens, + defaultChunker, + defaultChunkingOptions, + getAvailableChunkers, + getChunkerPlugin, + isChunkerAvailable, + listChunkerPlugins, + recursiveChunker, + tokenChunker, + registerChunkerPlugin, + resolveChunker, + resolveChunkingOptions +} from '@registry/core/chunking' export { defaultAssetProcessingConfig, defaultContentStorageConfig, diff --git a/packages/unrag/registry/core/ingest.ts b/packages/unrag/registry/core/ingest.ts index 237143f..b027fda 100644 --- a/packages/unrag/registry/core/ingest.ts +++ b/packages/unrag/registry/core/ingest.ts @@ -88,9 +88,13 @@ export const ingest = async ( const chunkingOptions = { ...config.defaults, - ...input.chunking + ...input.chunking, + sourceId: input.sourceId, + metadata: input.metadata ?? {} } + const chunker = input.chunker ?? config.chunker + const metadata = input.metadata ?? {} const documentId = config.idGenerator() const assets: AssetInput[] = Array.isArray(input.assets) ? input.assets : [] @@ -126,7 +130,7 @@ export const ingest = async ( const prepared: PreparedChunk[] = [] const warnings: IngestWarning[] = [] - const baseTextChunks = config.chunker(input.content, chunkingOptions) + const baseTextChunks = await chunker(input.content, chunkingOptions) for (const c of baseTextChunks) { prepared.push({ chunk: { @@ -215,7 +219,7 @@ export const ingest = async ( .filter((t) => t.content.trim().length > 0) for (const item of nonEmptyItems) { - const chunks = config.chunker(item.content, chunkingOptions) + const chunks = await chunker(item.content, chunkingOptions) for (const c of chunks) { outSpecs.push({ documentId, @@ -405,7 +409,7 @@ export const ingest = async ( storedTokenCount: storedCaptionTokenCount }) } else if (caption) { - const captionChunks = config.chunker(caption, chunkingOptions) + const captionChunks = await chunker(caption, chunkingOptions) for (const c of captionChunks) { specs.push({ documentId, diff --git a/packages/unrag/registry/core/types.ts b/packages/unrag/registry/core/types.ts index 44768a6..2eb3265 100644 --- a/packages/unrag/registry/core/types.ts +++ b/packages/unrag/registry/core/types.ts @@ -80,12 +80,119 @@ export type ChunkText = { tokenCount: number } +/** + * Chunking options for token-based recursive chunking. + * All sizes are in TOKENS (not characters or words). + */ export type ChunkingOptions = { + /** + * Maximum chunk size in tokens. + * Default: 512 + */ chunkSize: number + /** + * Number of overlapping tokens between consecutive chunks. + * Default: 50 + */ chunkOverlap: number + /** + * Minimum chunk size in tokens. Chunks smaller than this will be merged. + * Default: 24 + */ + minChunkSize?: number + /** + * Custom separator hierarchy for recursive splitting. + * Default: ['\n\n', '\n', '. ', '? ', '! ', '; ', ': ', ', ', ' ', ''] + */ + separators?: string[] + /** + * Optional model override for LLM-driven chunkers (semantic/agentic). + */ + model?: string + /** + * Optional language hint for code chunker (typescript, javascript, python, go). + */ + language?: string + /** + * Source identifier for the current document (used for per-file inference). + */ + sourceId?: string + /** + * Document metadata available during chunking. + */ + metadata?: Metadata +} + +export type ChunkerResult = ChunkText[] | Promise + +export type Chunker = ( + content: string, + options: ChunkingOptions +) => ChunkerResult + +// --------------------------------------------------------------------------- +// Chunking method & plugin types +// --------------------------------------------------------------------------- + +/** + * Built-in chunking methods shipped with core. + * Uses token-based recursive chunking with js-tiktoken (o200k_base encoding). + */ +export type BuiltInChunkingMethod = 'recursive' | 'token' + +/** + * Plugin chunking methods (installed via CLI). + */ +export type PluginChunkingMethod = + | 'semantic' + | 'markdown' + | 'hierarchical' + | 'code' + | 'agentic' + | 'late' + | 'maxmin' + | 'proposition' + +/** + * All supported chunking methods. + */ +export type ChunkingMethod = + | BuiltInChunkingMethod + | PluginChunkingMethod + | 'custom' + +/** + * Chunking configuration for unrag.config.ts. + */ +export type ChunkingConfig = { + /** + * Chunking method to use. Default: "recursive". + * Built-in: "recursive" (token-based recursive), "token" (fixed-size tokens) + * Plugins: "semantic", "markdown", "hierarchical", "code", "agentic", "late", "maxmin", "proposition" + */ + method?: ChunkingMethod + /** + * Method-specific options. Shape depends on the chosen method. + */ + options?: ChunkingOptions & Record + /** + * Custom chunker function. Only used when method is "custom". + */ + chunker?: Chunker } -export type Chunker = (content: string, options: ChunkingOptions) => ChunkText[] +/** + * Plugin interface for chunker modules. + * Installed via `bunx unrag add chunker:`. + */ +export type ChunkerPlugin = { + /** Unique name matching the method (e.g. "semantic", "markdown"). */ + name: string + /** Create a chunker function with the given options. */ + createChunker: ( + options?: ChunkingOptions & Record + ) => Chunker +} /** * Data reference for an ingested asset. @@ -702,6 +809,16 @@ export type IngestInput = { sourceId: string content: string metadata?: Metadata + /** + * Per-ingest chunker override. + * + * Use this to switch chunking algorithms for a single ingest call without + * changing the engine's configured chunker. + * + * Note: This affects chunking for both the main `content` and any text derived + * from `assets` during this ingest. + */ + chunker?: Chunker chunking?: Partial /** Optional rich media attached to the document. */ assets?: AssetInput[] @@ -998,6 +1115,23 @@ export type UnragEmbeddingConfig = export type DefineUnragConfigInput = { defaults?: UnragDefaultsConfig + /** + * Chunking configuration. + * Controls how documents are split into chunks for embedding. + * + * @example + * ```typescript + * chunking: { + * method: "recursive", // default, uses js-tiktoken with o200k_base + * options: { + * chunkSize: 512, // max tokens per chunk + * chunkOverlap: 50, // overlap tokens between chunks + * minChunkSize: 24 // minimum tokens per chunk + * } + * } + * ``` + */ + chunking?: ChunkingConfig /** * Engine configuration (everything except embedding/store/defaults). * This is where you configure storage, asset processing, chunker/idGenerator, etc. diff --git a/packages/unrag/registry/debug/commands.ts b/packages/unrag/registry/debug/commands.ts index f36408b..5bda96e 100644 --- a/packages/unrag/registry/debug/commands.ts +++ b/packages/unrag/registry/debug/commands.ts @@ -491,7 +491,13 @@ async function handleIngest(command: { content?: string contentPath?: string metadata?: Metadata - chunking?: {chunkSize?: number; chunkOverlap?: number} + chunking?: { + chunkSize?: number + chunkOverlap?: number + minChunkSize?: number + model?: string + language?: string + } }): Promise { const runtime = getUnragDebugRuntime() if (!runtime?.engine) { diff --git a/packages/unrag/registry/debug/types.ts b/packages/unrag/registry/debug/types.ts index 3eeeb14..a6a9b77 100644 --- a/packages/unrag/registry/debug/types.ts +++ b/packages/unrag/registry/debug/types.ts @@ -193,6 +193,17 @@ export type IngestCommand = { chunking?: { chunkSize?: number chunkOverlap?: number + minChunkSize?: number + /** + * Model hint for LLM-driven chunkers (semantic/agentic) when supported. + * Example: "openai/gpt-5-mini" + */ + model?: string + /** + * Language hint for the code chunker when supported. + * Example: "typescript" | "javascript" | "python" | "go" + */ + language?: string } } diff --git a/packages/unrag/registry/manifest.json b/packages/unrag/registry/manifest.json index 3bce1e5..3b10cee 100644 --- a/packages/unrag/registry/manifest.json +++ b/packages/unrag/registry/manifest.json @@ -385,6 +385,54 @@ "devDeps": {} } ], + "chunkers": [ + { + "id": "semantic", + "label": "semantic", + "description": "LLM-guided semantic chunking for general text", + "status": "available", + "deps": { "ai": "^6.0.3" }, + "devDeps": {} + }, + { + "id": "markdown", + "label": "markdown", + "description": "Markdown-aware chunking with fenced code preservation", + "status": "available", + "deps": {}, + "devDeps": {} + }, + { + "id": "hierarchical", + "label": "hierarchical", + "description": "Section-first chunking with header context", + "status": "available", + "deps": {}, + "devDeps": {} + }, + { + "id": "code", + "label": "code", + "description": "Structure-aware chunking for source code", + "status": "available", + "deps": { + "tree-sitter": "^0.22.6", + "tree-sitter-typescript": "^0.21.2", + "tree-sitter-javascript": "^0.21.4", + "tree-sitter-python": "^0.21.0", + "tree-sitter-go": "^0.21.0" + }, + "devDeps": {} + }, + { + "id": "agentic", + "label": "agentic", + "description": "LLM-guided chunking for highest quality", + "status": "available", + "deps": { "ai": "^6.0.3" }, + "devDeps": {} + } + ], "batteries": [ { "id": "reranker", diff --git a/packages/unrag/registry/types/externals.d.ts b/packages/unrag/registry/types/externals.d.ts new file mode 100644 index 0000000..c65d3e3 --- /dev/null +++ b/packages/unrag/registry/types/externals.d.ts @@ -0,0 +1,30 @@ +declare module 'js-tiktoken/lite' { + /** + * `js-tiktoken` ships subpath exports without bundled TS types for some builds. + * We keep a minimal declaration here so Unrag can typecheck in-repo. + */ + export class Tiktoken { + constructor(ranks: unknown) + encode(text: string): number[] + decode(tokens: number[]): string + } +} + +declare module 'js-tiktoken/ranks/o200k_base' { + /** + * Token rank data for the `o200k_base` encoding. + */ + const ranks: unknown + export default ranks +} + +declare module '@prisma/client' { + /** + * In Unrag’s repo we don’t run `prisma generate` as part of typechecking, + * so `@prisma/client` types may be absent. Projects using the Prisma adapter + * will have real generated types. + */ + export class PrismaClient { + [key: string]: unknown + } +} diff --git a/packages/unrag/skills/unrag/SKILL.md b/packages/unrag/skills/unrag/SKILL.md index 63e3925..fb2fdea 100644 --- a/packages/unrag/skills/unrag/SKILL.md +++ b/packages/unrag/skills/unrag/SKILL.md @@ -1,7 +1,7 @@ --- name: unrag -description: Covers RAG installation, ContextEngine API, embedding providers, store adapters, extractors, connectors, batteries, and CLI commands for the unrag TypeScript library. -version: 0.3.2 +description: Covers RAG installation, ContextEngine API, embedding providers, store adapters, extractors, connectors, chunkers, batteries, and CLI commands for the unrag TypeScript library. +version: 0.4.0 --- # Unrag Agent Skill @@ -129,15 +129,18 @@ const result = await engine.retrieve({ ### Chunking -Documents are split into chunks before embedding: +Documents are split into chunks before embedding. Unrag uses **token-based recursive chunking** by default with the `o200k_base` tokenizer (GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1). ```ts // Global defaults in unrag.config.ts export const unrag = defineUnragConfig({ - defaults: { - chunking: { - chunkSize: 512, // tokens per chunk - chunkOverlap: 50, // overlap between chunks + // Method 1: Top-level chunking config (recommended) + chunking: { + method: "recursive", // or "semantic", "markdown", "code", etc. + options: { + chunkSize: 512, // tokens per chunk + chunkOverlap: 50, // overlap between chunks + minChunkSize: 24, // minimum tokens per chunk }, }, // ... @@ -151,6 +154,53 @@ await engine.ingest({ }); ``` +#### Plugin Chunkers + +Install specialized chunkers for different content types: + +```bash +bunx unrag add chunker:semantic # LLM-guided semantic boundaries +bunx unrag add chunker:markdown # Markdown-aware (headers, code blocks) +bunx unrag add chunker:code # AST-based (TypeScript, JavaScript, Python, Go) +bunx unrag add chunker:hierarchical # Section-first with header context +bunx unrag add chunker:agentic # LLM-powered highest quality +``` + +| Method | Best for | Dependencies | +|--------|----------|--------------| +| `recursive` | General text (default) | Built-in | +| `token` | Fixed token splitting | Built-in | +| `semantic` | LLM-guided boundaries | `ai` SDK | +| `markdown` | Documentation, READMEs | None | +| `code` | Source code files | `tree-sitter` | +| `hierarchical` | Structured docs | None | +| `agentic` | High-value content | `ai` SDK | +| `custom` | Your own logic | Bring your own | + +#### Custom Chunker + +```ts +import { countTokens } from "unrag"; + +export const unrag = defineUnragConfig({ + chunking: { + method: "custom", + chunker: (content, options) => { + // Your custom logic + return [{ index: 0, content, tokenCount: countTokens(content) }]; + }, + }, +}); +``` + +#### Token Counting + +```ts +import { countTokens } from "unrag"; + +const tokens = countTokens("Hello world"); // 2 +``` + ### Asset Processing Rich media (PDFs, images, audio, video, files) can be attached to documents: @@ -180,7 +230,7 @@ const result = await engine.ingest({ sourceId: string, // Stable document identifier content: string, // Document text metadata?: Metadata, // Optional key-value pairs - chunking?: { chunkSize?, chunkOverlap? }, + chunking?: { chunkSize?, chunkOverlap?, minChunkSize? }, // All in tokens assets?: AssetInput[], // Optional rich media assetProcessing?: DeepPartial, }); @@ -324,8 +374,8 @@ This skill includes detailed reference files for specific topics: ## Version Information -- **Skill Version:** 1.0.0 -- **Unrag CLI Version:** 0.3.2 +- **Skill Version:** 0.4.0 +- **Unrag CLI Version:** 0.4.0 - **Config Version:** 2 ## Key Source Files @@ -336,6 +386,8 @@ When you need to look at source code: |------|---------| | `packages/unrag/registry/core/types.ts` | All TypeScript types | | `packages/unrag/registry/core/context-engine.ts` | ContextEngine class | -| `packages/unrag/registry/manifest.json` | Extractors, connectors, batteries metadata | +| `packages/unrag/registry/core/chunking.ts` | Chunking logic and plugin registry | +| `packages/unrag/registry/chunkers/*/index.ts` | Plugin chunker implementations | +| `packages/unrag/registry/manifest.json` | Extractors, connectors, chunkers, batteries metadata | | `packages/unrag/cli/commands/*.ts` | CLI command implementations | | `apps/web/content/docs/**/*.mdx` | Documentation pages | diff --git a/packages/unrag/skills/unrag/references/api-reference.md b/packages/unrag/skills/unrag/references/api-reference.md index 6c77f06..e6ff136 100644 --- a/packages/unrag/skills/unrag/references/api-reference.md +++ b/packages/unrag/skills/unrag/references/api-reference.md @@ -37,12 +37,66 @@ type Metadata = Record< ### ChunkingOptions -Controls how documents are split: +Controls how documents are split. All sizes are in **tokens** (using o200k_base encoding): ```ts type ChunkingOptions = { - chunkSize: number; // Max tokens per chunk - chunkOverlap: number; // Overlap between chunks + chunkSize: number; // Max tokens per chunk (default: 512) + chunkOverlap: number; // Overlap tokens between chunks (default: 50) + minChunkSize?: number; // Minimum tokens per chunk (default: 24) + separators?: string[]; // Custom separator hierarchy for recursive splitting + model?: string; // LLM model for semantic/agentic chunkers + language?: string; // Language hint for code chunker +}; +``` + +### ChunkingConfig + +Top-level chunking configuration: + +```ts +type ChunkingMethod = + | "recursive" // Token-based recursive (default) + | "token" // Fixed token splitting + | "semantic" // LLM-guided semantic boundaries + | "markdown" // Markdown-aware + | "hierarchical" // Section-first with headers + | "code" // AST-based for source code + | "agentic" // LLM-powered highest quality + | "custom"; // Bring your own + +type ChunkingConfig = { + method?: ChunkingMethod; // Default: "recursive" + options?: ChunkingOptions; // Method-specific options + chunker?: Chunker; // Custom chunker (when method="custom") +}; +``` + +### Chunker + +Interface for custom chunker functions: + +```ts +type ChunkText = { + index: number; // Position in document (0, 1, 2, ...) + content: string; // Chunk text + tokenCount: number; // Token count for this chunk +}; + +type Chunker = ( + content: string, + options: ChunkingOptions +) => ChunkText[] | Promise; +``` + +### ChunkerPlugin + +Interface for plugin chunkers (installed via CLI): + +```ts +type ChunkerPlugin = { + name: string; // e.g., "semantic", "markdown" + createChunker: (options?: ChunkingOptions) => Chunker; }; ``` @@ -55,7 +109,8 @@ type IngestInput = { sourceId: string; // Stable document identifier content: string; // Document text metadata?: Metadata; // Optional metadata - chunking?: Partial; // Override chunking + chunker?: Chunker; // Override chunking algorithm for this ingest + chunking?: Partial; // Override chunking (all sizes in tokens) assets?: AssetInput[]; // Rich media attachments assetProcessing?: DeepPartial; }; @@ -388,12 +443,13 @@ type ImageEmbeddingInput = { ```ts type DefineUnragConfigInput = { defaults?: UnragDefaultsConfig; + chunking?: ChunkingConfig; // Top-level chunking config (recommended) engine?: UnragEngineConfig; embedding: UnragEmbeddingConfig; }; type UnragDefaultsConfig = { - chunking?: Partial; + chunking?: Partial; // Legacy: use top-level chunking instead embedding?: Partial; retrieval?: { topK?: number }; }; diff --git a/packages/unrag/skills/unrag/references/cli-commands.md b/packages/unrag/skills/unrag/references/cli-commands.md index ae8b707..8a73959 100644 --- a/packages/unrag/skills/unrag/references/cli-commands.md +++ b/packages/unrag/skills/unrag/references/cli-commands.md @@ -91,7 +91,7 @@ The CLI patches your tsconfig.json to add path aliases: ## unrag add -Add extractors, connectors, or batteries to an existing installation. +Add extractors, connectors, chunkers, or batteries to an existing installation. ```bash bunx unrag@latest add @@ -103,6 +103,7 @@ bunx unrag@latest add |------|-------------| | `extractor` | Asset extraction modules | | `connector` | External service connectors | +| `chunker` | Specialized chunking plugins | | `battery` | Optional feature modules | ### Examples @@ -114,6 +115,13 @@ bunx unrag@latest add extractor pdf-llm bunx unrag@latest add extractor image-ocr bunx unrag@latest add extractor file-docx +# Add chunkers +bunx unrag@latest add chunker semantic # LLM-guided semantic boundaries +bunx unrag@latest add chunker markdown # Markdown-aware chunking +bunx unrag@latest add chunker code # AST-based code chunking +bunx unrag@latest add chunker hierarchical # Section-first with headers +bunx unrag@latest add chunker agentic # LLM-powered highest quality + # Add connectors bunx unrag@latest add connector notion bunx unrag@latest add connector google-drive @@ -147,6 +155,13 @@ bunx unrag@latest add battery debug - `file-pptx` - PowerPoint extraction - `file-xlsx` - Excel extraction +**Chunkers:** +- `semantic` - LLM-guided semantic boundaries (requires `ai` SDK) +- `markdown` - Markdown-aware (headers, fenced code blocks) +- `code` - AST-based for source code (requires `tree-sitter`) +- `hierarchical` - Section-first with header context +- `agentic` - LLM-powered highest quality chunking (requires `ai` SDK) + **Connectors:** - `notion` - Notion pages and databases - `google-drive` - Google Drive files @@ -344,7 +359,7 @@ Metadata file tracking your installation: "embeddingProvider": "openai", "version": 2, "installedFrom": { - "unragVersion": "0.3.2" + "unragVersion": "0.4.0" }, "scaffold": { "mode": "slim", @@ -352,10 +367,12 @@ Metadata file tracking your installation: }, "connectors": ["notion"], "extractors": ["pdf-text-layer", "file-text"], + "chunkers": ["semantic", "markdown"], "batteries": ["reranker", "debug"], "managedFiles": [ "lib/unrag/core/types.ts", "lib/unrag/core/context-engine.ts", + "lib/unrag/core/chunking.ts", "..." ] } diff --git a/specs/CHUNKING_RESEARCH_REPORT.md b/specs/CHUNKING_RESEARCH_REPORT.md new file mode 100644 index 0000000..6d5d17f --- /dev/null +++ b/specs/CHUNKING_RESEARCH_REPORT.md @@ -0,0 +1,455 @@ +# RAG Chunking Strategies: Research Report + +**Date:** January 2026 +**Updated:** January 2026 (v2 - Token-based recursive chunking) +**Purpose:** Research chunking techniques and document implementation decisions + +--- + +## Table of Contents +1. [Executive Summary](#executive-summary) +2. [Research Methodology](#research-methodology) +3. [Strategy-by-Strategy Analysis](#strategy-by-strategy-analysis) +4. [Benchmark Comparison](#benchmark-comparison) +5. [Implementation Decision](#implementation-decision) +6. [Default Chunker: Token-Based Recursive](#default-chunker-token-based-recursive) +7. [Plugin Architecture](#plugin-architecture) +8. [Citations](#citations) + +--- + +## Executive Summary + +After reviewing 15+ sources including peer-reviewed papers, industry benchmarks, and production case studies, this report provides research backing for unrag's chunking implementation. + +### Key Decision: Token-Based Recursive as Default + +**Breaking Change (v0.4.0):** Word-based chunking has been removed. The new default is **token-based recursive chunking** using `js-tiktoken` with `o200k_base` encoding (GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1). + +| Aspect | Old (v0.3.x) | New (v0.4.0+) | +|--------|--------------|---------------| +| **Default** | Word-based | Token-based recursive | +| **Tokenizer** | None (word count) | js-tiktoken (o200k_base) | +| **Separators** | 4 levels | 10 levels | +| **Min chunk** | None | 24 tokens | +| **Accuracy** | ~85% | 100% (matches OpenAI) | + +### Research-Backed Strategy Rankings + +| Strategy | Research Impact | Status | +|----------|-----------------|--------| +| **Recursive** | Industry default, 85-90% recall | **Built-in (Default)** | +| **Semantic** | +70% accuracy | Plugin | +| **Markdown** | Essential for docs | Plugin | +| **Hierarchical** | +20-35% relevance | Plugin | +| **Code (cAST)** | +4.3 Recall@5 | Plugin | +| **Late** | +24% retrieval | Plugin | +| **Max-Min** | 0.85 AMI | Plugin | +| **Agentic** | Highest quality ($$$) | Plugin | +| **Proposition** | Highest precision ($$$) | Plugin | + +--- + +## Research Methodology + +### Sources Reviewed + +**Academic Papers:** +1. "Late Chunking: Contextual Chunk Embeddings" - arXiv:2409.04701v3 (Jul 2025) [^1] +2. "Reconstructing Context: Evaluating Advanced Chunking Strategies for RAG" - arXiv:2504.19754 (Apr 2025) [^2] +3. "A Systematic Analysis of Chunking Strategies for Reliable Question Answering" - arXiv:2601.14123 (Jan 2026) [^3] +4. "Max–Min semantic chunking of documents for RAG application" - Springer Discover Computing (Jun 2025) [^4] +5. "cAST: Enhancing Code RAG with AST Chunking" - EMNLP 2025 [^5] +6. "Comparative Evaluation of Advanced Chunking for Clinical Decision Support" - PMC (Nov 2025) [^6] + +**Industry Benchmarks & Guides:** +7. "Best Chunking Strategies for RAG in 2025" - Firecrawl [^7] +8. "Chunking Strategies to Improve Your RAG Performance" - Weaviate [^8] +9. "Finding the Best Chunking Strategy for Accurate AI Responses" - NVIDIA [^9] +10. "The Ultimate Guide to Chunking Strategies for RAG" - Databricks [^10] +11. "Contextual Retrieval" - Anthropic [^11] +12. "Late Chunking in Long-Context Embedding Models" - Jina AI [^12] +13. "Max–Min Semantic Chunking" - Milvus [^13] +14. "Breaking up is hard to do: Chunking in RAG" - Stack Overflow [^14] +15. "Document Chunking for RAG: 9 Strategies Tested" - LangCopilot [^15] + +**Tokenizer Research:** +16. "gpt-tokenizer: OpenAI GPT models tokenizer" - GitHub [^18] +17. "js-tiktoken: BPE tokeniser for OpenAI models" - npm [^19] +18. "What is o200k Harmony? OpenAI's tokenizer" - Modal [^20] + +--- + +## Strategy-by-Strategy Analysis + +### 1. Recursive Token-Based Splitting (DEFAULT) + +**What it does:** Splits text using a hierarchy of separators (paragraphs → sentences → words → characters) while counting actual tokens using `o200k_base` encoding. + +**Research Findings:** +> "Always start with RecursiveCharacterTextSplitter—it's the versatile, reliable workhorse of chunking." [^7] + +> "Recursive Chunking offers the best balance, preserves structure (paragraphs → sentences), and works for most RAG applications. It's the **LangChain default**." [^7] + +> "RecursiveCharacterTextSplitter with 400-512 tokens delivered 85-90% recall in Chroma's tests without computational overhead." [^7] + +> "Token-based chunking is recommended for production RAG systems to ensure chunks stay within embedding model context windows." [^10] + +**Why Token-Based:** +- Accurate chunk sizing for embedding models +- Prevents truncation errors +- Matches OpenAI embedding limits exactly +- GPT-5 and all modern models use `o200k_base` encoding [^18] + +**Verdict: DEFAULT** - Industry standard + production-grade token accuracy. + +--- + +### 2. Semantic Chunking + +**What it does:** Splits text at natural language boundaries while respecting semantic coherence. + +**Research Findings:** +> "Testing of 9 chunking strategies found that semantic chunking achieved the best accuracy with a **70% improvement**." [^15] + +**Verdict: PLUGIN** - Strongest accuracy gains, available via `bunx unrag add chunker:semantic`. + +--- + +### 3. Markdown-Aware Chunking + +**What it does:** Respects markdown structure - keeps code blocks intact, splits on headers, preserves lists/tables. + +**Research Findings:** +> "For documentation and README files, markdown-aware chunking that preserves code blocks and respects headers is essential for retrieval quality." [^10] + +**Verdict: PLUGIN** - Critical for documentation, available via `bunx unrag add chunker:markdown`. + +--- + +### 4. Hierarchical (Parent-Child) Chunking + +**What it does:** Creates two levels - large "parent" chunks for context, small "child" chunks for precise retrieval. + +**Research Findings:** +> "Hierarchical chunking can provide a typical gain of **+20-35% relevance** on structured documents." [^16] + +**Verdict: PLUGIN** - Specialized use case, available via `bunx unrag add chunker:hierarchical`. + +--- + +### 5. Code-Aware (AST) Chunking + +**What it does:** Uses Abstract Syntax Tree parsing to split code at function/class boundaries. + +**Research Findings:** +> "cAST improves Recall@5 by **4.3 points** on RepoEval and Pass@1 by **2.67 points** on SWE-bench." [^5] + +**Verdict: PLUGIN** - Specialized for code, available via `bunx unrag add chunker:code`. + +--- + +### 6. Agentic (LLM-Powered) Chunking + +**What it does:** Uses an LLM to intelligently decide where to split documents. + +**Research Findings:** +> "LLM-based chunking should be reserved for high-value, complex documents where retrieval quality is critical and budget is less of a concern." [^7] + +**Verdict: PLUGIN** - Expensive but valuable, available via `bunx unrag add chunker:agentic`. + +--- + +### 7. Late Chunking + +**What it does:** Embeds entire document first, then applies chunking to token representations. + +**Research Findings:** +> "Late chunking with 512 tokens showed **+24.47% improvement** on some benchmarks." [^1] + +**Verdict: PLUGIN** - Requires long-context models, available via `bunx unrag add chunker:late`. + +--- + +### 8. Max-Min Semantic Chunking + +**What it does:** Embeds sentences first, then groups based on semantic similarity. + +**Research Findings:** +> "Max–Min semantic chunking achieved superior performance with average AMI scores of 0.85, 0.90." [^4] + +**Verdict: PLUGIN** - Research-backed, available via `bunx unrag add chunker:maxmin`. + +--- + +### 9. Proposition-Based Chunking + +**What it does:** Extracts atomic propositions (single facts) from text using LLM. + +**Research Findings:** +> "Proposition-based chunking indexes atomic, claim-level statements for high-precision retrieval." [^6] + +**Verdict: PLUGIN** - Very expensive, available via `bunx unrag add chunker:proposition`. + +--- + +## Benchmark Comparison + +| Strategy | Accuracy/Recall | Cost | Complexity | Status | +|----------|----------------|------|------------|--------| +| **Recursive (Token)** | 85-90% recall | Low | Low | **Built-in** | +| Semantic | +70% | Low | Medium | Plugin | +| Markdown | High (docs) | Low | Medium | Plugin | +| Hierarchical | +20-35% | Medium | High | Plugin | +| Code (AST) | +4.3 R@5 | Medium | High | Plugin | +| Late | +24.47% | Low | Medium | Plugin | +| Max-Min | 0.85 AMI | High | High | Plugin | +| Agentic | Highest | **$$$$** | Low | Plugin | +| Proposition | Highest | **$$$$** | High | Plugin | + +--- + +## Implementation Decision + +### Why Token-Based Recursive as Default? + +| Criteria | Word-Based | Token-Based Recursive | +|----------|------------|----------------------| +| **Accuracy** | ~85% (word ≠ token) | 100% (actual tokens) | +| **Industry Standard** | No | Yes (LangChain default) | +| **Production Ready** | No (truncation risk) | Yes | +| **Dependencies** | None | js-tiktoken (~2MB) | +| **Model Compatibility** | Approximate | Exact (o200k_base) | + +**Decision:** The 2MB dependency cost is justified by: +1. **100% token accuracy** - no truncation errors +2. **Industry standard** - matches LangChain, LlamaIndex +3. **Future-proof** - o200k_base supports GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1 +4. **Research-backed** - 85-90% recall in benchmarks + +### Why Remove Word-Based Chunking? + +Word-based chunking was removed because: +1. **Inaccurate** - 1 word ≠ 1 token (can be 0.5-3 tokens) +2. **Truncation risk** - chunks may exceed embedding limits +3. **Not production-grade** - no major RAG framework uses it +4. **Superseded** - token-based recursive is strictly better + +--- + +## Default Chunker: Token-Based Recursive + +### Technical Specification + +```typescript +// Tokenizer: o200k_base (GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1) +import { Tiktoken } from 'js-tiktoken/lite' +import o200k_base from 'js-tiktoken/ranks/o200k_base' + +const encoder = new Tiktoken(o200k_base) + +// Default options +const defaultChunkingOptions = { + chunkSize: 512, // tokens + chunkOverlap: 50, // tokens + minChunkSize: 24, // tokens (avoid tiny chunks) + separators: [ + '\n\n', // paragraphs + '\n', // lines + '. ', // sentences (period) + '? ', // sentences (question) + '! ', // sentences (exclamation) + '; ', // semicolon clauses + ': ', // colon clauses + ', ', // comma phrases + ' ', // words + '' // characters (last resort) + ] +} +``` + +### Features + +1. **Accurate Token Counting** - Uses `o200k_base` encoding for 100% accuracy with modern OpenAI models +2. **10-Level Separator Hierarchy** - Paragraphs → sentences → clauses → words → characters +3. **Minimum Chunk Threshold** - Avoids tiny chunks (24 token minimum) +4. **Token-Based Overlap** - Preserves context between chunks +5. **Configurable** - All options can be overridden + +### Usage + +```typescript +// Default - uses recursive chunking with o200k_base +export default defineUnragConfig({ + embedding: { provider: "openai" } + // chunking is automatic - uses recursive by default +}) + +// Custom options +export default defineUnragConfig({ + embedding: { provider: "openai" }, + chunking: { + method: "recursive", + options: { + chunkSize: 256, // smaller chunks + chunkOverlap: 25, // less overlap + minChunkSize: 16 // allow smaller chunks + } + } +}) + +// Use countTokens utility +import { countTokens } from 'unrag' +const tokens = countTokens("Hello world") // 2 +``` + +--- + +## Plugin Architecture + +### Built-in (Core) + +| Method | Description | Dependencies | +|--------|-------------|--------------| +| `recursive` | Token-based recursive splitting (DEFAULT) | `js-tiktoken` | + +### Plugins (Install via CLI) + +| Method | Command | Dependencies | +|--------|---------|--------------| +| `semantic` | `bunx unrag add chunker:semantic` | None | +| `markdown` | `bunx unrag add chunker:markdown` | None | +| `hierarchical` | `bunx unrag add chunker:hierarchical` | None | +| `code` | `bunx unrag add chunker:code` | `tree-sitter` (optional) | +| `agentic` | `bunx unrag add chunker:agentic` | AI SDK | +| `late` | `bunx unrag add chunker:late` | None | +| `maxmin` | `bunx unrag add chunker:maxmin` | Embedding provider | +| `proposition` | `bunx unrag add chunker:proposition` | AI SDK | + +### Escape Hatch + +```typescript +// Custom chunker +export default defineUnragConfig({ + chunking: { + method: "custom", + chunker: (content, options) => { + // Your custom logic + return [{ index: 0, content, tokenCount: 100 }] + } + } +}) +``` + +--- + +## Final Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ UNRAG CHUNKING │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ BUILT-IN (Core) │ +│ ┌──────────────────────────────────────────────────────┐ │ +│ │ recursive (DEFAULT) │ │ +│ │ • Token-based with js-tiktoken (o200k_base) │ │ +│ │ • 10-level separator hierarchy │ │ +│ │ • Min chunk threshold (24 tokens) │ │ +│ │ • GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1 support │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ +│ PLUGINS (Install via CLI) │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ semantic │ │ markdown │ │hierarchic│ │ code │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ agentic │ │ late │ │ maxmin │ │proposit. │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +│ │ +│ ESCAPE HATCH │ +│ ┌──────────┐ │ +│ │ custom │ ← Bring your own chunker │ +│ └──────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Citations + +[^1]: Günther, M. et al. "Late Chunking: Contextual Chunk Embeddings Using Long-Context Embedding Models." arXiv:2409.04701v3, July 2025. https://arxiv.org/pdf/2409.04701 + +[^2]: "Reconstructing Context: Evaluating Advanced Chunking Strategies for Retrieval-Augmented Generation." arXiv:2504.19754, April 2025. https://arxiv.org/abs/2504.19754 + +[^3]: "A Systematic Analysis of Chunking Strategies for Reliable Question Answering." arXiv:2601.14123, January 2026. https://arxiv.org/html/2601.14123 + +[^4]: "Max–Min semantic chunking of documents for RAG application." Discover Computing, Springer, June 2025. https://link.springer.com/article/10.1007/s10791-025-09638-7 + +[^5]: Zhou, Y. et al. "cAST: Enhancing Code Retrieval-Augmented Generation with Structural Chunking via Abstract Syntax Tree." EMNLP 2025. https://arxiv.org/abs/2506.15655 + +[^6]: "Comparative Evaluation of Advanced Chunking for Retrieval-Augmented Generation in Large Language Models for Clinical Decision Support." PMC, November 2025. https://pmc.ncbi.nlm.nih.gov/articles/PMC12649634/ + +[^7]: "Best Chunking Strategies for RAG in 2025." Firecrawl. https://www.firecrawl.dev/blog/best-chunking-strategies-rag-2025 + +[^8]: "Chunking Strategies to Improve Your RAG Performance." Weaviate. https://weaviate.io/blog/chunking-strategies-for-rag + +[^9]: "Finding the Best Chunking Strategy for Accurate AI Responses." NVIDIA Developer Blog. https://developer.nvidia.com/blog/finding-the-best-chunking-strategy-for-accurate-ai-responses/ + +[^10]: "The Ultimate Guide to Chunking Strategies for RAG Applications." Databricks Community. https://community.databricks.com/t5/technical-blog/the-ultimate-guide-to-chunking-strategies-for-rag-applications/ba-p/113089 + +[^11]: "Contextual Retrieval: A Guide With Implementation." DataCamp (Anthropic). https://www.datacamp.com/tutorial/contextual-retrieval-anthropic + +[^12]: "Late Chunking in Long-Context Embedding Models." Jina AI. https://jina.ai/news/late-chunking-in-long-context-embedding-models/ + +[^13]: "Max–Min Semantic Chunking: Top Chunking Strategy to Improve RAG Performance." Milvus Blog. https://milvus.io/blog/embedding-first-chunking-second-smarter-rag-retrieval-with-max-min-semantic-chunking.md + +[^14]: "Breaking up is hard to do: Chunking in RAG applications." Stack Overflow Blog, December 2024. https://stackoverflow.blog/2024/12/27/breaking-up-is-hard-to-do-chunking-in-rag-applications/ + +[^15]: "Document Chunking for RAG: 9 Strategies Tested (70% Accuracy Boost 2025)." LangCopilot. https://langcopilot.com/posts/2025-10-11-document-chunking-for-rag-practical-guide + +[^16]: "Hierarchical Chunking: Preserving Document Structure." Ailog RAG. https://app.ailog.fr/en/blog/guides/hierarchical-chunking + +[^17]: "Parent-Child Chunking in LangChain for Advanced RAG." Medium. https://medium.com/@seahorse.technologies.sl/parent-child-chunking-in-langchain-for-advanced-rag-e7c37171995a + +[^18]: "gpt-tokenizer: The fastest JavaScript BPE Tokenizer for OpenAI's GPT models." GitHub. https://github.com/niieani/gpt-tokenizer + +[^19]: "js-tiktoken: BPE tokeniser for use with OpenAI's models." npm. https://www.npmjs.com/package/js-tiktoken + +[^20]: "What is o200k Harmony? OpenAI's latest edition to their tiktoken tokenizer library." Modal. https://modal.com/blog/what-is-o200k-harmony + +--- + +## Summary + +### Breaking Change (v0.4.0) + +- **Removed:** Word-based chunking +- **Default:** Token-based recursive chunking with `js-tiktoken` +- **Encoding:** `o200k_base` (GPT-5, GPT-4o, o1, o3, o4-mini, gpt-4.1) + +### Key Takeaways + +1. **Token-based recursive** is the new default - industry standard with 100% token accuracy +2. **o200k_base encoding** supports all modern OpenAI models including GPT-5 +3. **10-level separator hierarchy** preserves semantic boundaries +4. **Minimum chunk threshold** (24 tokens) avoids tiny chunks +5. **All other strategies** available as plugins via CLI +6. **Custom chunker** escape hatch for edge cases + +### Migration Guide + +```typescript +// Old (v0.3.x) - word-based +const chunks = defaultChunker(text, { chunkSize: 200, chunkOverlap: 40 }) +// chunkSize was in WORDS + +// New (v0.4.0+) - token-based +const chunks = recursiveChunker(text, { chunkSize: 512, chunkOverlap: 50 }) +// chunkSize is now in TOKENS + +// Token counting utility +import { countTokens } from 'unrag' +const tokens = countTokens("Your text here") +```