diff --git a/docs/archive/conversation-notes-2025-01-25.md b/docs/archive/2025-01-25-initial-spec-strategy.md similarity index 100% rename from docs/archive/conversation-notes-2025-01-25.md rename to docs/archive/2025-01-25-initial-spec-strategy.md diff --git a/docs/archive/conversation-notes-2025-01-28.md b/docs/archive/2025-01-28-phantom-anchor-design.md similarity index 100% rename from docs/archive/conversation-notes-2025-01-28.md rename to docs/archive/2025-01-28-phantom-anchor-design.md diff --git a/examples/presentation-document/content/document.json b/examples/presentation-document/content/document.json new file mode 100644 index 0000000..01b2ea5 --- /dev/null +++ b/examples/presentation-document/content/document.json @@ -0,0 +1,160 @@ +{ + "version": "0.1", + "blocks": [ + { + "type": "heading", + "id": "chapter-1", + "level": 1, + "children": [{ "type": "text", "value": "Chapter 1: Introduction" }] + }, + { + "type": "paragraph", + "id": "p-intro-1", + "children": [ + { "type": "text", "value": "This document demonstrates the advanced presentation features available in the Codex format. The presentation layer provides fine-grained control over typography, layout, and print output." } + ] + }, + { + "type": "paragraph", + "id": "p-intro-2", + "children": [ + { "type": "text", "value": "The separation of content from presentation means that the same semantic document can be rendered in multiple ways—as a printed book, a responsive web page, or an accessible screen reader output—while maintaining consistent meaning." } + ] + }, + { + "type": "heading", + "id": "section-1-1", + "level": 2, + "children": [{ "type": "text", "value": "1.1 Typography Features" }] + }, + { + "type": "paragraph", + "id": "p-typo-1", + "children": [ + { "type": "text", "value": "Professional typography includes hyphenation, widow and orphan control, and baseline grid alignment. These features ensure that text flows naturally and pages maintain visual consistency." } + ] + }, + { + "type": "paragraph", + "id": "p-typo-2", + "children": [ + { "type": "text", "value": "OpenType features like ligatures, small caps, and old-style numerals can be enabled per style. Variable fonts allow fine control over weight, width, and other axes." } + ] + }, + { + "type": "figure", + "id": "fig-layout", + "children": [ + { + "type": "image", + "src": "assets/images/layout-example.png", + "alt": "Diagram showing multi-column layout with flow regions" + }, + { + "type": "figcaption", + "children": [{ "type": "text", "value": "Figure 1: Multi-column layout with flow regions" }] + } + ], + "numbering": "auto" + }, + { + "type": "heading", + "id": "section-1-2", + "level": 2, + "children": [{ "type": "text", "value": "1.2 Page Layout" }] + }, + { + "type": "paragraph", + "id": "p-layout-1", + "children": [ + { "type": "text", "value": "The presentation extension supports both multi-column and grid layouts. Text can flow through defined regions across multiple pages, enabling complex magazine-style layouts." } + ] + }, + { + "type": "paragraph", + "id": "p-layout-2", + "children": [ + { "type": "text", "value": "Master pages define reusable templates with margins, headers, footers, and background elements. Rules can automatically apply different masters to chapter openings, full-bleed pages, and default pages." } + ] + }, + { + "type": "heading", + "id": "chapter-2", + "level": 1, + "children": [{ "type": "text", "value": "Chapter 2: Print Features" }] + }, + { + "type": "paragraph", + "id": "p-print-1", + "children": [ + { "type": "text", "value": "For professional printing, the presentation extension supports bleeds, trim marks, registration marks, and spot colors. Documents can be configured for PDF/X compliance." } + ] + }, + { + "type": "paragraph", + "id": "p-print-2", + "children": [ + { "type": "text", "value": "Running headers and footers can include dynamic content like chapter titles, section names, page numbers, and total page counts. Different templates can be applied to odd and even pages for book-style facing layouts." } + ] + }, + { + "type": "table", + "id": "table-features", + "children": [ + { + "type": "tableRow", + "header": true, + "children": [ + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Feature" }] }] + }, + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Description" }] }] + } + ] + }, + { + "type": "tableRow", + "children": [ + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Master Pages" }] }] + }, + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Reusable page templates with margins and headers" }] }] + } + ] + }, + { + "type": "tableRow", + "children": [ + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Hyphenation" }] }] + }, + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Language-aware word breaking" }] }] + } + ] + }, + { + "type": "tableRow", + "children": [ + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Line Numbering" }] }] + }, + { + "type": "tableCell", + "children": [{ "type": "paragraph", "children": [{ "type": "text", "value": "Margin line numbers for legal/manuscript documents" }] }] + } + ] + } + ] + } + ] +} diff --git a/examples/presentation-document/manifest.json b/examples/presentation-document/manifest.json new file mode 100644 index 0000000..997a870 --- /dev/null +++ b/examples/presentation-document/manifest.json @@ -0,0 +1,29 @@ +{ + "codex": "0.1", + "id": "pending", + "state": "draft", + "created": "2025-01-29T10:00:00Z", + "modified": "2025-01-29T10:00:00Z", + "content": { + "path": "content/document.json", + "hash": "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + "presentation": [ + { + "type": "paginated", + "path": "presentation/paginated.json", + "hash": "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "default": true + } + ], + "metadata": { + "dublinCore": "metadata/dublin-core.json" + }, + "extensions": [ + { + "id": "codex.presentation", + "version": "0.1", + "required": false + } + ] +} diff --git a/examples/presentation-document/metadata/dublin-core.json b/examples/presentation-document/metadata/dublin-core.json new file mode 100644 index 0000000..f9540b0 --- /dev/null +++ b/examples/presentation-document/metadata/dublin-core.json @@ -0,0 +1,13 @@ +{ + "version": "1.1", + "terms": { + "title": "Presentation Extension Example", + "creator": "Codex Specification Team", + "subject": ["Document Format", "Presentation", "Typography"], + "description": "An example document demonstrating the Presentation Extension features including master pages, typography, multi-column layout, and running headers/footers.", + "date": "2025-01-29", + "type": "Text", + "format": "application/vnd.codex+zip", + "language": "en" + } +} diff --git a/examples/presentation-document/presentation/paginated.json b/examples/presentation-document/presentation/paginated.json new file mode 100644 index 0000000..d67cc55 --- /dev/null +++ b/examples/presentation-document/presentation/paginated.json @@ -0,0 +1,184 @@ +{ + "version": "0.1", + "type": "paginated", + "defaults": { + "pageSize": { "width": "6in", "height": "9in" }, + "margins": { "top": "0.75in", "outside": "0.75in", "bottom": "0.875in", "inside": "1in" } + }, + "masterPages": { + "default": { + "margins": { "top": "0.75in", "right": "0.75in", "bottom": "0.875in", "left": "1in" }, + "header": { "height": "0.5in" }, + "footer": { "height": "0.5in" } + }, + "chapter-start": { + "basedOn": "default", + "margins": { "top": "2in" }, + "header": null + }, + "full-bleed": { + "margins": { "top": "0", "right": "0", "bottom": "0", "left": "0" } + } + }, + "masterRules": [ + { "match": { "first": true }, "master": "chapter-start" }, + { "match": { "contains": "full-bleed-image" }, "master": "full-bleed" }, + { "match": { "default": true }, "master": "default" } + ], + "pageTemplate": { + "odd": { + "header": { + "height": "0.4in", + "content": { + "right": { "variable": "section-title" } + } + }, + "footer": { + "height": "0.4in", + "content": { + "right": { "text": "{pageNumber}" } + } + } + }, + "even": { + "header": { + "height": "0.4in", + "content": { + "left": { "variable": "chapter-title" } + } + }, + "footer": { + "height": "0.4in", + "content": { + "left": { "text": "{pageNumber}" } + } + } + } + }, + "layout": { + "type": "columns", + "columns": 1, + "gap": "0.25in" + }, + "typography": { + "hyphenation": { + "enabled": true, + "language": "en-US", + "minWordLength": 6, + "minBefore": 3, + "minAfter": 2, + "maxConsecutive": 2 + }, + "widows": 2, + "orphans": 2, + "baselineGrid": { + "enabled": true, + "lineHeight": "14pt", + "offset": "0pt" + }, + "lineNumbering": { + "enabled": false + } + }, + "print": { + "bleed": { + "top": "0.125in", + "right": "0.125in", + "bottom": "0.125in", + "left": "0.125in" + }, + "trim": { + "width": "6in", + "height": "9in" + }, + "cropMarks": true, + "registrationMarks": true, + "colorBars": false + }, + "tableOfContents": { + "title": "Contents", + "levels": [1, 2], + "pageNumbers": true, + "leaders": "dots" + }, + "footnotes": { + "numbering": "1", + "position": "page-bottom", + "separator": { + "width": "2in", + "style": "solid", + "margin": "0.5em" + } + }, + "styles": { + "body": { + "fontFamily": "Source Serif Pro", + "fontSize": "11pt", + "lineHeight": 1.4, + "textAlign": "justify", + "fontFeatureSettings": { + "liga": true, + "kern": true, + "onum": true + } + }, + "heading1": { + "fontFamily": "Source Sans Pro", + "fontSize": "24pt", + "fontWeight": 700, + "lineHeight": 1.2, + "marginTop": "0", + "marginBottom": "18pt", + "pageBreakBefore": "always" + }, + "heading2": { + "fontFamily": "Source Sans Pro", + "fontSize": "16pt", + "fontWeight": 600, + "lineHeight": 1.3, + "marginTop": "24pt", + "marginBottom": "12pt" + }, + "chapter-first-para": { + "extends": "body", + "dropCap": { + "lines": 3, + "fontFamily": "inherit", + "fontWeight": "bold", + "marginRight": "0.1em" + } + }, + "figure": { + "marginTop": "1em", + "marginBottom": "1em", + "textAlign": "center" + }, + "figcaption": { + "fontFamily": "Source Sans Pro", + "fontSize": "9pt", + "fontStyle": "italic", + "marginTop": "0.5em" + }, + "table": { + "fontSize": "10pt", + "marginTop": "1em", + "marginBottom": "1em" + }, + "tableCell": { + "paddingTop": "4pt", + "paddingRight": "8pt", + "paddingBottom": "4pt", + "paddingLeft": "8pt", + "borderWidth": "0.5pt", + "borderStyle": "solid", + "borderColor": "#cccccc" + }, + "toc1": { + "fontWeight": "bold", + "marginTop": "1em" + }, + "toc2": { + "marginLeft": "1em" + } + } +} diff --git a/package.json b/package.json index 5b895d2..f7c496e 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,10 @@ "scripts": { "test": "npm run validate:schemas && npm run validate:examples", "validate:schemas": "npx tsx scripts/validate-schemas.ts", - "validate:examples": "npx tsx scripts/validate-examples.ts" + "validate:examples": "npx tsx scripts/validate-examples.ts", + "check:sync": "npx tsx scripts/check-spec-schema-sync.ts", + "check:refs": "npx tsx scripts/validate-cross-refs.ts", + "generate:template": "npx tsx scripts/generate-template.ts" }, "devDependencies": { "ajv": "^8.17.1", diff --git a/schemas/content.schema.json b/schemas/content.schema.json index 8049c42..42470ab 100644 --- a/schemas/content.schema.json +++ b/schemas/content.schema.json @@ -152,6 +152,10 @@ "style": { "type": "string", "description": "Named style to apply" + }, + "semantic": { + "$ref": "https://codex.document/schemas/semantic.schema.json#/$defs/jsonLdAnnotation", + "description": "JSON-LD semantic annotation for this block" } } }, @@ -182,7 +186,8 @@ { "$ref": "https://codex.document/schemas/semantic.schema.json#/$defs/glossaryMark" }, { "$ref": "https://codex.document/schemas/academic.schema.json#/$defs/theoremRefMark" }, { "$ref": "https://codex.document/schemas/academic.schema.json#/$defs/equationRefMark" }, - { "$ref": "https://codex.document/schemas/academic.schema.json#/$defs/algorithmRefMark" } + { "$ref": "https://codex.document/schemas/academic.schema.json#/$defs/algorithmRefMark" }, + { "$ref": "https://codex.document/schemas/presentation.schema.json#/$defs/indexMark" } ] } } @@ -314,6 +319,16 @@ "type": "string", "description": "Programming language identifier" }, + "highlighting": { + "type": "string", + "enum": ["none", "tokens"], + "description": "Highlighting mode: 'none' for plain text, 'tokens' for pre-tokenized highlighting" + }, + "tokens": { + "type": "array", + "description": "Pre-tokenized syntax highlighting tokens (used when highlighting='tokens')", + "items": { "$ref": "#/$defs/highlightToken" } + }, "children": { "type": "array", "items": { "$ref": "#/$defs/textNode" }, @@ -321,6 +336,29 @@ } } }, + "highlightToken": { + "type": "object", + "description": "A syntax highlighting token", + "required": ["type", "value"], + "properties": { + "type": { + "type": "string", + "enum": [ + "keyword", "function", "class", "variable", "parameter", + "string", "number", "boolean", "null", + "comment", "docstring", + "operator", "punctuation", "delimiter", + "type", "namespace", "decorator", + "plain" + ], + "description": "Token type for syntax highlighting" + }, + "value": { + "type": "string", + "description": "Token text content" + } + } + }, "image": { "type": "object", "required": ["type", "src", "alt"], diff --git a/schemas/legal.schema.json b/schemas/legal.schema.json new file mode 100644 index 0000000..6206008 --- /dev/null +++ b/schemas/legal.schema.json @@ -0,0 +1,250 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://codex.document/schemas/legal.schema.json", + "title": "Codex Legal Extension", + "description": "Schema for legal extension blocks and marks in a Codex document", + "$defs": { + "citationCategory": { + "type": "string", + "description": "Category for Table of Authorities grouping", + "enum": ["cases", "statutes", "regulations", "constitutions", "treatises", "law-reviews", "other"] + }, + "citationFormat": { + "type": "string", + "description": "Legal citation style", + "enum": ["bluebook", "alwd", "mcgill", "oscola"] + }, + "legalCiteMark": { + "type": "object", + "description": "Legal citation mark for Table of Authorities generation", + "required": ["type", "citation", "category"], + "properties": { + "type": { "const": "legal:cite" }, + "citation": { + "type": "string", + "description": "Full citation string" + }, + "category": { "$ref": "#/$defs/citationCategory" }, + "shortForm": { + "type": "string", + "description": "Short form for subsequent references" + }, + "pinpoint": { + "type": "string", + "description": "Specific page, paragraph, or section reference" + }, + "format": { "$ref": "#/$defs/citationFormat" } + }, + "additionalProperties": false + }, + "toaCategoryConfig": { + "type": "object", + "description": "Table of Authorities category configuration", + "required": ["name", "key"], + "properties": { + "name": { + "type": "string", + "description": "Display name for the category" + }, + "key": { "$ref": "#/$defs/citationCategory" }, + "format": { "$ref": "#/$defs/citationFormat" } + }, + "additionalProperties": false + }, + "tableOfAuthoritiesBlock": { + "type": "object", + "description": "Table of Authorities block for auto-generated citation index", + "required": ["type"], + "properties": { + "type": { "const": "legal:tableOfAuthorities" }, + "id": { + "type": "string", + "description": "Block identifier" + }, + "title": { + "type": "string", + "description": "Section title", + "default": "Table of Authorities" + }, + "categories": { + "type": "array", + "description": "Category configuration for grouping", + "items": { "$ref": "#/$defs/toaCategoryConfig" } + }, + "pageReferences": { + "type": "boolean", + "description": "Include page references", + "default": true + }, + "passimThreshold": { + "type": "integer", + "description": "Number of references before showing 'passim'", + "minimum": 1, + "default": 5 + } + }, + "additionalProperties": false + }, + "party": { + "type": "object", + "description": "Party information in a case caption", + "properties": { + "name": { + "type": "string", + "description": "Party name" + }, + "role": { + "type": "string", + "description": "Party role (e.g., Plaintiff, Defendant, Appellant)" + } + } + }, + "captionBlock": { + "type": "object", + "description": "Court caption block", + "required": ["type", "court"], + "properties": { + "type": { "const": "legal:caption" }, + "id": { + "type": "string", + "description": "Block identifier" + }, + "court": { + "type": "string", + "description": "Court name" + }, + "caseNumber": { + "type": "string", + "description": "Case or docket number" + }, + "parties": { + "type": "object", + "description": "Parties to the case", + "properties": { + "plaintiff": { + "oneOf": [ + { "type": "string" }, + { "$ref": "#/$defs/party" } + ] + }, + "defendant": { + "oneOf": [ + { "type": "string" }, + { "$ref": "#/$defs/party" } + ] + }, + "appellant": { + "oneOf": [ + { "type": "string" }, + { "$ref": "#/$defs/party" } + ] + }, + "appellee": { + "oneOf": [ + { "type": "string" }, + { "$ref": "#/$defs/party" } + ] + }, + "petitioner": { + "oneOf": [ + { "type": "string" }, + { "$ref": "#/$defs/party" } + ] + }, + "respondent": { + "oneOf": [ + { "type": "string" }, + { "$ref": "#/$defs/party" } + ] + } + } + }, + "docket": { + "type": "string", + "description": "Docket or term information" + }, + "judge": { + "type": "string", + "description": "Assigned judge" + } + }, + "additionalProperties": false + }, + "signatureBlockSigner": { + "type": "object", + "description": "Signer information for legal signature block", + "properties": { + "name": { + "type": "string", + "description": "Full name" + }, + "title": { + "type": "string", + "description": "Title or role" + }, + "barNumber": { + "type": "string", + "description": "Bar admission number" + }, + "firm": { + "type": "string", + "description": "Law firm name" + }, + "address": { + "type": "string", + "description": "Business address" + }, + "telephone": { + "type": "string", + "description": "Telephone number" + }, + "fax": { + "type": "string", + "description": "Fax number" + }, + "email": { + "type": "string", + "format": "email", + "description": "Email address" + } + } + }, + "signatureBlockBlock": { + "type": "object", + "description": "Legal signature block with attorney information", + "required": ["type", "role", "signer"], + "properties": { + "type": { "const": "legal:signatureBlock" }, + "id": { + "type": "string", + "description": "Block identifier" + }, + "role": { + "type": "string", + "description": "Role of the signatory", + "enum": ["counsel", "attorney", "party", "witness", "notary"] + }, + "signer": { "$ref": "#/$defs/signatureBlockSigner" }, + "date": { + "type": "string", + "format": "date", + "description": "Date of signature" + } + }, + "additionalProperties": false + } + }, + "type": "object", + "description": "Legal extension configuration", + "properties": { + "citationStyle": { + "$ref": "#/$defs/citationFormat", + "description": "Default citation style for the document" + }, + "jurisdiction": { + "type": "string", + "description": "Jurisdiction for citation formatting" + } + }, + "additionalProperties": false +} diff --git a/schemas/manifest.schema.json b/schemas/manifest.schema.json index b5e71f9..05f04ff 100644 --- a/schemas/manifest.schema.json +++ b/schemas/manifest.schema.json @@ -251,6 +251,11 @@ "required": { "type": "boolean", "description": "Whether extension is required for correct rendering" + }, + "config": { + "type": "object", + "description": "Extension-specific configuration paths or inline settings", + "additionalProperties": true } } } diff --git a/schemas/presentation.schema.json b/schemas/presentation.schema.json index f6ece9b..eaeb715 100644 --- a/schemas/presentation.schema.json +++ b/schemas/presentation.schema.json @@ -44,6 +44,58 @@ "pageTemplate": { "$ref": "#/$defs/pageTemplate" }, + "layout": { + "$ref": "#/$defs/layout" + }, + "flowRegions": { + "type": "array", + "description": "Flow region definitions for text flow across pages/areas", + "items": { + "$ref": "#/$defs/flowRegion" + } + }, + "print": { + "$ref": "#/$defs/printSettings" + }, + "masterPages": { + "type": "object", + "description": "Master page template definitions", + "additionalProperties": { + "$ref": "#/$defs/masterPage" + } + }, + "masterRules": { + "type": "array", + "description": "Auto-application rules for master pages", + "items": { + "$ref": "#/$defs/masterRule" + } + }, + "colors": { + "type": "object", + "description": "Named color definitions including spot colors", + "additionalProperties": { + "$ref": "#/$defs/colorDefinition" + } + }, + "tableOfContents": { + "$ref": "#/$defs/tableOfContentsConfig" + }, + "listOfFigures": { + "$ref": "#/$defs/listOfConfig" + }, + "listOfTables": { + "$ref": "#/$defs/listOfConfig" + }, + "index": { + "$ref": "#/$defs/indexConfig" + }, + "footnotes": { + "$ref": "#/$defs/footnotesConfig" + }, + "endnotes": { + "$ref": "#/$defs/endnotesConfig" + }, "styles": { "type": "object", "description": "Style definitions", @@ -267,6 +319,343 @@ }, "footer": { "$ref": "#/$defs/pageHeaderFooter" + }, + "odd": { + "type": "object", + "description": "Template for odd pages", + "properties": { + "header": { "$ref": "#/$defs/pageHeaderFooter" }, + "footer": { "$ref": "#/$defs/pageHeaderFooter" } + } + }, + "even": { + "type": "object", + "description": "Template for even pages", + "properties": { + "header": { "$ref": "#/$defs/pageHeaderFooter" }, + "footer": { "$ref": "#/$defs/pageHeaderFooter" } + } + } + } + }, + "layout": { + "type": "object", + "description": "Page layout configuration", + "properties": { + "type": { + "type": "string", + "enum": ["columns", "grid"], + "description": "Layout type" + }, + "columns": { + "type": "integer", + "minimum": 1, + "maximum": 12, + "description": "Number of columns" + }, + "rows": { + "oneOf": [ + { "type": "string", "const": "auto" }, + { "type": "integer", "minimum": 1 } + ], + "description": "Number of rows (grid layout)" + }, + "gap": { + "type": "string", + "description": "Gap between columns/cells" + }, + "balance": { + "type": "boolean", + "description": "Balance column heights" + }, + "rule": { + "type": "object", + "description": "Column rule (separator line)", + "properties": { + "width": { "type": "string" }, + "style": { "type": "string" }, + "color": { "type": "string" } + } + }, + "areas": { + "type": "array", + "description": "Grid area definitions", + "items": { + "type": "object", + "required": ["name"], + "properties": { + "name": { "type": "string" }, + "column": { "type": "string" }, + "row": { "type": "string" } + } + } + } + } + }, + "flowRegion": { + "type": "object", + "description": "Flow region definition for text flow", + "required": ["id", "regions"], + "properties": { + "id": { + "type": "string", + "description": "Flow region identifier" + }, + "regions": { + "type": "array", + "description": "Ordered list of regions for text flow", + "items": { + "type": "object", + "properties": { + "page": { "type": "integer", "minimum": 1 }, + "area": { "type": "string" } + } + } + } + } + }, + "printSettings": { + "type": "object", + "description": "Print-specific settings", + "properties": { + "bleed": { + "type": "object", + "description": "Bleed area for printing", + "properties": { + "top": { "type": "string" }, + "right": { "type": "string" }, + "bottom": { "type": "string" }, + "left": { "type": "string" } + } + }, + "trim": { + "type": "object", + "description": "Trim box dimensions", + "properties": { + "width": { "type": "string" }, + "height": { "type": "string" } + } + }, + "cropMarks": { + "type": "boolean", + "description": "Include crop marks" + }, + "registrationMarks": { + "type": "boolean", + "description": "Include registration marks" + }, + "colorBars": { + "type": "boolean", + "description": "Include color bars" + }, + "standard": { + "type": "string", + "description": "PDF standard compliance", + "enum": ["PDF/X-1a", "PDF/X-3", "PDF/X-4", "PDF/A-1", "PDF/A-2", "PDF/A-3"] + }, + "outputIntent": { + "type": "object", + "description": "Output intent for color management", + "properties": { + "profile": { "type": "string" }, + "condition": { "type": "string" } + } + } + } + }, + "masterPage": { + "type": "object", + "description": "Master page template definition", + "properties": { + "basedOn": { + "type": "string", + "description": "Parent master page name" + }, + "margins": { + "type": "object", + "properties": { + "top": { "type": "string" }, + "right": { "type": "string" }, + "bottom": { "type": "string" }, + "left": { "type": "string" }, + "inside": { "type": "string" }, + "outside": { "type": "string" } + } + }, + "header": { + "oneOf": [ + { "type": "null" }, + { "$ref": "#/$defs/pageHeaderFooter" } + ] + }, + "footer": { + "oneOf": [ + { "type": "null" }, + { "$ref": "#/$defs/pageHeaderFooter" } + ] + } + } + }, + "masterRule": { + "type": "object", + "description": "Auto-application rule for master pages", + "required": ["match", "master"], + "properties": { + "match": { + "type": "object", + "description": "Match criteria", + "properties": { + "first": { "type": "boolean" }, + "last": { "type": "boolean" }, + "contains": { "type": "string" }, + "default": { "type": "boolean" } + } + }, + "master": { + "type": "string", + "description": "Master page name to apply" + } + } + }, + "colorDefinition": { + "type": "object", + "description": "Color definition including spot colors", + "properties": { + "type": { + "type": "string", + "enum": ["rgb", "cmyk", "spot"], + "description": "Color type" + }, + "name": { + "type": "string", + "description": "Spot color name (e.g., PANTONE 286 C)" + }, + "fallback": { + "type": "string", + "description": "Fallback hex color for non-spot rendering" + }, + "value": { + "type": "string", + "description": "Color value (hex for RGB, CMYK values for CMYK)" + } + } + }, + "tableOfContentsConfig": { + "type": "object", + "description": "Table of contents generation settings", + "properties": { + "title": { + "type": "string", + "description": "TOC section title" + }, + "levels": { + "type": "array", + "description": "Heading levels to include", + "items": { + "type": "integer", + "minimum": 1, + "maximum": 6 + } + }, + "styles": { + "type": "object", + "description": "Style names for each TOC level", + "additionalProperties": { "$ref": "#/$defs/style" } + }, + "pageNumbers": { + "type": "boolean", + "description": "Include page numbers" + }, + "leaders": { + "type": "string", + "enum": ["none", "dots", "dashes", "underline"], + "description": "Leader style between title and page number" + } + } + }, + "listOfConfig": { + "type": "object", + "description": "List of figures/tables configuration", + "properties": { + "title": { + "type": "string", + "description": "Section title" + }, + "style": { + "type": "string", + "description": "Named style to apply" + } + } + }, + "indexConfig": { + "type": "object", + "description": "Index generation settings", + "properties": { + "title": { + "type": "string", + "description": "Index section title" + }, + "columns": { + "type": "integer", + "minimum": 1, + "maximum": 4, + "description": "Number of columns for index layout" + }, + "style": { + "type": "object", + "description": "Styling for index entries", + "properties": { + "mainEntry": { "$ref": "#/$defs/style" }, + "subEntry": { "$ref": "#/$defs/style" } + } + } + } + }, + "footnotesConfig": { + "type": "object", + "description": "Footnote rendering settings", + "properties": { + "numbering": { + "type": "string", + "description": "Numbering style", + "enum": ["1", "a", "A", "i", "I", "*"] + }, + "position": { + "type": "string", + "enum": ["page-bottom", "column-bottom", "section-end"], + "description": "Footnote position" + }, + "separator": { + "type": "object", + "description": "Separator line settings", + "properties": { + "width": { "type": "string" }, + "style": { "type": "string" }, + "margin": { "type": "string" } + } + }, + "style": { + "$ref": "#/$defs/style", + "description": "Footnote text styling" + } + } + }, + "endnotesConfig": { + "type": "object", + "description": "Endnote rendering settings", + "properties": { + "title": { + "type": "string", + "description": "Endnotes section title" + }, + "numbering": { + "type": "string", + "description": "Numbering style", + "enum": ["1", "a", "A", "i", "I"] + }, + "perChapter": { + "type": "boolean", + "description": "Reset numbering per chapter" } } }, @@ -304,6 +693,10 @@ "text": { "type": "string", "description": "Text with optional variables like {pageNumber}" + }, + "variable": { + "type": "string", + "description": "Dynamic variable name (e.g., 'chapter-title', 'section-title', 'pageNumber')" } } } @@ -360,6 +753,38 @@ } } }, + "presentationReference": { + "type": "object", + "description": "Cross-reference block for figure/table/section references", + "required": ["type", "target"], + "properties": { + "type": { "const": "presentation:reference" }, + "target": { + "type": "string", + "description": "Content Anchor URI (internal references start with #)" + }, + "format": { + "type": "string", + "description": "Display format template (e.g., 'Figure #')" + } + } + }, + "indexMark": { + "type": "object", + "description": "Index entry mark for document index generation", + "required": ["type", "term"], + "properties": { + "type": { "const": "index" }, + "term": { + "type": "string", + "description": "Primary index term" + }, + "subterm": { + "type": "string", + "description": "Secondary index term" + } + } + }, "style": { "type": "object", "properties": { @@ -383,6 +808,16 @@ "type": "string", "enum": ["normal", "italic"] }, + "fontFeatureSettings": { + "type": "object", + "description": "OpenType feature settings", + "additionalProperties": { "type": "boolean" } + }, + "fontVariationSettings": { + "type": "object", + "description": "Variable font axis settings", + "additionalProperties": { "type": "number" } + }, "lineHeight": { "oneOf": [ { "type": "number" }, @@ -466,9 +901,52 @@ "pageBreakAfter": { "type": "string", "enum": ["auto", "always", "avoid"] + }, + "dropCap": { + "type": "object", + "description": "Drop cap settings for first letter", + "properties": { + "lines": { + "type": "integer", + "minimum": 2, + "description": "Number of lines to span" + }, + "fontFamily": { "type": "string" }, + "fontWeight": { + "oneOf": [ + { "type": "integer", "minimum": 100, "maximum": 900 }, + { "type": "string", "enum": ["normal", "bold", "lighter", "bolder", "inherit"] } + ] + }, + "marginRight": { "type": "string" } + } + }, + "overprint": { + "type": "boolean", + "description": "Enable overprint for this element (print only)" } }, "additionalProperties": false + }, + "float": { + "type": "object", + "description": "Float positioning for figures and images", + "properties": { + "position": { + "type": "string", + "enum": ["top", "bottom", "inline", "page-top", "page-bottom"], + "description": "Float position preference" + }, + "span": { + "type": "string", + "enum": ["column", "page", "spread"], + "description": "Width span of the float" + }, + "clearance": { + "type": "string", + "description": "Minimum clearance around float" + } + } } } } diff --git a/scripts/check-spec-schema-sync.ts b/scripts/check-spec-schema-sync.ts new file mode 100644 index 0000000..81fd7d7 --- /dev/null +++ b/scripts/check-spec-schema-sync.ts @@ -0,0 +1,248 @@ +#!/usr/bin/env npx tsx + +/** + * Checks for drift between spec documentation and JSON schemas. + * + * This script: + * 1. Parses spec markdown files for documented block types + * 2. Parses JSON schemas for defined block types + * 3. Reports discrepancies between documented and implemented types + */ + +import * as fs from 'fs'; +import * as path from 'path'; + +const rootDir = path.join(__dirname, '..'); +const specDir = path.join(rootDir, 'spec'); +const schemasDir = path.join(rootDir, 'schemas'); + +interface BlockType { + type: string; + source: string; + line?: number; +} + +interface SyncReport { + specOnly: BlockType[]; + schemaOnly: BlockType[]; + synced: string[]; +} + +// Extract block types from spec markdown files +function extractTypesFromSpec(filePath: string): BlockType[] { + const types: BlockType[] = []; + const content = fs.readFileSync(filePath, 'utf8'); + const lines = content.split('\n'); + const filename = path.relative(rootDir, filePath); + + // Pattern: "type": "" in JSON examples + const typePattern = /"type":\s*"([^"]+)"/g; + + // Pattern: `"type": "xxx"` in inline code + const inlinePattern = /`"type":\s*"([^"]+)"`/g; + + lines.forEach((line, index) => { + let match; + + // Match JSON examples + while ((match = typePattern.exec(line)) !== null) { + const typeName = match[1]; + // Skip "text" as it's always present and not a block type + if (typeName !== 'text' && !types.find(t => t.type === typeName)) { + types.push({ + type: typeName, + source: filename, + line: index + 1 + }); + } + } + + // Match inline code references + while ((match = inlinePattern.exec(line)) !== null) { + const typeName = match[1]; + if (typeName !== 'text' && !types.find(t => t.type === typeName)) { + types.push({ + type: typeName, + source: filename, + line: index + 1 + }); + } + } + }); + + return types; +} + +// Extract block types from JSON schema files +function extractTypesFromSchema(filePath: string): BlockType[] { + const types: BlockType[] = []; + const content = fs.readFileSync(filePath, 'utf8'); + const filename = path.relative(rootDir, filePath); + + try { + const schema = JSON.parse(content); + + // Look for const types in $defs + if (schema.$defs) { + for (const [defName, defValue] of Object.entries(schema.$defs)) { + const def = defValue as Record; + + // Check for type const in properties + if (def.properties && typeof def.properties === 'object') { + const props = def.properties as Record; + if (props.type && typeof props.type === 'object') { + const typeProp = props.type as Record; + if (typeProp.const && typeof typeProp.const === 'string') { + types.push({ + type: typeProp.const, + source: filename + }); + } + } + } + } + } + + // Also check allOf conditionals in block definitions + if (schema.$defs?.block?.allOf) { + const allOf = schema.$defs.block.allOf as Array>; + for (const condition of allOf) { + if (condition.if?.properties?.type?.const) { + const typeName = condition.if.properties.type.const as string; + if (!types.find(t => t.type === typeName)) { + types.push({ + type: typeName, + source: filename + }); + } + } + } + } + } catch (err) { + console.error(`Error parsing ${filename}: ${err}`); + } + + return types; +} + +// Recursively find all markdown files +function findMarkdownFiles(dir: string): string[] { + const files: string[] = []; + + if (!fs.existsSync(dir)) { + return files; + } + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...findMarkdownFiles(fullPath)); + } else if (entry.name.endsWith('.md')) { + files.push(fullPath); + } + } + + return files; +} + +// Find all schema files +function findSchemaFiles(dir: string): string[] { + const files: string[] = []; + + if (!fs.existsSync(dir)) { + return files; + } + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isFile() && entry.name.endsWith('.schema.json')) { + files.push(path.join(dir, entry.name)); + } + } + + return files; +} + +// Main sync check +function checkSync(): SyncReport { + console.log('Checking spec-schema synchronization...\n'); + + // Collect types from specs + const specTypes: BlockType[] = []; + const specFiles = findMarkdownFiles(specDir); + console.log(`Found ${specFiles.length} spec files`); + + for (const file of specFiles) { + const types = extractTypesFromSpec(file); + specTypes.push(...types); + } + + // Collect types from schemas + const schemaTypes: BlockType[] = []; + const schemaFiles = findSchemaFiles(schemasDir); + console.log(`Found ${schemaFiles.length} schema files`); + + for (const file of schemaFiles) { + const types = extractTypesFromSchema(file); + schemaTypes.push(...types); + } + + // Deduplicate + const specTypeNames = [...new Set(specTypes.map(t => t.type))]; + const schemaTypeNames = [...new Set(schemaTypes.map(t => t.type))]; + + console.log(`\nSpec documents ${specTypeNames.length} unique block types`); + console.log(`Schemas define ${schemaTypeNames.length} unique block types\n`); + + // Find discrepancies + const specOnly = specTypes.filter(t => !schemaTypeNames.includes(t.type)); + const schemaOnly = schemaTypes.filter(t => !specTypeNames.includes(t.type)); + const synced = specTypeNames.filter(t => schemaTypeNames.includes(t)); + + return { + specOnly: specOnly.filter((t, i, arr) => arr.findIndex(x => x.type === t.type) === i), + schemaOnly: schemaOnly.filter((t, i, arr) => arr.findIndex(x => x.type === t.type) === i), + synced + }; +} + +// Run check +const report = checkSync(); + +// Report results +console.log('='.repeat(60)); + +if (report.synced.length > 0) { + console.log(`\n✓ ${report.synced.length} types are synchronized:`); + report.synced.sort().forEach(t => console.log(` ${t}`)); +} + +if (report.specOnly.length > 0) { + console.log(`\n⚠ ${report.specOnly.length} types documented in spec but not in schema:`); + report.specOnly.forEach(t => { + console.log(` ${t.type}`); + console.log(` Source: ${t.source}${t.line ? `:${t.line}` : ''}`); + }); +} + +if (report.schemaOnly.length > 0) { + console.log(`\n⚠ ${report.schemaOnly.length} types in schema but not documented in spec:`); + report.schemaOnly.forEach(t => { + console.log(` ${t.type}`); + console.log(` Source: ${t.source}`); + }); +} + +console.log('\n' + '='.repeat(60)); + +// Exit with error if there are discrepancies +if (report.specOnly.length > 0 || report.schemaOnly.length > 0) { + console.log('\nSpec-schema sync check found discrepancies.'); + // Don't fail - this is informational for now + // process.exit(1); +} else { + console.log('\nAll documented types have schema definitions.'); +} diff --git a/scripts/generate-template.ts b/scripts/generate-template.ts new file mode 100644 index 0000000..e3d9eac --- /dev/null +++ b/scripts/generate-template.ts @@ -0,0 +1,353 @@ +#!/usr/bin/env npx tsx + +/** + * Generates minimal valid Codex document templates. + * + * Usage: + * npx tsx scripts/generate-template.ts --extensions academic,semantic --output ./my-doc + * npx tsx scripts/generate-template.ts --preset academic --output ./my-doc + * npx tsx scripts/generate-template.ts --list-presets + */ + +import * as fs from 'fs'; +import * as path from 'path'; + +// Extension configurations +interface ExtensionConfig { + id: string; + version: string; + required: boolean; + directories?: string[]; + files?: Record; +} + +const extensionConfigs: Record = { + academic: { + id: 'codex.academic', + version: '0.1', + required: false, + directories: ['academic'], + files: { + 'academic/numbering.json': { + version: '0.1', + equations: { style: 'chapter.number', resetOn: 'chapter' }, + theorems: { style: 'chapter.number' }, + algorithms: { style: 'number', resetOn: 'chapter' }, + exercises: { style: 'chapter.number', resetOn: 'chapter' } + } + } + }, + semantic: { + id: 'codex.semantic', + version: '0.1', + required: false, + directories: ['semantic'], + files: { + 'semantic/bibliography.json': { + version: '0.1', + entries: [] + }, + 'semantic/glossary.json': { + version: '0.1', + terms: [] + } + } + }, + forms: { + id: 'codex.forms', + version: '0.1', + required: false, + directories: ['forms'], + files: { + 'forms/data.json': { + version: '0.1', + values: {} + } + } + }, + security: { + id: 'codex.security', + version: '0.1', + required: false, + directories: ['security'], + files: { + 'security/signatures.json': { + version: '0.1', + signatures: [] + } + } + }, + collaboration: { + id: 'codex.collaboration', + version: '0.2', + required: false, + directories: ['collaboration'], + files: { + 'collaboration/comments.json': { + version: '0.2', + threads: [] + }, + 'collaboration/changes.json': { + version: '0.2', + changes: [] + } + } + }, + presentation: { + id: 'codex.presentation', + version: '0.1', + required: false, + directories: ['presentation'], + files: { + 'presentation/paginated.json': { + version: '0.1', + type: 'paginated', + defaults: { + pageSize: { width: '8.5in', height: '11in' }, + margins: { top: '1in', right: '1in', bottom: '1in', left: '1in' } + }, + styles: {} + } + } + }, + phantoms: { + id: 'codex.phantoms', + version: '0.1', + required: false, + directories: ['phantoms'], + files: { + 'phantoms/clusters.json': { + version: '0.1', + clusters: [] + } + } + } +}; + +// Presets +const presets: Record = { + simple: [], + academic: ['academic', 'semantic'], + semantic: ['semantic'], + forms: ['forms'], + signed: ['security'], + collaborative: ['collaboration'], + presentation: ['presentation'], + phantoms: ['phantoms'], + all: Object.keys(extensionConfigs) +}; + +// Generate base manifest +function generateManifest(extensions: string[]): Record { + const now = new Date().toISOString(); + + const manifest: Record = { + codex: '0.1', + id: 'pending', + state: 'draft', + created: now, + modified: now, + content: { + path: 'content/document.json', + hash: 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' + }, + metadata: { + dublinCore: 'metadata/dublin-core.json' + } + }; + + if (extensions.length > 0) { + manifest.extensions = extensions.map(ext => ({ + id: extensionConfigs[ext].id, + version: extensionConfigs[ext].version, + required: extensionConfigs[ext].required + })); + } + + // Add presentation reference if presentation extension is included + if (extensions.includes('presentation')) { + manifest.presentation = [{ + type: 'paginated', + path: 'presentation/paginated.json', + hash: 'sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', + default: true + }]; + } + + // Add phantoms reference if phantoms extension is included + if (extensions.includes('phantoms')) { + manifest.phantoms = { + clusters: 'phantoms/clusters.json' + }; + } + + return manifest; +} + +// Generate base content +function generateContent(): Record { + return { + version: '0.1', + blocks: [ + { + type: 'heading', + id: 'title', + level: 1, + children: [{ type: 'text', value: 'Document Title' }] + }, + { + type: 'paragraph', + id: 'p1', + children: [{ type: 'text', value: 'Document content goes here.' }] + } + ] + }; +} + +// Generate Dublin Core metadata +function generateDublinCore(): Record { + return { + title: 'Untitled Document', + creator: 'Author Name', + subject: 'Subject', + description: 'Document description', + date: new Date().toISOString().split('T')[0], + type: 'Text', + format: 'application/vnd.codex+zip', + language: 'en' + }; +} + +// Generate template +function generateTemplate(outputDir: string, extensions: string[]): void { + console.log(`Generating template in: ${outputDir}`); + console.log(`Extensions: ${extensions.length > 0 ? extensions.join(', ') : 'none (simple)'}\n`); + + // Create directories + const dirs = ['content', 'metadata']; + for (const ext of extensions) { + const config = extensionConfigs[ext]; + if (config.directories) { + dirs.push(...config.directories); + } + } + + for (const dir of dirs) { + const fullPath = path.join(outputDir, dir); + fs.mkdirSync(fullPath, { recursive: true }); + console.log(` Created: ${dir}/`); + } + + // Generate files + const files: Record = { + 'manifest.json': generateManifest(extensions), + 'content/document.json': generateContent(), + 'metadata/dublin-core.json': generateDublinCore() + }; + + // Add extension-specific files + for (const ext of extensions) { + const config = extensionConfigs[ext]; + if (config.files) { + Object.assign(files, config.files); + } + } + + // Write files + for (const [filePath, content] of Object.entries(files)) { + const fullPath = path.join(outputDir, filePath); + fs.writeFileSync(fullPath, JSON.stringify(content, null, 2) + '\n'); + console.log(` Created: ${filePath}`); + } + + console.log('\nTemplate generated successfully!'); +} + +// Parse command line arguments +function parseArgs(): { extensions: string[]; output: string; listPresets: boolean } { + const args = process.argv.slice(2); + let extensions: string[] = []; + let output = './codex-document'; + let listPresets = false; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + + if (arg === '--list-presets' || arg === '-l') { + listPresets = true; + } else if (arg === '--extensions' || arg === '-e') { + const extList = args[++i]; + if (extList) { + extensions = extList.split(',').map(e => e.trim()); + } + } else if (arg === '--preset' || arg === '-p') { + const preset = args[++i]; + if (preset && presets[preset]) { + extensions = presets[preset]; + } else { + console.error(`Unknown preset: ${preset}`); + console.error(`Available presets: ${Object.keys(presets).join(', ')}`); + process.exit(1); + } + } else if (arg === '--output' || arg === '-o') { + output = args[++i] || output; + } else if (arg === '--help' || arg === '-h') { + console.log(` +Codex Document Template Generator + +Usage: + npx tsx scripts/generate-template.ts [options] + +Options: + --extensions, -e Comma-separated list of extensions to include + --preset, -p Use a named preset (see --list-presets) + --output, -o Output directory (default: ./codex-document) + --list-presets, -l List available presets + --help, -h Show this help + +Examples: + npx tsx scripts/generate-template.ts --preset academic --output ./my-paper + npx tsx scripts/generate-template.ts --extensions forms,security --output ./my-form +`); + process.exit(0); + } + } + + // Validate extensions + for (const ext of extensions) { + if (!extensionConfigs[ext]) { + console.error(`Unknown extension: ${ext}`); + console.error(`Available extensions: ${Object.keys(extensionConfigs).join(', ')}`); + process.exit(1); + } + } + + return { extensions, output, listPresets }; +} + +// Main +const { extensions, output, listPresets } = parseArgs(); + +if (listPresets) { + console.log('Available presets:\n'); + for (const [name, exts] of Object.entries(presets)) { + console.log(` ${name.padEnd(15)} ${exts.length > 0 ? exts.join(', ') : '(core only)'}`); + } + console.log('\nAvailable extensions:\n'); + for (const [name, config] of Object.entries(extensionConfigs)) { + console.log(` ${name.padEnd(15)} ${config.id} v${config.version}`); + } + process.exit(0); +} + +// Check if output directory exists +if (fs.existsSync(output)) { + const entries = fs.readdirSync(output); + if (entries.length > 0) { + console.error(`Output directory is not empty: ${output}`); + console.error('Please specify an empty or non-existent directory.'); + process.exit(1); + } +} + +generateTemplate(output, extensions); diff --git a/scripts/validate-cross-refs.ts b/scripts/validate-cross-refs.ts new file mode 100644 index 0000000..bead2cc --- /dev/null +++ b/scripts/validate-cross-refs.ts @@ -0,0 +1,272 @@ +#!/usr/bin/env npx tsx + +/** + * Validates cross-references within spec documentation. + * + * This script: + * 1. Extracts all internal references from spec markdown files + * 2. Builds an index of all sections and anchors + * 3. Reports broken or invalid references + */ + +import * as fs from 'fs'; +import * as path from 'path'; + +const rootDir = path.join(__dirname, '..'); +const specDir = path.join(rootDir, 'spec'); + +interface Section { + id: string; + title: string; + file: string; + line: number; +} + +interface Reference { + target: string; + file: string; + line: number; + context: string; +} + +interface ValidationReport { + sections: Section[]; + references: Reference[]; + broken: Reference[]; + valid: Reference[]; +} + +// Extract sections from a markdown file +function extractSections(filePath: string): Section[] { + const sections: Section[] = []; + const content = fs.readFileSync(filePath, 'utf8'); + const lines = content.split('\n'); + const filename = path.relative(rootDir, filePath); + + lines.forEach((line, index) => { + // Match headings: # Title, ## Title, etc. + const headingMatch = line.match(/^(#{1,6})\s+(.+)$/); + if (headingMatch) { + const title = headingMatch[2]; + // Generate anchor from title (GitHub-style) + const id = title + .toLowerCase() + .replace(/[^\w\s-]/g, '') + .replace(/\s+/g, '-') + .replace(/-+/g, '-') + .trim(); + + sections.push({ + id, + title, + file: filename, + line: index + 1 + }); + } + + // Also match explicit anchors like + const anchorMatch = line.match(/ { + // Match markdown links: [text](target) + const linkPattern = /\[([^\]]+)\]\(([^)]+)\)/g; + let match; + + while ((match = linkPattern.exec(line)) !== null) { + const target = match[2]; + + // Only check internal references (not external URLs) + if (!target.startsWith('http://') && !target.startsWith('https://')) { + references.push({ + target, + file: filename, + line: index + 1, + context: match[0] + }); + } + } + + // Match "see section X.Y" patterns + const sectionRefPattern = /see\s+(section\s+)?(\d+(\.\d+)*)/gi; + while ((match = sectionRefPattern.exec(line)) !== null) { + references.push({ + target: `section:${match[2]}`, + file: filename, + line: index + 1, + context: match[0] + }); + } + + // Match "(see Section X)" patterns + const parenSectionPattern = /\(see\s+[Ss]ection\s+(\d+(\.\d+)*)\)/g; + while ((match = parenSectionPattern.exec(line)) !== null) { + references.push({ + target: `section:${match[1]}`, + file: filename, + line: index + 1, + context: match[0] + }); + } + }); + + return references; +} + +// Recursively find all markdown files +function findMarkdownFiles(dir: string): string[] { + const files: string[] = []; + + if (!fs.existsSync(dir)) { + return files; + } + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + files.push(...findMarkdownFiles(fullPath)); + } else if (entry.name.endsWith('.md')) { + files.push(fullPath); + } + } + + return files; +} + +// Validate a reference against known sections +function validateReference(ref: Reference, sections: Section[], files: string[]): boolean { + const target = ref.target; + + // Handle anchor references: #anchor + if (target.startsWith('#')) { + const anchor = target.slice(1); + return sections.some(s => s.id === anchor); + } + + // Handle file references: path/to/file.md or path/to/file.md#anchor + if (target.includes('.md')) { + const [filePart, anchorPart] = target.split('#'); + const resolvedPath = path.resolve(path.dirname(path.join(rootDir, ref.file)), filePart); + const relativePath = path.relative(rootDir, resolvedPath); + + const fileExists = files.some(f => path.relative(rootDir, f) === relativePath); + + if (!fileExists) { + return false; + } + + if (anchorPart) { + return sections.some(s => s.file === relativePath && s.id === anchorPart); + } + + return true; + } + + // Handle section number references: section:1.2.3 + if (target.startsWith('section:')) { + // Section number references are informational - always valid + // (They refer to numbered sections in the document, not anchors) + return true; + } + + // Handle relative paths without .md extension + if (target.includes('/')) { + // Could be a path to another file or directory + const resolvedPath = path.resolve(path.dirname(path.join(rootDir, ref.file)), target); + return fs.existsSync(resolvedPath); + } + + // Default: assume it's an anchor in the same file + const currentFileAnchor = target.startsWith('#') ? target.slice(1) : target; + return sections.some(s => s.file === ref.file && s.id === currentFileAnchor); +} + +// Main validation +function validateCrossRefs(): ValidationReport { + console.log('Validating cross-references...\n'); + + const markdownFiles = findMarkdownFiles(specDir); + console.log(`Found ${markdownFiles.length} spec files`); + + // Build section index + const allSections: Section[] = []; + for (const file of markdownFiles) { + const sections = extractSections(file); + allSections.push(...sections); + } + console.log(`Indexed ${allSections.length} sections/anchors`); + + // Extract all references + const allReferences: Reference[] = []; + for (const file of markdownFiles) { + const refs = extractReferences(file); + allReferences.push(...refs); + } + console.log(`Found ${allReferences.length} cross-references\n`); + + // Validate references + const broken: Reference[] = []; + const valid: Reference[] = []; + + for (const ref of allReferences) { + if (validateReference(ref, allSections, markdownFiles)) { + valid.push(ref); + } else { + broken.push(ref); + } + } + + return { + sections: allSections, + references: allReferences, + broken, + valid + }; +} + +// Run validation +const report = validateCrossRefs(); + +// Report results +console.log('='.repeat(60)); + +console.log(`\n✓ ${report.valid.length} valid references`); + +if (report.broken.length > 0) { + console.log(`\n✗ ${report.broken.length} broken references:`); + report.broken.forEach(ref => { + console.log(`\n ${ref.file}:${ref.line}`); + console.log(` Target: ${ref.target}`); + console.log(` Context: ${ref.context}`); + }); +} + +console.log('\n' + '='.repeat(60)); + +if (report.broken.length > 0) { + console.log('\nCross-reference validation found issues.'); + // Don't fail - this is informational for now + // process.exit(1); +} else { + console.log('\nAll cross-references are valid.'); +} diff --git a/scripts/validate-examples.ts b/scripts/validate-examples.ts index b1ec768..50eb040 100644 --- a/scripts/validate-examples.ts +++ b/scripts/validate-examples.ts @@ -42,7 +42,7 @@ function loadJson(filepath: string): unknown { // Schema dependencies (schemas that need other schemas loaded first) const schemaDependencies: Record = { - 'content.schema.json': ['semantic.schema.json', 'academic.schema.json'], + 'content.schema.json': ['semantic.schema.json', 'academic.schema.json', 'presentation.schema.json'], 'collaboration.schema.json': ['anchor.schema.json'], 'phantoms.schema.json': ['anchor.schema.json'], 'security.schema.json': ['anchor.schema.json'], diff --git a/scripts/validate-schemas.ts b/scripts/validate-schemas.ts index a000737..599dfaa 100644 --- a/scripts/validate-schemas.ts +++ b/scripts/validate-schemas.ts @@ -23,6 +23,7 @@ const standaloneSchemas: string[] = [ 'asset-index.schema.json', 'dublin-core.schema.json', 'forms.schema.json', + 'legal.schema.json', 'manifest.schema.json', 'precise-layout.schema.json', 'presentation.schema.json', @@ -34,7 +35,7 @@ const standaloneSchemas: string[] = [ const dependentSchemas: DependentSchema[] = [ { schema: 'annotations.schema.json', refs: ['anchor.schema.json'] }, { schema: 'collaboration.schema.json', refs: ['anchor.schema.json'] }, - { schema: 'content.schema.json', refs: ['semantic.schema.json', 'academic.schema.json'] }, + { schema: 'content.schema.json', refs: ['semantic.schema.json', 'academic.schema.json', 'presentation.schema.json'] }, { schema: 'phantoms.schema.json', refs: ['anchor.schema.json'] }, { schema: 'security.schema.json', refs: ['anchor.schema.json'] }, ]; diff --git a/spec/core/00-introduction.md b/spec/core/00-introduction.md index 67de284..7cd8d24 100644 --- a/spec/core/00-introduction.md +++ b/spec/core/00-introduction.md @@ -54,19 +54,46 @@ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "S ### 1.4 Terminology -**Block**: A discrete unit of content (paragraph, heading, image, etc.) +This section defines key terms used throughout the specification. -**Content Layer**: The semantic representation of document content as blocks +#### Core Concepts -**Presentation Layer**: Instructions for rendering content visually +**Block**: A discrete unit of content that represents a semantic element of the document (paragraph, heading, image, table, etc.). Blocks form a tree structure where container blocks can have child blocks. -**Document State**: The lifecycle state of a document (draft, review, frozen, published) +**Text Node**: A leaf node containing actual text content. Text nodes are the only block type that holds text; all other blocks contain children or are void elements. -**Manifest**: The root metadata structure describing a document's contents and properties +**Mark**: Inline formatting applied to a range of text within a text node. Marks can be simple strings (`"bold"`, `"italic"`) or objects with additional properties (links, citations, math). Marks do not create new blocks — they annotate existing text. -**Asset**: An embedded resource (image, font, file) referenced by content blocks +**Annotation**: External commentary attached to content but stored outside the content layer. Annotations include comments, suggestions, and phantoms. Annotations are always outside the content hash boundary. -**Lineage**: The version history chain linking a document to its predecessors +**Element**: A generic term used in specific contexts: +- In presentation layers: a positioned item on a page (`pageElement`) +- In DOM/SVG contexts: an XML/HTML element +- In arrays: an item in a JSON array + +#### Layers and Structure + +**Content Layer**: The semantic representation of document content as a tree of blocks. This layer captures what the document says, independent of how it looks. + +**Presentation Layer**: Instructions for rendering content visually. Presentation layers define styles, layouts, and page structures. Multiple presentation layers can exist for the same content. + +**Manifest**: The root metadata structure (`manifest.json`) describing a document's contents, properties, extensions, and file references. + +**Asset**: An embedded resource (image, font, embedded file) referenced by content blocks. Assets are stored in the `assets/` directory and indexed in the manifest. + +#### Document Lifecycle + +**Document State**: The lifecycle state of a document: `draft` (active editing), `review` (under review), `frozen` (locked for signing), or `published` (distributed). + +**Lineage**: The version history chain linking a document to its predecessors via content-addressable hashes. + +**Document ID**: The content-addressable hash that serves as a document's canonical identifier. Computed from the document's semantic content. + +#### Extension Terminology + +**Extension**: An optional module that adds capabilities beyond the core specification. Extensions are declared in the manifest and use namespaced block types (e.g., `forms:input`, `academic:theorem`). + +**Phantom**: An off-page annotation from the Phantom Extension. Phantoms are anchored to content but rendered in margin clusters outside the main document flow. ### 1.5 Document Structure Overview diff --git a/spec/core/03-content-blocks.md b/spec/core/03-content-blocks.md index 42f22a2..3a92dce 100644 --- a/spec/core/03-content-blocks.md +++ b/spec/core/03-content-blocks.md @@ -309,11 +309,65 @@ Block of source code or preformatted text. | Attribute | Type | Required | Description | |-----------|------|----------|-------------| | `language` | string | No | Programming language identifier | +| `highlighting` | string | No | Highlighting mode: `"none"` or `"tokens"` | +| `tokens` | array | No | Pre-tokenized highlighting (when `highlighting="tokens"`) | Children: Single text node (no marks) Language identifiers SHOULD follow common conventions (e.g., "javascript", "python", "rust"). +#### 4.7.1 Syntax Highlighting + +For documents requiring stable, portable syntax highlighting, code blocks can include pre-tokenized content: + +```json +{ + "type": "codeBlock", + "language": "python", + "highlighting": "tokens", + "tokens": [ + { "type": "keyword", "value": "def " }, + { "type": "function", "value": "hello" }, + { "type": "punctuation", "value": "():" }, + { "type": "plain", "value": "\n " }, + { "type": "keyword", "value": "return " }, + { "type": "string", "value": "\"world\"" } + ], + "children": [ + { "type": "text", "value": "def hello():\n return \"world\"" } + ] +} +``` + +**Token Types:** + +| Token Type | Description | +|------------|-------------| +| `keyword` | Language keywords (if, for, def, class, etc.) | +| `function` | Function names | +| `class` | Class names | +| `variable` | Variable names | +| `parameter` | Function parameters | +| `string` | String literals | +| `number` | Numeric literals | +| `boolean` | Boolean literals | +| `null` | Null/nil/None values | +| `comment` | Code comments | +| `docstring` | Documentation strings | +| `operator` | Operators (+, -, *, etc.) | +| `punctuation` | Punctuation marks | +| `delimiter` | Delimiters (braces, brackets, etc.) | +| `type` | Type annotations | +| `namespace` | Namespace/module identifiers | +| `decorator` | Decorators/annotations | +| `plain` | Plain text (default/fallback) | + +**Behavior:** +- If `highlighting` is absent or `"none"`, renderers use the `children` text node (current behavior) +- If `highlighting` is `"tokens"`, renderers use the `tokens` array for colored output +- Renderers MAY re-highlight from `children` if they don't support the tokens format +- The `children` field MUST always contain the complete source code for fallback and accessibility + ### 4.8 Horizontal Rule Thematic break between sections. diff --git a/spec/core/06-document-hashing.md b/spec/core/06-document-hashing.md index 2a8c4fe..ade9e12 100644 --- a/spec/core/06-document-hashing.md +++ b/spec/core/06-document-hashing.md @@ -104,6 +104,27 @@ The canonical content EXCLUDES: > **Note**: The document ID represents the document's semantic identity — what it says, not how it looks. Multiple visual presentations (letter, A4, responsive) of the same content produce the same document ID. For appearance attestation, see Scoped Signatures in the Security Extension. +### 4.1a Hash Boundary Summary + +The following table summarizes what is included in and excluded from the document content hash: + +| Layer | Inside Hash | Notes | +|-------|-------------|-------| +| Content blocks | Yes | Core document identity — all text, structure, and semantic markup | +| Dublin Core metadata | Partial | Only `title`, `creator`, `subject`, `description`, `language` | +| Asset hashes | Yes | Asset identity via hash mapping (not asset bytes) | +| Asset content | No | Actual asset bytes are hashed separately; only references included | +| Presentation | No | Visual rendering instructions — not part of semantic identity | +| Precise layouts | No | Coordinate-level positioning — rendering fidelity | +| Collaboration | No | Comments, suggestions, change tracking | +| Phantoms | No | Off-page annotations and margin notes | +| Forms data | No | Fillable field values (mutable even on frozen documents) | +| Security | No | Signatures reference the hash — not part of it | +| Timestamps | No | Administrative metadata (`created`, `modified`) | +| Provenance | No | Lineage tracking and derivation history | + +This boundary ensures that the document's identity represents its **semantic content** — what the document says — rather than how it appears or administrative metadata about it. + ### 4.2 Canonical Content Structure ```json diff --git a/spec/extensions/legal/README.md b/spec/extensions/legal/README.md new file mode 100644 index 0000000..a9ed002 --- /dev/null +++ b/spec/extensions/legal/README.md @@ -0,0 +1,355 @@ +# Legal Extension + +**Extension ID**: `codex.legal` +**Version**: 0.1 +**Status**: Draft + +## 1. Overview + +The Legal Extension provides specialized blocks and marks for legal documents, including: + +- Table of Authorities (auto-generated citation index) +- Legal citation marks with citation style support +- Support for common legal citation formats (Bluebook, ALWD, McGill, OSCOLA) + +## 2. Extension Declaration + +```json +{ + "extensions": [ + { + "id": "codex.legal", + "version": "0.1", + "required": false + } + ] +} +``` + +## 3. Legal Citation Mark + +The `legal:cite` mark annotates text with legal citation information for automatic Table of Authorities generation. + +### 3.1 Basic Usage + +```json +{ + "type": "text", + "value": "Brown v. Board of Education", + "marks": [ + { + "type": "legal:cite", + "citation": "347 U.S. 483 (1954)", + "category": "cases", + "shortForm": "Brown" + } + ] +} +``` + +### 3.2 Citation Mark Properties + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `type` | string | Yes | Always `"legal:cite"` | +| `citation` | string | Yes | Full citation string | +| `category` | string | Yes | Citation category for TOA grouping | +| `shortForm` | string | No | Short form for subsequent references | +| `pinpoint` | string | No | Specific page, paragraph, or section reference | +| `format` | string | No | Citation style override | + +### 3.3 Citation Categories + +Standard categories for Table of Authorities grouping: + +| Category | Description | +|----------|-------------| +| `cases` | Court cases and judicial decisions | +| `statutes` | Statutory law | +| `regulations` | Administrative regulations | +| `constitutions` | Constitutional provisions | +| `treatises` | Legal treatises and books | +| `law-reviews` | Law review articles | +| `other` | Other secondary sources | + +### 3.4 Pinpoint Citations + +For citations to specific locations within a source: + +```json +{ + "type": "text", + "value": "Brown", + "marks": [ + { + "type": "legal:cite", + "citation": "347 U.S. 483 (1954)", + "category": "cases", + "shortForm": "Brown", + "pinpoint": "at 495" + } + ] +} +``` + +## 4. Table of Authorities Block + +The `legal:tableOfAuthorities` block generates an auto-indexed table of all cited authorities. + +### 4.1 Basic Usage + +```json +{ + "type": "legal:tableOfAuthorities", + "id": "toa", + "title": "Table of Authorities" +} +``` + +### 4.2 Configuration Options + +```json +{ + "type": "legal:tableOfAuthorities", + "id": "toa", + "title": "Table of Authorities", + "categories": [ + { "name": "Cases", "key": "cases", "format": "bluebook" }, + { "name": "Statutes", "key": "statutes", "format": "bluebook" }, + { "name": "Regulations", "key": "regulations", "format": "bluebook" }, + { "name": "Constitutional Provisions", "key": "constitutions", "format": "bluebook" }, + { "name": "Secondary Sources", "key": "treatises", "format": "bluebook" } + ], + "pageReferences": true, + "passimThreshold": 5 +} +``` + +### 4.3 Table of Authorities Properties + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `type` | string | Yes | Always `"legal:tableOfAuthorities"` | +| `id` | string | No | Block identifier | +| `title` | string | No | Section title (default: "Table of Authorities") | +| `categories` | array | No | Category configuration (see below) | +| `pageReferences` | boolean | No | Include page references (default: true) | +| `passimThreshold` | integer | No | Number of references before showing "passim" instead of page list | + +### 4.4 Category Configuration + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `name` | string | Yes | Display name for the category | +| `key` | string | Yes | Category key (matches citation mark category) | +| `format` | string | No | Citation style for this category | + +## 5. Citation Formats + +The Legal Extension supports common legal citation styles: + +### 5.1 Bluebook + +The Bluebook: A Uniform System of Citation (US legal standard) + +```json +{ + "type": "legal:cite", + "citation": "347 U.S. 483 (1954)", + "format": "bluebook" +} +``` + +### 5.2 ALWD + +ALWD Guide to Legal Citation + +```json +{ + "type": "legal:cite", + "citation": "Brown v. Bd. of Educ., 347 U.S. 483 (1954)", + "format": "alwd" +} +``` + +### 5.3 McGill + +Canadian Guide to Uniform Legal Citation (McGill Guide) + +```json +{ + "type": "legal:cite", + "citation": "Brown v Board of Education, 347 US 483 (1954)", + "format": "mcgill" +} +``` + +### 5.4 OSCOLA + +Oxford University Standard for Citation of Legal Authorities (UK) + +```json +{ + "type": "legal:cite", + "citation": "Brown v Board of Education (1954) 347 US 483", + "format": "oscola" +} +``` + +## 6. Legal Document Structure Blocks + +### 6.1 Court Caption + +```json +{ + "type": "legal:caption", + "court": "Supreme Court of the United States", + "caseNumber": "No. 1", + "parties": { + "plaintiff": "Oliver Brown, et al.", + "defendant": "Board of Education of Topeka, et al." + }, + "docket": "October Term, 1953" +} +``` + +### 6.2 Signature Block + +Legal documents often require specific signature block formats: + +```json +{ + "type": "legal:signatureBlock", + "role": "counsel", + "signer": { + "name": "Thurgood Marshall", + "title": "Counsel for Appellants", + "barNumber": "12345", + "firm": "NAACP Legal Defense Fund", + "address": "10 Columbus Circle, New York, NY 10019", + "telephone": "(212) 555-1234" + } +} +``` + +## 7. Examples + +### 7.1 Legal Brief with Table of Authorities + +```json +{ + "version": "0.1", + "blocks": [ + { + "type": "legal:tableOfAuthorities", + "id": "toa", + "title": "Table of Authorities", + "categories": [ + { "name": "Cases", "key": "cases" }, + { "name": "Statutes", "key": "statutes" } + ] + }, + { + "type": "heading", + "level": 1, + "children": [{ "type": "text", "value": "Argument" }] + }, + { + "type": "paragraph", + "children": [ + { "type": "text", "value": "In " }, + { + "type": "text", + "value": "Brown v. Board of Education", + "marks": [ + { + "type": "legal:cite", + "citation": "347 U.S. 483 (1954)", + "category": "cases", + "shortForm": "Brown" + } + ] + }, + { "type": "text", "value": ", the Supreme Court held that 'separate but equal' has no place in public education. " } + ] + }, + { + "type": "paragraph", + "children": [ + { "type": "text", "value": "This principle was reaffirmed in " }, + { + "type": "text", + "value": "Brown", + "marks": [ + { + "type": "legal:cite", + "citation": "347 U.S. 483 (1954)", + "category": "cases", + "shortForm": "Brown", + "pinpoint": "at 495" + } + ] + }, + { "type": "text", "value": ", where the Court explained the psychological impact of segregation." } + ] + } + ] +} +``` + +### 7.2 Statute Citation + +```json +{ + "type": "text", + "value": "42 U.S.C. § 1983", + "marks": [ + { + "type": "legal:cite", + "citation": "42 U.S.C. § 1983", + "category": "statutes" + } + ] +} +``` + +## 8. Rendering Guidelines + +### 8.1 Table of Authorities + +Renderers generating a Table of Authorities SHOULD: + +1. Collect all `legal:cite` marks in document order +2. Group citations by category +3. Sort entries alphabetically within each category +4. Consolidate multiple references to the same authority +5. List page numbers where each authority is cited +6. Use "passim" when references exceed the threshold + +### 8.2 Short Form References + +After the first full citation, subsequent references MAY use the short form: + +- First reference: "Brown v. Board of Education, 347 U.S. 483 (1954)" +- Subsequent: "Brown, 347 U.S. at 495" + +### 8.3 Id. Citations + +For consecutive citations to the same source, renderers MAY substitute "Id." according to citation style rules. + +## 9. Compatibility + +The Legal Extension is compatible with: + +- **Semantic Extension**: Legal citations can include semantic entity markup +- **Presentation Extension**: Table of Authorities uses presentation layer styling +- **Academic Extension**: Legal documents may use academic numbering for sections + +## 10. Future Considerations + +Potential future additions: + +- Court filing metadata +- E-filing format compliance (CM/ECF) +- Citation verification services +- International legal citation formats