diff --git a/.claude/settings.json b/.claude/settings.json
index 6df2fb93..f9c1cace 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -128,7 +128,8 @@
       "Edit(./.deepwork/**)",
       "Write(./.deepwork/**)",
       "Bash(deepwork:*)",
-      "Bash(.claude/hooks/commit_job_git_commit.sh:*)"
+      "Bash(.claude/hooks/commit_job_git_commit.sh:*)",
+      "Bash(./.deepwork/jobs/deepwork_jobs/make_new_job.sh:*)"
     ]
   },
   "hooks": {
diff --git a/.claude/skills/add_platform.add_capabilities/SKILL.md b/.claude/skills/add_platform.add_capabilities/SKILL.md
index b9d76df3..7dd71e27 100644
--- a/.claude/skills/add_platform.add_capabilities/SKILL.md
+++ b/.claude/skills/add_platform.add_capabilities/SKILL.md
@@ -1,39 +1,6 @@
 ---
 name: add_platform.add_capabilities
-description: "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the capability additions meet ALL criteria:
-            1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py
-            2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields
-               (set to None/null if the platform doesn't support that hook)
-            3. Only hooks available on slash command definitions are added (not general CLI hooks)
-            4. job_schema.py remains valid Python with no syntax errors
-            5. adapters.py remains consistent - all adapters have the same hook fields
-            6. If no new hooks are needed, document why in a comment
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the capability additions meet ALL criteria:
-            1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py
-            2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields
-               (set to None/null if the platform doesn't support that hook)
-            3. Only hooks available on slash command definitions are added (not general CLI hooks)
-            4. job_schema.py remains valid Python with no syntax errors
-            5. adapters.py remains consistent - all adapters have the same hook fields
-            6. If no new hooks are needed, document why in a comment
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."user-invocable: false---
 
 # add_platform.add_capabilities
 
@@ -233,14 +200,6 @@ Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/add_platform.implement/SKILL.md b/.claude/skills/add_platform.implement/SKILL.md
index 44722b65..167a6d7e 100644
--- a/.claude/skills/add_platform.implement/SKILL.md
+++ b/.claude/skills/add_platform.implement/SKILL.md
@@ -1,47 +1,10 @@
 ---
 name: add_platform.implement
-description: "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: command
-          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"
-        - type: prompt
-          prompt: |
-            Verify the implementation meets ALL criteria:
-            1. Platform adapter class is added to src/deepwork/adapters.py
-            2. Templates exist in src/deepwork/templates/<platform>/ with appropriate command structure
-            3. Tests exist for all new functionality
-            4. Test coverage is 100% for new code (run: uv run pytest --cov)
-            5. All tests pass
-            6. README.md is updated with:
-               - New platform listed in supported platforms
-               - Installation instructions for the platform
-               - Any platform-specific notes
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: command
-          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"
-        - type: prompt
-          prompt: |
-            Verify the implementation meets ALL criteria:
-            1. Platform adapter class is added to src/deepwork/adapters.py
-            2. Templates exist in src/deepwork/templates/<platform>/ with appropriate command structure
-            3. Tests exist for all new functionality
-            4. Test coverage is 100% for new code (run: uv run pytest --cov)
-            5. All tests pass
-            6. README.md is updated with:
-               - New platform listed in supported platforms
-               - Installation instructions for the platform
-               - Any platform-specific notes
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."user-invocable: falsehooks:  Stop:
+    - hooks:        - type: command
+          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"  SubagentStop:
+    - hooks:        - type: command
+          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"---
 
 # add_platform.implement
 
@@ -336,15 +299,7 @@ Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
 **Validation script**: `.deepwork/jobs/add_platform/hooks/run_tests.sh` (runs automatically)
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/add_platform.research/SKILL.md b/.claude/skills/add_platform.research/SKILL.md
index af44f2d3..3a3e3604 100644
--- a/.claude/skills/add_platform.research/SKILL.md
+++ b/.claude/skills/add_platform.research/SKILL.md
@@ -1,41 +1,6 @@
 ---
 name: add_platform.research
-description: "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the research output meets ALL criteria:
-            1. Both files exist in doc/platforms/<platform>/: cli_configuration.md and hooks_system.md
-            2. Each file has a comment at the top with:
-               - Last updated date
-               - Source URL where the documentation was obtained
-            3. cli_configuration.md covers how the platform's CLI is configured
-            4. hooks_system.md covers hooks available for slash command definitions ONLY
-            5. No extraneous documentation (only these two specific topics)
-            6. Documentation is comprehensive enough to implement the platform
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the research output meets ALL criteria:
-            1. Both files exist in doc/platforms/<platform>/: cli_configuration.md and hooks_system.md
-            2. Each file has a comment at the top with:
-               - Last updated date
-               - Source URL where the documentation was obtained
-            3. cli_configuration.md covers how the platform's CLI is configured
-            4. hooks_system.md covers hooks available for slash command definitions ONLY
-            5. No extraneous documentation (only these two specific topics)
-            6. Documentation is comprehensive enough to implement the platform
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."user-invocable: false---
 
 # add_platform.research
 
@@ -284,14 +249,6 @@ Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/add_platform.verify/SKILL.md b/.claude/skills/add_platform.verify/SKILL.md
index 583101f2..605d92c4 100644
--- a/.claude/skills/add_platform.verify/SKILL.md
+++ b/.claude/skills/add_platform.verify/SKILL.md
@@ -1,37 +1,6 @@
 ---
 name: add_platform.verify
-description: "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the installation meets ALL criteria:
-            1. Platform-specific directories/files are added to the deepwork repo as needed
-            2. Running `deepwork install --platform <platform>` completes without errors
-            3. Expected command files are created in the platform's command directory
-            4. Command file content matches the templates and job definitions
-            5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly
-            6. The platform can be used alongside existing platforms without conflicts
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the installation meets ALL criteria:
-            1. Platform-specific directories/files are added to the deepwork repo as needed
-            2. Running `deepwork install --platform <platform>` completes without errors
-            3. Expected command files are created in the platform's command directory
-            4. Command file content matches the templates and job definitions
-            5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly
-            6. The platform can be used alongside existing platforms without conflicts
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."user-invocable: false---
 
 # add_platform.verify
 
@@ -206,14 +175,6 @@ Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/commit.commit_and_push/SKILL.md b/.claude/skills/commit.commit_and_push/SKILL.md
index 7c96d0df..779c6e52 100644
--- a/.claude/skills/commit.commit_and_push/SKILL.md
+++ b/.claude/skills/commit.commit_and_push/SKILL.md
@@ -1,31 +1,6 @@
 ---
 name: commit.commit_and_push
-description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the commit is ready:
-            1. Changed files list was reviewed by the agent
-            2. Files match what was modified during this session (or unexpected changes were investigated)
-            3. Commit was created with appropriate message
-            4. Changes were pushed to remote
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the commit is ready:
-            1. Changed files list was reviewed by the agent
-            2. Files match what was modified during this session (or unexpected changes were investigated)
-            3. Commit was created with appropriate message
-            4. Changes were pushed to remote
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."user-invocable: false---
 
 # commit.commit_and_push
 
@@ -68,14 +43,27 @@ Check the list of changed files against what was modified during this session, e
    - Are there any unexpected deleted files?
    - Do the line counts seem reasonable for the changes you made?
 
-   If changes match expectations, proceed to commit.
+   If changes match expectations, proceed to the next step.
 
    If there are unexpected changes:
    - Investigate why (e.g., lint auto-fixes, generated files)
    - If they're legitimate side effects of your work, include them
    - If they're unrelated or shouldn't be committed, use `git restore` to discard them
 
-3. **Stage all appropriate changes**
+3. **Update CHANGELOG.md if needed**
+
+   If your changes include new features, bug fixes, or other notable changes:
+   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
+   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
+   - Write concise descriptions that explain the user-facing impact
+
+   **CRITICAL: NEVER modify version numbers**
+   - Do NOT change the version in `pyproject.toml`
+   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
+   - Do NOT rename the `## [Unreleased]` section
+   - Version updates are handled by the release workflow, not commits
+
+4. **Stage all appropriate changes**
    ```bash
    git add -A
    ```
@@ -93,8 +81,9 @@ Check the list of changed files against what was modified during this session, e
    - The style of recent commits
    - Conventional commit format if the project uses it
 
+   **IMPORTANT:** Use the commit job script (not `git commit` directly):
    ```bash
-   git commit -m "commit message here"
+   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
    ```
 
 7. **Push to remote**
@@ -110,6 +99,8 @@ Check the list of changed files against what was modified during this session, e
 
 - Changed files list was reviewed by the agent
 - Files match what was modified during this session (or unexpected changes were investigated and handled)
+- CHANGELOG.md was updated with entries in the `[Unreleased]` section (if changes warrant documentation)
+- Version numbers were NOT modified (in pyproject.toml or CHANGELOG.md version headers)
 - Commit message follows project conventions
 - Commit was created successfully
 - Changes were pushed to remote
@@ -156,14 +147,6 @@ Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/commit.lint/SKILL.md b/.claude/skills/commit.lint/SKILL.md
index 1caa6bf0..8bedb8b5 100644
--- a/.claude/skills/commit.lint/SKILL.md
+++ b/.claude/skills/commit.lint/SKILL.md
@@ -1,29 +1,6 @@
 ---
 name: commit.lint
-description: "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the linting is complete:
-            1. ruff format was run successfully
-            2. ruff check was run successfully (with --fix)
-            3. No remaining lint errors
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the linting is complete:
-            1. ruff format was run successfully
-            2. ruff check was run successfully (with --fix)
-            3. No remaining lint errors
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."user-invocable: false---
 
 # commit.lint
 
@@ -150,14 +127,6 @@ Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/commit.review/SKILL.md b/.claude/skills/commit.review/SKILL.md
index 08ed14f4..71d4c6b3 100644
--- a/.claude/skills/commit.review/SKILL.md
+++ b/.claude/skills/commit.review/SKILL.md
@@ -1,29 +1,6 @@
 ---
 name: commit.review
-description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the code review is complete:
-            1. Changed files were identified
-            2. Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage
-            3. All identified issues were addressed or documented as intentional
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the code review is complete:
-            1. Changed files were identified
-            2. Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage
-            3. All identified issues were addressed or documented as intentional
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."user-invocable: false---
 
 # commit.review
 
@@ -179,14 +156,6 @@ Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/commit.test/SKILL.md b/.claude/skills/commit.test/SKILL.md
index 79229595..cef6a52a 100644
--- a/.claude/skills/commit.test/SKILL.md
+++ b/.claude/skills/commit.test/SKILL.md
@@ -1,31 +1,6 @@
 ---
 name: commit.test
-description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the tests are passing:
-            1. Latest code was pulled from the branch
-            2. All tests completed successfully
-            3. No test failures or errors remain
-            4. Test output shows passing status
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the tests are passing:
-            1. Latest code was pulled from the branch
-            2. All tests completed successfully
-            3. No test failures or errors remain
-            4. Test output shows passing status
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."user-invocable: false---
 
 # commit.test
 
@@ -138,14 +113,6 @@ Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.claude/skills/deepwork_jobs.define/SKILL.md b/.claude/skills/deepwork_jobs.define/SKILL.md
index 1dce7680..ef5b2407 100644
--- a/.claude/skills/deepwork_jobs.define/SKILL.md
+++ b/.claude/skills/deepwork_jobs.define/SKILL.md
@@ -1,71 +1,6 @@
 ---
 name: deepwork_jobs.define
-description: "Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **User Understanding**: Did the agent fully understand the user's workflow by asking structured questions?
-            2. **Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input?
-            3. **Document Detection**: For document-oriented workflows, did the agent detect patterns and offer doc spec creation?
-            4. **doc spec Created (if applicable)**: If a doc spec was needed, was it created in `.deepwork/doc_specs/[doc_spec_name].md` with proper quality criteria?
-            5. **doc spec References**: Are document outputs properly linked to their doc specs using `{file, doc_spec}` format?
-            6. **Valid Against doc spec**: Does the job.yml conform to the job.yml doc spec quality criteria (valid identifier, semantic version, concise summary, rich description, complete steps, valid dependencies)?
-            7. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs?
-            8. **Logical Dependencies**: Do step dependencies make sense and avoid circular references?
-            9. **Concise Summary**: Is the summary under 200 characters and descriptive?
-            10. **Rich Description**: Does the description provide enough context for future refinement?
-            11. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)?
-            12. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **User Understanding**: Did the agent fully understand the user's workflow by asking structured questions?
-            2. **Structured Questions Used**: Did the agent ask structured questions (using the AskUserQuestion tool) to gather user input?
-            3. **Document Detection**: For document-oriented workflows, did the agent detect patterns and offer doc spec creation?
-            4. **doc spec Created (if applicable)**: If a doc spec was needed, was it created in `.deepwork/doc_specs/[doc_spec_name].md` with proper quality criteria?
-            5. **doc spec References**: Are document outputs properly linked to their doc specs using `{file, doc_spec}` format?
-            6. **Valid Against doc spec**: Does the job.yml conform to the job.yml doc spec quality criteria (valid identifier, semantic version, concise summary, rich description, complete steps, valid dependencies)?
-            7. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs?
-            8. **Logical Dependencies**: Do step dependencies make sense and avoid circular references?
-            9. **Concise Summary**: Is the summary under 200 characters and descriptive?
-            10. **Rich Description**: Does the description provide enough context for future refinement?
-            11. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)?
-            12. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow."user-invocable: false---
 
 # deepwork_jobs.define
 
@@ -701,7 +636,9 @@ Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **User Understanding**: Did the agent fully understand the user's workflow by asking structured questions?
@@ -716,9 +653,12 @@ Stop hooks will automatically validate your work. The loop continues until all c
 10. **Rich Description**: Does the description provide enough context for future refinement?
 11. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)?
 12. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`?
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/deepwork_jobs.implement/SKILL.md b/.claude/skills/deepwork_jobs.implement/SKILL.md
index bb555eab..03879aa2 100644
--- a/.claude/skills/deepwork_jobs.implement/SKILL.md
+++ b/.claude/skills/deepwork_jobs.implement/SKILL.md
@@ -1,65 +1,6 @@
 ---
 name: deepwork_jobs.implement
-description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?
-            2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?
-            3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?
-            4. **Output Examples**: Does each instruction file show what good output looks like?
-            5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs?
-            6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"?
-            7. **Sync Complete**: Has `deepwork sync` been run successfully?
-            8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`?
-            9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful.
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?
-            2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?
-            3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?
-            4. **Output Examples**: Does each instruction file show what good output looks like?
-            5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs?
-            6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"?
-            7. **Sync Complete**: Has `deepwork sync` been run successfully?
-            8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`?
-            9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful.
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."user-invocable: false---
 
 # deepwork_jobs.implement
 
@@ -357,7 +298,9 @@ Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?
@@ -369,9 +312,12 @@ Stop hooks will automatically validate your work. The loop continues until all c
 7. **Sync Complete**: Has `deepwork sync` been run successfully?
 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`?
 9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful.
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/deepwork_jobs.learn/SKILL.md b/.claude/skills/deepwork_jobs.learn/SKILL.md
index fce20c29..d50ab9af 100644
--- a/.claude/skills/deepwork_jobs.learn/SKILL.md
+++ b/.claude/skills/deepwork_jobs.learn/SKILL.md
@@ -1,70 +1,6 @@
 ---
 name: deepwork_jobs.learn
-description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?
-            2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?
-            3. **Instructions Improved**: Were job instructions updated to address identified issues?
-            4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?
-            5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?
-            6. **doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?
-            7. **doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?
-            8. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?
-            9. **File References Used**: Do AGENTS.md entries reference other files where appropriate?
-            10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
-            11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
-            12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?
-            2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?
-            3. **Instructions Improved**: Were job instructions updated to address identified issues?
-            4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?
-            5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?
-            6. **doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?
-            7. **doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?
-            8. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?
-            9. **File References Used**: Do AGENTS.md entries reference other files where appropriate?
-            10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
-            11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
-            12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."---
 
 # deepwork_jobs.learn
 
@@ -477,7 +413,9 @@ Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?
@@ -492,9 +430,12 @@ Stop hooks will automatically validate your work. The loop continues until all c
 10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
 11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
 12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md b/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md
index c49defb6..4e54318a 100644
--- a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md
+++ b/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md
@@ -1,55 +1,6 @@
 ---
 name: deepwork_jobs.review_job_spec
-description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?
-            2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?
-            3. **Findings Addressed**: Were all failed criteria addressed by the main agent?
-            4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?
-            2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?
-            3. **Findings Addressed**: Were all failed criteria addressed by the main agent?
-            4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."user-invocable: false---
 
 # deepwork_jobs.review_job_spec
 
@@ -500,16 +451,21 @@ Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?
 2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?
 3. **Findings Addressed**: Were all failed criteria addressed by the main agent?
 4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/deepwork_rules.define/SKILL.md b/.claude/skills/deepwork_rules.define/SKILL.md
index 25cf1892..1ad6ebe3 100644
--- a/.claude/skills/deepwork_rules.define/SKILL.md
+++ b/.claude/skills/deepwork_rules.define/SKILL.md
@@ -1,8 +1,6 @@
 ---
 name: deepwork_rules.define
-description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-user-invocable: false
----
+description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."user-invocable: false---
 
 # deepwork_rules.define
 
diff --git a/.claude/skills/manual_tests.infinite_block_tests/SKILL.md b/.claude/skills/manual_tests.infinite_block_tests/SKILL.md
index ea56c5e1..5b3cb4b5 100644
--- a/.claude/skills/manual_tests.infinite_block_tests/SKILL.md
+++ b/.claude/skills/manual_tests.infinite_block_tests/SKILL.md
@@ -1,55 +1,6 @@
 ---
 name: manual_tests.infinite_block_tests
-description: "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agents Used**: Each test run via Task tool with `model: "haiku"` and `max_turns: 5`
-            2. **Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each
-            3. **Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)
-            4. **No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agents Used**: Each test run via Task tool with `model: "haiku"` and `max_turns: 5`
-            2. **Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each
-            3. **Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)
-            4. **No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."user-invocable: false---
 
 # manual_tests.infinite_block_tests
 
@@ -270,16 +221,21 @@ Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **Sub-Agents Used**: Each test run via Task tool with `model: "haiku"` and `max_turns: 5`
 2. **Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each
 3. **Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)
 4. **No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/manual_tests.reset/SKILL.md b/.claude/skills/manual_tests.reset/SKILL.md
index ceb8eb4a..1792ef69 100644
--- a/.claude/skills/manual_tests.reset/SKILL.md
+++ b/.claude/skills/manual_tests.reset/SKILL.md
@@ -1,49 +1,6 @@
 ---
 name: manual_tests.reset
-description: "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Environment Clean**: Git changes reverted, created files removed, and rules queue cleared
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Environment Clean**: Git changes reverted, created files removed, and rules queue cleared
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."user-invocable: false---
 
 # manual_tests.reset
 
@@ -157,13 +114,18 @@ Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **Environment Clean**: Git changes reverted, created files removed, and rules queue cleared
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/manual_tests.run_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_fire_tests/SKILL.md
index d6bd6c58..29199317 100644
--- a/.claude/skills/manual_tests.run_fire_tests/SKILL.md
+++ b/.claude/skills/manual_tests.run_fire_tests/SKILL.md
@@ -1,61 +1,6 @@
 ---
 name: manual_tests.run_fire_tests
-description: "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly.
-            2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-            3. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?
-            4. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command.
-            5. **Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?
-            6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-            7. **Results Recorded**: Did the main agent track pass/fail status for each test case?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly.
-            2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-            3. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?
-            4. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command.
-            5. **Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?
-            6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-            7. **Results Recorded**: Did the main agent track pass/fail status for each test case?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."user-invocable: false---
 
 # manual_tests.run_fire_tests
 
@@ -272,7 +217,9 @@ Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly.
@@ -282,9 +229,12 @@ Stop hooks will automatically validate your work. The loop continues until all c
 5. **Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?
 6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
 7. **Results Recorded**: Did the main agent track pass/fail status for each test case?
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md
index 4a3380ce..65c3c899 100644
--- a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md
+++ b/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md
@@ -1,61 +1,6 @@
 ---
 name: manual_tests.run_not_fire_tests
-description: "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly.
-            2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-            3. **Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?
-            4. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command.
-            5. **Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?
-            6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-            7. **Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            You must evaluate whether Claude has met all the below quality criteria for the request.
-
-            ## Quality Criteria
-
-            1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly.
-            2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-            3. **Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?
-            4. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command.
-            5. **Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?
-            6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-            7. **Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?
-
-            ## Instructions
-
-            Review the conversation and determine if ALL quality criteria above have been satisfied.
-            Look for evidence that each criterion has been addressed.
-
-            If the agent has included `<promise>✓ Quality Criteria Met</promise>` in their response OR
-            all criteria appear to be met, let the agent finish.
-
-            If criteria are NOT met AND the promise tag is missing, have the agent keep working
-            until all criteria are satisfied.
----
+description: "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."user-invocable: false---
 
 # manual_tests.run_not_fire_tests
 
@@ -258,7 +203,9 @@ Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
 
 ## Quality Validation
 
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
+**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
+
+Use a sub-agent (Haiku model) to review your work against these criteria:
 
 **Criteria (all must be satisfied)**:
 1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly.
@@ -268,9 +215,12 @@ Stop hooks will automatically validate your work. The loop continues until all c
 5. **Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?
 6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
 7. **Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
+**Review Process**:
+1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
+2. The sub-agent should examine your outputs and verify each criterion is met
+3. If the sub-agent identifies valid issues, fix them
+4. Have the sub-agent review again until all valid feedback has been addressed
+5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
 
 ## On Completion
 
diff --git a/.claude/skills/update.job/SKILL.md b/.claude/skills/update.job/SKILL.md
index 19ab7fb0..d2122d5f 100644
--- a/.claude/skills/update.job/SKILL.md
+++ b/.claude/skills/update.job/SKILL.md
@@ -1,31 +1,6 @@
 ---
 name: update.job
-description: "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the update process completed successfully:
-            1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)
-            2. `deepwork install --platform claude` was run
-            3. Files in .deepwork/jobs/ match the source files
-            4. Command files in .claude/commands/ were regenerated
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  SubagentStop:
-    - hooks:
-        - type: prompt
-          prompt: |
-            Verify the update process completed successfully:
-            1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)
-            2. `deepwork install --platform claude` was run
-            3. Files in .deepwork/jobs/ match the source files
-            4. Command files in .claude/commands/ were regenerated
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
----
+description: "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."user-invocable: false---
 
 # update.job
 
@@ -155,14 +130,6 @@ Use branch format: `deepwork/update-[instance]-YYYYMMDD`
 - Do NOT proceed without required inputs; ask the user if any are missing
 - Do NOT modify files outside the scope of this step's defined outputs
 
-## Quality Validation
-
-Stop hooks will automatically validate your work. The loop continues until all criteria pass.
-
-
-
-**To complete**: Include `<promise>✓ Quality Criteria Met</promise>` in your final response only after verifying ALL criteria are satisfied.
-
 ## On Completion
 
 1. Verify outputs are created
diff --git a/.deepwork/jobs/commit/job.yml b/.deepwork/jobs/commit/job.yml
index d153bff3..445640df 100644
--- a/.deepwork/jobs/commit/job.yml
+++ b/.deepwork/jobs/commit/job.yml
@@ -1,5 +1,5 @@
 name: commit
-version: "1.3.0"
+version: "1.4.0"
 summary: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
 description: |
   A workflow for preparing and committing code changes with quality checks.
@@ -16,6 +16,8 @@ description: |
   4. commit_and_push - Review changes and commit/push
 
 changelog:
+  - version: "1.4.0"
+    changes: "Added changelog guidance: entries must go in [Unreleased] section, NEVER modify version numbers in pyproject.toml or CHANGELOG.md"
   - version: "1.3.0"
     changes: "Added code review step that runs in sub-agent to check for general issues, DRY opportunities, naming clarity, and test coverage"
   - version: "1.0.1"
@@ -99,6 +101,8 @@ steps:
             Verify the commit is ready:
             1. Changed files list was reviewed by the agent
             2. Files match what was modified during this session (or unexpected changes were investigated)
-            3. Commit was created with appropriate message
-            4. Changes were pushed to remote
+            3. CHANGELOG.md was updated with entries in the [Unreleased] section (if changes warrant documentation)
+            4. Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers must remain unchanged)
+            5. Commit was created with appropriate message
+            6. Changes were pushed to remote
             If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
diff --git a/.deepwork/jobs/commit/steps/commit_and_push.md b/.deepwork/jobs/commit/steps/commit_and_push.md
index cd45c43d..a1160a91 100644
--- a/.deepwork/jobs/commit/steps/commit_and_push.md
+++ b/.deepwork/jobs/commit/steps/commit_and_push.md
@@ -24,14 +24,27 @@ Check the list of changed files against what was modified during this session, e
    - Are there any unexpected deleted files?
    - Do the line counts seem reasonable for the changes you made?
 
-   If changes match expectations, proceed to commit.
+   If changes match expectations, proceed to the next step.
 
    If there are unexpected changes:
    - Investigate why (e.g., lint auto-fixes, generated files)
    - If they're legitimate side effects of your work, include them
    - If they're unrelated or shouldn't be committed, use `git restore` to discard them
 
-3. **Stage all appropriate changes**
+3. **Update CHANGELOG.md if needed**
+
+   If your changes include new features, bug fixes, or other notable changes:
+   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
+   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
+   - Write concise descriptions that explain the user-facing impact
+
+   **CRITICAL: NEVER modify version numbers**
+   - Do NOT change the version in `pyproject.toml`
+   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
+   - Do NOT rename the `## [Unreleased]` section
+   - Version updates are handled by the release workflow, not commits
+
+4. **Stage all appropriate changes**
    ```bash
    git add -A
    ```
@@ -67,6 +80,8 @@ Check the list of changed files against what was modified during this session, e
 
 - Changed files list was reviewed by the agent
 - Files match what was modified during this session (or unexpected changes were investigated and handled)
+- CHANGELOG.md was updated with entries in the `[Unreleased]` section (if changes warrant documentation)
+- Version numbers were NOT modified (in pyproject.toml or CHANGELOG.md version headers)
 - Commit message follows project conventions
 - Commit was created successfully
 - Changes were pushed to remote
diff --git a/.gemini/skills/commit/commit_and_push.toml b/.gemini/skills/commit/commit_and_push.toml
index 9c42b3e5..251b9a3a 100644
--- a/.gemini/skills/commit/commit_and_push.toml
+++ b/.gemini/skills/commit/commit_and_push.toml
@@ -48,14 +48,27 @@ Check the list of changed files against what was modified during this session, e
    - Are there any unexpected deleted files?
    - Do the line counts seem reasonable for the changes you made?
 
-   If changes match expectations, proceed to commit.
+   If changes match expectations, proceed to the next step.
 
    If there are unexpected changes:
    - Investigate why (e.g., lint auto-fixes, generated files)
    - If they're legitimate side effects of your work, include them
    - If they're unrelated or shouldn't be committed, use `git restore` to discard them
 
-3. **Stage all appropriate changes**
+3. **Update CHANGELOG.md if needed**
+
+   If your changes include new features, bug fixes, or other notable changes:
+   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
+   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
+   - Write concise descriptions that explain the user-facing impact
+
+   **CRITICAL: NEVER modify version numbers**
+   - Do NOT change the version in `pyproject.toml`
+   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
+   - Do NOT rename the `## [Unreleased]` section
+   - Version updates are handled by the release workflow, not commits
+
+4. **Stage all appropriate changes**
    ```bash
    git add -A
    ```
@@ -73,8 +86,9 @@ Check the list of changed files against what was modified during this session, e
    - The style of recent commits
    - Conventional commit format if the project uses it
 
+   **IMPORTANT:** Use the commit job script (not `git commit` directly):
    ```bash
-   git commit -m "commit message here"
+   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
    ```
 
 7. **Push to remote**
@@ -90,6 +104,8 @@ Check the list of changed files against what was modified during this session, e
 
 - Changed files list was reviewed by the agent
 - Files match what was modified during this session (or unexpected changes were investigated and handled)
+- CHANGELOG.md was updated with entries in the `[Unreleased]` section (if changes warrant documentation)
+- Version numbers were NOT modified (in pyproject.toml or CHANGELOG.md version headers)
 - Commit message follows project conventions
 - Commit was created successfully
 - Changes were pushed to remote
diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
new file mode 100644
index 00000000..0a81d368
--- /dev/null
+++ b/.github/workflows/create-release.yml
@@ -0,0 +1,168 @@
+name: Create Release
+
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Version number (e.g., 0.6.0)'
+        required: true
+        type: string
+
+permissions:
+  contents: write
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Validate version format
+        run: |
+          if ! echo "${{ inputs.version }}" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then
+            echo "Error: Version must be in format X.Y.Z (e.g., 0.6.0)"
+            exit 1
+          fi
+
+      - name: Configure git
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Update CHANGELOG.md
+        id: changelog
+        env:
+          GITHUB_REPOSITORY: ${{ github.repository }}
+        run: |
+          VERSION="${{ inputs.version }}"
+          DATE=$(date +%Y-%m-%d)
+
+          # Create the new Unreleased section template
+          UNRELEASED_TEMPLATE="## [Unreleased]
+
+### Added
+
+### Changed
+
+### Fixed
+
+### Removed
+
+"
+
+          # Read the current changelog
+          CHANGELOG=$(cat CHANGELOG.md)
+
+          # Check if there's an existing Unreleased section
+          if grep -q "^## \[Unreleased\]" CHANGELOG.md; then
+            # Replace [Unreleased] with the new version and date
+            # First, extract everything after the Unreleased header until the next ## section
+            # Then insert the new Unreleased section at the top
+
+            # Use awk to do the replacement
+            awk -v version="$VERSION" -v date="$DATE" -v template="$UNRELEASED_TEMPLATE" '
+              /^## \[Unreleased\]/ {
+                print template
+                print "## [" version "] - " date
+                next
+              }
+              { print }
+            ' CHANGELOG.md > CHANGELOG.md.tmp
+            mv CHANGELOG.md.tmp CHANGELOG.md
+          else
+            # No Unreleased section exists - insert new version after the header
+            # Find the first ## line and insert before it
+            awk -v version="$VERSION" -v date="$DATE" -v template="$UNRELEASED_TEMPLATE" '
+              BEGIN { inserted = 0 }
+              /^## \[/ && !inserted {
+                print template
+                print "## [" version "] - " date
+                print ""
+                inserted = 1
+              }
+              { print }
+            ' CHANGELOG.md > CHANGELOG.md.tmp
+            mv CHANGELOG.md.tmp CHANGELOG.md
+          fi
+
+          # Update the version links at the bottom of the CHANGELOG
+          # Update the Unreleased link to compare from the new version
+          sed -i "s|\[Unreleased\]: https://github.com/.*/compare/.*\.\.\.HEAD|[Unreleased]: https://github.com/${GITHUB_REPOSITORY}/compare/${VERSION}...HEAD|" CHANGELOG.md
+
+          # Add the new version link if it doesn't exist (insert after Unreleased link)
+          if ! grep -q "^\[${VERSION}\]:" CHANGELOG.md; then
+            # Find the previous version from the changelog (first version after Unreleased)
+            PREV_VERSION=$(grep -oP '^\[[\d.]+\]:' CHANGELOG.md | head -1 | tr -d '[]:'  || echo "")
+            if [ -n "$PREV_VERSION" ]; then
+              VERSION_LINK="[${VERSION}]: https://github.com/${GITHUB_REPOSITORY}/releases/tag/${VERSION}"
+              sed -i "/^\[Unreleased\]:/a ${VERSION_LINK}" CHANGELOG.md
+            fi
+          fi
+
+          # Extract the changelog content for this version (for the release notes)
+          # Get everything between the version header and the next version header
+          RELEASE_NOTES=$(awk -v version="$VERSION" '
+            BEGIN { capture = 0; found = 0 }
+            /^## \[/ {
+              if (capture) exit
+              if ($0 ~ "\\[" version "\\]") {
+                capture = 1
+                found = 1
+                next
+              }
+            }
+            capture { print }
+          ' CHANGELOG.md)
+
+          # Save release notes to a file for the release step
+          echo "$RELEASE_NOTES" > release_notes.md
+
+          # Output for debugging
+          echo "Release notes extracted:"
+          cat release_notes.md
+
+      - name: Update pyproject.toml version
+        run: |
+          VERSION="${{ inputs.version }}"
+          sed -i "s/^version = \".*\"/version = \"$VERSION\"/" pyproject.toml
+
+          # Verify the change
+          grep "^version = " pyproject.toml
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Update lock file
+        run: uv sync
+
+      - name: Commit changes
+        run: |
+          VERSION="${{ inputs.version }}"
+          git add CHANGELOG.md pyproject.toml uv.lock
+          git commit -m "Release v${VERSION}"
+          git push origin main
+
+      - name: Create and push tag
+        run: |
+          VERSION="${{ inputs.version }}"
+          git tag -a "$VERSION" -m "Release $VERSION"
+          git push origin "$VERSION"
+
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: ${{ inputs.version }}
+          name: ${{ inputs.version }}
+          body_path: release_notes.md
+          draft: false
+          prerelease: false
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55081786..4eb764da 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,8 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Installer now auto-adds permission for `make_new_job.sh` script, allowing Claude to run job creation without manual configuration
+- Manual release workflow (`create-release.yml`) that automates version releases:
+  - Takes version number as input, validates format
+  - Updates CHANGELOG.md: converts Unreleased section to new version with date
+  - Adds fresh Unreleased section with placeholder categories
+  - Updates pyproject.toml version and runs uv sync for lock file
+  - Commits changes directly to main, creates tag, and publishes GitHub release
 
 ### Changed
+- Commit job now requires changelog entries go to `[Unreleased]` section and explicitly prohibits modifying version numbers
 
 ### Fixed