Skip to content

Commit 67352eb

Browse files
committed
Restructure rule_library and add JSON file support
- Move rule to rule_library/json_validation/ subfolder - Add JSON file support (.json in addition to .yml/.yaml) - Optimize validation with quick text scan for $schema before parsing - Only fully parse files that have a schema declaration - Add JSON examples (valid and invalid)
1 parent 339ef8a commit 67352eb

File tree

10 files changed

+215
-112
lines changed

10 files changed

+215
-112
lines changed

rule_library/examples/test-schema.json

Lines changed: 0 additions & 13 deletions
This file was deleted.

rule_library/examples/valid-example.yaml

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"$schema": "./test-schema.json",
3+
"name": "",
4+
"version": "not-a-version",
5+
"extra_field": "not allowed"
6+
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
$schema: ./test-schema.json
2-
name: example
2+
name: my-project
33
version: 123
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"type": "object",
4+
"required": ["name", "version"],
5+
"properties": {
6+
"$schema": {
7+
"type": "string"
8+
},
9+
"name": {
10+
"type": "string",
11+
"minLength": 1
12+
},
13+
"version": {
14+
"type": "string",
15+
"pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"
16+
},
17+
"description": {
18+
"type": "string"
19+
}
20+
},
21+
"additionalProperties": false
22+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"$schema": "./test-schema.json",
3+
"name": "my-project",
4+
"version": "2.0.0",
5+
"description": "A sample JSON project"
6+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
$schema: ./test-schema.json
2+
name: my-project
3+
version: "1.0.0"
4+
description: A sample project
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
---
2+
name: Schema Validation
3+
trigger:
4+
- "**/*.yml"
5+
- "**/*.yaml"
6+
- "**/*.json"
7+
action:
8+
command: python3 rule_library/json_validation/scripts/validate_schema.py {file}
9+
run_for: each_match
10+
compare_to: prompt
11+
---
12+
Validates YAML and JSON files against their declared JSON Schema.
13+
14+
This rule triggers on any `.yml`, `.yaml`, or `.json` file that is modified. It
15+
performs a quick text scan for a `$schema` declaration before doing any parsing.
16+
Only files with a schema reference are fully parsed and validated.
17+
18+
## Schema Declaration
19+
20+
**YAML files:**
21+
```yaml
22+
$schema: https://json-schema.org/draft-07/schema
23+
# or
24+
$schema: ./schemas/my-schema.json
25+
```
26+
27+
**JSON files:**
28+
```json
29+
{
30+
"$schema": "https://json-schema.org/draft-07/schema",
31+
"name": "example"
32+
}
33+
```
34+
35+
## Behavior
36+
37+
1. **Quick scan**: Searches first 4KB for `$schema` pattern (no parsing)
38+
2. **Skip if none**: Files without schema declaration pass immediately
39+
3. **Full parse**: Only files with schema are fully parsed
40+
4. **Validate**: Content validated against the declared schema
41+
42+
### Exit Codes
43+
44+
- **0 (pass)**: File validates against schema, or no schema declared
45+
- **1 (fail)**: Validation failed - returns blocking JSON with error details
46+
- **2 (error)**: Could not load schema or parse file
47+
48+
## Example Output
49+
50+
On validation failure:
51+
```json
52+
{
53+
"status": "fail",
54+
"file": "config.json",
55+
"schema": "https://example.com/schemas/config.json",
56+
"error_count": 2,
57+
"errors": [
58+
{
59+
"message": "'name' is a required property",
60+
"path": "/",
61+
"schema_path": "/required"
62+
},
63+
{
64+
"message": "42 is not of type 'string'",
65+
"path": "/version",
66+
"schema_path": "/properties/version/type",
67+
"value": 42
68+
}
69+
]
70+
}
71+
```
72+
73+
## Requirements
74+
75+
- Python 3.10+
76+
- `jsonschema` package (required)
77+
- `pyyaml` package (required for YAML files)
78+
79+
Install with: `pip install jsonschema pyyaml`

rule_library/scripts/validate_yaml_schema.py renamed to rule_library/json_validation/scripts/validate_schema.py

Lines changed: 97 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#!/usr/bin/env python3
22
"""
3-
Validate YAML files against their declared JSON Schema.
3+
Validate YAML and JSON files against their declared JSON Schema.
44
5-
Looks for a $schema declaration at the top of YAML files and validates
6-
the file content against that schema. The schema can be a URL or a local path.
5+
This script first performs a quick text search for $schema in the file.
6+
Only if a schema reference is found does it fully parse the file and validate.
7+
8+
Supported file types: .yml, .yaml, .json
79
810
Exit codes:
911
0 - Validation passed (or no schema declared)
@@ -12,6 +14,7 @@
1214
"""
1315

1416
import json
17+
import re
1518
import sys
1619
import urllib.request
1720
import urllib.error
@@ -20,15 +23,11 @@
2023
try:
2124
import yaml
2225
except ImportError:
23-
print(json.dumps({
24-
"status": "error",
25-
"message": "PyYAML is not installed. Run: pip install pyyaml"
26-
}))
27-
sys.exit(2)
26+
yaml = None
2827

2928
try:
3029
import jsonschema
31-
from jsonschema import Draft7Validator, ValidationError
30+
from jsonschema import Draft7Validator
3231
except ImportError:
3332
print(json.dumps({
3433
"status": "error",
@@ -37,12 +36,59 @@
3736
sys.exit(2)
3837

3938

40-
def load_yaml_file(file_path: str) -> tuple[dict | list | None, str | None]:
41-
"""Load a YAML file and return its contents."""
39+
# Pattern to quickly detect $schema in file content without full parsing
40+
# Matches both JSON ("$schema": "...") and YAML ($schema: ...)
41+
SCHEMA_PATTERN = re.compile(
42+
r'''["']?\$schema["']?\s*[:=]\s*["']?([^"'\s,}\]]+)''',
43+
re.IGNORECASE
44+
)
45+
46+
47+
def quick_detect_schema(file_path: str) -> str | None:
48+
"""
49+
Quickly scan file for $schema declaration without full parsing.
50+
Returns the schema reference if found, None otherwise.
51+
"""
4252
try:
4353
with open(file_path, "r", encoding="utf-8") as f:
44-
content = yaml.safe_load(f)
45-
return content, None
54+
# Read first 4KB - schema should be near the top
55+
content = f.read(4096)
56+
57+
match = SCHEMA_PATTERN.search(content)
58+
if match:
59+
return match.group(1).rstrip("'\"")
60+
return None
61+
62+
except Exception:
63+
return None
64+
65+
66+
def parse_file(file_path: str) -> tuple[dict | list | None, str | None]:
67+
"""Parse a YAML or JSON file and return its contents."""
68+
path = Path(file_path)
69+
suffix = path.suffix.lower()
70+
71+
try:
72+
with open(file_path, "r", encoding="utf-8") as f:
73+
content = f.read()
74+
75+
if suffix == ".json":
76+
return json.loads(content), None
77+
elif suffix in (".yml", ".yaml"):
78+
if yaml is None:
79+
return None, "PyYAML is not installed. Run: pip install pyyaml"
80+
return yaml.safe_load(content), None
81+
else:
82+
# Try JSON first, then YAML
83+
try:
84+
return json.loads(content), None
85+
except json.JSONDecodeError:
86+
if yaml:
87+
return yaml.safe_load(content), None
88+
return None, f"Unsupported file type: {suffix}"
89+
90+
except json.JSONDecodeError as e:
91+
return None, f"Invalid JSON syntax: {e}"
4692
except yaml.YAMLError as e:
4793
return None, f"Invalid YAML syntax: {e}"
4894
except FileNotFoundError:
@@ -52,7 +98,7 @@ def load_yaml_file(file_path: str) -> tuple[dict | list | None, str | None]:
5298

5399

54100
def extract_schema_reference(content: dict) -> str | None:
55-
"""Extract the $schema reference from YAML content."""
101+
"""Extract the $schema reference from parsed content."""
56102
if not isinstance(content, dict):
57103
return None
58104
return content.get("$schema")
@@ -63,7 +109,7 @@ def fetch_schema_from_url(url: str) -> tuple[dict | None, str | None]:
63109
try:
64110
req = urllib.request.Request(
65111
url,
66-
headers={"User-Agent": "yaml-schema-validator/1.0"}
112+
headers={"User-Agent": "schema-validator/1.0"}
67113
)
68114
with urllib.request.urlopen(req, timeout=30) as response:
69115
schema_content = response.read().decode("utf-8")
@@ -72,24 +118,26 @@ def fetch_schema_from_url(url: str) -> tuple[dict | None, str | None]:
72118
try:
73119
return json.loads(schema_content), None
74120
except json.JSONDecodeError:
75-
try:
76-
return yaml.safe_load(schema_content), None
77-
except yaml.YAMLError as e:
78-
return None, f"Invalid schema format at URL: {e}"
121+
if yaml:
122+
try:
123+
return yaml.safe_load(schema_content), None
124+
except yaml.YAMLError as e:
125+
return None, f"Invalid schema format at URL: {e}"
126+
return None, "Invalid JSON schema format at URL"
79127

80128
except urllib.error.URLError as e:
81129
return None, f"Failed to fetch schema from URL: {e}"
82130
except Exception as e:
83131
return None, f"Error fetching schema: {e}"
84132

85133

86-
def load_schema_from_path(schema_path: str, yaml_file_path: str) -> tuple[dict | None, str | None]:
134+
def load_schema_from_path(schema_path: str, source_file_path: str) -> tuple[dict | None, str | None]:
87135
"""Load a JSON Schema from a local file path."""
88-
# Resolve relative paths relative to the YAML file's directory
136+
# Resolve relative paths relative to the source file's directory
89137
path = Path(schema_path)
90138
if not path.is_absolute():
91-
yaml_dir = Path(yaml_file_path).parent
92-
path = yaml_dir / path
139+
source_dir = Path(source_file_path).parent
140+
path = source_dir / path
93141

94142
path = path.resolve()
95143

@@ -104,21 +152,23 @@ def load_schema_from_path(schema_path: str, yaml_file_path: str) -> tuple[dict |
104152
try:
105153
return json.loads(content), None
106154
except json.JSONDecodeError:
107-
try:
108-
return yaml.safe_load(content), None
109-
except yaml.YAMLError as e:
110-
return None, f"Invalid schema format: {e}"
155+
if yaml:
156+
try:
157+
return yaml.safe_load(content), None
158+
except yaml.YAMLError as e:
159+
return None, f"Invalid schema format: {e}"
160+
return None, "Invalid JSON schema format"
111161

112162
except Exception as e:
113163
return None, f"Error reading schema file: {e}"
114164

115165

116-
def load_schema(schema_ref: str, yaml_file_path: str) -> tuple[dict | None, str | None]:
166+
def load_schema(schema_ref: str, source_file_path: str) -> tuple[dict | None, str | None]:
117167
"""Load a schema from either a URL or local path."""
118168
if schema_ref.startswith(("http://", "https://")):
119169
return fetch_schema_from_url(schema_ref)
120170
else:
121-
return load_schema_from_path(schema_ref, yaml_file_path)
171+
return load_schema_from_path(schema_ref, source_file_path)
122172

123173

124174
def validate_against_schema(content: dict, schema: dict) -> list[dict]:
@@ -146,14 +196,25 @@ def main():
146196
if len(sys.argv) < 2:
147197
print(json.dumps({
148198
"status": "error",
149-
"message": "Usage: validate_yaml_schema.py <file.yaml>"
199+
"message": "Usage: validate_schema.py <file.yaml|file.json>"
150200
}))
151201
sys.exit(2)
152202

153203
file_path = sys.argv[1]
154204

155-
# Load the YAML file
156-
content, error = load_yaml_file(file_path)
205+
# Step 1: Quick detection - scan for $schema without parsing
206+
quick_schema = quick_detect_schema(file_path)
207+
if not quick_schema:
208+
# No schema found in quick scan - pass without full parsing
209+
print(json.dumps({
210+
"status": "pass",
211+
"file": file_path,
212+
"message": "No $schema declared, skipping validation"
213+
}))
214+
sys.exit(0)
215+
216+
# Step 2: Schema detected - now do full parsing
217+
content, error = parse_file(file_path)
157218
if error:
158219
print(json.dumps({
159220
"status": "error",
@@ -162,18 +223,18 @@ def main():
162223
}))
163224
sys.exit(2)
164225

165-
# Check for $schema reference
226+
# Get the actual schema reference from parsed content
166227
schema_ref = extract_schema_reference(content)
167228
if not schema_ref:
168-
# No schema declared - pass silently
229+
# Quick scan found something but it wasn't actually a $schema field
169230
print(json.dumps({
170231
"status": "pass",
171232
"file": file_path,
172233
"message": "No $schema declared, skipping validation"
173234
}))
174235
sys.exit(0)
175236

176-
# Load the schema
237+
# Step 3: Load the schema
177238
schema, error = load_schema(schema_ref, file_path)
178239
if error:
179240
print(json.dumps({
@@ -184,7 +245,7 @@ def main():
184245
}))
185246
sys.exit(2)
186247

187-
# Validate
248+
# Step 4: Validate
188249
errors = validate_against_schema(content, schema)
189250

190251
if not errors:

0 commit comments

Comments
 (0)