From f7608087786f4c05949fecbb9ad2ccc923d0a237 Mon Sep 17 00:00:00 2001 From: sehwan505 Date: Wed, 12 Nov 2025 17:21:36 +0900 Subject: [PATCH 1/5] fix: prevent duplicate messages in convention output --- internal/mcp/server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/mcp/server.go b/internal/mcp/server.go index 8555b5d..90f9565 100644 --- a/internal/mcp/server.go +++ b/internal/mcp/server.go @@ -426,7 +426,7 @@ func (s *Server) handleQueryConventions(params map[string]interface{}) (interfac textContent += fmt.Sprintf("%d. [%s] %s\n", i+1, conv.Severity, conv.ID) textContent += fmt.Sprintf(" Category: %s\n", conv.Category) textContent += fmt.Sprintf(" Description: %s\n", conv.Description) - if conv.Message != "" { + if conv.Message != "" && conv.Message != conv.Description { textContent += fmt.Sprintf(" Message: %s\n", conv.Message) } textContent += "\n" From 46a886fa24b6c8b7db72ad3e0bfa4a9993b70534 Mon Sep 17 00:00:00 2001 From: sehwan505 Date: Thu, 13 Nov 2025 00:39:10 +0900 Subject: [PATCH 2/5] feat: add LLM-based validation engine and integrate with existing validator --- internal/engine/llm/engine.go | 284 ++++++++++++++++++++++++++++ internal/engine/registry/builtin.go | 6 + internal/llm/inference.go | 3 +- internal/llm/types.go | 2 +- internal/mcp/server.go | 149 +++++++++------ internal/validator/engine.go | 72 +++++++ internal/validator/llm_validator.go | 18 +- internal/validator/selector.go | 263 ++++++++++++++++++++++++++ internal/validator/validator.go | 240 ++++++++++++++++++++++- test_validator.go | 99 ++++++++++ 10 files changed, 1063 insertions(+), 73 deletions(-) create mode 100644 internal/engine/llm/engine.go create mode 100644 internal/validator/engine.go create mode 100644 internal/validator/selector.go create mode 100644 test_validator.go diff --git a/internal/engine/llm/engine.go b/internal/engine/llm/engine.go new file mode 100644 index 0000000..10d586e --- /dev/null +++ b/internal/engine/llm/engine.go @@ -0,0 +1,284 @@ +package llm + +import ( + "context" + "fmt" + "os" + "strings" + "time" + + "github.com/DevSymphony/sym-cli/internal/engine/core" + "github.com/DevSymphony/sym-cli/internal/llm" +) + +// Engine validates code using LLM-based analysis. +// Unlike other engines that use static analysis tools, this engine +// uses an LLM to understand and validate code against natural language rules. +type Engine struct { + client *llm.Client + config core.EngineConfig +} + +// NewEngine creates a new LLM engine. +func NewEngine() *Engine { + return &Engine{} +} + +// Init initializes the engine. +func (e *Engine) Init(ctx context.Context, config core.EngineConfig) error { + e.config = config + + // Initialize LLM client + apiKey := os.Getenv("ANTHROPIC_API_KEY") + if apiKey == "" { + apiKey = os.Getenv("OPENAI_API_KEY") + } + + if apiKey == "" { + return fmt.Errorf("LLM API key not found (ANTHROPIC_API_KEY or OPENAI_API_KEY)") + } + + e.client = llm.NewClient(apiKey) + return nil +} + +// Validate validates files against an LLM-based rule. +func (e *Engine) Validate(ctx context.Context, rule core.Rule, files []string) (*core.ValidationResult, error) { + start := time.Now() + + // Filter files by selector + files = core.FilterFiles(files, rule.When) + + if len(files) == 0 { + return &core.ValidationResult{ + RuleID: rule.ID, + Passed: true, + Engine: "llm-validator", + Duration: time.Since(start), + }, nil + } + + violations := make([]core.Violation, 0) + + // Validate each file + for _, file := range files { + // Read file content + content, err := os.ReadFile(file) + if err != nil { + if e.config.Debug { + fmt.Printf("⚠️ Failed to read file %s: %v\n", file, err) + } + continue + } + + // Validate with LLM + fileViolations, err := e.validateFile(ctx, rule, file, string(content)) + if err != nil { + if e.config.Debug { + fmt.Printf("⚠️ Failed to validate file %s: %v\n", file, err) + } + continue + } + + violations = append(violations, fileViolations...) + } + + return &core.ValidationResult{ + RuleID: rule.ID, + Passed: len(violations) == 0, + Violations: violations, + Duration: time.Since(start), + Engine: "llm-validator", + Metrics: &core.Metrics{ + FilesProcessed: len(files), + }, + }, nil +} + +// validateFile validates a single file using LLM +func (e *Engine) validateFile(ctx context.Context, rule core.Rule, filePath string, content string) ([]core.Violation, error) { + // Build prompt for LLM + systemPrompt := `You are a code reviewer. Check if the code violates the given coding convention. + +Respond with JSON only: +{ + "violates": true/false, + "description": "explanation of violation if any", + "suggestion": "how to fix it if violated", + "line": line_number_if_applicable (0 if not applicable) +}` + + userPrompt := fmt.Sprintf(`File: %s + +Coding Convention: +%s + +Code: +%s + +Does this code violate the convention?`, filePath, rule.Desc, content) + + // Call LLM + response, err := e.client.Complete(ctx, systemPrompt, userPrompt) + if err != nil { + return nil, err + } + + // Parse response + result := parseValidationResponse(response) + if !result.Violates { + return []core.Violation{}, nil + } + + message := result.Description + if result.Suggestion != "" { + message += fmt.Sprintf(" | Suggestion: %s", result.Suggestion) + } + + // Use custom message if provided in rule + if rule.Message != "" { + message = rule.Message + " | " + message + } + + violation := core.Violation{ + RuleID: rule.ID, + Severity: rule.Severity, + Message: message, + File: filePath, + Line: result.Line, + Category: rule.Category, + } + + return []core.Violation{violation}, nil +} + +// GetCapabilities returns engine capabilities. +func (e *Engine) GetCapabilities() core.EngineCapabilities { + return core.EngineCapabilities{ + Name: "llm-validator", + // LLM is language-agnostic - can understand any programming language + SupportedLanguages: []string{ + "javascript", "typescript", "jsx", "tsx", + "python", "go", "java", "rust", "c", "cpp", + "ruby", "php", "swift", "kotlin", "scala", + }, + SupportedCategories: []string{ + "convention", "style", "best-practice", + "security", "performance", "maintainability", + }, + SupportsAutofix: false, // Future enhancement + RequiresCompilation: false, + ExternalTools: []core.ToolRequirement{}, // No external tools needed + } +} + +// Close cleans up resources. +func (e *Engine) Close() error { + return nil +} + +// validationResponse represents the parsed LLM response +type validationResponse struct { + Violates bool + Description string + Suggestion string + Line int +} + +// parseValidationResponse parses the LLM response +func parseValidationResponse(response string) validationResponse { + // Default to no violation + result := validationResponse{ + Violates: false, + Description: "", + Suggestion: "", + Line: 0, + } + + lower := strings.ToLower(response) + + // Check if no violation + if strings.Contains(lower, `"violates": false`) || + strings.Contains(lower, `"violates":false`) || + strings.Contains(lower, "does not violate") { + return result + } + + // Check if violates + if strings.Contains(lower, `"violates": true`) || + strings.Contains(lower, `"violates":true`) { + result.Violates = true + + // Extract description + if desc := extractJSONField(response, "description"); desc != "" { + result.Description = desc + } else { + result.Description = "Rule violation detected" + } + + // Extract suggestion + if sugg := extractJSONField(response, "suggestion"); sugg != "" { + result.Suggestion = sugg + } + + // Extract line number + if lineStr := extractJSONField(response, "line"); lineStr != "" { + // Parse line number + var line int + fmt.Sscanf(lineStr, "%d", &line) + result.Line = line + } + } + + return result +} + +// extractJSONField extracts a field value from JSON response +func extractJSONField(response, field string) string { + // Look for "field": "value" + key := fmt.Sprintf(`"%s"`, field) + idx := strings.Index(response, key) + if idx == -1 { + return "" + } + + // Find : after field name + colonIdx := strings.Index(response[idx:], ":") + idx + if colonIdx <= idx { + return "" + } + + // Find opening quote or number + start := colonIdx + 1 + for start < len(response) && (response[start] == ' ' || response[start] == '\t' || response[start] == '\n') { + start++ + } + + if start >= len(response) { + return "" + } + + // Handle string value + if response[start] == '"' { + openIdx := start + closeIdx := openIdx + 1 + for closeIdx < len(response) { + if response[closeIdx] == '"' && (closeIdx == openIdx+1 || response[closeIdx-1] != '\\') { + return response[openIdx+1 : closeIdx] + } + closeIdx++ + } + return "" + } + + // Handle numeric value + end := start + for end < len(response) && response[end] >= '0' && response[end] <= '9' { + end++ + } + if end > start { + return response[start:end] + } + + return "" +} diff --git a/internal/engine/registry/builtin.go b/internal/engine/registry/builtin.go index f326de4..820af33 100644 --- a/internal/engine/registry/builtin.go +++ b/internal/engine/registry/builtin.go @@ -4,6 +4,7 @@ import ( "github.com/DevSymphony/sym-cli/internal/engine/ast" "github.com/DevSymphony/sym-cli/internal/engine/core" "github.com/DevSymphony/sym-cli/internal/engine/length" + "github.com/DevSymphony/sym-cli/internal/engine/llm" "github.com/DevSymphony/sym-cli/internal/engine/pattern" "github.com/DevSymphony/sym-cli/internal/engine/style" "github.com/DevSymphony/sym-cli/internal/engine/typechecker" @@ -35,4 +36,9 @@ func init() { MustRegister("typechecker", func() (core.Engine, error) { return typechecker.NewEngine(), nil }) + + // Register LLM validator engine + MustRegister("llm-validator", func() (core.Engine, error) { + return llm.NewEngine(), nil + }) } diff --git a/internal/llm/inference.go b/internal/llm/inference.go index 3c01336..7bbc1af 100644 --- a/internal/llm/inference.go +++ b/internal/llm/inference.go @@ -13,11 +13,12 @@ import ( const systemPrompt = `You are a code linting rule analyzer. Extract structured information from natural language coding rules. Extract: -1. **engine**: pattern|length|style|ast|custom +1. **engine**: pattern|length|style|ast|llm-validator - Use "style" for code formatting rules (semicolons, quotes, indentation, spacing) - Use "pattern" for naming conventions or content matching - Use "length" for size/length constraints - Use "ast" for structural complexity rules + - Use "llm-validator" for complex semantic rules that cannot be expressed with simple patterns 2. **category**: naming|formatting|security|error_handling|testing|documentation|dependency|commit|performance|architecture|custom diff --git a/internal/llm/types.go b/internal/llm/types.go index 50c6196..9ed33a9 100644 --- a/internal/llm/types.go +++ b/internal/llm/types.go @@ -2,7 +2,7 @@ package llm // RuleIntent represents the structured interpretation of a natural language rule type RuleIntent struct { - Engine string // "pattern", "length", "style", "ast", "custom" + Engine string // "pattern", "length", "style", "ast", "llm-validator" Category string // "naming", "formatting", "security", "error_handling", etc. Target string // "identifier", "content", "import", "class", "method", etc. Scope string // "line", "file", "function", "method", "class", etc. diff --git a/internal/mcp/server.go b/internal/mcp/server.go index 90f9565..89f0f10 100644 --- a/internal/mcp/server.go +++ b/internal/mcp/server.go @@ -315,8 +315,7 @@ type QueryConventionsInput struct { // ValidateCodeInput represents the input schema for the validate_code tool (go-sdk). type ValidateCodeInput struct { - Files []string `json:"files" jsonschema:"File paths to validate"` - Role string `json:"role,omitempty" jsonschema:"RBAC role for validation (optional)"` + Role string `json:"role,omitempty" jsonschema:"RBAC role for validation (optional)"` } // runStdioWithSDK runs a spec-compliant MCP server over stdio using the official go-sdk. @@ -329,7 +328,7 @@ func (s *Server) runStdioWithSDK(ctx context.Context) error { // Tool: query_conventions sdkmcp.AddTool(server, &sdkmcp.Tool{ Name: "query_conventions", - Description: "Query coding conventions before you start coding.", + Description: "[MANDATORY BEFORE CODING] Query project conventions BEFORE writing any code to ensure compliance from the start.", }, func(ctx context.Context, req *sdkmcp.CallToolRequest, input QueryConventionsInput) (*sdkmcp.CallToolResult, map[string]any, error) { params := map[string]any{ "category": input.Category, @@ -346,11 +345,10 @@ func (s *Server) runStdioWithSDK(ctx context.Context) error { // Tool: validate_code sdkmcp.AddTool(server, &sdkmcp.Tool{ Name: "validate_code", - Description: "Validate that your code complies with all project conventions.", + Description: "Validate git changes (staged + unstaged) against all project conventions.", }, func(ctx context.Context, req *sdkmcp.CallToolRequest, input ValidateCodeInput) (*sdkmcp.CallToolResult, map[string]any, error) { params := map[string]any{ - "files": input.Files, - "role": input.Role, + "role": input.Role, } result, rpcErr := s.handleValidateCode(params) if rpcErr != nil { @@ -533,8 +531,7 @@ func (s *Server) isRuleRelevant(rule schema.PolicyRule, req QueryConventionsRequ // ValidateCodeRequest is a code validation request. type ValidateCodeRequest struct { - Files []string `json:"files"` // file paths to validate - Role string `json:"role"` // RBAC role + Role string `json:"role"` // RBAC role (optional) } // ViolationItem is a violation item. @@ -549,7 +546,7 @@ type ViolationItem struct { } // handleValidateCode handles code validation requests. -// It uses the existing validator to validate code. +// It validates git changes (diff) instead of entire files for efficiency. func (s *Server) handleValidateCode(params map[string]interface{}) (interface{}, *RPCError) { // Get policy for validation (convert UserPolicy if needed) validationPolicy, err := s.getValidationPolicy() @@ -569,49 +566,95 @@ func (s *Server) handleValidateCode(params map[string]interface{}) (interface{}, } } - if len(req.Files) == 0 { - req.Files = []string{"."} + var allViolations []ViolationItem + var hasErrors bool + + // Always validate git changes (staged + unstaged) + // This is the most efficient and relevant approach for AI coding workflows + + // Get unstaged changes + changes, err := validator.GetGitChanges() + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to get git changes: %v\n", err) + return map[string]interface{}{ + "content": []map[string]interface{}{ + { + "type": "text", + "text": "⚠️ Failed to get git changes. Make sure you're in a git repository.\n\nError: " + err.Error(), + }, + }, + "isError": false, + }, nil } - v := validator.NewValidator(validationPolicy, false) // verbose = false for MCP + // Also check staged changes + stagedChanges, err := validator.GetStagedChanges() + if err == nil { + changes = append(changes, stagedChanges...) + } - var allViolations []ViolationItem - var hasErrors bool + if len(changes) == 0 { + return map[string]interface{}{ + "content": []map[string]interface{}{ + { + "type": "text", + "text": "✓ No uncommitted changes to validate. Working directory is clean.", + }, + }, + "isError": false, + }, nil + } - for _, filePath := range req.Files { - result, err := v.Validate(filePath) - if err != nil { - return nil, &RPCError{ - Code: -32000, - Message: fmt.Sprintf("validation failed: %v", err), - } + // Setup LLM client for validation + apiKey := os.Getenv("ANTHROPIC_API_KEY") + if apiKey == "" { + apiKey = os.Getenv("OPENAI_API_KEY") + } + if apiKey == "" { + return nil, &RPCError{ + Code: -32000, + Message: "LLM API key not found (ANTHROPIC_API_KEY or OPENAI_API_KEY required for validation)", } + } - for _, violation := range result.Violations { - allViolations = append(allViolations, ViolationItem{ - RuleID: violation.RuleID, - Category: "", - Message: violation.Message, - Severity: violation.Severity, - File: violation.File, - Line: violation.Line, - Column: violation.Column, - }) + llmClient := llm.NewClient(apiKey) + llmValidator := validator.NewLLMValidator(llmClient, validationPolicy) - if violation.Severity == "error" { - hasErrors = true - } + // Validate git changes + ctx := context.Background() + result, err := llmValidator.Validate(ctx, changes) + if err != nil { + return nil, &RPCError{ + Code: -32000, + Message: fmt.Sprintf("validation failed: %v", err), + } + } + + // Convert violations + for _, violation := range result.Violations { + allViolations = append(allViolations, ViolationItem{ + RuleID: violation.RuleID, + Category: "", + Message: violation.Message, + Severity: violation.Severity, + File: violation.File, + Line: violation.Line, + Column: violation.Column, + }) + + if violation.Severity == "error" { + hasErrors = true } } // Format validation results as readable text for MCP response var textContent string if hasErrors { - textContent = "❌ VALIDATION FAILED: Found error-level violations. You MUST fix these issues and re-validate before proceeding.\n\n" + textContent = "VALIDATION FAILED: Found error-level violations. You MUST fix these issues and re-validate before proceeding.\n\n" } else if len(allViolations) > 0 { - textContent = "⚠️ VALIDATION WARNING: Found non-critical violations. Consider fixing these warnings for better code quality.\n\n" + textContent = "VALIDATION WARNING: Found non-critical violations. Consider fixing these warnings for better code quality.\n\n" } else { - textContent = "✓ VALIDATION PASSED: Code complies with all conventions. Task can be marked as complete.\n\n" + textContent = "VALIDATION PASSED: Code complies with all conventions. Task can be marked as complete.\n\n" } if len(allViolations) > 0 { @@ -706,19 +749,17 @@ func (s *Server) handleToolsList(params map[string]interface{}) (interface{}, *R tools := []map[string]interface{}{ { "name": "query_conventions", - "description": `[STEP 1 - ALWAYS CALL FIRST] Query coding conventions and best practices before writing any code. + "description": `⚠️ CALL THIS FIRST - BEFORE WRITING ANY CODE ⚠️ -CRITICAL WORKFLOW: -1. ALWAYS call this tool FIRST when starting any coding task -2. Query relevant conventions by category (security, style, architecture, etc.) -3. Query conventions for specific files/languages you'll be working with -4. Use the returned conventions to guide your code implementation +This tool is MANDATORY before you start coding. Query project conventions to understand what rules your code must follow. -This ensures your code follows project standards from the start. Never skip this step. +Usage: +- Filter by category: security, style, error_handling, architecture, performance, testing, documentation +- Filter by languages: javascript, typescript, python, go, java, etc. -Categories available: security, style, documentation, error_handling, architecture, performance, testing +Example: Before adding a login feature, call query_conventions(category="security") first. -Example: Before implementing authentication, query security conventions first.`, +DO NOT write code before calling this tool. Violations will be caught by validate_code later.`, "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ @@ -736,7 +777,7 @@ Example: Before implementing authentication, query security conventions first.`, }, { "name": "validate_code", - "description": `[STEP 3 - ALWAYS CALL LAST] Validate that your code complies with all project conventions. + "description": `[STEP 3 - ALWAYS CALL LAST] Validate your git changes against all project conventions. CRITICAL WORKFLOW: 1. Call this tool AFTER you have written or modified code @@ -744,9 +785,14 @@ CRITICAL WORKFLOW: 3. If violations are found, fix them and validate again 4. Only mark the task as done after validation passes with no errors +This tool automatically validates: +- All STAGED changes (git add) +- All UNSTAGED changes (modified but not staged) +- Only checks the ADDED/MODIFIED lines in your diffs (not entire files) + This is the final quality gate. Never skip this validation step. -The tool will check: +The tool will check your changes for: - Security violations (hardcoded secrets, SQL injection, XSS, etc.) - Style violations (formatting, naming, documentation) - Architecture violations (separation of concerns, patterns) @@ -756,18 +802,11 @@ If violations are found, you MUST fix them before proceeding.`, "inputSchema": map[string]interface{}{ "type": "object", "properties": map[string]interface{}{ - "files": map[string]interface{}{ - "type": "array", - "items": map[string]string{"type": "string"}, - "description": "File paths to validate (required)", - "required": true, - }, "role": map[string]interface{}{ "type": "string", "description": "RBAC role for validation (optional)", }, }, - "required": []string{"files"}, }, }, } diff --git a/internal/validator/engine.go b/internal/validator/engine.go new file mode 100644 index 0000000..3a646c1 --- /dev/null +++ b/internal/validator/engine.go @@ -0,0 +1,72 @@ +package validator + +import ( + "context" + + "github.com/DevSymphony/sym-cli/pkg/schema" +) + +// Engine represents a validation engine that can check code against rules +type Engine interface { + // Name returns the engine name (e.g., "eslint", "llm-validator", "checkstyle") + Name() string + + // CanHandle checks if this engine can handle the given rule + CanHandle(rule schema.PolicyRule) bool + + // Returns violations found + Execute(ctx context.Context, files []string, rules []schema.PolicyRule) ([]Violation, error) +} + +// EngineRegistry manages available validation engines +type EngineRegistry struct { + engines map[string]Engine +} + +// NewEngineRegistry creates a new engine registry +func NewEngineRegistry() *EngineRegistry { + return &EngineRegistry{ + engines: make(map[string]Engine), + } +} + +// Register registers a validation engine +func (r *EngineRegistry) Register(engine Engine) { + r.engines[engine.Name()] = engine +} + +// Get retrieves an engine by name +func (r *EngineRegistry) Get(name string) (Engine, bool) { + engine, ok := r.engines[name] + return engine, ok +} + +// GetEngineForRule finds the appropriate engine for a rule +func (r *EngineRegistry) GetEngineForRule(rule schema.PolicyRule) Engine { + // Check if rule specifies an engine + if engineName, ok := rule.Check["engine"].(string); ok { + if engine, exists := r.engines[engineName]; exists { + if engine.CanHandle(rule) { + return engine + } + } + } + + // Fallback: find any engine that can handle this rule + for _, engine := range r.engines { + if engine.CanHandle(rule) { + return engine + } + } + + return nil +} + +// ListEngines returns all registered engines +func (r *EngineRegistry) ListEngines() []string { + names := make([]string, 0, len(r.engines)) + for name := range r.engines { + names = append(names, name) + } + return names +} \ No newline at end of file diff --git a/internal/validator/llm_validator.go b/internal/validator/llm_validator.go index 0062fda..b3a39be 100644 --- a/internal/validator/llm_validator.go +++ b/internal/validator/llm_validator.go @@ -17,21 +17,27 @@ type ValidationResult struct { Failed int } -// LLMValidator validates code changes against LLM-based rules +// LLMValidator validates code changes against LLM-based rules. +// This validator is specifically for Git diff validation. +// For regular file validation, use Validator which orchestrates all engines including LLM. type LLMValidator struct { - client *llm.Client - policy *schema.CodePolicy + client *llm.Client + policy *schema.CodePolicy + validator *Validator } // NewLLMValidator creates a new LLM validator func NewLLMValidator(client *llm.Client, policy *schema.CodePolicy) *LLMValidator { return &LLMValidator{ - client: client, - policy: policy, + client: client, + policy: policy, + validator: NewValidator(policy, false), // Use main validator for orchestration } } -// Validate validates git changes against LLM-based rules +// Validate validates git changes against LLM-based rules. +// This method is for diff-based validation (pre-commit hooks, PR validation). +// For regular file validation, use validator.Validate() which orchestrates all engines. func (v *LLMValidator) Validate(ctx context.Context, changes []GitChange) (*ValidationResult, error) { result := &ValidationResult{ Violations: make([]Violation, 0), diff --git a/internal/validator/selector.go b/internal/validator/selector.go new file mode 100644 index 0000000..a304a10 --- /dev/null +++ b/internal/validator/selector.go @@ -0,0 +1,263 @@ +package validator + +import ( + "os" + "path/filepath" + "strings" + + "github.com/DevSymphony/sym-cli/pkg/schema" +) + +// FileSelector handles file discovery and filtering based on rule selectors +type FileSelector struct { + basePath string +} + +// NewFileSelector creates a new file selector +func NewFileSelector(basePath string) *FileSelector { + return &FileSelector{ + basePath: basePath, + } +} + +// SelectFiles finds files that match the given selector +func (fs *FileSelector) SelectFiles(selector *schema.Selector) ([]string, error) { + if selector == nil { + // No selector means match all files + return fs.findAllFiles() + } + + // Start with all files + allFiles, err := fs.findAllFiles() + if err != nil { + return nil, err + } + + // Filter by include patterns + files := allFiles + if len(selector.Include) > 0 { + files = fs.filterByPatterns(allFiles, selector.Include, true) + } + + // Filter out excluded files + if len(selector.Exclude) > 0 { + files = fs.filterByPatterns(files, selector.Exclude, false) + } + + // Filter by language (based on file extension) + if len(selector.Languages) > 0 { + files = fs.filterByLanguages(files, selector.Languages) + } + + return files, nil +} + +// findAllFiles recursively finds all files under basePath +func (fs *FileSelector) findAllFiles() ([]string, error) { + var files []string + + err := filepath.Walk(fs.basePath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip directories + if info.IsDir() { + // Skip common directories to ignore + name := info.Name() + if name == ".git" || name == "node_modules" || name == "vendor" || name == ".sym" { + return filepath.SkipDir + } + return nil + } + + // Get relative path from base + relPath, err := filepath.Rel(fs.basePath, path) + if err != nil { + return err + } + + files = append(files, relPath) + return nil + }) + + return files, err +} + +// filterByPatterns filters files by glob patterns +func (fs *FileSelector) filterByPatterns(files []string, patterns []string, include bool) []string { + var result []string + + for _, file := range files { + matched := false + for _, pattern := range patterns { + if match, _ := filepath.Match(pattern, file); match { + matched = true + break + } + // Also try glob pattern matching (e.g., **/*.js) + if matchGlob(file, pattern) { + matched = true + break + } + } + + // Include mode: keep if matched + // Exclude mode: keep if NOT matched + if (include && matched) || (!include && !matched) { + result = append(result, file) + } + } + + return result +} + +// filterByLanguages filters files by programming language +func (fs *FileSelector) filterByLanguages(files []string, languages []string) []string { + var result []string + + extMap := buildLanguageExtensionMap(languages) + + for _, file := range files { + ext := filepath.Ext(file) + if _, ok := extMap[ext]; ok { + result = append(result, file) + } + } + + return result +} + +// buildLanguageExtensionMap creates a map of file extensions for given languages +func buildLanguageExtensionMap(languages []string) map[string]bool { + extMap := make(map[string]bool) + + for _, lang := range languages { + exts := getExtensionsForLanguage(lang) + for _, ext := range exts { + extMap[ext] = true + } + } + + return extMap +} + +// getExtensionsForLanguage returns file extensions for a language +func getExtensionsForLanguage(language string) []string { + language = strings.ToLower(language) + + switch language { + case "javascript", "js": + return []string{".js", ".mjs", ".cjs"} + case "typescript", "ts": + return []string{".ts", ".mts", ".cts"} + case "jsx": + return []string{".jsx"} + case "tsx": + return []string{".tsx"} + case "go", "golang": + return []string{".go"} + case "python", "py": + return []string{".py"} + case "java": + return []string{".java"} + case "c": + return []string{".c", ".h"} + case "cpp", "c++", "cxx": + return []string{".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx"} + case "rust", "rs": + return []string{".rs"} + case "ruby", "rb": + return []string{".rb"} + case "php": + return []string{".php"} + case "shell", "bash", "sh": + return []string{".sh", ".bash"} + case "yaml", "yml": + return []string{".yaml", ".yml"} + case "json": + return []string{".json"} + case "xml": + return []string{".xml"} + case "html": + return []string{".html", ".htm"} + case "css": + return []string{".css"} + default: + return []string{} + } +} + +// matchGlob performs glob pattern matching with ** support +func matchGlob(path, pattern string) bool { + // Convert glob pattern to simple matching + // This is a simplified implementation - for production, use a proper glob library + + // Handle **/ prefix (matches any directory depth) + if strings.HasPrefix(pattern, "**/") { + suffix := strings.TrimPrefix(pattern, "**/") + // Check if path ends with the suffix or any subdirectory contains it + if strings.HasSuffix(path, suffix) { + return true + } + // Check if any part matches + parts := strings.Split(path, string(filepath.Separator)) + for i := range parts { + subPath := strings.Join(parts[i:], string(filepath.Separator)) + if match, _ := filepath.Match(suffix, subPath); match { + return true + } + } + } + + // Handle exact match + if match, _ := filepath.Match(pattern, path); match { + return true + } + + // Handle directory prefix patterns (e.g., "src/**") + if strings.HasSuffix(pattern, "/**") { + prefix := strings.TrimSuffix(pattern, "/**") + if strings.HasPrefix(path, prefix+string(filepath.Separator)) || path == prefix { + return true + } + } + + return false +} + +// GetLanguageFromFile determines the programming language from a file path +func GetLanguageFromFile(filePath string) string { + ext := filepath.Ext(filePath) + + switch ext { + case ".js", ".mjs", ".cjs": + return "javascript" + case ".ts", ".mts", ".cts": + return "typescript" + case ".jsx": + return "jsx" + case ".tsx": + return "tsx" + case ".go": + return "go" + case ".py": + return "python" + case ".java": + return "java" + case ".c", ".h": + return "c" + case ".cpp", ".cc", ".cxx", ".hpp", ".hh", ".hxx": + return "cpp" + case ".rs": + return "rust" + case ".rb": + return "ruby" + case ".php": + return "php" + case ".sh", ".bash": + return "shell" + default: + return "" + } +} \ No newline at end of file diff --git a/internal/validator/validator.go b/internal/validator/validator.go index 2c67c82..0523424 100644 --- a/internal/validator/validator.go +++ b/internal/validator/validator.go @@ -1,22 +1,46 @@ package validator import ( + "context" "fmt" + "os" + "path/filepath" + "time" + "github.com/DevSymphony/sym-cli/internal/engine/core" + "github.com/DevSymphony/sym-cli/internal/engine/registry" "github.com/DevSymphony/sym-cli/pkg/schema" ) // Validator validates code against policy type Validator struct { - policy *schema.CodePolicy - verbose bool + policy *schema.CodePolicy + verbose bool + registry *registry.Registry + workDir string + selector *FileSelector + ctx context.Context + ctxCancel context.CancelFunc } // NewValidator creates a new validator func NewValidator(policy *schema.CodePolicy, verbose bool) *Validator { + // Determine working directory + workDir, err := os.Getwd() + if err != nil { + workDir = "." + } + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + return &Validator{ - policy: policy, - verbose: verbose, + policy: policy, + verbose: verbose, + registry: registry.Global(), + workDir: workDir, + selector: NewFileSelector(workDir), + ctx: ctx, + ctxCancel: cancel, } } @@ -47,14 +71,103 @@ func (v *Validator) Validate(path string) (*Result, error) { Passed: true, } - // TODO: Implement actual validation logic - // This is a placeholder that will be implemented with: - // - File discovery and filtering based on selectors - // - Rule engine execution based on check.engine type - // - Violation collection and reporting + if v.verbose { + fmt.Printf("🔍 Validating %s against %d rule(s)...\n", path, len(v.policy.Rules)) + } + + // For each enabled rule, execute validation + for _, rule := range v.policy.Rules { + if !rule.Enabled { + continue + } + + // Determine which engine to use + engineName := getEngineName(rule) + if engineName == "" { + if v.verbose { + fmt.Printf("⚠️ Rule %s has no engine specified, skipping\n", rule.ID) + } + continue + } + + // Get files that match this rule's selector + files, err := v.selectFilesForRule(path, &rule) + if err != nil { + fmt.Printf("⚠️ Failed to select files for rule %s: %v\n", rule.ID, err) + continue + } + + if len(files) == 0 { + if v.verbose { + fmt.Printf(" Rule %s: no matching files\n", rule.ID) + } + continue + } + + // Get or create engine + engine, err := v.registry.Get(engineName) + if err != nil { + fmt.Printf("⚠️ Engine %s not found for rule %s: %v\n", engineName, rule.ID, err) + continue + } + + // Initialize engine if needed + if err := v.initializeEngine(engine); err != nil { + fmt.Printf("⚠️ Failed to initialize engine %s: %v\n", engineName, err) + continue + } + + // Convert schema.PolicyRule to core.Rule + coreRule := convertToCoreRule(rule) + + // Execute validation + if v.verbose { + fmt.Printf(" Rule %s (%s): checking %d file(s)...\n", rule.ID, engineName, len(files)) + } + + validationResult, err := engine.Validate(v.ctx, coreRule, files) + if err != nil { + fmt.Printf("⚠️ Validation failed for rule %s: %v\n", rule.ID, err) + continue + } + + // Collect violations + if validationResult != nil && len(validationResult.Violations) > 0 { + for _, coreViolation := range validationResult.Violations { + violation := Violation{ + RuleID: rule.ID, + Severity: rule.Severity, + Message: coreViolation.Message, + File: coreViolation.File, + Line: coreViolation.Line, + Column: coreViolation.Column, + } + + // Use custom message if provided + if rule.Message != "" { + violation.Message = rule.Message + } + + result.Violations = append(result.Violations, violation) + } + + if v.verbose { + fmt.Printf(" ❌ Found %d violation(s)\n", len(validationResult.Violations)) + } + } else if v.verbose { + fmt.Printf(" ✓ Passed\n") + } + } + + // Determine overall pass/fail + result.Passed = len(result.Violations) == 0 if v.verbose { - fmt.Printf("Validating %s against %d rules\n", path, len(v.policy.Rules)) + if result.Passed { + fmt.Printf("\n✅ Validation passed: no violations found\n") + } else { + fmt.Printf("\n❌ Validation failed: %d violation(s) found\n", len(result.Violations)) + } } return result, nil @@ -74,3 +187,110 @@ func (v *Validator) AutoFix(result *Result) error { // TODO: Implement auto-fix logic return fmt.Errorf("auto-fix not implemented yet") } + +// Helper functions + +// getEngineName extracts the engine name from a rule +func getEngineName(rule schema.PolicyRule) string { + if engine, ok := rule.Check["engine"].(string); ok { + return engine + } + return "" +} + +// selectFilesForRule selects files that match the rule's selector +func (v *Validator) selectFilesForRule(basePath string, rule *schema.PolicyRule) ([]string, error) { + // If path is a specific file, check if it matches the selector + fileInfo, err := os.Stat(basePath) + if err != nil { + return nil, err + } + + if !fileInfo.IsDir() { + // Single file - check if it matches selector + if rule.When != nil { + lang := GetLanguageFromFile(basePath) + if len(rule.When.Languages) > 0 { + matched := false + for _, ruleLang := range rule.When.Languages { + if ruleLang == lang { + matched = true + break + } + } + if !matched { + return []string{}, nil + } + } + } + return []string{basePath}, nil + } + + // Directory - use selector to find files + if rule.When == nil { + // No selector, use all files in directory + return v.selector.SelectFiles(nil) + } + + return v.selector.SelectFiles(rule.When) +} + +// initializeEngine initializes an engine if not already initialized +func (v *Validator) initializeEngine(engine core.Engine) error { + // Create engine config + config := core.EngineConfig{ + WorkDir: v.workDir, + ToolsDir: filepath.Join(os.Getenv("HOME"), ".sym", "tools"), + CacheDir: filepath.Join(os.Getenv("HOME"), ".sym", "cache"), + Timeout: 5 * time.Minute, + Parallelism: 0, + Debug: v.verbose, + } + + // Initialize engine + return engine.Init(v.ctx, config) +} + +// convertToCoreRule converts schema.PolicyRule to core.Rule +func convertToCoreRule(rule schema.PolicyRule) core.Rule { + var when *core.Selector + if rule.When != nil { + when = &core.Selector{ + Languages: rule.When.Languages, + Include: rule.When.Include, + Exclude: rule.When.Exclude, + Branches: rule.When.Branches, + Roles: rule.When.Roles, + Tags: rule.When.Tags, + } + } + + var remedy *core.Remedy + if rule.Remedy != nil { + remedy = &core.Remedy{ + Autofix: rule.Remedy.Autofix, + Tool: rule.Remedy.Tool, + Config: rule.Remedy.Config, + } + } + + return core.Rule{ + ID: rule.ID, + Enabled: rule.Enabled, + Category: rule.Category, + Severity: rule.Severity, + Desc: rule.Desc, + When: when, + Check: rule.Check, + Remedy: remedy, + Message: rule.Message, + } +} + +// Close cleans up validator resources +func (v *Validator) Close() error { + if v.ctxCancel != nil { + v.ctxCancel() + } + return nil +} diff --git a/test_validator.go b/test_validator.go new file mode 100644 index 0000000..3fb081e --- /dev/null +++ b/test_validator.go @@ -0,0 +1,99 @@ +package main + +import ( + "fmt" + "os" + + "github.com/DevSymphony/sym-cli/internal/validator" + "github.com/DevSymphony/sym-cli/pkg/schema" +) + +func main() { + // Create a simple test policy inline with multiple engine types + policy := &schema.CodePolicy{ + Version: "1.0.0", + Rules: []schema.PolicyRule{ + { + ID: "test-max-len", + Enabled: true, + Category: "style", + Severity: "warning", + Desc: "Lines should not exceed 120 characters", + When: &schema.Selector{ + Languages: []string{"javascript"}, + }, + Check: map[string]any{ + "engine": "length", + "scope": "line", + "max": 120, + }, + Message: "Line too long (max 120 characters)", + }, + { + ID: "test-pattern", + Enabled: true, + Category: "security", + Severity: "error", + Desc: "No hardcoded API keys", + When: &schema.Selector{ + Languages: []string{"javascript"}, + }, + Check: map[string]any{ + "engine": "pattern", + "pattern": "sk-[a-zA-Z0-9]{30,}", + "target": "content", + }, + Message: "Hardcoded API key detected", + }, + }, + Enforce: schema.EnforceSettings{ + Stages: []string{"pre-commit"}, + FailOn: []string{"error"}, + }, + } + + fmt.Printf("📋 Testing validator with %d rule(s)\n\n", len(policy.Rules)) + + // Create validator + v := validator.NewValidator(policy, true) + defer v.Close() + + // Test files + testFiles := []string{ + "tests/e2e/examples/bad-example.js", + "tests/e2e/examples/good-example.js", + } + + for _, file := range testFiles { + // Check if file exists + if _, err := os.Stat(file); os.IsNotExist(err) { + fmt.Printf("⚠️ Skipping %s (file not found)\n\n", file) + continue + } + + fmt.Printf("═══════════════════════════════════════════════\n") + fmt.Printf("Testing: %s\n", file) + fmt.Printf("═══════════════════════════════════════════════\n\n") + + result, err := v.Validate(file) + if err != nil { + fmt.Printf("❌ Validation error: %v\n\n", err) + continue + } + + if result.Passed { + fmt.Printf("\n✅ PASSED: No violations\n\n") + } else { + fmt.Printf("\n❌ FAILED: %d violation(s) found:\n", len(result.Violations)) + for i, violation := range result.Violations { + fmt.Printf("\n%d. [%s] %s\n", i+1, violation.Severity, violation.RuleID) + fmt.Printf(" File: %s:%d:%d\n", violation.File, violation.Line, violation.Column) + fmt.Printf(" Message: %s\n", violation.Message) + } + fmt.Println() + } + } + + fmt.Printf("═══════════════════════════════════════════════\n") + fmt.Printf("✅ Validator test complete!\n") +} From f049ecbb002aaed7de7e62ec7a8977c8544058b5 Mon Sep 17 00:00:00 2001 From: sehwan505 Date: Thu, 13 Nov 2025 01:02:04 +0900 Subject: [PATCH 3/5] feat: enhance git operations in MCP server and validator --- internal/mcp/server.go | 6 ++++++ internal/validator/git.go | 14 +++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/internal/mcp/server.go b/internal/mcp/server.go index 89f0f10..f26593c 100644 --- a/internal/mcp/server.go +++ b/internal/mcp/server.go @@ -130,6 +130,12 @@ func (s *Server) Start() error { fmt.Fprintf(os.Stderr, "⚠️ Warning: Not in a git repository, MCP server starting without policies\n") } else { dir = filepath.Join(repoRoot, ".sym") + // Change working directory to project root for git operations + if err := os.Chdir(repoRoot); err != nil { + fmt.Fprintf(os.Stderr, "⚠️ Warning: Failed to change to project root: %v\n", err) + } else { + fmt.Fprintf(os.Stderr, "✓ Working directory set to project root: %s\n", repoRoot) + } } } diff --git a/internal/validator/git.go b/internal/validator/git.go index 5860925..ad51a09 100644 --- a/internal/validator/git.go +++ b/internal/validator/git.go @@ -13,12 +13,16 @@ type GitChange struct { Diff string } -// GetGitChanges returns all changed files in the current git repository +// GetGitChanges returns all changed files in the current git repository (unstaged changes) func GetGitChanges() ([]GitChange, error) { - // Get list of changed files - cmd := exec.Command("git", "diff", "--name-status", "HEAD") + // Get list of unstaged changed files (working directory vs index) + cmd := exec.Command("git", "diff", "--name-status") output, err := cmd.Output() if err != nil { + // Try to get more detailed error information + if exitErr, ok := err.(*exec.ExitError); ok { + return nil, fmt.Errorf("failed to get git changes: %w (stderr: %s)", err, string(exitErr.Stderr)) + } return nil, fmt.Errorf("failed to get git changes: %w", err) } @@ -37,8 +41,8 @@ func GetGitChanges() ([]GitChange, error) { status := parts[0] filePath := parts[1] - // Get diff for this file - diffCmd := exec.Command("git", "diff", "HEAD", "--", filePath) + // Get diff for this file (working directory vs index) + diffCmd := exec.Command("git", "diff", "--", filePath) diffOutput, err := diffCmd.Output() if err != nil { continue From 2f994144fe7ee31af079ad09f928b42a4e663170 Mon Sep 17 00:00:00 2001 From: sehwan505 Date: Thu, 13 Nov 2025 01:08:14 +0900 Subject: [PATCH 4/5] fix: handle error in line number parsing in validation response --- internal/engine/llm/engine.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/internal/engine/llm/engine.go b/internal/engine/llm/engine.go index 10d586e..f127c48 100644 --- a/internal/engine/llm/engine.go +++ b/internal/engine/llm/engine.go @@ -225,8 +225,9 @@ func parseValidationResponse(response string) validationResponse { if lineStr := extractJSONField(response, "line"); lineStr != "" { // Parse line number var line int - fmt.Sscanf(lineStr, "%d", &line) - result.Line = line + if _, err := fmt.Sscanf(lineStr, "%d", &line); err == nil { + result.Line = line + } } } From f030665659173a1e75a9bf18e1c7645af41a3f2e Mon Sep 17 00:00:00 2001 From: sehwan505 Date: Thu, 13 Nov 2025 10:13:02 +0900 Subject: [PATCH 5/5] fix: improve error handling during validator closure --- test_validator.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test_validator.go b/test_validator.go index 3fb081e..8f2ea97 100644 --- a/test_validator.go +++ b/test_validator.go @@ -56,7 +56,11 @@ func main() { // Create validator v := validator.NewValidator(policy, true) - defer v.Close() + defer func() { + if err := v.Close(); err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to close validator: %v\n", err) + } + }() // Test files testFiles := []string{