diff --git a/.github/Memory.md b/.github/Memory.md index bc22ba60..b710cc04 100644 --- a/.github/Memory.md +++ b/.github/Memory.md @@ -1,5 +1,5 @@ # AI Assistant Memory -Last Updated: 2025-08-01T21:30:00Z +Last Updated: 2025-08-04T12:30:00Z ## Current Goals - βœ… Improve test coverage for Blarify codebase to >80% (ACHIEVED 3x improvement: 20.76% β†’ 63.76%) @@ -15,6 +15,7 @@ Last Updated: 2025-08-01T21:30:00Z - βœ… Fix VS Code BlarifyIntegration command mismatch issue (PR #55 - COMPLETED) - πŸ”„ **ACTIVE**: Complete pyright type checking implementation - achieve 0 errors (PR #62 - OUTSTANDING PROGRESS: 715 β†’ 606 errors, 109 fixed, 15.2% improvement) - πŸ”„ Continue improving test coverage for low-coverage modules +- πŸ”„ **ACTIVE**: Comprehensive README update for Cue/Blarify project (Issue #67) - Document multilayer code understanding, MCP server, VS Code extension ## Todo List - [x] Write prompt file for test coverage improvement agent diff --git a/README.md b/README.md index a5dc0eb0..00de17de 100644 --- a/README.md +++ b/README.md @@ -1,167 +1,428 @@ -This repo introduces a method to represent a local code repository as a graph structure. The objective is to allow an LLM to traverse this graph to understand the code logic and flow. Providing the LLM with the power to debug, refactor, and optimize queries. +# Blarify / Cue -# Supported Languages +> Transform any codebase into an intelligent multilayer graph for AI-powered code understanding -Blarify supports the following languages out of the box: +Blarify (formerly Cue) is a sophisticated code analysis tool that creates comprehensive graph representations of codebases, enabling AI agents and developers to understand, navigate, and modify code with unprecedented precision. -**Core Languages** (most popular, always included): -- Python -- JavaScript -- TypeScript -- Java -- Go +## πŸš€ Key Features -**Additional Languages** (included by default): -- Ruby -- C# -- PHP +- **🧠 Multilayer Code Understanding Graph**: Creates interconnected layers capturing filesystem structure, code hierarchy, dependencies, and semantic relationships +- **πŸ” AI-Powered Code Analysis**: Leverages LLM integration to generate natural language descriptions and extract code insights +- **πŸ“Š Interactive 3D Visualization**: VS Code extension with ThreeJS-based 3D graph visualization and real-time exploration +- **πŸ€– MCP Server Integration**: Model Context Protocol server for AI agents to query and analyze codebases +- **🌐 Multi-Language Support**: Supports Python, JavaScript, TypeScript, Java, Go, Ruby, C#, and PHP +- **πŸ“š Documentation Knowledge Graph**: Automatically links documentation to code elements +- **⚑ Incremental Updates**: Efficiently updates graphs when code changes +- **πŸ”’ Local-First**: All analysis runs locally with optional cloud LLM integration -**Note**: Blarify is designed to be robust. If a language parser is not installed or fails to load, Blarify will continue to work for other supported languages, displaying a warning for the missing language support. See [Language Support Documentation](cue/LANGUAGE_SUPPORT.md) for more details. +## πŸ—οΈ Architecture Overview -# Example +Blarify creates a sophisticated multilayer graph representation of your codebase: - -This graph was generated from the code in this repository. +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Blarify Architecture β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ VS Code β”‚ β”‚ MCP Server β”‚ β”‚ Neo4j Graph β”‚ β”‚ +β”‚ β”‚ Extension β”‚ β”‚ (AI Agents) β”‚ β”‚ Database β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β€’ 3D Visualizer β”‚ β”‚ β€’ Context Tools β”‚ β”‚ β€’ Node Storage β”‚ β”‚ +β”‚ β”‚ β€’ Search/Filter β”‚ β”‚ β€’ Query Builder β”‚ β”‚ β€’ Relationships β”‚ β”‚ +β”‚ β”‚ β€’ Interactive β”‚ β”‚ β€’ Plan Builder β”‚ β”‚ β€’ Graph Queries β”‚ β”‚ +β”‚ β”‚ Exploration β”‚ β”‚ β€’ LLM Processor β”‚ β”‚ β€’ Cypher API β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Blarify Core Engine β”‚ β”‚ +β”‚ β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ Filesystem β”‚ β”‚ Code β”‚ β”‚ Documentationβ”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Layer β”‚ β”‚ Hierarchy β”‚ β”‚ Layer β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ Layer β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β€’ Files β”‚ β”‚ β€’ Classes β”‚ β”‚ β€’ Concepts β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β€’ Folders β”‚ β”‚ β€’ Functions β”‚ β”‚ β€’ Entities β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β€’ Structure β”‚ β”‚ β€’ Variables β”‚ β”‚ β€’ Auto-Link β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ β”‚ LSP β”‚ β”‚ Tree-Sitter β”‚ β”‚ LLM β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ Integration β”‚ β”‚ Parsing β”‚ β”‚ Integration β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β€’ Referencesβ”‚ β”‚ β€’ AST Parse β”‚ β”‚ β€’ Summaries β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β€’ Symbols β”‚ β”‚ β€’ Language β”‚ β”‚ β€’ Context β”‚ β”‚ β”‚ +β”‚ β”‚ β”‚ β€’ Workspace β”‚ β”‚ Specific β”‚ β”‚ β€’ Planning β”‚ β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` -# Quickstart +## πŸ”§ Components + +### Blarify Core +The heart of the system that analyzes codebases and builds graph representations: + +- **Language Parsing**: Tree-sitter based parsing for 8+ programming languages +- **LSP Integration**: Language Server Protocol for precise code understanding +- **Graph Builder**: Creates multilayer graph with filesystem, code hierarchy, and semantic layers +- **Incremental Updates**: Efficiently updates graphs when files change + +### MCP Server +Model Context Protocol server that provides AI agents with sophisticated tools: + +- **Context Retrieval**: Get comprehensive context for files or symbols +- **Change Planning**: Analyze impact and create implementation plans +- **Query Builder**: Construct efficient Neo4j Cypher queries +- **LLM Integration**: Organize results with natural language processing + +### VS Code Extension +Interactive visualization and exploration interface: + +- **3D Graph Visualization**: ThreeJS-based interactive 3D rendering +- **Smart Search**: Real-time search across nodes by name, type, or properties +- **Multiple Layouts**: Force-directed, hierarchical, and circular layouts +- **Neo4j Management**: Automatic Docker container management +- **Workspace Analysis**: One-click codebase analysis and ingestion + +### Neo4j Integration +Graph database storage and querying: + +- **Node Types**: Files, classes, functions, documentation, concepts +- **Relationships**: Dependencies, inheritance, calls, references, documentation links +- **Query Interface**: Cypher queries for complex graph traversal +- **Data Persistence**: Maintain graph state across sessions + +## πŸ“¦ Installation & Setup + +### Prerequisites + +- **Python**: 3.10-3.14 +- **Docker Desktop**: For Neo4j container management +- **VS Code**: 1.74.0+ (for extension) +- **Node.js**: 16+ (for Neo4j container manager) + +### Core Installation + +1. **Install Blarify Core**: + ```bash + pip install cue + ``` + +2. **Set up environment variables**: + ```bash + # Required for Neo4j + export NEO4J_URI="bolt://localhost:7687" + export NEO4J_USERNAME="neo4j" + export NEO4J_PASSWORD="your-secure-password" + + # Optional: Azure OpenAI for LLM features + export AZURE_OPENAI_API_KEY="your-api-key" + export AZURE_OPENAI_ENDPOINT="https://your-instance.openai.azure.com/" + export AZURE_OPENAI_DEPLOYMENT_NAME="gpt-4" + export ENABLE_LLM_DESCRIPTIONS=true + ``` + +### VS Code Extension Setup + +1. **Download the extension**: + ```bash + # From the repository + cd vscode-blarify-visualizer + ``` + +2. **Install dependencies**: + ```bash + npm install + npm run compile + ``` + +3. **Package and install**: + ```bash + vsce package + code --install-extension blarify-visualizer-*.vsix + ``` + +4. **Configure extension settings** in VS Code: + - Search for "Blarify Visualizer" in settings + - Add Azure OpenAI credentials (optional) + - Configure Neo4j connection details + +### MCP Server Setup + +1. **Navigate to MCP server directory**: + ```bash + cd mcp-blarify-server + ``` + +2. **Install dependencies**: + ```bash + pip install -r requirements.txt + ``` + +3. **Configure for Claude Desktop**: + ```json + { + "mcpServers": { + "blarify": { + "command": "python", + "args": ["-m", "src.server"], + "cwd": "/path/to/mcp-blarify-server", + "env": { + "MANAGE_NEO4J_CONTAINER": "true", + "NEO4J_PASSWORD": "your-secure-password", + "AZURE_OPENAI_API_KEY": "your-api-key" + } + } + } + } + ``` + +## 🎯 Usage + +### Basic Graph Building -Get started with cue by following our quickstart guide: +```python +from cue.prebuilt.graph_builder import GraphBuilder -[➑️ Quickstart Guide](https://github.com/blarApp/cue/blob/main/docs/quickstart.md) +# Create graph builder with smart filtering +graph_builder = GraphBuilder( + root_path="/path/to/your/project", + use_gitignore=True, # Respect .gitignore patterns + enable_llm_descriptions=True, # Generate AI summaries + enable_documentation_nodes=True # Parse documentation +) -# Article +# Build the graph +graph = graph_builder.build() -Read our article on Medium to learn more about the motivation behind this project: +# Get nodes and relationships +nodes = graph.get_nodes_as_objects() +relationships = graph.get_relationships_as_objects() -[➑️ How we built a tool to turn any codebase into a graph of its relationships](https://medium.com/@v4rgas/how-we-built-a-tool-to-turn-any-code-base-into-a-graph-of-its-relationships-23c7bd130f13) +print(f"Built graph with {len(nodes)} nodes and {len(relationships)} relationships") +``` -# Features +### Save to Neo4j -- **Code Graph Generation**: Automatically creates a graph representation of your codebase with nodes for files, classes, functions, and their relationships -- **Multi-Language Support**: Supports Python, JavaScript, TypeScript, Ruby, Go, C#, PHP, and Java -- **LLM-Generated Descriptions** (New!): Optionally generate natural language descriptions for code elements using Azure OpenAI -- **Documentation Knowledge Graph** (New!): Parse documentation files to extract concepts, entities, and automatically link them to relevant code -- **Gitignore Integration** (New!): Automatically excludes files matching `.gitignore` patterns, with `.cueignore` for additional exclusions -- **Graph Database Integration**: Export to Neo4j or FalkorDB for visualization and querying -- **Incremental Updates**: Efficiently update the graph when code changes +```python +from cue.db_managers.neo4j_manager import Neo4jManager + +# Save to Neo4j +graph_manager = Neo4jManager(repo_id="my-project", entity_id="main") +graph_manager.save_graph(nodes, relationships) +graph_manager.close() +``` -# LLM Description Generation +### VS Code Extension Usage -Blarify can generate natural language descriptions for your code elements using Azure OpenAI's GPT-4. This feature helps developers quickly understand the purpose and functionality of code components. **This feature is enabled by default.** +1. **Open your project** in VS Code +2. **Analyze workspace**: Run "Blarify: Analyze Workspace" command +3. **View visualization**: Run "Blarify: Show 3D Visualization" command +4. **Explore the graph**: + - Left drag: Rotate view + - Right drag: Pan view + - Scroll: Zoom + - Click nodes: View details + - Double-click: Expand neighborhood -## Setup +### MCP Server with AI Agents + +Use with Claude or other MCP-compatible AI agents: -1. Configure Azure OpenAI credentials in your `.env` file: -```bash -AZURE_OPENAI_API_KEY=your-api-key -AZURE_OPENAI_ENDPOINT=https://your-instance.openai.azure.com/ -AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4 -ENABLE_LLM_DESCRIPTIONS=true # Default is true ``` +Get context for these files using getContextForFiles: +- src/services/auth.py +- src/models/user.py -2. Use the feature when building your graph: -```python -graph_builder = GraphBuilder( - root_path="/path/to/project", - enable_llm_descriptions=True # Default is True -) -graph = graph_builder.build() +Find information about the UserService class using getContextForSymbol. + +Create an implementation plan using buildPlanForChange: +"Add email verification to user registration" ``` -# File Exclusion and Gitignore Support +## 🌟 Features Deep Dive -Blarify now automatically respects your `.gitignore` patterns, ensuring that version control ignored files (like `node_modules`, `.env`, build artifacts) are excluded from the graph analysis. +### Multilayer Graph Structure -## How it works +Blarify creates multiple interconnected layers: -- **Automatic `.gitignore` support**: All patterns in `.gitignore` files are automatically applied -- **`.cueignore` for additional exclusions**: Create a `.cueignore` file for Blarify-specific exclusions -- **Nested `.gitignore` files**: Supports `.gitignore` files in subdirectories -- **Full pattern syntax**: Supports all gitignore patterns including globs, directory markers, and comments +1. **Filesystem Layer**: + - File and directory nodes + - Hierarchical relationships + - Gitignore integration -## Configuration +2. **Code Hierarchy Layer**: + - Classes, functions, variables + - Inheritance and composition + - Call graphs and dependencies -```python -# Enable gitignore support (enabled by default) -graph_builder = GraphBuilder( - root_path="/path/to/project", - use_gitignore=True # Default is True -) +3. **Documentation Layer**: + - Markdown and doc files + - Extracted concepts and entities + - Automatic code linking -# Disable gitignore support if needed -graph_builder = GraphBuilder( - root_path="/path/to/project", - use_gitignore=False -) +4. **Semantic Layer**: + - LLM-generated descriptions + - Natural language summaries + - Contextual relationships -# Specify custom .cueignore path -graph_builder = GraphBuilder( - root_path="/path/to/project", - cueignore_path="/path/to/.cueignore" -) +### Language Support + +**Core Languages** (always included): +- Python, JavaScript, TypeScript, Java, Go + +**Additional Languages** (included by default): +- Ruby, C#, PHP + +**Graceful Degradation**: Missing language parsers won't break analysis for other languages. + +### Smart File Filtering + +- **Gitignore Integration**: Automatically respects `.gitignore` patterns +- **Custom Exclusions**: Use `.cueignore` for Blarify-specific exclusions +- **Extension Filtering**: Skip non-code files like `.json`, `.xml` +- **Performance Optimization**: Focus on relevant code files + +### AI-Powered Features + +- **Code Summaries**: Natural language descriptions of functions and classes +- **Concept Extraction**: Identify key concepts from documentation +- **Smart Linking**: Automatically connect docs to code elements +- **Impact Analysis**: Understand change implications across the codebase + +## πŸ”¬ Development + +### Contributing + +1. **Fork the repository** +2. **Set up development environment**: + ```bash + python -m venv .venv + source .venv/bin/activate + pip install -e .[dev] + ``` + +3. **Run tests**: + ```bash + pytest tests/ -v + pytest --cov=cue tests/ # With coverage + ``` + +4. **Type checking**: + ```bash + pyright cue/ + ``` + +### Testing + +- **Unit Tests**: Test individual components in isolation +- **Integration Tests**: Test with real Neo4j databases +- **VS Code Tests**: Extension functionality tests +- **MCP Server Tests**: Protocol compliance and functionality + +### Architecture Details + +The codebase is organized into logical modules: + +``` +cue/ +β”œβ”€β”€ code_hierarchy/ # Tree-sitter language parsing +β”œβ”€β”€ code_references/ # LSP integration +β”œβ”€β”€ db_managers/ # Neo4j and FalkorDB managers +β”œβ”€β”€ documentation/ # Documentation parsing and linking +β”œβ”€β”€ filesystem/ # File system graph generation +β”œβ”€β”€ graph/ # Core graph data structures +β”œβ”€β”€ llm_descriptions/ # AI-powered descriptions +└── project_file_explorer/ # File discovery and filtering ``` -## .cueignore Example +## πŸ› οΈ Troubleshooting -Create a `.cueignore` file in your project root: +### Common Issues +**Neo4j Connection Issues**: +```bash +# Check if Neo4j is running +docker ps | grep neo4j + +# Start Neo4j container +docker run -d --name neo4j \ + -p 7474:7474 -p 7687:7687 \ + -e NEO4J_AUTH=neo4j/your-password \ + neo4j:latest ``` -# Exclude test files -test_*.py -*_test.py -# Exclude documentation -docs/ +**VS Code Extension Not Working**: +- Ensure Docker Desktop is running +- Check Python is in PATH +- Restart VS Code after installation +- Check Output panel for error messages + +**MCP Server Issues**: +- Verify environment variables are set +- Check Neo4j connectivity +- Ensure Azure OpenAI credentials are valid +- Review server logs for specific errors + +**Performance Issues**: +- Use `.gitignore` to exclude large directories +- Reduce `nodeLimit` in VS Code settings +- Skip test files and documentation if needed +- Consider using FalkorDB for better performance + +### Debug Mode -# Exclude specific large files -data/*.csv +Enable debug logging: +```bash +export DEBUG=true +export LOGGING_LEVEL=DEBUG ``` -# Documentation Knowledge Graph +### Getting Help -Blarify can parse your documentation files to create a knowledge graph of concepts, entities, and their relationships. This feature uses LLM to intelligently extract information from your docs and automatically links them to relevant code elements. **This feature is enabled by default.** +- **Issues**: [GitHub Issues](https://github.com/rysweet/cue/issues) +- **Discord**: [Community Discord](https://discord.gg/s8pqnPt5AP) +- **Documentation**: Check `docs/` directory for detailed guides -## Features +## πŸ“‹ FAQ -- **Automatic Documentation Detection**: Finds README, API docs, architecture docs, and other documentation files -- **Intelligent Extraction**: Uses LLM to extract concepts, entities, relationships, and code references -- **Smart Linking**: Automatically creates relationships between documentation and code nodes -- **Customizable Patterns**: Configure which documentation files to include +**Q: What's the difference between Cue and Blarify?** +A: Blarify is the evolved name for the project. The package is still called `cue` for compatibility. -## Setup +**Q: Can I use this without AI/LLM features?** +A: Yes! Set `ENABLE_LLM_DESCRIPTIONS=false` to disable AI features. -1. Enable the feature when building your graph: -```python -graph_builder = GraphBuilder( - root_path="/path/to/project", - enable_documentation_nodes=True, # Default is True - documentation_patterns=["*.md", "*.rst", "*.adoc"] # Optional custom patterns -) -graph = graph_builder.build() -``` +**Q: Does this work with private repositories?** +A: Yes, everything runs locally. Only LLM features (if enabled) make external API calls. -2. The feature will create these node types: -- `DOCUMENTATION_FILE`: Represents documentation files -- `CONCEPT`: Key ideas, patterns, or methodologies mentioned in docs -- `DOCUMENTED_ENTITY`: Classes, services, or modules described in documentation +**Q: How large codebases can this handle?** +A: Successfully tested on repositories with 100k+ files. Performance depends on filtering and hardware. -3. And these relationships: -- `CONTAINS_CONCEPT`: Links documentation files to concepts they describe -- `DESCRIBES_ENTITY`: Links documentation to entities they document -- `DOCUMENTS`: Links documentation/entities to code nodes -- `IMPLEMENTS_CONCEPT`: Links code nodes that implement documented concepts +**Q: Can I extend language support?** +A: Yes! Add new tree-sitter parsers and language definitions in `code_hierarchy/languages/`. -# Future Work +## πŸ† Future Roadmap -- [x] Gracefully update the graph when new files are added, deleted, or modified -- [x] Add more language servers -- [x] LLM-generated descriptions for code understanding -- [x] Documentation knowledge graph with automatic code linking -- [ ] Experiment with parallelizing the language server requests - [ ] Vector embeddings for semantic code search +- [ ] Web-based dashboard for team collaboration +- [ ] Integration with more IDEs (IntelliJ, Sublime Text) +- [ ] Performance optimizations for enterprise-scale codebases +- [ ] Advanced AI features (code generation, refactoring suggestions) +- [ ] Plugin system for custom analyzers + +## πŸ“„ License + +MIT License - see [LICENSE.md](LICENSE.md) for details. + +## πŸ™ Acknowledgments + +Created by [Juan Vargas](https://github.com/juanvargas) and [BenjamΓ­n Errazuriz](https://github.com/benerrazuriz) at [Blar.io](https://blar.io). -# Need help? +Special thanks to the open source community and all contributors who have made this project possible. -If you need help, want to report a bug, or have a feature request, please open an issue on this repository. +--- -You can also reach out to us at [Discord](https://discord.gg/s8pqnPt5AP) +**Ready to transform your codebase into an intelligent graph?** +Get started with our [Quickstart Guide](docs/quickstart.md) or try the [VS Code Extension](vscode-blarify-visualizer/) today! \ No newline at end of file diff --git a/claude-project-specific.md b/claude-project-specific.md index 24985642..a8cdab28 100644 --- a/claude-project-specific.md +++ b/claude-project-specific.md @@ -1,26 +1,28 @@ -# AI-SIP Workshop Project-Specific Instructions +# Blarify/Cue Project-Specific Instructions ## Overview -This repository contains a sample JavaScript application used in the AI-SIP (AI-enhanced Software development In Practice) workshop. The application visualizes file system structures as an interactive graph using D3.js, providing a hands-on environment for demonstrating AI-assisted development workflows, debugging practices, and automated issue management. +This repository contains Blarify (formerly Cue), a sophisticated code analysis tool that creates comprehensive multilayer graph representations of codebases. The project enables AI agents and developers to understand, navigate, and modify code with unprecedented precision through intelligent graph structures. -The workshop focuses on: -- Leveraging AI tools like Claude Code and GitHub Copilot for efficient development -- Implementing test-driven development with AI assistance -- Creating automated error handling and issue creation workflows -- Building visual debugging and annotation features +The project includes: +- **Blarify Core**: Python-based code analysis engine with multilayer graph generation +- **VS Code Extension**: Interactive 3D visualization with ThreeJS-based rendering +- **MCP Server**: Model Context Protocol server for AI agent integration +- **Neo4j Integration**: Graph database storage and querying capabilities ## Project-Specific Guidelines ### Code Style -- Follow existing patterns in the codebase -- Use ES6+ JavaScript features -- Maintain consistent indentation (2 spaces) -- Add meaningful variable and function names +- Follow existing Python patterns and PEP 8 conventions +- Use type hints for better code clarity and IDE support +- Maintain consistent indentation (4 spaces for Python, 2 for JSON/YAML) +- Use descriptive variable and function names that reflect domain concepts +- Prefer composition over inheritance in graph node design -### Workshop-Specific Git Practices -- Keep feature branches (don't delete) for workshop instruction purposes -- Use descriptive branch names that help workshop attendees understand the progression +### Development Practices +- Keep feature branches for review and CI/CD pipeline execution +- Use descriptive branch names that reflect the feature or fix being implemented +- Maintain comprehensive test coverage (target >80%) ### Project-Specific Files to Verify - Always verify existence of: `CLAUDE.md`, `prompts/`, `.github/agents/` @@ -28,40 +30,53 @@ The workshop focuses on: - The repository should be: `rysweet/cue` ### Testing Strategy -- Write tests for all new features -- Maintain test coverage above 80% -- Use Playwright for E2E testing -- Test error scenarios and edge cases +- Write comprehensive tests for all new features and bug fixes +- Maintain test coverage above 80% (current: 63.76%) +- Use pytest for unit and integration testing +- Mock external dependencies (Neo4j, Azure OpenAI) appropriately +- Test error scenarios, edge cases, and language parser edge cases +- Validate graph structure integrity and relationship consistency ### Error Handling -- Catch and log all exceptions appropriately -- Create GitHub issues for production errors -- Include stack traces and reproduction steps -- Assign issues to appropriate team members +- Use structured logging with appropriate log levels +- Implement graceful degradation for missing language parsers +- Handle Neo4j connection failures with clear error messages +- Validate graph data integrity before database operations +- Provide helpful error messages for configuration issues ## Available Project Agents -- **test-solver**: Specialized in fixing failing tests, particularly Playwright E2E tests. Excels at debugging cross-browser issues, timing problems, and test stability. +- **workflow-master**: Orchestrates complete development workflows from prompt files +- **code-reviewer**: Conducts comprehensive code reviews with quality checks +- **orchestrator-agent**: Manages parallel execution of multiple workflow tasks +- **test-solver**: Specialized in fixing failing tests and improving test coverage -## Visual-First Development -- Use screenshots and mockups to guide implementation -- Test UI changes visually using the Playwright MCP service -- Capture and annotate screenshots for bug reports +## Graph-First Development +- Design features around graph data structures and relationships +- Consider impact on all graph layers (filesystem, code hierarchy, documentation, semantic) +- Validate graph traversal efficiency for new relationship types +- Test visualization performance with large graphs +- Ensure incremental update operations maintain graph consistency ## Technology Stack Resources -- [Neo4j Documentation](https://neo4j.com/docs/) -- [D3.js Documentation](https://d3js.org/) -- [Playwright Documentation](https://playwright.dev/docs/intro) +- [Neo4j Documentation](https://neo4j.com/docs/) - Graph database queries and operations +- [Tree-sitter Documentation](https://tree-sitter.github.io/tree-sitter/) - Language parsing and AST analysis +- [Language Server Protocol](https://microsoft.github.io/language-server-protocol/) - Code analysis and symbol resolution +- [Model Context Protocol](https://modelcontextprotocol.io/) - AI agent integration patterns +- [ThreeJS Documentation](https://threejs.org/docs/) - 3D visualization in VS Code extension +- [Azure OpenAI Documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/openai/) - LLM integration ## Project Structure -This is a JavaScript/TypeScript project with: -- D3.js for graph visualization -- Neo4j for graph database -- Playwright for E2E testing -- GitHub Actions for CI/CD +This is a Python-based project with multiple components: +- **Core**: Python package with graph analysis engine +- **VS Code Extension**: TypeScript/JavaScript extension with 3D visualization +- **MCP Server**: Python MCP server for AI agent integration +- **Neo4j Container Manager**: Node.js package for Docker container management +- **Tests**: Comprehensive pytest test suite with >63% coverage -## Workshop Context -This repository serves as a teaching tool for AI-enhanced development practices. When making changes: -- Consider the educational value of the implementation -- Document the AI assistance process -- Create clear examples for workshop participants -- Maintain progression of complexity across branches \ No newline at end of file +## Development Context +This repository demonstrates advanced AI-powered code analysis. When making changes: +- Consider impact on graph structure and relationships +- Validate changes don't break existing language parsers +- Test with real codebases to ensure scalability +- Document new graph relationship types and their semantics +- Maintain backwards compatibility for existing graph databases \ No newline at end of file diff --git a/tests/test_readme_accuracy.py b/tests/test_readme_accuracy.py new file mode 100644 index 00000000..d2b8c823 --- /dev/null +++ b/tests/test_readme_accuracy.py @@ -0,0 +1,251 @@ +""" +Tests to validate the accuracy of information in README.md +""" +import os +import pytest +from pathlib import Path +import json +import re +try: + import tomllib # Python 3.11+ +except ImportError: + import tomli as tomllib # Fallback for older Python versions + + +def test_readme_exists(): + """Test that README.md exists in the root directory.""" + readme_path = Path("README.md") + assert readme_path.exists(), "README.md should exist in root directory" + + +def test_python_version_accuracy(): + """Test that Python version requirements in README match pyproject.toml.""" + # Read pyproject.toml + pyproject_path = Path("pyproject.toml") + assert pyproject_path.exists(), "pyproject.toml should exist" + + with open(pyproject_path, 'rb') as f: + pyproject_data = tomllib.load(f) + + python_requirement = pyproject_data['tool']['poetry']['dependencies']['python'] + + # Read README.md + readme_path = Path("README.md") + with open(readme_path, 'r') as f: + readme_content = f.read() + + # Check if Python version is mentioned correctly + assert "3.10-3.14" in readme_content, "README should mention correct Python version range" + assert ">=3.10,<=3.14" in python_requirement, "pyproject.toml should have matching Python requirement" + + +def test_package_name_consistency(): + """Test that package name is consistent between README and pyproject.toml.""" + # Read pyproject.toml + pyproject_path = Path("pyproject.toml") + with open(pyproject_path, 'rb') as f: + pyproject_data = tomllib.load(f) + + package_name = pyproject_data['tool']['poetry']['name'] + + # Read README.md + readme_path = Path("README.md") + with open(readme_path, 'r') as f: + readme_content = f.read() + + # Check package installation command + assert f"pip install {package_name}" in readme_content, f"README should mention 'pip install {package_name}'" + + +def test_vscode_extension_directory_exists(): + """Test that VS Code extension directory mentioned in README exists.""" + vscode_dir = Path("vscode-blarify-visualizer") + assert vscode_dir.exists() and vscode_dir.is_dir(), "vscode-blarify-visualizer directory should exist" + + # Check for key files + package_json = vscode_dir / "package.json" + assert package_json.exists(), "VS Code extension should have package.json" + + +def test_mcp_server_directory_exists(): + """Test that MCP server directory mentioned in README exists.""" + mcp_dir = Path("mcp-blarify-server") + assert mcp_dir.exists() and mcp_dir.is_dir(), "mcp-blarify-server directory should exist" + + # Check for key files + requirements = mcp_dir / "requirements.txt" + server_py = mcp_dir / "src" / "server.py" + assert requirements.exists(), "MCP server should have requirements.txt" + assert server_py.exists(), "MCP server should have src/server.py" + + +def test_language_support_accuracy(): + """Test that language support claims match actual implementation.""" + # Check if language definition files exist + lang_dir = Path("cue/code_hierarchy/languages") + assert lang_dir.exists(), "Language definitions directory should exist" + + # Core languages mentioned in README + core_languages = ["python", "javascript", "typescript", "java", "go"] + additional_languages = ["ruby", "csharp", "php"] + + for lang in core_languages + additional_languages: + lang_file = lang_dir / f"{lang}_definitions.py" + assert lang_file.exists(), f"Language definition for {lang} should exist" + + +def test_architecture_components_exist(): + """Test that architectural components mentioned in README exist.""" + # Core directories mentioned in architecture + core_dirs = [ + "cue/code_hierarchy", + "cue/code_references", + "cue/db_managers", + "cue/documentation", + "cue/filesystem", + "cue/graph", + "cue/llm_descriptions", + "cue/project_file_explorer" + ] + + for dir_path in core_dirs: + path = Path(dir_path) + assert path.exists() and path.is_dir(), f"Directory {dir_path} should exist" + + +def test_example_code_imports(): + """Test that example code imports in README are valid.""" + # Test GraphBuilder import + try: + from cue.prebuilt.graph_builder import GraphBuilder + except ImportError: + pytest.fail("GraphBuilder import from README example should work") + + # Test Neo4jManager import + try: + from cue.db_managers.neo4j_manager import Neo4jManager + except ImportError: + pytest.fail("Neo4jManager import from README example should work") + + +def test_quickstart_guide_exists(): + """Test that quickstart guide referenced in README exists.""" + quickstart_path = Path("docs/quickstart.md") + assert quickstart_path.exists(), "Quickstart guide should exist at docs/quickstart.md" + + +def test_github_links_format(): + """Test that GitHub links in README are properly formatted.""" + readme_path = Path("README.md") + with open(readme_path, 'r') as f: + readme_content = f.read() + + # Check for GitHub repository URL + assert "github.com/rysweet/cue" in readme_content, "README should contain correct GitHub repository URL" + + # Check for issues link + assert "https://github.com/rysweet/cue/issues" in readme_content, "README should contain GitHub issues link" + + +def test_license_file_exists(): + """Test that license file referenced in README exists.""" + license_paths = [Path("LICENSE.md"), Path("LICENSE"), Path("LICENSE.txt")] + license_exists = any(path.exists() for path in license_paths) + assert license_exists, "License file should exist (LICENSE.md, LICENSE, or LICENSE.txt)" + + +def test_neo4j_container_manager_exists(): + """Test that Neo4j container manager mentioned in README exists or is properly referenced.""" + # Neo4j container manager might be installed as a dependency or in a different location + # Check if it's mentioned in MCP server requirements or package files + mcp_server_dir = Path("mcp-cue-server") + if not mcp_server_dir.exists(): + mcp_server_dir = Path("mcp-blarify-server") + + assert mcp_server_dir.exists(), "MCP server directory should exist" + + # The container manager might be referenced in package.json or requirements + # For now, just check that the MCP server exists which uses the container manager + assert (mcp_server_dir / "src" / "neo4j_container.py").exists(), "Neo4j container management code should exist" + + +def test_environment_variables_documented(): + """Test that documented environment variables are comprehensive.""" + readme_path = Path("README.md") + with open(readme_path, 'r') as f: + readme_content = f.read() + + # Key environment variables that should be documented + required_env_vars = [ + "NEO4J_URI", + "NEO4J_USERNAME", + "NEO4J_PASSWORD", + "AZURE_OPENAI_API_KEY", + "AZURE_OPENAI_ENDPOINT", + "ENABLE_LLM_DESCRIPTIONS" + ] + + for env_var in required_env_vars: + assert env_var in readme_content, f"Environment variable {env_var} should be documented in README" + + +def test_commands_exist_in_vscode_extension(): + """Test that VS Code commands mentioned in README exist in package.json.""" + vscode_dir = Path("vscode-blarify-visualizer") + package_json = vscode_dir / "package.json" + + if package_json.exists(): + with open(package_json, 'r') as f: + package_data = json.load(f) + + # Check if commands are defined + if 'contributes' in package_data and 'commands' in package_data['contributes']: + commands = [cmd['command'] for cmd in package_data['contributes']['commands']] + + # Commands mentioned in README + expected_commands_patterns = [ + r".*[Aa]nalyze.*[Ww]orkspace.*", + r".*[Ss]how.*[Vv]isualization.*", + r".*[Uu]pdate.*[Gg]raph.*" + ] + + # At least some commands should match patterns + found_commands = 0 + for pattern in expected_commands_patterns: + for cmd in commands: + if re.search(pattern, cmd, re.IGNORECASE): + found_commands += 1 + break + + assert found_commands > 0, "VS Code extension should have commands matching README descriptions" + + +def test_tree_sitter_dependencies(): + """Test that tree-sitter dependencies mentioned in README are in pyproject.toml.""" + pyproject_path = Path("pyproject.toml") + with open(pyproject_path, 'rb') as f: + pyproject_data = tomllib.load(f) + + dependencies = pyproject_data['tool']['poetry']['dependencies'] + + # Core tree-sitter parsers that should be present + expected_parsers = [ + "tree-sitter-python", + "tree-sitter-javascript", + "tree-sitter-typescript", + "tree-sitter-java", + "tree-sitter-go" + ] + + for parser in expected_parsers: + assert parser in dependencies, f"Tree-sitter parser {parser} should be in dependencies" + + +def test_docs_directory_exists(): + """Test that docs directory referenced in README exists.""" + docs_dir = Path("docs") + assert docs_dir.exists() and docs_dir.is_dir(), "docs directory should exist" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file