diff --git a/demo_enhanced_explorer.py b/demo_enhanced_explorer.py deleted file mode 100644 index 0320f68..0000000 --- a/demo_enhanced_explorer.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python3 -""" -Quick Demo of Enhanced Graph Explorer -Shows the LLM-powered features in action -""" - -import asyncio -import sys -import os - -# Add the parent directory to the path so we can import our modules -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) - -from demos.graph_explorer import GraphExplorer - -async def demo_enhanced_features(): - """Demonstrate the enhanced LLM features""" - print("π ENHANCED GRAPH EXPLORER DEMO") - print("=" * 50) - - explorer = GraphExplorer() - await explorer.initialize() - - # Demo queries showcasing LLM enhancement - demo_queries = [ - "Find technology companies in our graph", - "Show me people who work for sustainable organizations", - "What products are related to environmental impact?" - ] - - for i, query in enumerate(demo_queries, 1): - print(f"\nπ Demo Query {i}: '{query}'") - print("-" * 50) - - # Show LLM interpretation - interpretation = await explorer._interpret_natural_language_query(query) - print(f"π§ LLM Analysis:") - print(f" Intent: {interpretation['intent']}") - print(f" Strategy: {interpretation['strategy']}") - print(f" Terms: {', '.join(interpretation['search_terms'])}") - print(f" Confidence: {interpretation['confidence']:.2f}") - print(f" Explanation: {interpretation['explanation']}") - - # Show search results - search_terms = interpretation['search_terms'][:2] - entities = [] - - for term in search_terms: - results = await explorer.graphiti.search_entities(term, limit=3) - entities.extend(results) - - # Remove duplicates - unique_entities = {} - for entity in entities: - entity_id = entity.get('id') or entity.get('name') - if entity_id not in unique_entities: - unique_entities[entity_id] = entity - - entities = list(unique_entities.values()) - - print(f"\nπ Results: {len(entities)} entities found") - for j, entity in enumerate(entities[:3], 1): - name = entity.get('name', 'Unknown') - entity_type = entity.get('type', entity.get('entity_type', 'Unknown')) - description = entity.get('description', 'No description') - print(f" {j}. {name} ({entity_type})") - print(f" {description[:60]}{'...' if len(description) > 60 else ''}") - - # Show LLM enhancement - if entities: - enhanced = await explorer._enhance_search_results_with_llm(query, entities, interpretation) - if hasattr(explorer, '_last_enhancement'): - enhancement = explorer._last_enhancement - print(f"\nπ‘ LLM Insights: {enhancement.get('insights', 'No insights')}") - - follow_ups = enhancement.get('follow_up_queries', []) - if follow_ups: - print(f"π Suggested follow-ups:") - for k, suggestion in enumerate(follow_ups[:2], 1): - print(f" {k}. {suggestion}") - - print() - - print("β Demo completed! The graph explorer is now fully LLM-powered.") - -if __name__ == "__main__": - asyncio.run(demo_enhanced_features()) diff --git a/demo_results_20250530_105229.md b/demo_results_20250530_105229.md deleted file mode 100644 index 8a89c6b..0000000 --- a/demo_results_20250530_105229.md +++ /dev/null @@ -1,351 +0,0 @@ -# Enhanced Graph Explorer Demo Report - -**Generated:** 2025-05-30T10:52:29.722466 -**Demo:** Enhanced Graph Explorer Capabilities Demo -**System:** LLM-Powered Knowledge Graph Explorer - -## Executive Summary - -This report documents a comprehensive demonstration of the Enhanced Graph Explorer system, showcasing its advanced capabilities for AI-powered knowledge graph exploration and analysis. The system successfully demonstrates integration between Azure Cosmos DB graph storage and OpenAI's language models for intelligent query processing. - -## Key Capabilities Demonstrated - -- π§ **Natural Language Processing**: LLM-powered query interpretation and semantic understanding -- π **Advanced Search**: Multi-strategy entity and relationship discovery -- π **Statistical Analysis**: Comprehensive graph metrics and pattern recognition -- π― **Complex Queries**: High-connectivity analysis and pattern matching -- π‘ **AI Insights**: Automated insight generation and follow-up recommendations - -## Detailed Demo Results - -### 1. System Initialization - -**Description:** Connecting to Azure Cosmos DB and OpenAI services for LLM-enhanced graph exploration -**Timestamp:** 10:52:29 - -**Key Insights:** The system uses Azure Cosmos DB for graph storage and OpenAI for natural language processing - ---- - -### 2. Connection Status - -**Description:** Successfully connected to all required services -**Timestamp:** 10:52:30 - -**Key Insights:** Ready for AI-powered graph exploration and analysis - -**Results:** -```json -{ - "status": "β Connected", - "services": [ - "Azure Cosmos DB", - "OpenAI GPT-4" - ] -} -``` - ---- - -### 3. Natural Language Search Demo - -**Description:** Demonstrated LLM-powered query interpretation and semantic entity discovery -**Timestamp:** 10:56:09 - -**Key Insights:** Successfully processed 3 natural language queries with AI interpretation - -**Results:** -```json -[ - { - "query": "Find products related to sustainable footwear", - "interpretation": { - "intent": "search_entities", - "search_terms": [ - "products", - "sustainable footwear", - "related products" - ], - "strategy": "semantic_search", - "result_type": "specific_entities", - "confidence": 0.92, - "explanation": "The query seeks to identify specific products that are associated with the concept of sustainable footwear, indicating an entity search using semantic understanding to capture relatedness." - }, - "entities_found": 10, - "top_entities": [ - { - "name": "trail_runners", - "type": "unknown", - "description": "A category of products related to running on trails" - }, - { - "name": "dress_shoes", - "type": "unknown", - "description": "A category of footwear products browsed by the customer." - }, - { - "name": "boots", - "type": "unknown", - "description": "Footwear products that the customer browsed." - } - ] - }, - { - "query": "Show me companies that manufacture insoles", - "interpretation": { - "intent": "search_entities", - "search_terms": [ - "companies", - "manufacture", - "insoles" - ], - "strategy": "semantic_search", - "result_type": "specific_entities", - "confidence": 0.95, - "explanation": "The query seeks a list of companies (entities) with the specific activity of manufacturing insoles, indicating an entity search with semantic understanding of the manufacturing relationship." - }, - "entities_found": 10, - "top_entities": [ - { - "name": "corporate_sustainability", - "type": "unknown", - "description": "Business practices and strategies that prioritize environmental and social responsibility." - }, - { - "name": "Microsoft", - "type": "unknown", - "description": "A multinational technology company that develops, licenses, and sells computer software, electronics" - }, - { - "name": "corporate sustainability initiatives", - "type": "unknown", - "description": "Business strategies and actions aimed at promoting environmental, social, and economic sustainabilit" - } - ] - }, - { - "query": "What blue products are available?", - "interpretation": { - "intent": "search_entities", - "search_terms": [ - "product", - "blue" - ], - "strategy": "exact_match", - "result_type": "specific_entities", - "confidence": 0.95, - "explanation": "The query seeks a list of products with the attribute 'blue', indicating a search for specific entities matching this property." - }, - "entities_found": 10, - "top_entities": [ - { - "name": "Sustainability filter", - "type": "unknown", - "description": "A filter option related to sustainable products, which was not applied in this session." - }, - { - "name": "sneakers", - "type": "unknown", - "description": "Type of footwear product browsed by the customer" - }, - { - "name": "Sarah Johnson", - "type": "unknown", - "description": "A person who viewed the product." - } - ] - } -] -``` - ---- - -### 4. Relationship Analysis - -**Description:** Comprehensive analysis of relationship patterns, types, and connectivity -**Timestamp:** 10:56:19 - -**Key Insights:** Relationship analysis reveals the structural patterns and key connectors in the knowledge graph - -**Results:** -```json -{ - "total_relationships": 50, - "relationship_types": { - "unknown": 20 - }, - "confidence_stats": { - "average": 1.0, - "min": 1.0, - "max": 1.0 - }, - "top_connected_entities": { - "Unknown": 11, - "John Smith": 4, - "Alice Williams": 4, - "Microsoft": 3, - "Sarah Johnson": 3 - } -} -``` - ---- - -### 5. Graph Statistics - -**Description:** Comprehensive statistical overview of the knowledge graph structure -**Timestamp:** 10:56:37 - -**Key Insights:** Graph statistics provide insights into data distribution and structural characteristics - -**Results:** -```json -{ - "total_entities": 100, - "total_relationships": 100, - "entity_types": { - "unknown": 100 - }, - "density": 1.0, - "analysis_completeness": 100 -} -``` - ---- - -### 6. Advanced Query Capabilities - -**Description:** Demonstration of complex query patterns and analysis techniques -**Timestamp:** 11:08:47 - -**Key Insights:** Advanced queries enable deep insights into graph structure and connectivity patterns - -**Results:** -```json -[ - { - "query_type": "high_connectivity", - "results": [ - [ - "Marcus Chen", - 16 - ], - [ - "OpenAI", - 5 - ], - [ - "John Smith", - 4 - ], - [ - "Alice Williams", - 4 - ], - [ - "Sarah Williams", - 4 - ] - ], - "total_found": 16 - }, - { - "query_type": "pattern_matching", - "patterns": { - "entity β unknown β entity": 30 - }, - "total_patterns": 1 - } -] -``` - ---- - -### 7. LLM-Powered Enhancements - -**Description:** Demonstration of AI-powered query interpretation and result enhancement -**Timestamp:** 11:11:06 - -**Key Insights:** LLM enhancements provide intelligent query understanding and contextual insights - -**Results:** -```json -{ - "test_query": "Show me all eco-friendly products from sustainable brands", - "interpretation": { - "intent": "search_entities", - "search_terms": [ - "eco-friendly products", - "sustainable brands" - ], - "strategy": "semantic_search", - "result_type": "specific_entities", - "confidence": 0.95, - "explanation": "The query seeks a list of products (entities) that are both eco-friendly and associated with sustainable brands, requiring semantic understanding to match relevant attributes." - }, - "entities_found": 6, - "enhancement": null, - "llm_features_tested": [ - "Query interpretation", - "Search strategy recommendation", - "Result enhancement", - "Insight generation" - ] -} -``` - ---- - -## Technical Specifications - -- **Graph Database**: Azure Cosmos DB with Gremlin API -- **AI Model**: OpenAI GPT-4 for natural language processing -- **Query Language**: Gremlin for graph traversal -- **Data Format**: Handles both simple dictionaries and Cosmos DB valueMap(true) format -- **Programming Language**: Python 3.12+ -- **Key Libraries**: asyncio, azure-cosmos, openai - -## System Features - -### β Core Capabilities -- Natural language query interpretation -- Entity and relationship search -- Graph statistical analysis -- Pattern recognition and matching -- Community detection -- Subgraph analysis - -### β LLM Enhancements -- Intelligent query understanding -- Search strategy recommendation -- Result contextualization -- Insight generation -- Follow-up query suggestions - -### β Data Handling -- Cosmos DB valueMap(true) format support -- Backward compatibility with simple dictionaries -- Robust error handling and fallback mechanisms -- Performance optimization for large graphs - -## Demo Conclusions - -The Enhanced Graph Explorer successfully demonstrates sophisticated AI-powered graph analysis capabilities. The system effectively combines traditional graph database operations with modern language model intelligence to provide intuitive, natural language interfaces for complex graph exploration tasks. - -### Key Achievements: -1. **Seamless Integration**: Successfully integrated graph database with LLM capabilities -2. **Natural Interface**: Demonstrated natural language query processing -3. **Comprehensive Analysis**: Showed multiple analysis approaches and insight generation -4. **Robust Performance**: Handled various query types and data formats -5. **Actionable Insights**: Generated meaningful patterns and recommendations - -### Future Enhancements: -- Real-time visualization integration -- Advanced community detection algorithms -- Multi-hop reasoning capabilities -- Custom domain-specific query templates -- Interactive result exploration - ---- - -*This report was automatically generated by the Enhanced Graph Explorer Demo System on May 30, 2025 at 11:11 AM* diff --git a/demos/graph_explorer.py b/demos/graph_explorer.py deleted file mode 100644 index 659e6ce..0000000 --- a/demos/graph_explorer.py +++ /dev/null @@ -1,1561 +0,0 @@ -""" -Interactive Graph Explorer for Graphiti-Cosmos -Allows natural language exploration of entities, relationships, communities, and subgraphs -""" - -import asyncio -import os -import sys -import json -import time -from datetime import datetime -from typing import List, Dict, Any, Set, Tuple -from collections import defaultdict, Counter -import traceback - -# Add the parent directory to the path so we can import our modules -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) - -from src.graphiti_cosmos import GraphitiCosmos - -class GraphExplorer: - """Interactive exploration tool for knowledge graphs""" - - def __init__(self): - self.graphiti = None - self.session_history = [] - self.bookmark_entities = [] - self.bookmark_relationships = [] - - async def initialize(self): - """Initialize the Graphiti-Cosmos system""" - print("π INTERACTIVE GRAPH EXPLORER") - print("=" * 60) - print("π¨ Initializing Graphiti-Cosmos...") - - self.graphiti = GraphitiCosmos() - await self.graphiti.initialize() - print("β Connected to Azure Cosmos DB and OpenAI") - print("π Ready for graph exploration!") - print() - - def display_main_menu(self): - """Display the main exploration menu""" - print("π GRAPH EXPLORATION MENU") - print("-" * 50) - print("1. π Search Entities (Natural Language)") - print("2. π Search Relationships") - print("3. ποΈ Explore Communities") - print("4. π§© Analyze Subgraphs") - print("5. π Graph Overview & Statistics") - print("6. π Entity Deep Dive") - print("7. π Relationship Analysis") - print("8. π― Advanced Query Builder") - print("9. π Session History") - print("10. π Manage Bookmarks") - print("11. π Export Analysis") - print("0. πͺ Exit") - print("-" * 50) - - async def run_interactive_session(self): - """Main interactive session loop""" - await self.initialize() - - while True: - self.display_main_menu() - choice = input("Enter your choice (0-11): ").strip() - - try: - if choice == "0": - print("π Thanks for exploring! Session saved.") - await self._save_session() - break - elif choice == "1": - await self._search_entities_nl() - elif choice == "2": - await self._search_relationships() - elif choice == "3": - await self._explore_communities() - elif choice == "4": - await self._analyze_subgraphs() - elif choice == "5": - await self._graph_overview() - elif choice == "6": - await self._entity_deep_dive() - elif choice == "7": - await self._relationship_analysis() - elif choice == "8": - await self._advanced_query_builder() - elif choice == "9": - await self._show_session_history() - elif choice == "10": - await self._manage_bookmarks() - elif choice == "11": - await self._export_analysis() - else: - print("β Invalid choice! Please try again.") - - print("\nPress Enter to continue...") - input() - print("\n" + "="*60 + "\n") - - except Exception as e: - print(f"β Error: {e}") - traceback.print_exc() - print("\nPress Enter to continue...") - - async def _search_entities_nl(self): - """Natural language entity search""" - print("π NATURAL LANGUAGE ENTITY SEARCH") - print("-" * 40) - print("π‘ Examples:") - print(" - 'Find all people in the graph'") - print(" - 'Show me products related to sustainability'") - print(" - 'List organizations and their relationships'") - print(" - 'Find entities connected to Sarah Johnson'") - print() - - query = input("π£οΈ Enter your search query: ").strip() - if not query: - print("βΉοΈ No query entered.") - return - - # Log the search - self.session_history.append({ - 'type': 'entity_search', - 'query': query, - 'timestamp': datetime.now().isoformat() - }) - print(f"\nπ Analyzing query: '{query}'") - print("-" * 40) - - try: - # Step 1: Interpret the natural language query using LLM - interpretation = await self._interpret_search_query(query) - - print(f"π― Query intent: {interpretation['intent']}") - print(f"π Search terms: {', '.join(interpretation['search_terms'])}") - print(f"π§ Strategy: {interpretation['strategy']}") - print(f"πͺ Confidence: {interpretation['confidence']:.2f}") - print("π‘ Searching with AI-enhanced strategy...") - print() - - # Step 2: Execute search based on interpretation - entities = [] - search_terms = interpretation['search_terms'] - - if interpretation['strategy'] == 'exact_match': - # Use exact matching for specific entity searches - for term in search_terms[:2]: - results = await self.graphiti.search_entities(term, limit=10) - entities.extend(results) - elif interpretation['strategy'] == 'broad_exploration': - # Get broader results for exploratory queries - for term in search_terms[:1]: - results = await self.graphiti.search_entities(term, limit=30) - entities.extend(results) - else: # semantic_search (default) - # Use semantic search for most queries - for term in search_terms[:3]: - results = await self.graphiti.search_entities(term, limit=15) - entities.extend(results) - # Step 3: Remove duplicates and enhance results with LLM - unique_entities = {} - for entity in entities: - entity_id = self._extract_property(entity, 'id') or self._extract_property(entity, 'name') - if entity_id not in unique_entities: - unique_entities[entity_id] = entity - - entities = list(unique_entities.values()) - - # Step 4: Enhance results with LLM insights - if entities: - print("π§ Enhancing results with AI insights...") - entities = await self._enhance_search_results_with_llm(query, entities, interpretation) - - if entities: - print(f"\nβ Found {len(entities)} entities:") - - # Display LLM insights if available - if hasattr(self, '_last_enhancement'): - enhancement = self._last_enhancement - print(f"\nπ AI Insights: {enhancement.get('insights', 'No insights available')}") - - if enhancement.get('follow_up_queries'): - print("\nπ‘ Suggested follow-up queries:") - for i, suggestion in enumerate(enhancement['follow_up_queries'][:3], 1): - print(f" {i}. {suggestion}") - - if enhancement.get('gaps'): - print(f"\nβ οΈ Consider exploring: {enhancement['gaps']}") - print() - # Group by type - entity_groups = defaultdict(list) - for entity in entities: - entity_type = self._extract_property(entity, 'type', - self._extract_property(entity, 'entity_type', 'unknown')) - entity_groups[entity_type].append(entity) - - for entity_type, type_entities in entity_groups.items(): - print(f"\nπ {entity_type.upper()} ({len(type_entities)} entities):") - for i, entity in enumerate(type_entities[:5], 1): - name = self._extract_property(entity, 'name', - self._extract_property(entity, 'id', 'Unknown')) - description = self._extract_property(entity, 'description', '') - if description: - print(f" {i}. {name}: {description[:100]}{'...' if len(description) > 100 else ''}") - else: - print(f" {i}. {name}") - - # Show bookmark option - print("\nπ Enter entity name to bookmark (or press Enter to continue):") - bookmark_choice = input().strip() - if bookmark_choice: - self.bookmark_entities.append(bookmark_choice) - print(f"β Bookmarked: {bookmark_choice}") - - else: - print("β No entities found matching your query.") - print("π‘ Try:") - print(" - Using broader search terms") - print(" - Checking spelling") - print(" - Using different keywords") - - except Exception as e: - print(f"β Search error: {e}") - print("π‘ Trying fallback search...") - - try: - # Fallback to basic keyword search - keywords = await self._extract_search_keywords(query) - entities = [] - for keyword in keywords: - results = await self.graphiti.search_entities(keyword, limit=20) - entities.extend(results) - if entities: - print(f"β Found {len(entities)} entities using fallback search:") - for i, entity in enumerate(entities[:10], 1): - # Handle both dictionary and Cosmos DB valueMap(true) list formats - if isinstance(entity, dict): - name = self._extract_property(entity, 'name', 'Unknown') - entity_type = self._extract_property(entity, 'type', - self._extract_property(entity, 'entity_type', 'unknown')) - else: - name = str(entity) if entity else 'Unknown' - entity_type = 'unknown' - print(f" {i}. {name} ({entity_type})") - else: - print("β No entities found.") - print() - - except Exception as fallback_error: - print(f"β Fallback search also failed: {fallback_error}") - print("π‘ Try these alternatives:") - print(" - Use more general terms") - print(" - Try searching for entity types: 'person', 'product', 'organization'") - print(" - Browse all entities with an empty search") - - async def _search_relationships(self): - """Search and explore relationships""" - print("π RELATIONSHIP SEARCH & EXPLORATION") - print("-" * 40) - print("π‘ Search options:") - print(" 1. By relationship type (e.g., 'works_for', 'related_to')") - print(" 2. By entities (e.g., 'Sarah Johnson')") - print(" 3. By pattern (e.g., 'person β organization')") - print() - - search_type = input("Choose search type (1-3): ").strip() - - if search_type == "1": - await self._search_relationships_by_type() - elif search_type == "2": - await self._search_relationships_by_entity() - elif search_type == "3": - await self._search_relationships_by_pattern() - else: - print("β Invalid choice!") - - async def _search_relationships_by_type(self): - """Search relationships by type""" - print("\nπ·οΈ SEARCH BY RELATIONSHIP TYPE") - print("-" * 30) - - # Show available relationship types - print("π Getting available relationship types...") - relationships = await self.graphiti.search_relationships("", limit=100) - - rel_types = Counter() - for rel in relationships: - rel_type = self._extract_property(rel, 'type', 'unknown') - rel_types[rel_type] += 1 - - print("π Available relationship types:") - for rel_type, count in rel_types.most_common(10): - print(f" β’ {rel_type} ({count} instances)") - - rel_type = input("\nEnter relationship type to explore: ").strip() - if not rel_type: - return - - # Search for this relationship type - matching_rels = [rel for rel in relationships if self._extract_property(rel, 'type', '').lower() == rel_type.lower()] - - if matching_rels: - print(f"\nβ Found {len(matching_rels)} '{rel_type}' relationships:") - - for i, rel in enumerate(matching_rels[:10], 1): - source = self._extract_property(rel, 'source', 'Unknown') - target = self._extract_property(rel, 'target', 'Unknown') - description = self._extract_property(rel, 'description', 'No description') - confidence = self._extract_property(rel, 'confidence', '1.0') - - print(f"\n{i}. {source} β {rel_type} β {target}") - if confidence < 1.0: - print(f" π― Confidence: {confidence:.2f}") - if description and description != 'No description': - desc_preview = description[:100] + "..." if len(description) > 100 else description - print(f" π {desc_preview}") - - if len(matching_rels) > 10: - print(f"\n... and {len(matching_rels) - 10} more relationships") - else: - print(f"β No relationships found of type '{rel_type}'") - - async def _search_relationships_by_entity(self): - """Search relationships involving specific entities""" - print("\nπ€ SEARCH BY ENTITY") - print("-" * 20) - - entity_name = input("Enter entity name: ").strip() - if not entity_name: - return - - relationships = await self._get_entity_relationships(entity_name) - - if relationships: - print(f"\nβ Found {len(relationships)} relationships for '{entity_name}':") - # Group by relationship type - rel_groups = defaultdict(list) - for rel in relationships: - rel_type = self._extract_property(rel, 'type', 'unknown') - rel_groups[rel_type].append(rel) - - for rel_type, type_rels in rel_groups.items(): - print(f"\nπ {rel_type} ({len(type_rels)} instances):") - for rel in type_rels[:5]: - source = self._extract_property(rel, 'source', 'Unknown') - target = self._extract_property(rel, 'target', 'Unknown') - - # Determine direction - if source.lower() == entity_name.lower(): - print(f" β {target}") - else: - print(f" β {source}") - - if len(type_rels) > 5: - print(f" ... and {len(type_rels) - 5} more") - else: - print(f"β No relationships found for '{entity_name}'") - - async def _explore_communities(self): - """Explore entity communities and clusters""" - print("ποΈ COMMUNITY EXPLORATION") - print("-" * 40) - - print("π Analyzing entity communities...") - - # Get all entities - entities = await self.graphiti.search_entities("", limit=200) - - if len(entities) < 3: - print("βΉοΈ Not enough entities for community analysis") - return - # Group entities by type (basic community detection) - type_communities = defaultdict(list) - for entity in entities: - entity_type = self._extract_property(entity, 'type', 'unknown') - type_communities[entity_type].append(entity) - - print(f"β Found {len(type_communities)} entity type communities:") - - for community_type, members in type_communities.items(): - print(f"\nπ {community_type.upper()} Community ({len(members)} members)") - # Show top members - for i, member in enumerate(members[:5], 1): - name = self._extract_property(member, 'name', 'Unknown') - description = self._extract_property(member, 'description', 'No description') - desc_preview = description[:50] + "..." if len(description) > 50 else description - print(f" {i}. {name} - {desc_preview}") - - if len(members) > 5: - print(f" ... and {len(members) - 5} more members") - - # Advanced community analysis - print(f"\nπ¬ ADVANCED COMMUNITY ANALYSIS") - print("-" * 30) - - community_choice = input("Enter community type to analyze deeply (or press Enter to skip): ").strip() - if community_choice: - await self._deep_community_analysis(community_choice, type_communities.get(community_choice.lower(), [])) - - async def _deep_community_analysis(self, community_type: str, members: List[Dict]): - """Perform deep analysis of a specific community""" - print(f"\nπ¬ DEEP ANALYSIS: {community_type.upper()} COMMUNITY") - print("-" * 40) - - if not members: - print("β No members found in this community") - return - - print(f"π₯ Community Size: {len(members)} members") - - # Analyze internal connections - print("\nπ Analyzing internal connections...") - internal_connections = 0 - connection_map = defaultdict(list) - for member in members: - member_name = self._extract_property(member, 'name', '') - relationships = await self._get_entity_relationships(member_name) - - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - - # Check if both entities are in this community - member_names = [self._extract_property(m, 'name', '') for m in members] - if source in member_names and target in member_names: - internal_connections += 1 - connection_map[member_name].append({ - 'target': target if source == member_name else source, - 'type': self._extract_property(rel, 'type', 'unknown') - }) - - print(f"π Internal connections: {internal_connections}") - - # Find most connected members - connection_counts = [(name, len(connections)) for name, connections in connection_map.items()] - connection_counts.sort(key=lambda x: x[1], reverse=True) - - print(f"\nβ Most connected members:") - for name, count in connection_counts[:5]: - print(f" β’ {name}: {count} connections") - - # Find external connections - print(f"\nπ External connections...") - external_targets = defaultdict(int) - for member in members[:10]: # Limit for performance - member_name = self._extract_property(member, 'name', '') - relationships = await self._get_entity_relationships(member_name) - - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - - # Find external targets - member_names = [self._extract_property(m, 'name', '') for m in members] - external_target = None - if source == member_name and target not in member_names: - external_target = target - elif target == member_name and source not in member_names: - external_target = source - - if external_target: - external_targets[external_target] += 1 - - if external_targets: - print(f"π Top external connections:") - for target, count in Counter(external_targets).most_common(5): - print(f" β’ {target}: {count} connections") - - async def _analyze_subgraphs(self): - """Analyze and explore subgraphs""" - print("π§© SUBGRAPH ANALYSIS") - print("-" * 40) - - print("π Choose subgraph analysis type:") - print("1. π― Ego network (around specific entity)") - print("2. π Path analysis (between two entities)") - print("3. π Dense subgraphs") - print("4. π·οΈ Type-based subgraphs") - - choice = input("Enter choice (1-4): ").strip() - - if choice == "1": - await self._ego_network_analysis() - elif choice == "2": - await self._path_analysis() - elif choice == "3": - await self._dense_subgraph_analysis() - elif choice == "4": - await self._type_based_subgraphs() - else: - print("β Invalid choice!") - - async def _ego_network_analysis(self): - """Analyze ego network around a specific entity""" - print("\nπ― EGO NETWORK ANALYSIS") - print("-" * 25) - - entity_name = input("Enter entity name for ego network: ").strip() - if not entity_name: - return - - depth = input("Enter network depth (1-3, default 2): ").strip() - try: - depth = int(depth) if depth else 2 - depth = max(1, min(depth, 3)) # Limit between 1-3 - except ValueError: - depth = 2 - - print(f"\nπ Analyzing {depth}-hop ego network for '{entity_name}'...") - # Build ego network - ego_entities = {entity_name} - ego_relationships = [] - - current_entities = {entity_name} - for hop in range(depth): - next_entities = set() - for entity in current_entities: - relationships = await self._get_entity_relationships(entity) - - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - - ego_relationships.append(rel) - - # Add connected entities - if source == entity: - next_entities.add(target) - ego_entities.add(target) - elif target == entity: - next_entities.add(source) - ego_entities.add(source) - - current_entities = next_entities - ego_entities - ego_entities.update(next_entities) - - print(f" Hop {hop + 1}: Found {len(next_entities)} new entities") - - print(f"\nβ Ego network summary:") - print(f" π Center entity: {entity_name}") - print(f" π₯ Total entities: {len(ego_entities)}") - print(f" π Total relationships: {len(ego_relationships)}") - - # Analyze the ego network - if len(ego_entities) > 1: - print(f"\nπ¬ Network analysis:") - # Entity types in network - entity_types = Counter() - for entity_name_in_network in ego_entities: - # Get entity type - entities = await self.graphiti.search_entities(entity_name_in_network, limit=1) - if entities: - entity_type = self._extract_property(entities[0], 'type', 'unknown') - entity_types[entity_type] += 1 - - print(f" π Entity types:") - for entity_type, count in entity_types.most_common(): - print(f" β’ {entity_type}: {count}") - # Relationship types in network - rel_types = Counter() - for rel in ego_relationships: - rel_type = self._extract_property(rel, 'type', 'unknown') - rel_types[rel_type] += 1 - - print(f" π Relationship types:") - for rel_type, count in rel_types.most_common(5): - print(f" β’ {rel_type}: {count}") - - async def _graph_overview(self): - """Provide comprehensive graph overview and statistics""" - print("π GRAPH OVERVIEW & STATISTICS") - print("-" * 40) - - print("π Collecting graph statistics...") - # Get basic stats - stats = await self.graphiti.get_graph_stats() - print(f"\nπ Basic Statistics:") - print(f" π Episodes: {self._extract_property(stats, 'episodes', '0')}") - print(f" π₯ Entities: {self._extract_property(stats, 'entities', '0')}") - print(f" π Relationships: {self._extract_property(stats, 'relationships', '0')}") - - entities_count = int(self._extract_property(stats, 'entities', '0')) - relationships_count = int(self._extract_property(stats, 'relationships', '0')) - if entities_count > 0: - density = relationships_count / entities_count - print(f" π― Density: {density:.2f} relationships per entity") - - # Entity type distribution - print(f"\nπ·οΈ Entity Type Distribution:") - entities = await self.graphiti.search_entities("", limit=200) - entity_types = Counter() - for entity in entities: - entity_type = self._extract_property(entity, 'type', 'unknown') - entity_types[entity_type] += 1 - - for entity_type, count in entity_types.most_common(): - percentage = (count / len(entities)) * 100 if entities else 0 - print(f" π {entity_type}: {count} ({percentage:.1f}%)") - - # Relationship type distribution - print(f"\nπ Relationship Type Distribution:") - relationships = await self.graphiti.search_relationships("", limit=200) - rel_types = Counter() - for rel in relationships: - rel_type = self._extract_property(rel, 'type', 'unknown') - rel_types[rel_type] += 1 - - for rel_type, count in rel_types.most_common(5): - percentage = (count / len(relationships)) * 100 if relationships else 0 - print(f" π {rel_type}: {count} ({percentage:.1f}%)") - # Find most connected entities - print(f"\nβ Most Connected Entities:") - entity_connections = defaultdict(int) - - for rel in relationships[:100]: # Limit for performance - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - entity_connections[source] += 1 - entity_connections[target] += 1 - - top_connected = sorted(entity_connections.items(), key=lambda x: x[1], reverse=True) - for entity, connections in top_connected[:5]: - print(f" π {entity}: {connections} connections") - - async def _entity_deep_dive(self): - """Deep dive analysis of a specific entity""" - print("π ENTITY DEEP DIVE") - print("-" * 40) - - entity_name = input("Enter entity name to analyze: ").strip() - if not entity_name: - return - - await self._entity_deep_dive_specific(entity_name) - - async def _entity_deep_dive_specific(self, entity_name: str): - """Perform deep dive analysis on a specific entity""" - print(f"\nπ¬ DEEP DIVE: {entity_name}") - print("-" * 40) - - # Get entity details - entities = await self.graphiti.search_entities(entity_name, limit=5) - target_entity = None - for entity in entities: - if self._extract_property(entity, 'name', '').lower() == entity_name.lower(): - target_entity = entity - break - - if not target_entity and entities: - target_entity = entities[0] # Take the first match - - if target_entity: - print(f"π Entity Details:") - print(f" π·οΈ Name: {self._extract_property(target_entity, 'name', 'Unknown')}") - print(f" π Type: {self._extract_property(target_entity, 'type', 'Unknown')}") - print(f" π Description: {self._extract_property(target_entity, 'description', 'No description')}") - - # Get relationships - relationships = await self._get_entity_relationships(entity_name) - print(f"\nπ Relationships ({len(relationships)} total):") - - if relationships: # Group by type - rel_groups = defaultdict(list) - for rel in relationships: - rel_type = self._extract_property(rel, 'type', 'unknown') - rel_groups[rel_type].append(rel) - - for rel_type, type_rels in rel_groups.items(): - print(f"\n π {rel_type} ({len(type_rels)} instances):") - for rel in type_rels[:3]: - source = self._extract_property(rel, 'source', 'Unknown') - target = self._extract_property(rel, 'target', 'Unknown') - confidence = float(self._extract_property(rel, 'confidence', '1.0')) - - if source.lower() == entity_name.lower(): - direction = f"β {target}" - else: - direction = f"β {source}" - - confidence_str = f" (conf: {confidence:.2f})" if confidence < 1.0 else "" - print(f" {direction}{confidence_str}") - - if len(type_rels) > 3: - print(f" ... and {len(type_rels) - 3} more") - - # Find related entities print(f"\nπ Connected Entity Types:") - connected_types = Counter() - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - - # Get the other entity's type - other_entity = target if source.lower() == entity_name.lower() else source - other_entities = await self.graphiti.search_entities(other_entity, limit=1) - if other_entities: - other_type = self._extract_property(other_entities[0], 'type', 'unknown') - connected_types[other_type] += 1 - - for conn_type, count in connected_types.most_common(): - print(f" π {conn_type}: {count} connections") - else: - print(f"β Entity '{entity_name}' not found") - - async def _relationship_analysis(self): - """Comprehensive relationship analysis""" - print("π RELATIONSHIP ANALYSIS") - print("-" * 40) - - print("π Collecting relationship data...") - relationships = await self.graphiti.search_relationships("", limit=200) - - if not relationships: - print("β No relationships found in the graph") - return - - print(f"β Analyzing {len(relationships)} relationships...") - # Relationship type analysis - rel_types = Counter() - confidence_by_type = defaultdict(list) - - for rel in relationships: - rel_type = self._extract_property(rel, 'type', 'unknown') - confidence = float(self._extract_property(rel, 'confidence', '1.0')) - rel_types[rel_type] += 1 - confidence_by_type[rel_type].append(confidence) - - print(f"\nπ·οΈ Relationship Types & Confidence:") - for rel_type, count in rel_types.most_common(): - avg_confidence = sum(confidence_by_type[rel_type]) / len(confidence_by_type[rel_type]) - print(f" π {rel_type}: {count} instances (avg confidence: {avg_confidence:.2f})") - # Find relationship patterns - print(f"\nπ Relationship Patterns:") - patterns = Counter() - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - rel_type = self._extract_property(rel, 'type', 'unknown') - - # Get entity types - source_entities = await self.graphiti.search_entities(source, limit=1) - target_entities = await self.graphiti.search_entities(target, limit=1) - - source_type = self._extract_property(source_entities[0], 'type', 'unknown') if source_entities else 'unknown' - target_type = self._extract_property(target_entities[0], 'type', 'unknown') if target_entities else 'unknown' - - pattern = f"{source_type} β {rel_type} β {target_type}" - patterns[pattern] += 1 - - print(f" π Most common patterns:") - for pattern, count in patterns.most_common(5): - print(f" β’ {pattern}: {count} times") - - async def _advanced_query_builder(self): - """Advanced query builder for complex graph queries""" - print("π― ADVANCED QUERY BUILDER") - print("-" * 40) - print("π§ Build complex queries to explore your graph") - print() - - print("1. π Multi-entity search") - print("2. π Relationship chain queries") - print("3. π Conditional queries") - print("4. π¨ Custom pattern matching") - - choice = input("Enter choice (1-4): ").strip() - - if choice == "1": - await self._multi_entity_search() - elif choice == "2": - await self._relationship_chain_query() - elif choice == "3": - await self._conditional_query() - elif choice == "4": - await self._pattern_matching() - else: - print("β Invalid choice!") - - async def _multi_entity_search(self): - """Search for multiple entities simultaneously""" - print("\nπ MULTI-ENTITY SEARCH") - print("-" * 25) - - query = input("Enter entities to search (comma-separated): ").strip() - if not query: - return - - entity_names = [name.strip() for name in query.split(',')] - print(f"\nπ― Searching for: {', '.join(entity_names)}") - - found_entities = {} - for entity_name in entity_names: - entities = await self.graphiti.search_entities(entity_name, limit=5) - if entities: - found_entities[entity_name] = entities[0] - - print(f"\nβ Found {len(found_entities)} entities:") - for name, entity in found_entities.items(): - entity_type = self._extract_property(entity, 'type', 'unknown') - print(f" β’ {name} ({entity_type})") - - # Find connections between found entities - print(f"\nπ Analyzing connections between entities...") - connections = [] - - for name1, entity1 in found_entities.items(): - relationships = await self._get_entity_relationships(name1) - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - - # Check if the other entity is in our search set - other_entity = target if source == name1 else source - if any(other_entity.lower() == name.lower() for name in entity_names): - connections.append(rel) - - if connections: - print(f" β Found {len(connections)} connections:") - for rel in connections: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - rel_type = self._extract_property(rel, 'type', 'unknown') - print(f" {source} β {rel_type} β {target}") - else: - print(f" βΉοΈ No direct connections found between these entities") - - async def _show_session_history(self): - """Show session history""" - print("π SESSION HISTORY") - print("-" * 40) - - if not self.session_history: - print("βΉοΈ No history available for this session") - return - - for i, entry in enumerate(self.session_history, 1): - timestamp = entry.get('timestamp', 'Unknown') - entry_type = entry.get('type', 'unknown') - query = entry.get('query', 'No query') - - print(f"{i}. [{timestamp}] {entry_type}: {query}") - - async def _manage_bookmarks(self): - """Manage bookmarked entities and relationships""" - print("π BOOKMARK MANAGER") - print("-" * 40) - - print("1. π View bookmarks") - print("2. β Add entity bookmark") - print("3. β Add relationship bookmark") - print("4. β Remove bookmark") - - choice = input("Enter choice (1-4): ").strip() - - if choice == "1": - await self._view_bookmarks() - elif choice == "2": - await self._add_entity_bookmark() - elif choice == "3": - await self._add_relationship_bookmark() - elif choice == "4": - await self._remove_bookmark() - - async def _view_bookmarks(self): - """View all bookmarks""" - print("\nπ YOUR BOOKMARKS") - print("-" * 20) - - if self.bookmark_entities: - print(f"π₯ Entity Bookmarks ({len(self.bookmark_entities)}):") - for i, entity in enumerate(self.bookmark_entities, 1): - print(f" {i}. {entity}") - - if self.bookmark_relationships: - print(f"\nπ Relationship Bookmarks ({len(self.bookmark_relationships)}):") - for i, rel in enumerate(self.bookmark_relationships, 1): - print(f" {i}. {rel}") - - if not self.bookmark_entities and not self.bookmark_relationships: - print("βΉοΈ No bookmarks saved yet") - - async def _add_entity_bookmark(self): - """Add entity to bookmarks""" - entity_name = input("Enter entity name to bookmark: ").strip() - if entity_name and entity_name not in self.bookmark_entities: - self.bookmark_entities.append(entity_name) - print(f"β Added '{entity_name}' to bookmarks") - elif entity_name in self.bookmark_entities: - print(f"βΉοΈ '{entity_name}' is already bookmarked") - - async def _export_analysis(self): - """Export analysis results""" - print("π EXPORT ANALYSIS") - print("-" * 40) - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - - # Create analysis report - report = { - 'timestamp': timestamp, - 'session_history': self.session_history, - 'bookmarks': { - 'entities': self.bookmark_entities, - 'relationships': self.bookmark_relationships - } - } - - # Add graph statistics - stats = await self.graphiti.get_graph_stats() - report['graph_stats'] = stats - - # Save report - reports_dir = "exploration_reports" - os.makedirs(reports_dir, exist_ok=True) - - filename = f"graph_exploration_{timestamp}.json" - filepath = os.path.join(reports_dir, filename) - - with open(filepath, 'w', encoding='utf-8') as f: - json.dump(report, f, indent=2, ensure_ascii=False) - - print(f"β Analysis exported to: {filepath}") - - async def _save_session(self): - """Save session data""" - if not self.session_history: - return - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - session_data = { - 'timestamp': timestamp, - 'history': self.session_history, - 'bookmarks': { - 'entities': self.bookmark_entities, - 'relationships': self.bookmark_relationships - } } - - sessions_dir = "exploration_sessions" - os.makedirs(sessions_dir, exist_ok=True) - - filename = f"session_{timestamp}.json" - filepath = os.path.join(sessions_dir, filename) - - with open(filepath, 'w', encoding='utf-8') as f: - json.dump(session_data, f, indent=2, ensure_ascii=False) - - # Helper methods - async def _extract_search_keywords(self, query: str) -> List[str]: - """Extract relevant keywords and intent from natural language query using LLM""" - try: - prompt = f""" - Analyze this natural language graph search query and extract the most relevant search terms and concepts. - Focus on entities, relationships, and graph concepts that would be found in a knowledge graph. - - Query: "{query}" - - Extract: - 1. Key entities or entity types to search for - 2. Relationship types or patterns - 3. Important descriptive terms - 4. Graph concepts (communities, networks, etc.) - - Return a JSON array of the most relevant search terms (max 5), ordered by importance: - ["term1", "term2", "term3", "term4", "term5"] - - Only return valid JSON, no other text. - """ - - response = await self.graphiti.openai_client.chat.completions.create( - model=self.graphiti.config.llm_deployment, - messages=[{"role": "user", "content": prompt}], - temperature=0.1, - max_tokens=200 - ) - - keywords = json.loads(response.choices[0].message.content) - return keywords[:5] # Ensure max 5 keywords - - except Exception as e: - print(f"β οΈ LLM keyword extraction failed, using fallback: {e}") - # Fallback to basic extraction - stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'find', 'show', 'list', 'get', 'all', 'me'} - words = query.lower().split() - keywords = [word.strip('.,!?') for word in words if word not in stop_words and len(word) > 2] - return keywords[:5] - - async def _get_entity_relationships(self, entity_name: str) -> List[Dict]: - """Get all relationships for a specific entity""" - try: # Search for relationships where entity is source or target - all_relationships = await self.graphiti.search_relationships("", limit=200) - entity_relationships = [] - for rel in all_relationships: - source = self._extract_property(rel, 'source', '').lower() - target = self._extract_property(rel, 'target', '').lower() - - if entity_name.lower() in source or entity_name.lower() in target: - entity_relationships.append(rel) - - return entity_relationships - except Exception as e: - print(f"Error getting relationships for {entity_name}: {e}") - return [] - -# Additional helper methods for advanced features - async def _relationship_chain_query(self): - """Query for relationship chains""" - print("\nπ RELATIONSHIP CHAIN QUERY") - print("-" * 30) - print("Find entities connected through a chain of relationships") - print("Example: A β works_for β B β located_in β C") - - start_entity = input("Enter starting entity: ").strip() - if not start_entity: - return - - max_hops = input("Enter maximum hops (1-3, default 2): ").strip() - try: - max_hops = int(max_hops) if max_hops else 2 - max_hops = max(1, min(max_hops, 3)) - except ValueError: - max_hops = 2 - - print(f"\nπ Finding relationship chains from '{start_entity}' (max {max_hops} hops)...") - - # Build relationship chains - chains = [] - current_chains = [[start_entity]] - for hop in range(max_hops): - next_chains = [] - for chain in current_chains: - last_entity = chain[-1] - relationships = await self._get_entity_relationships(last_entity) - - for rel in relationships[:5]: # Limit to prevent explosion - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - rel_type = self._extract_property(rel, 'type', 'unknown') - - # Determine next entity - next_entity = target if source.lower() == last_entity.lower() else source - - # Avoid cycles - if next_entity not in chain: - new_chain = chain + [f"β{rel_type}β", next_entity] - next_chains.append(new_chain) - - if len(new_chain) >= 3: # At least one relationship - chains.append(new_chain.copy()) - - current_chains = next_chains - if not current_chains: - break - - if chains: - print(f"β Found {len(chains)} relationship chains:") - for i, chain in enumerate(chains[:10], 1): - chain_str = " ".join(chain) - print(f" {i}. {chain_str}") - - if len(chains) > 10: - print(f" ... and {len(chains) - 10} more chains") - else: - print("β No relationship chains found") - - async def _conditional_query(self): - """Query with conditions""" - print("\nπ CONDITIONAL QUERY") - print("-" * 20) - print("Find entities/relationships that meet specific conditions") - - print("1. Entities with high connectivity (> X relationships)") - print("2. Relationships with low confidence (< X)") - print("3. Entities of specific type with specific relationship") - - condition_type = input("Choose condition type (1-3): ").strip() - - if condition_type == "1": - threshold = input("Enter minimum relationship count (default 5): ").strip() - try: - threshold = int(threshold) if threshold else 5 - except ValueError: - threshold = 5 - - print(f"\nπ Finding entities with > {threshold} relationships...") - entities = await self.graphiti.search_entities("", limit=100) - high_connectivity = [] - for entity in entities: - entity_name = self._extract_property(entity, 'name', '') - relationships = await self._get_entity_relationships(entity_name) - if len(relationships) > threshold: - high_connectivity.append((entity_name, len(relationships))) - - high_connectivity.sort(key=lambda x: x[1], reverse=True) - - if high_connectivity: - print(f"β Found {len(high_connectivity)} highly connected entities:") - for entity_name, count in high_connectivity[:10]: - print(f" β’ {entity_name}: {count} relationships") - else: - print(f"β No entities found with > {threshold} relationships") - - elif condition_type == "2": - threshold = input("Enter maximum confidence (0.0-1.0, default 0.8): ").strip() - try: - threshold = float(threshold) if threshold else 0.8 - threshold = max(0.0, min(threshold, 1.0)) - except ValueError: - threshold = 0.8 - - print(f"\nπ Finding relationships with confidence < {threshold}...") - relationships = await self.graphiti.search_relationships("", limit=200) - low_confidence = [] - for rel in relationships: - confidence = float(self._extract_property(rel, 'confidence', '1.0')) - if confidence < threshold: - low_confidence.append(rel) - - if low_confidence: - print(f"β Found {len(low_confidence)} low-confidence relationships:") - for rel in low_confidence[:10]: - source = self._extract_property(rel, 'source', 'Unknown') - target = self._extract_property(rel, 'target', 'Unknown') - rel_type = self._extract_property(rel, 'type', 'unknown') - confidence = float(self._extract_property(rel, 'confidence', '1.0')) - print(f" β’ {source} β {rel_type} β {target} (conf: {confidence:.2f})") - else: - print(f"β No relationships found with confidence < {threshold}") - - async def _pattern_matching(self): - """Custom pattern matching""" - print("\nπ¨ CUSTOM PATTERN MATCHING") - print("-" * 30) - print("Define custom patterns to search for in the graph") - print("Example patterns:") - print(" - person works_for organization") - print(" - product related_to organization") - print(" - * created_by person") - - pattern = input("Enter pattern (entity_type relationship entity_type): ").strip() - if not pattern: - return - - parts = pattern.split() - if len(parts) != 3: - print("β Invalid pattern format. Use: entity_type relationship entity_type") - return - - source_type, rel_type, target_type = parts - - print(f"\nπ Searching for pattern: {source_type} β {rel_type} β {target_type}") - relationships = await self.graphiti.search_relationships("", limit=200) - matching_patterns = [] - for rel in relationships: - rel_relationship = self._extract_property(rel, 'type', '').lower() - source_name = self._extract_property(rel, 'source', '') - target_name = self._extract_property(rel, 'target', '') - - # Check relationship type match - if rel_type != '*' and rel_type.lower() != rel_relationship: - continue - - # Get entity types - source_entities = await self.graphiti.search_entities(source_name, limit=1) - target_entities = await self.graphiti.search_entities(target_name, limit=1) - - source_entity_type = self._extract_property(source_entities[0], 'type', '').lower() if source_entities else '' - target_entity_type = self._extract_property(target_entities[0], 'type', '').lower() if target_entities else '' - - # Check entity type matches - source_match = source_type == '*' or source_type.lower() == source_entity_type - target_match = target_type == '*' or target_type.lower() == target_entity_type - - if source_match and target_match: - matching_patterns.append({ - 'source': source_name, - 'source_type': source_entity_type, - 'relationship': rel_relationship, - 'target': target_name, - 'target_type': target_entity_type, - 'confidence': self._extract_property(rel, 'confidence', '1.0') - }) - - if matching_patterns: - print(f"β Found {len(matching_patterns)} matching patterns:") - for i, match in enumerate(matching_patterns[:10], 1): - source = match['source'] - target = match['target'] - relationship = match['relationship'] - confidence = match['confidence'] - - conf_str = f" (conf: {confidence:.2f})" if confidence < 1.0 else "" - print(f" {i}. {source} β {relationship} β {target}{conf_str}") - - if len(matching_patterns) > 10: - print(f" ... and {len(matching_patterns) - 10} more matches") - else: - print("β No patterns found matching your criteria") - - async def _path_analysis(self): - """Analyze paths between entities""" - print("\nπ PATH ANALYSIS") - print("-" * 15) - - source_entity = input("Enter source entity: ").strip() - target_entity = input("Enter target entity: ").strip() - - if not source_entity or not target_entity: - return - - print(f"\nπ Finding paths from '{source_entity}' to '{target_entity}'...") - - # Simple BFS to find paths - queue = [[source_entity]] - visited = set() - paths = [] - max_depth = 3 - - for depth in range(max_depth): - if not queue: - break - - next_queue = [] - - for path in queue: - current_entity = path[-1] - - if current_entity.lower() == target_entity.lower(): - paths.append(path) - continue - - if current_entity in visited: - continue - - visited.add(current_entity) - - # Get relationships - relationships = await self._get_entity_relationships(current_entity) - for rel in relationships[:5]: # Limit to prevent explosion - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - rel_type = self._extract_property(rel, 'type', 'unknown') - - next_entity = target if source.lower() == current_entity.lower() else source - - if next_entity not in path: # Avoid cycles - new_path = path + [f"β{rel_type}β", next_entity] - next_queue.append(new_path) - - queue = next_queue - - if paths: - print(f"β Found {len(paths)} paths:") - for i, path in enumerate(paths[:5], 1): - path_str = " ".join(path) - print(f" {i}. {path_str}") - else: - print("β No paths found between these entities") - - async def _dense_subgraph_analysis(self): - """Find dense subgraphs""" - print("\nπ DENSE SUBGRAPH ANALYSIS") - print("-" * 30) - - print("π Finding densely connected regions...") - - # Get all relationships - relationships = await self.graphiti.search_relationships("", limit=200) - # Build adjacency count - entity_connections = defaultdict(set) - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - entity_connections[source].add(target) - entity_connections[target].add(source) - - # Find entities with high local connectivity - dense_regions = [] - - for entity, connections in entity_connections.items(): - if len(connections) >= 3: # At least 3 connections - # Check how many of the connected entities are also connected to each other - interconnections = 0 - total_possible = len(connections) * (len(connections) - 1) // 2 - - connections_list = list(connections) - for i, entity1 in enumerate(connections_list): - for entity2 in connections_list[i+1:]: - if entity2 in entity_connections.get(entity1, set()): - interconnections += 1 - - density = interconnections / max(total_possible, 1) - - if density > 0.3: # 30% interconnected - dense_regions.append({ - 'center': entity, - 'connections': len(connections), - 'density': density, - 'members': list(connections) - }) - - dense_regions.sort(key=lambda x: x['density'], reverse=True) - - if dense_regions: - print(f"β Found {len(dense_regions)} dense regions:") - for i, region in enumerate(dense_regions[:5], 1): - center = region['center'] - connections = region['connections'] - density = region['density'] - print(f" {i}. Center: {center}") - print(f" Connections: {connections}, Density: {density:.2f}") - print(f" Members: {', '.join(region['members'][:3])}{'...' if len(region['members']) > 3 else ''}") - print() - else: - print("β No dense subgraphs found") - - async def _type_based_subgraphs(self): - """Analyze subgraphs based on entity types""" - print("\nπ·οΈ TYPE-BASED SUBGRAPH ANALYSIS") - print("-" * 35) - # Get all entities and group by type - entities = await self.graphiti.search_entities("", limit=200) - entity_types = defaultdict(list) - for entity in entities: - entity_type = self._extract_property(entity, 'type', 'unknown') - entity_types[entity_type].append(self._extract_property(entity, 'name', 'Unknown')) - - print("π Available entity types:") - for i, (entity_type, type_entities) in enumerate(entity_types.items(), 1): - print(f" {i}. {entity_type} ({len(type_entities)} entities)") - - choice = input("Enter type number to analyze: ").strip() - try: - choice_idx = int(choice) - 1 - if 0 <= choice_idx < len(entity_types): - selected_type = list(entity_types.keys())[choice_idx] - selected_entities = entity_types[selected_type] - - print(f"\nπ¬ Analyzing {selected_type} subgraph...") - - # Find relationships within this type - internal_relationships = [] - external_relationships = [] - for entity_name in selected_entities: - relationships = await self._get_entity_relationships(entity_name) - for rel in relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - - other_entity = target if source == entity_name else source - - if other_entity in selected_entities: - internal_relationships.append(rel) - else: - external_relationships.append(rel) - - print(f" π Subgraph statistics:") - print(f" π₯ Entities: {len(selected_entities)}") - print(f" π Internal relationships: {len(internal_relationships)}") - print(f" π External relationships: {len(external_relationships)}") - - if len(selected_entities) > 1: - internal_density = len(internal_relationships) / len(selected_entities) - print(f" π Internal density: {internal_density:.2f}") - - # Show sample relationships - if internal_relationships: - print(f"\n π Sample internal relationships:") - for rel in internal_relationships[:3]: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - rel_type = self._extract_property(rel, 'type', 'unknown') - print(f" β’ {source} β {rel_type} β {target}") - - if external_relationships: - print(f"\n π Sample external relationships:") - external_targets = Counter() - for rel in external_relationships: - source = self._extract_property(rel, 'source', '') - target = self._extract_property(rel, 'target', '') - other_entity = target if source in selected_entities else source - external_targets[other_entity] += 1 - - for target, count in external_targets.most_common(3): - print(f" β’ {target}: {count} connections") - - else: - print("β Invalid choice!") - except ValueError: - print("β Invalid choice!") - - async def _interpret_natural_language_query(self, query: str) -> Dict[str, Any]: - """Use LLM to interpret natural language queries and suggest search strategies""" - try: - prompt = f""" - Analyze this natural language query for graph exploration and provide structured guidance. - - Query: "{query}" - - Determine: - 1. Query intent (search_entities, search_relationships, explore_communities, analyze_subgraphs, or overview) - 2. Key search terms (entities, concepts, or patterns to look for) - 3. Suggested search strategy (exact_match, semantic_search, broad_exploration) - 4. Expected result type (specific_entities, relationship_patterns, community_structures, statistical_overview) - 5. Confidence level (0.0 to 1.0) - - Return JSON in this exact format: - {{ - "intent": "search_entities|search_relationships|explore_communities|analyze_subgraphs|overview", - "search_terms": ["term1", "term2", "term3"], - "strategy": "exact_match|semantic_search|broad_exploration", - "result_type": "specific_entities|relationship_patterns|community_structures|statistical_overview", - "confidence": 0.85, - "explanation": "Brief explanation of the interpretation" - }} - - Only return valid JSON, no other text. - """ - - response = await self.graphiti.openai_client.chat.completions.create( - model=self.graphiti.config.llm_deployment, - messages=[{"role": "user", "content": prompt}], - temperature=0.1, - max_tokens=300 - ) - - interpretation = json.loads(response.choices[0].message.content) - return interpretation - - except Exception as e: - print(f"β οΈ Query interpretation failed: {e}") - # Fallback to basic interpretation - return { - "intent": "search_entities", - "search_terms": await self._extract_search_keywords(query), - "strategy": "semantic_search", - "result_type": "specific_entities", - "confidence": 0.5, - "explanation": "Fallback interpretation due to LLM error" - } - - async def _interpret_search_query(self, query: str) -> Dict[str, Any]: - """Interpret natural language query to determine search strategy using LLM""" - try: - prompt = f""" - Analyze this search query and determine the best search strategy and intent. - - Query: "{query}" - - Determine: - 1. Search intent (specific_entity, relationship_discovery, community_exploration, broad_exploration) - 2. Entity types likely to be relevant (person, organization, product, event, location, concept) - 3. Search strategy (exact_match, semantic_search, broad_exploration) - 4. Suggested search terms (3-5 terms) - - Return JSON in this format: - {{ - "intent": "specific_entity|relationship_discovery|community_exploration|broad_exploration", - "entity_types": ["person", "organization", "product"], - "strategy": "exact_match|semantic_search|broad_exploration", - "search_terms": ["term1", "term2", "term3"], - "confidence": 0.8 - }} - - Only return valid JSON, no other text. - """ - - response = await self.graphiti.openai_client.chat.completions.create( - model=self.graphiti.config.llm_deployment, - messages=[{"role": "user", "content": prompt}], - temperature=0.1, - max_tokens=300 - ) - - interpretation = json.loads(response.choices[0].message.content) - return interpretation - - except Exception as e: - print(f"β οΈ Query interpretation failed, using fallback: {e}") - # Fallback interpretation - return { - "intent": "broad_exploration", - "entity_types": ["person", "organization", "product"], - "strategy": "semantic_search", - "search_terms": query.split()[:3], - "confidence": 0.5 - } - - async def _enhance_search_results_with_llm(self, query: str, entities: List[Dict], interpretation: Dict) -> List[Dict]: - """Enhance search results with LLM insights and analysis""" - try: - # Prepare entity summaries for LLM - entity_summaries = [] - for entity in entities[:10]: # Limit to top 10 for LLM processing - name = self._extract_property(entity, 'name', self._extract_property(entity, 'id', 'Unknown')) - entity_type = self._extract_property(entity, 'type', self._extract_property(entity, 'entity_type', 'unknown')) - description = self._extract_property(entity, 'description', '') - entity_summaries.append(f"- {name} ({entity_type}): {description}") - - entities_text = "\n".join(entity_summaries) - - prompt = f""" - Analyze these search results for the query: "{query}" - - Found entities: - {entities_text} - - Provide: - 1. Key insights about the results - 2. Patterns or connections you notice - 3. 3 follow-up questions that would be valuable - 4. Any gaps or additional areas to explore - - Return JSON in this format: - {{ - "insights": "Brief summary of key insights", - "patterns": ["pattern1", "pattern2"], - "follow_up_queries": ["question1", "question2", "question3"], - "gaps": "Areas that might be missing or worth exploring" - }} - - Only return valid JSON, no other text. - """ - - response = await self.graphiti.openai_client.chat.completions.create( - model=self.graphiti.config.llm_deployment, - messages=[{"role": "user", "content": prompt}], - temperature=0.2, - max_tokens=500 - ) - - enhancement = json.loads(response.choices[0].message.content) - self._last_enhancement = enhancement # Store for display - - return entities - - except Exception as e: - print(f"β οΈ Result enhancement failed: {e}") - return entities - - def _extract_property(self, entity: Dict[str, Any], property_name: str, default: str = '') -> str: - """Extract property from entity, handling both dict and Cosmos DB valueMap(true) list formats""" - if not isinstance(entity, dict): - return default - - value = entity.get(property_name, default) - - # Handle Cosmos DB valueMap(true) format where properties are lists - if isinstance(value, list): - return value[0] if value else default - - return str(value) if value is not None else default - - -async def main(): - """Main function to run the graph explorer""" - explorer = GraphExplorer() - await explorer.run_interactive_session() - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/demos/step_by_step_visualization_demo.py b/demos/step_by_step_visualization_demo.py deleted file mode 100644 index c093209..0000000 --- a/demos/step_by_step_visualization_demo.py +++ /dev/null @@ -1,808 +0,0 @@ -""" -Step-by-Step Interactive Graph Visualization Demo -============================================== - -A simple, interactive demo that lets you select episodes and visualize -the graph step by step with detailed descriptions of entities, relationships, -subgraphs, and communities. - -Perfect for understanding how Graphiti-Cosmos builds knowledge graphs from episodes. -""" - -import asyncio -import os -import sys -import time -import platform -from datetime import datetime -from typing import Dict, Any, List -import json - -# Add the src directory to the path -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src')) - -# Fix for Windows ProactorEventLoop issues -if platform.system() == 'Windows': - asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) - -from graphiti_cosmos import GraphitiCosmos, Episode - -class StepByStepVisualizationDemo: - """Interactive demo for step-by-step graph visualization""" - - def __init__(self): - self.graphiti = None - self.current_step = 0 - self.processed_episodes = [] - self.sample_episodes = self._create_sample_episodes() - - def _create_sample_episodes(self) -> List[Episode]: - """Create a set of interconnected sample episodes for demonstration""" - - episodes = [ - Episode( - content=""" - Sarah Johnson, a 28-year-old marketing professional from Seattle, visited the Manybirds - e-commerce website on Monday morning. She browsed the sustainable sneakers category and - spent 5 minutes viewing product details for the EcoWalk Sustainable Sneakers. Sarah - showed particular interest in the product's recycled ocean plastic material and added - the sneakers to her wishlist for future consideration. - """, - episode_id="ep_001_sarah_browsing", - source="website_analytics" - ), - - Episode( - content=""" - Two days later, Sarah Johnson returned to Manybirds and purchased the EcoWalk Sustainable - Sneakers (Product ID: PROD_ECOWALK_001) for $129.99. She used her Visa credit card for - payment and selected standard shipping to her home address in Seattle. The order was - confirmed with Order ID: ORD_20250529_001. Sarah also signed up for the sustainability - newsletter during checkout. - """, - episode_id="ep_002_sarah_purchase", - source="order_management_system" - ), - - Episode( - content=""" - Alex Chen, a 32-year-old software engineer from San Francisco, discovered Manybirds - through a Google search for "sustainable running shoes". He landed on the product page - for EcoWalk Sustainable Sneakers and read customer reviews, including Sarah Johnson's - recent 5-star review. Alex was impressed by the environmental impact metrics and added - the same product to his cart. He also viewed the Trail Runner Performance shoes but - decided to stick with the EcoWalk model. - """, - episode_id="ep_003_alex_discovery", - source="website_analytics" - ), - - Episode( - content=""" - Manybirds received a shipment of 500 pairs of EcoWalk Sustainable Sneakers from their - manufacturing partner GreenStep Manufacturing in Vietnam. The shipment arrived at the - Portland distribution center and was processed by warehouse manager Mike Rodriguez. - Quality control inspected 50 random pairs and approved the entire shipment. The - inventory system was updated to reflect 500 new units available for sale. - """, - episode_id="ep_004_inventory_shipment", - source="warehouse_management_system" - ), - - Episode( - content=""" - Emily Thompson, a 26-year-old environmental science student from Portland, purchased - EcoWalk Sustainable Sneakers during a flash sale promotion offering 20% off sustainable - products. She discovered the product through Manybirds' Instagram ad campaign focusing - on ocean plastic recycling. Emily paid $103.99 (reduced from $129.99) and chose express - shipping. She also purchased the matching Eco-Friendly Shoe Care Kit as an add-on item. - """, - episode_id="ep_005_emily_promo_purchase", - source="order_management_system" - ), - - Episode( - content=""" - The Manybirds marketing team launched a targeted email campaign to customers who had - previously purchased sustainable products. The campaign featured the new Summer - Sustainability Collection, including the EcoWalk Sustainable Sneakers. The campaign - achieved a 15% open rate and 3.2% click-through rate, resulting in 25 new visitors - to the sustainable sneakers category page. Sarah Johnson and Alex Chen both received - and opened this email. - """, - episode_id="ep_006_marketing_campaign", - source="email_marketing_platform" - ), - - Episode( - content=""" - Dr. Maria Rodriguez, a sustainability consultant, wrote a blog post reviewing eco-friendly - footwear brands for EcoLifestyle Magazine. She featured Manybirds' EcoWalk Sustainable - Sneakers as a "top pick" for their innovative use of recycled ocean plastic. The blog - post mentioned the positive reviews from customers like Sarah Johnson and noted the - company's commitment to carbon-neutral shipping. The article drove 150 new visitors - to the Manybirds website. - """, - episode_id="ep_007_influencer_review", - source="content_marketing_tracker" - ), - - Episode( - content=""" - GreenStep Manufacturing, Manybirds' key production partner, reported successful completion - of their Q2 sustainability audit. The audit confirmed their carbon-neutral production - facility and ethical labor practices. Manufacturing Director Linda Chang presented the - results to Manybirds' supply chain team, strengthening the partnership for producing - EcoWalk Sustainable Sneakers. The audit results will be featured in Manybirds' upcoming - sustainability report. - """, - episode_id="ep_008_supplier_audit", - source="supply_chain_management" - ) - ] - - return episodes - - async def initialize(self): - """Initialize the Graphiti-Cosmos system""" - print("π STEP-BY-STEP GRAPH VISUALIZATION DEMO") - print("=" * 60) - print("π¨ Initializing Graphiti-Cosmos...") - - self.graphiti = GraphitiCosmos() - await self.graphiti.initialize() - print("β Connected to Azure Cosmos DB and OpenAI") - print("π Ready for interactive graph building!") - print() - - def display_episode_menu(self): - """Display available episodes for selection""" - print("π AVAILABLE EPISODES") - print("-" * 40) - - for i, episode in enumerate(self.sample_episodes, 1): - status = "β PROCESSED" if episode.episode_id in [ep.episode_id for ep in self.processed_episodes] else "β³ PENDING" - print(f"{i}. {episode.episode_id} - {status}") - # Show first 80 characters of content - preview = episode.content.strip()[:80].replace('\n', ' ') + "..." - print(f" Preview: {preview}") - print() - - async def process_selected_episode(self, episode_num: int): - """Process a selected episode and show step-by-step results""" - if episode_num < 1 or episode_num > len(self.sample_episodes): - print("β Invalid episode number!") - return - - episode = self.sample_episodes[episode_num - 1] - - # Check if already processed - if episode.episode_id in [ep.episode_id for ep in self.processed_episodes]: - print(f"β οΈ Episode {episode.episode_id} has already been processed!") - return - - print(f"π¬ PROCESSING EPISODE {episode_num}") - print("=" * 60) - print(f"π Episode ID: {episode.episode_id}") - print(f"π Source: {episode.source}") - print(f"π Content:") - print(episode.content.strip()) - print() - # Step 1: Add episode to graph - print("STEP 1: Adding Episode to Knowledge Graph") - print("-" * 40) - start_time = time.time() - - try: - # Store the extracted data for step-by-step display - entities = await self.graphiti._extract_entities(episode.content) - relationships = await self.graphiti._extract_relationships(episode.content, entities) - - # Process the episode - result = await self.graphiti.add_episode(episode) - processing_time = time.time() - start_time - - print(f"β Episode processed successfully in {processing_time:.2f} seconds") - print(f"π Processing result: {result}") - - # Store extracted data for later steps - episode._extracted_entities = entities - episode._extracted_relationships = relationships - - # Mark as processed - self.processed_episodes.append(episode) - print() - - # Step 2: Show current graph statistics - await self._show_graph_statistics() - - # Step 3: Extract and show entities - await self._show_extracted_entities(episode.episode_id) - - # Step 4: Show relationships - await self._show_relationships(episode.episode_id) - - # Step 5: Show subgraphs and communities - await self._show_subgraphs_and_communities() - - # Step 6: Generate visualization - await self._generate_step_visualization() - - except Exception as e: - print(f"β Error processing episode: {e}") - import traceback - traceback.print_exc() - - async def _show_graph_statistics(self): - """Show current graph statistics""" - print("STEP 2: Current Graph Statistics") - print("-" * 40) - - try: - stats = await self.graphiti.get_graph_stats() - print(f"π Episodes: {stats['episodes']}") - print(f"π₯ Entities: {stats['entities']}") - print(f"π Relationships: {stats['relationships']}") - - if stats['entities'] > 0: - density = stats['relationships'] / stats['entities'] - print(f"π― Connection Density: {density:.2f} relationships per entity") - print() - except Exception as e: - print(f"β Error getting graph statistics: {e}") - print() - - async def _show_extracted_entities(self, episode_id: str): - """Show entities extracted from the latest episode""" - print("STEP 3: Entities Discovered") - print("-" * 40) - - try: - # Find the episode we just processed - episode = next((ep for ep in self.processed_episodes if ep.episode_id == episode_id), None) - - if episode and hasattr(episode, '_extracted_entities'): - # Show the actual entities extracted from this episode - entities = episode._extracted_entities - - if entities: - print(f"π― Entities extracted from episode '{episode_id}':") - - # Group entities by type - entity_groups = {} - for entity in entities: - entity_type = entity.entity_type.value - if entity_type not in entity_groups: - entity_groups[entity_type] = [] - entity_groups[entity_type].append(entity) - - # Display entities by type - for entity_type, type_entities in entity_groups.items(): - print(f"\nπ·οΈ {entity_type.upper()} entities:") - for entity in type_entities: - description = entity.description[:80] + "..." if entity.description and len(entity.description) > 80 else entity.description or "No description" - print(f" β’ {entity.name}: {description}") - else: - print(f"βΉοΈ No entities were extracted from episode '{episode_id}'") - else: - print(f"βΉοΈ No extraction data available for episode '{episode_id}'") - - print() - - except Exception as e: - print(f"β Error showing entities: {e}") - import traceback - traceback.print_exc() - print() - - async def _show_relationships(self, episode_id: str): - """Show relationships discovered""" - print("STEP 4: Relationships Discovered") - print("-" * 40) - - try: - # Find the episode we just processed - episode = next((ep for ep in self.processed_episodes if ep.episode_id == episode_id), None) - - if episode and hasattr(episode, '_extracted_relationships'): - # Show the actual relationships extracted from this episode - relationships = episode._extracted_relationships - - if relationships: - print(f"π Relationships extracted from episode '{episode_id}':") - for i, rel in enumerate(relationships, 1): - confidence_str = f" (confidence: {rel.confidence:.2f})" if rel.confidence < 1.0 else "" - print(f" {i}. {rel.source_entity} β {rel.relation_type.value} β {rel.target_entity}{confidence_str}") - if rel.description: - print(f" Description: {rel.description}") - else: - print(f"βΉοΈ No relationships were extracted from episode '{episode_id}'") - else: - print(f"βΉοΈ No relationship extraction data available for episode '{episode_id}'") - - print() - - except Exception as e: - print(f"β Error showing relationships: {e}") - import traceback - traceback.print_exc() - print() - async def _show_subgraphs_and_communities(self): - """Analyze and show subgraphs and communities""" - print("STEP 5: Subgraphs and Communities Analysis") - print("-" * 40) - - try: - # Get entities from processed episodes (not static search results) - all_extracted_entities = [] - - for episode in self.processed_episodes: - if hasattr(episode, '_extracted_entities') and episode._extracted_entities: - all_extracted_entities.extend(episode._extracted_entities) - - if len(all_extracted_entities) < 3: - print("βΉοΈ Not enough entities extracted from episodes yet for community analysis") - print() - return - - # Group entities by type from extracted data - communities = {} - for entity in all_extracted_entities: - entity_type = entity.entity_type.value - if entity_type not in communities: - communities[entity_type] = [] - communities[entity_type].append(entity.name) - - print("ποΈ Entity Communities from Processed Episodes:") - total_entities = len(all_extracted_entities) - print(f"π Total entities extracted from {len(self.processed_episodes)} episodes: {total_entities}") - print() - - for community_type, members in communities.items(): - unique_members = list(set(members)) # Remove duplicates - if len(unique_members) >= 1: # Show all communities - print(f" π {community_type.upper()} Community ({len(unique_members)} unique entities):") - for member in unique_members[:8]: # Show up to 8 members - print(f" β’ {member}") - if len(unique_members) > 8: - print(f" ... and {len(unique_members) - 8} more") - print() - - except Exception as e: - print(f"β Error analyzing communities: {e}") - import traceback - traceback.print_exc() - print() - - async def _generate_step_visualization(self): - """Generate visualization for current graph state""" - print("STEP 6: Visualization Generation") - print("-" * 40) - - try: - # Create a simple HTML visualization - html_content = await self._create_html_visualization() - - # Save visualization - viz_dir = "real_time_visualizations" - os.makedirs(viz_dir, exist_ok=True) - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - filename = f"step_{len(self.processed_episodes)}_{timestamp}.html" - filepath = os.path.join(viz_dir, filename) - - with open(filepath, 'w', encoding='utf-8') as f: - f.write(html_content) - - print(f"β Visualization saved: {filepath}") - print(f"π Open in browser to see current graph state") - print() - - except Exception as e: - print(f"β Error generating visualization: {e}") - print() - async def _create_html_visualization(self) -> str: - """Create HTML visualization of current graph state""" - try: - # Get entities and relationships from processed episodes (not static search) - all_extracted_entities = [] - all_extracted_relationships = [] - - for episode in self.processed_episodes: - if hasattr(episode, '_extracted_entities') and episode._extracted_entities: - all_extracted_entities.extend(episode._extracted_entities) - if hasattr(episode, '_extracted_relationships') and episode._extracted_relationships: - all_extracted_relationships.extend(episode._extracted_relationships) - # Create nodes for vis.js - nodes = [] - node_colors = { - 'person': '#FF6B6B', # Red for people - 'product': '#4ECDC4', # Teal for products - 'organization': '#45B7D1', # Blue for organizations - 'event': '#FFA07A', # Orange for events - 'location': '#98D8C8', # Light teal for locations - 'concept': '#F7DC6F', # Yellow for concepts - 'campaign': '#DDA0DD', # Plum for campaigns - 'unknown': '#BDC3C7' # Gray for unknown - } - - # Enhanced entity processing from extracted data - entity_id_map = {} - unique_entities = {} - - # Remove duplicates by name - for entity in all_extracted_entities: - unique_entities[entity.name] = entity - - for i, (entity_name, entity) in enumerate(unique_entities.items()): - entity_type = entity.entity_type.value.lower() - entity_desc = entity.description or "No description" - - # Make size based on importance (longer descriptions = more important) - size = min(30, max(15, len(entity_desc) // 10)) - - nodes.append({ - 'id': i, - 'label': entity_name[:25] + ('...' if len(entity_name) > 25 else ''), - 'title': f"π·οΈ Type: {entity_type.title()}\\nπ Name: {entity_name}\\nπ Description: {entity_desc[:150]}{'...' if len(entity_desc) > 150 else ''}", - 'color': node_colors.get(entity_type, node_colors['unknown']), - 'size': size, - 'font': {'size': 12, 'color': '#333333'} - }) - entity_id_map[entity_name] = i - - # Create edges based on extracted relationships - edges = [] - edge_id = 0 - - # Use actual extracted relationships - for rel in all_extracted_relationships: - source_name = rel.source_entity - target_name = rel.target_entity - rel_type = rel.relation_type.value - - # Find matching nodes - source_id = entity_id_map.get(source_name) - target_id = entity_id_map.get(target_name) - - if source_id is not None and target_id is not None and source_id != target_id: - confidence_str = f" (conf: {rel.confidence:.2f})" if rel.confidence < 1.0 else "" - edges.append({ - 'id': edge_id, - 'from': source_id, - 'to': target_id, - 'label': rel_type[:10] + confidence_str, - 'title': f"Relationship: {rel_type}\\nSource: {source_name}\\nTarget: {target_name}\\nConfidence: {rel.confidence:.2f}\\nDescription: {rel.description or 'No description'}", - 'width': max(1, int(rel.confidence * 3)), # Width based on confidence - 'color': {'color': '#848484', 'highlight': '#FF6B6B'}, - 'arrows': {'to': {'enabled': True, 'scaleFactor': 1}} - }) - edge_id += 1 - - # If no relationships found, create some demo connections - if not edges: - for i in range(min(len(nodes)-1, 10)): - edges.append({ - 'id': edge_id, - 'from': i, - 'to': (i + 1) % len(nodes), - 'title': 'Connected entities', - 'width': 1, - 'color': {'color': '#cccccc'}, - 'arrows': {'to': {'enabled': True, 'scaleFactor': 0.8}} - }) - edge_id += 1 - - # Get processed episodes info - episode_info = [] - for ep in self.processed_episodes: - episode_info.append({ - 'id': ep.episode_id, - 'source': ep.source, - 'preview': ep.content.strip()[:100] + '...' - }) - - # Create episode timeline - episode_timeline = "" - for i, ep_info in enumerate(episode_info, 1): - episode_timeline += f""" -
Source: {ep_info['source']}
-{ep_info['preview']}
-Progress: {len(self.processed_episodes)} Episodes Processed | Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
-{len(self.processed_episodes)}
-{len(unique_entities)}
-{len(all_extracted_relationships)}
-{len(all_extracted_relationships)/max(len(unique_entities), 1):.1f}
-{str(e)}
" - - async def run_interactive_demo(self): - """Run the main interactive demo loop""" - await self.initialize() - - while True: - print("π INTERACTIVE MENU") - print("-" * 40) - print("1. View available episodes") - print("2. Process an episode") - print("3. Show current graph statistics") - print("4. Search entities") - print("5. Generate full visualization") - print("6. Reset graph (clear all data)") - print("0. Exit") - print() - - choice = input("Enter your choice (0-6): ").strip() - print() - - if choice == "0": - print("π Thanks for using the Step-by-Step Graph Visualization Demo!") - break - elif choice == "1": - self.display_episode_menu() - elif choice == "2": - self.display_episode_menu() - try: - episode_num = int(input("Enter episode number to process: ").strip()) - await self.process_selected_episode(episode_num) - except ValueError: - print("β Please enter a valid number!") - elif choice == "3": - await self._show_graph_statistics() - elif choice == "4": - await self._interactive_entity_search() - elif choice == "5": - await self._generate_step_visualization() - elif choice == "6": - await self._reset_graph() - else: - print("β Invalid choice! Please try again.") - - input("Press Enter to continue...") - print("\n" + "="*60 + "\n") - - async def _interactive_entity_search(self): - """Interactive entity search""" - print("π ENTITY SEARCH") - print("-" * 40) - - query = input("Enter search term (or press Enter for all): ").strip() - - try: - entities = await self.graphiti.search_entities(query, limit=10) - - if entities: - print(f"\nπ― Found {len(entities)} entities:") - for i, entity in enumerate(entities, 1): - name = entity.get('name', 'Unknown') - entity_type = entity.get('type', 'unknown') - description = entity.get('description', 'No description')[:100] + "..." - print(f"{i}. {name} ({entity_type})") - print(f" {description}") - print() - else: - print("β No entities found matching your search.") - - except Exception as e: - print(f"β Error searching entities: {e}") - - async def _reset_graph(self): - """Reset the graph (clear all data)""" - confirm = input("β οΈ Are you sure you want to clear all graph data? (yes/no): ").strip().lower() - - if confirm == "yes": - try: - # This would need to be implemented in GraphitiCosmos - print("π§Ή Clearing graph data...") - # await self.graphiti.clear_all_data() # Would need this method - self.processed_episodes = [] - print("β Graph data cleared!") - except Exception as e: - print(f"β Error clearing graph: {e}") - else: - print("β Reset cancelled.") - -async def main(): - """Main entry point""" - demo = StepByStepVisualizationDemo() - await demo.run_interactive_demo() - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/docs/ENHANCEMENT_COMPLETION_REPORT.md b/docs/ENHANCEMENT_COMPLETION_REPORT.md deleted file mode 100644 index 2483c3b..0000000 --- a/docs/ENHANCEMENT_COMPLETION_REPORT.md +++ /dev/null @@ -1,175 +0,0 @@ -# π― Graph Explorer Enhancement Completion Report - -## β **Task Completion Summary** - -### **Objective**: Fix the graph explorer and update journey documentation with LLM enhancements - -### **Status**: β **COMPLETED SUCCESSFULLY** - ---- - -## π§ **Fixed Issues** - -### **1. Syntax Errors Resolved** -- β Fixed malformed try-except blocks in `_search_entities_nl()` method -- β Corrected indentation issues and removed duplicate code sections -- β Fixed undefined variable references in exception handling -- β Removed stray `input()` call causing compilation errors - -### **2. LLM Integration Enhanced** -- β Validated `_interpret_search_query()` method for natural language understanding -- β Confirmed `_enhance_search_results_with_llm()` for AI-powered insights -- β Verified `_extract_search_keywords()` fallback mechanism -- β Ensured semantic search integration with GraphitiCosmos - -### **3. GraphitiCosmos Semantic Search** -- β Confirmed embedding-based similarity search functionality -- β Validated cosine similarity calculations for vector comparisons -- β Verified fallback mechanisms for robust search operations - ---- - -## π **Documentation Updates** - -### **Enhanced Journey Documentation** -- β Updated `GRAPH_EXPLORER_JOURNEY.md` with AI-powered features -- β Added detailed examples of LLM query interpretation -- β Included AI insights and suggested follow-up queries -- β Created comprehensive AI features section -- β Enhanced Pro Tips with AI-specific guidance - -### **New AI Features Documented** -1. **Natural Language Query Interpretation** - LLM understands user intent -2. **Semantic Search Enhancement** - Vector embeddings for contextual matching -3. **AI-Powered Result Insights** - Business intelligence from search results -4. **Intelligent Search Strategy Selection** - AI chooses optimal search approach -5. **Dynamic Fallback Mechanisms** - Graceful degradation when features fail -6. **Contextual Relationship Understanding** - AI comprehends business patterns - ---- - -## π§ͺ **Validation Results** - -### **Module Compilation Tests** -- β `demos.graph_explorer` compiles successfully -- β `src.graphiti_cosmos` compiles successfully -- β All syntax errors eliminated - -### **Feature Validation Tests** -- β All LLM helper methods present and callable -- β Enhanced search method implemented -- β Session management structures working -- β GraphitiCosmos integration verified - -### **End-to-End Validation** -- β No syntax errors detected -- β All imports successful -- β Method signatures correct -- β Exception handling robust - ---- - -## π **Key Enhancements Implemented** - -### **1. AI-Powered Natural Language Search** -```python -# Before: Basic keyword splitting -keywords = query.split() - -# After: LLM-enhanced interpretation -interpretation = await self._interpret_search_query(query) -# Returns: intent, search_terms, strategy, confidence -``` - -### **2. Semantic Search with Embeddings** -```python -# Before: Simple string matching -if keyword.lower() in entity_text.lower(): - -# After: Vector similarity -similarity = self._cosine_similarity(query_embedding, entity_embedding) -if similarity > threshold: -``` - -### **3. AI-Generated Business Insights** -```python -# New Feature: LLM analysis of search results -entities = await self._enhance_search_results_with_llm(query, entities, interpretation) -# Provides: insights, follow_up_queries, gaps, business_intelligence -``` - -### **4. Intelligent Strategy Selection** -- **exact_match**: For specific entity lookups -- **broad_exploration**: For category exploration -- **semantic_search**: For concept-based discovery - ---- - -## π **Business Value Delivered** - -### **1. Enhanced User Experience** -- Natural language queries instead of exact keyword matching -- AI-suggested follow-up queries for deeper exploration -- Business-focused insights from graph data - -### **2. Improved Search Accuracy** -- Semantic understanding finds related concepts -- Vector embeddings capture contextual meaning -- Fallback mechanisms ensure robust operation - -### **3. Strategic Intelligence** -- AI identifies business patterns and opportunities -- Suggests unexplored areas for investigation -- Connects graph structure to business strategy - -### **4. Developer Experience** -- Clean, maintainable code structure -- Comprehensive error handling -- Modular LLM integration - ---- - -## π― **Next Steps & Recommendations** - -### **Immediate Actions** -1. β Graph explorer is ready for production use -2. β Documentation is complete and up-to-date -3. β All LLM features are functional - -### **Future Enhancements** (Optional) -- Add more sophisticated prompt engineering for specific business domains -- Implement query result caching for improved performance -- Add multi-language support for international deployments -- Create saved query templates for common business scenarios - -### **Usage Instructions** -```bash -# Ready to run! -cd "c:\Users\shyamsridhar\code\graph collection" -python demos/graph_explorer.py -``` - ---- - -## π **Success Metrics** - -- β **Zero syntax errors** - All code compiles cleanly -- β **100% feature completeness** - All LLM enhancements implemented -- β **Comprehensive documentation** - Journey guide updated with AI features -- β **Robust error handling** - Graceful fallback mechanisms -- β **Business-ready** - Production-quality implementation - ---- - -## π **Conclusion** - -The graph explorer has been successfully enhanced with state-of-the-art LLM capabilities while maintaining backward compatibility and robust error handling. The system now provides: - -- **Intelligent natural language understanding** for user queries -- **Semantic search capabilities** that go beyond keyword matching -- **AI-powered business insights** from graph exploration -- **Strategic guidance** for data-driven decision making - -The enhanced graph explorer transforms raw graph data into actionable business intelligence through the power of Large Language Models and semantic search. - -**Status**: β **READY FOR AI-POWERED GRAPH EXPLORATION!** π diff --git a/docs/GRAPH_EXPLORER_JOURNEY.md b/docs/GRAPH_EXPLORER_JOURNEY.md deleted file mode 100644 index df6882a..0000000 --- a/docs/GRAPH_EXPLORER_JOURNEY.md +++ /dev/null @@ -1,613 +0,0 @@ -# πΊοΈ Graph Explorer Journey Map & Demo Script -*Featuring AI-Enhanced Natural Language Understanding* - -## π― Overview -This document provides a comprehensive journey map for exploring your knowledge graph using the Interactive Graph Explorer with **LLM-powered semantic search and natural language understanding**. Follow these scenarios to discover the power of AI-driven graph exploration. - -## π Getting Started - -### Prerequisites -- Graphiti-Cosmos system initialized with **semantic search capabilities** -- Sample data loaded (Manybirds e-commerce dataset) -- Graph Explorer program ready with **Azure OpenAI integration** - -### Launch the Explorer -```bash -cd "c:\Users\shyamsridhar\code\graph collection" -python demos/graph_explorer.py -``` - ---- - -## π Demo Journey: The AI-Powered Data Detective Story - -### π¬ **Scene 1: The Business Question** -*"Our CEO wants to understand customer behavior patterns and product relationships using natural language queries that actually understand business context."* - -**Your Mission**: Use the **AI-Enhanced Graph Explorer** to uncover hidden insights about customers, products, and business relationships through intelligent semantic search. - ---- - -### π **Chapter 1: First Contact - Understanding the Graph Landscape** - -#### **Step 1: Get the Big Picture** -``` -Choose Option: 5 (Graph Overview & Statistics) -``` - -**What You'll Discover:** -- Total entities and relationships in your graph -- Entity type distribution (People, Products, Organizations, Events) -- Relationship density and connectivity patterns -- Most connected entities (your graph's "celebrities") - -**Demo Script:** -``` -π GRAPH OVERVIEW & STATISTICS ----------------------------------------- -π Collecting graph statistics... - -π Basic Statistics: - π Episodes: 444 - π₯ Entities: 206 - π Relationships: 3855 - π― Density: 18.71 relationships per entity - -π·οΈ Entity Type Distribution: - π person: 45 (21.8%) - π product: 38 (18.4%) - π organization: 32 (15.5%) - π event: 28 (13.6%) - π location: 25 (12.1%) - -β Most Connected Entities: - π Sarah Johnson: 23 connections - π EcoWalk Sustainable Sneakers: 19 connections - π Manybirds: 17 connections -``` - -**π‘ Insight**: Sarah Johnson is highly connected - she might be a key customer worth investigating! - ---- - -### π **Chapter 2: The AI Customer Detective - Intelligent Entity Discovery** - -#### **Step 2: AI-Enhanced Natural Language Search** -``` -Choose Option: 1 (Search Entities - Natural Language) -Query: "Find all people who purchased sustainable products" -``` - -**β¨ NEW: LLM-Powered Query Interpretation** -The system now uses Azure OpenAI to understand your intent and enhance search strategy! - -**Demo Script:** -``` -π NATURAL LANGUAGE ENTITY SEARCH ----------------------------------------- -π£οΈ Enter your search query: Find all people who purchased sustainable products - -π Analyzing query: 'Find all people who purchased sustainable products' ----------------------------------------- -π― Query intent: Find customers with sustainability focus -π Search terms: people, purchased, sustainable, products, eco-friendly, green -π§ Strategy: semantic_search -πͺ Confidence: 0.89 -π‘ Searching with AI-enhanced strategy... - -π§ Enhancing results with AI insights... - -β Found 12 entities: - -π AI Insights: This query reveals environmentally conscious customer segments with high purchase intent for sustainable products. These customers often exhibit cross-category purchasing patterns and strong brand loyalty. - -π‘ Suggested follow-up queries: - 1. What other products do these sustainability-focused customers buy? - 2. Which organizations supply these sustainable products? - 3. Find customers similar to the highest-value eco-conscious buyers - -β οΈ Consider exploring: Customer lifetime value patterns for sustainable product buyers - -π€ PERSON entities: - β’ Sarah Johnson (person): 28-year-old marketing professional from Seattle who purchased EcoWalk Sustainable... - β’ Emily Thompson (person): 26-year-old environmental science student who bought sustainable sneakers during... - β’ Alex Chen (person): 32-year-old software engineer interested in sustainable running shoes... - β’ Dr. Maria Rodriguez (person): Sustainability consultant who reviews eco-friendly products... - -π¦ PRODUCT entities: - β’ EcoWalk Sustainable Sneakers: Made from recycled ocean plastic, carbon-neutral production... - β’ Eco-Friendly Shoe Care Kit: Sustainable maintenance products for footwear... - -π Enter entity name to bookmark (or press Enter to continue): Sarah Johnson -β Bookmarked: Sarah Johnson -``` - -**π‘ Discovery**: The AI revealed not just matching entities but also provided strategic insights about customer segments and suggested follow-up investigations! - ---- - -### π§© **Chapter 3: The Product Investigation - Deep Dive Analysis** - -#### **Step 3: AI-Enhanced Entity Deep Dive** -``` -Choose Option: 6 (Entity Deep Dive) -Entity: "EcoWalk Sustainable Sneakers" -``` - -**Demo Script:** -``` -π ENTITY DEEP DIVE ----------------------------------------- -Enter entity name to analyze: EcoWalk Sustainable Sneakers - -π¬ DEEP DIVE: EcoWalk Sustainable Sneakers ----------------------------------------- -π Entity Details: - π·οΈ Name: EcoWalk Sustainable Sneakers - π Type: product - π Description: Sustainable footwear made from recycled ocean plastic with carbon-neutral production - -π Relationships (15 total): - - π purchased_by (4 instances): - β’ Sarah Johnson purchased EcoWalk Sustainable Sneakers for $129.99 - β’ Emily Thompson purchased during 20% off sale for $103.99 - β’ Alex Chen added to cart after reading reviews - - π manufactured_by (2 instances): - β’ GreenStep Manufacturing produces EcoWalk Sustainable Sneakers - β’ Partnership strengthened by Q2 sustainability audit - - π reviewed_by (3 instances): - β’ Sarah Johnson gave 5-star review - β’ Dr. Maria Rodriguez featured as "top pick" in blog - β’ Customer satisfaction rating 4.8/5 - -π Connected Entity Types: - π person: 7 connections - π organization: 3 connections - π event: 2 connections - π location: 3 connections -``` - -**π‘ Insight**: This product has strong customer satisfaction and sustainable supply chain! - ---- - -### ποΈ **Chapter 4: The Community Explorer - Finding Hidden Networks** - -#### **Step 4: Explore Communities** -``` -Choose Option: 3 (Explore Communities) -Community to analyze: "person" -``` - -**Demo Script:** -``` -ποΈ COMMUNITY EXPLORATION ----------------------------------------- -π Analyzing entity communities... - -β Found 6 entity type communities: - -π PERSON Community (45 members) - 1. Sarah Johnson - Marketing professional from Seattle, eco-conscious shopper - 2. Emily Thompson - Environmental science student, sustainability advocate - 3. Alex Chen - Software engineer from San Francisco, product researcher - 4. Dr. Maria Rodriguez - Sustainability consultant and influencer - 5. Linda Chang - Manufacturing Director at GreenStep Manufacturing - ... and 40 more members - -Enter community type to analyze deeply: person - -π¬ DEEP ANALYSIS: PERSON COMMUNITY ----------------------------------------- -π₯ Community Size: 45 members - -π Analyzing internal connections... -π Internal connections: 23 - -β Most connected members: - β’ Sarah Johnson: 12 connections - β’ Dr. Maria Rodriguez: 8 connections - β’ Emily Thompson: 6 connections - β’ Alex Chen: 5 connections - β’ Linda Chang: 4 connections - -π External connections... -π Top external connections: - β’ EcoWalk Sustainable Sneakers: 8 connections - β’ Manybirds: 6 connections - β’ GreenStep Manufacturing: 4 connections - β’ Sustainability Newsletter: 3 connections -``` - -**π‘ Discovery**: There's a strong network of sustainability-focused customers and partners! - ---- - -### π― **Chapter 5: The Relationship Detective - Following the Connections** - -#### **Step 5: Ego Network Analysis** -``` -Choose Option: 4 (Analyze Subgraphs) -Subtype: 1 (Ego network) -Entity: "Sarah Johnson" -Depth: 2 -``` - -**Demo Script:** -``` -π§© SUBGRAPH ANALYSIS ----------------------------------------- -Choose subgraph analysis type: -1. π― Ego network (around specific entity) - -π― EGO NETWORK ANALYSIS ----------------------------------------- -Enter entity name for ego network: Sarah Johnson -Enter network depth (1-3, default 2): 2 - -π Analyzing 2-hop ego network for 'Sarah Johnson'... - Hop 1: Found 8 new entities - Hop 2: Found 15 new entities - -β Ego network summary: - π Center entity: Sarah Johnson - π₯ Total entities: 24 - π Total relationships: 31 - -π¬ Network analysis: - π Entity types: - β’ person: 6 - β’ product: 8 - β’ organization: 4 - β’ event: 3 - β’ location: 3 - - π Relationship types: - β’ purchased: 4 - β’ reviewed: 3 - β’ related_to: 8 - β’ works_for: 2 - β’ located_in: 3 -``` - -**π‘ Insight**: Sarah's network reveals she's connected to multiple product categories and has influence through reviews! - ---- - -### π **Chapter 6: The Pattern Hunter - Relationship Analysis** - -#### **Step 6: Relationship Patterns** -``` -Choose Option: 7 (Relationship Analysis) -``` - -**Demo Script:** -``` -π RELATIONSHIP ANALYSIS ----------------------------------------- -π Collecting relationship data... -β Analyzing 200 relationships... - -π·οΈ Relationship Types & Confidence: - π related_to: 45 instances (avg confidence: 0.92) - π purchased: 23 instances (avg confidence: 0.98) - π works_for: 18 instances (avg confidence: 0.95) - π manufactured_by: 12 instances (avg confidence: 0.89) - π reviewed_by: 15 instances (avg confidence: 0.91) - -π Relationship Patterns: - π Most common patterns: - β’ person β purchased β product: 23 times - β’ person β works_for β organization: 18 times - β’ product β manufactured_by β organization: 12 times - β’ person β reviewed β product: 15 times - β’ organization β related_to β event: 8 times -``` - -**π‘ Discovery**: Clear business patterns emerge - customer purchase journeys and supply chain relationships! - ---- - -### π¨ **Chapter 7: The Query Artist - Advanced Searches** - -#### **Step 7: Advanced Query Builder** -``` -Choose Option: 8 (Advanced Query Builder) -Type: 1 (Multi-entity search) -Entities: "Sarah Johnson, EcoWalk Sustainable Sneakers, Manybirds" -``` - -**Demo Script:** -``` -π― ADVANCED QUERY BUILDER ----------------------------------------- -π§ Build complex queries to explore your graph - -1. π Multi-entity search - -π MULTI-ENTITY SEARCH ----------------------------------------- -Enter entities to search (comma-separated): Sarah Johnson, EcoWalk Sustainable Sneakers, Manybirds - -π― Searching for: Sarah Johnson, EcoWalk Sustainable Sneakers, Manybirds - -β Found 3 entities: -π Sarah Johnson (person): Marketing professional from Seattle, sustainability advocate -π EcoWalk Sustainable Sneakers (product): Sustainable footwear made from recycled materials -π Manybirds (organization): E-commerce company specializing in sustainable products - -π Analyzing connections between entities... -Found relationship chain: - Sarah Johnson β purchased β EcoWalk Sustainable Sneakers - EcoWalk Sustainable Sneakers β sold_by β Manybirds - Sarah Johnson β subscribed_to β Manybirds Newsletter - -π‘ Business Insight: Complete customer journey from discovery to purchase to engagement! -``` - ---- - -### π **Chapter 8: The Historian - Session Memory** - -#### **Step 8: Review Your Discovery Journey** -``` -Choose Option: 9 (Session History) -``` - -**Demo Script:** -``` -π SESSION HISTORY ----------------------------------------- -π Your exploration session: - -1. [14:30:15] Graph Overview - Discovered 206 entities, 3855 relationships -2. [14:32:20] Entity Search - "sustainable products" found 12 matches -3. [14:35:10] Deep Dive - EcoWalk Sustainable Sneakers analysis -4. [14:38:45] Community Analysis - Person community (45 members) -5. [14:42:30] Ego Network - Sarah Johnson 2-hop network (24 entities) -6. [14:45:15] Relationship Analysis - Found 5 key relationship types -7. [14:48:00] Multi-entity Search - Connected Sarah β Product β Company - -π Key discoveries saved to bookmarks: - β’ Sarah Johnson (high-value customer) - β’ EcoWalk Sustainable Sneakers (flagship product) - β’ Person-Product-Organization triangle pattern -``` - ---- - -## π€ **AI-Enhanced Features: What Makes This Graph Explorer Smart** - -### π§ **1. Natural Language Query Interpretation** -**Feature**: LLM-powered understanding of user intent and context -``` -User Input: "Find customers who love eco-friendly products" -AI Analysis: -π― Intent: Customer segmentation analysis -π Terms: customers, eco-friendly, sustainable, green, environmental -π§ Strategy: semantic_search (broad exploration) -πͺ Confidence: 0.94 -``` - -### π **2. Semantic Search Enhancement** -**Feature**: Vector embeddings for contextual similarity matching -- Goes beyond keyword matching to understand meaning -- Finds related concepts even with different terminology -- Examples: "sustainable" matches "eco-friendly", "green", "environmental" - -### π‘ **3. AI-Powered Result Insights** -**Feature**: LLM analysis of search results for business intelligence -``` -π AI Insights: This query reveals environmentally conscious customer segments -with high purchase intent for sustainable products. These customers often -exhibit cross-category purchasing patterns and strong brand loyalty. - -π‘ Suggested follow-up queries: - 1. What other products do these sustainability-focused customers buy? - 2. Which organizations supply these sustainable products? - 3. Find customers similar to the highest-value eco-conscious buyers - -β οΈ Consider exploring: Customer lifetime value patterns for sustainable product buyers -``` - -### π― **4. Intelligent Search Strategy Selection** -**Feature**: AI chooses optimal search strategy based on query type - -| Query Type | Strategy | Use Case | -|------------|----------|----------| -| "Find John Smith" | exact_match | Specific entity lookup | -| "Show me all products" | broad_exploration | Category exploration | -| "Customers interested in sustainability" | semantic_search | Concept-based discovery | - -### π **5. Dynamic Fallback Mechanisms** -**Feature**: Graceful degradation when advanced features fail -1. **Primary**: LLM-enhanced semantic search -2. **Fallback**: Traditional keyword extraction -3. **Final**: Basic string matching - -### π§ **6. Contextual Relationship Understanding** -**Feature**: AI comprehends business relationships and patterns -- Recognizes customer journey patterns -- Identifies supply chain relationships -- Understands influence networks -- Maps product affinities - ---- - -## π¨ **Advanced AI Query Examples** - -### ποΈ **Business Intelligence Queries** -``` -Query: "Who are my brand ambassadors and influencers?" -AI Response: Finds customers with high review activity, social connections, and product recommendations - -Query: "What products have supply chain risks?" -AI Response: Identifies products with single-source dependencies or quality issues - -Query: "Find customers similar to my top buyers" -AI Response: Uses relationship patterns to find lookalike customer segments -``` - -### π¬ **Market Research Queries** -``` -Query: "What trends are emerging in customer preferences?" -AI Response: Analyzes purchase patterns, review sentiment, and product categories - -Query: "Which product categories are most connected?" -AI Response: Maps cross-selling opportunities and product ecosystems - -Query: "Find untapped market segments" -AI Response: Identifies customer groups with low engagement but high potential -``` - -### π― **Strategic Planning Queries** -``` -Query: "What are my competitive advantages?" -AI Response: Highlights unique product features, customer loyalty, and market position - -Query: "Where should I expand my product line?" -AI Response: Uses customer interest patterns and market gaps for recommendations - -Query: "Which partnerships drive the most value?" -AI Response: Analyzes supplier relationships, customer satisfaction, and business outcomes -``` - ---- - -## π― **Real-World Use Cases** - -### π **E-commerce Intelligence** -``` -Scenario: "Which customers are most likely to buy our new sustainable product line?" - -Journey: -1. Graph Overview β Understand customer base -2. Community Analysis β Find sustainability-focused customer cluster -3. Entity Deep Dive β Analyze top customers' purchase patterns -4. Ego Network β Map customer influence networks -5. Advanced Query β Find customers similar to top buyers -``` - -### π **Supply Chain Analysis** -``` -Scenario: "How resilient is our manufacturing network?" - -Journey: -1. Search "manufacturing" β Find all production partners -2. Deep Dive β Analyze each manufacturer's connections -3. Subgraph Analysis β Map supplier dependencies -4. Relationship Analysis β Identify critical supply relationships -5. Community Exploration β Find supplier clusters and risks -``` - -### π₯ **Customer Relationship Management** -``` -Scenario: "Who are our brand ambassadors and how do they influence others?" - -Journey: -1. Search "review, recommend, influence" β Find customer advocates -2. Ego Network β Map each advocate's social connections -3. Relationship Analysis β Identify influence patterns -4. Community Analysis β Find customer segments they reach -5. Multi-entity Search β Connect advocates to product success -``` - -### π **Product Development Insights** -``` -Scenario: "What product features drive customer satisfaction?" - -Journey: -1. Search products β Find all product entities -2. Deep Dive β Analyze top products' relationships -3. Relationship Analysis β Find review/feedback patterns -4. Community Exploration β Group products by customer segments -5. Advanced Query β Connect features to satisfaction scores -``` - ---- - -## π‘ **Pro Tips for AI-Enhanced Graph Exploration** - -### π€ **Leveraging AI Features** -1. **Natural Language Queries**: Use conversational language - the AI understands context - ``` - Instead of: "product sustainable" - Try: "Show me all environmentally friendly products our customers love" - ``` - -2. **Follow AI Suggestions**: Pay attention to the suggested follow-up queries - - The AI identifies related concepts you might have missed - - Follow-up suggestions often reveal deeper insights - -3. **Trust the Confidence Scores**: Higher confidence = more reliable results - - 0.9+ = High confidence, results likely very relevant - - 0.7-0.9 = Good confidence, results should be relevant - - <0.7 = Lower confidence, consider rephrasing your query - -4. **Use AI Insights for Strategy**: The business insights help translate findings into action - - Look for patterns the AI identifies - - Consider the "gaps" and unexplored areas it suggests - -### π― **Start Broad, Then Narrow (AI-Enhanced)** -1. Begin with Graph Overview to understand the landscape -2. Use **AI-powered Natural Language Search** for broad topic exploration -3. **Follow AI-suggested queries** to discover related concepts -4. Deep Dive into AI-highlighted interesting entities -5. Use Ego Networks to map local connections -6. Apply Advanced Queries for specific investigations - -### π **Follow the AI-Guided Curiosity Path** -- Let AI insights and suggestions guide your exploration -- Pay attention to confidence scores and business interpretations -- Use the semantic search to discover related concepts you didn't know existed -- Bookmark entities the AI identifies as particularly interesting -- Cross-reference AI insights across different analysis types - -### π **Look for AI-Identified Patterns** -- High-connectivity entities often represent key business assets -- AI-enhanced relationship analysis reveals hidden business processes -- Semantic similarity helps identify market segments and customer groups -- AI insights connect graph structure to business strategy - -### π **Business Impact Focus (AI-Driven)** -- Use AI-generated business insights to connect graph data to strategy -- Follow AI suggestions for exploring market opportunities -- Leverage semantic search to discover customer segments -- Apply AI pattern recognition to identify optimization opportunities -- Use LLM insights to map communities to business strategy and market segments - -### π§ **Advanced AI Techniques** -1. **Semantic Exploration**: Let the AI find concepts related to your search terms -2. **Intent-Based Queries**: The AI understands business intent, not just keywords -3. **Pattern Recognition**: AI identifies business patterns humans might miss -4. **Strategic Insights**: Use AI-generated business intelligence for decision-making - ---- - -## π **Conclusion** - -The Graph Explorer transforms your knowledge graph from a static database into an interactive discovery platform. By following customer journeys, mapping relationships, and analyzing communities, you uncover insights that drive business decisions. - -**Remember**: Every connection tells a story. Every pattern reveals an opportunity. Every community represents a market segment waiting to be understood. - -Happy exploring! π - ---- - -## π **Quick Reference Commands** - -| Option | Feature | Best For | -|--------|---------|----------| -| 1 | Natural Language Search | Finding entities by topic/description | -| 2 | Relationship Search | Understanding how entities connect | -| 3 | Community Exploration | Finding natural business segments | -| 4 | Subgraph Analysis | Mapping local networks and dependencies | -| 5 | Graph Overview | Understanding overall structure | -| 6 | Entity Deep Dive | Detailed analysis of key entities | -| 7 | Relationship Analysis | Pattern discovery and trend analysis | -| 8 | Advanced Queries | Complex multi-entity investigations | -| 9 | Session History | Tracking your discovery journey | -| 10 | Bookmarks | Saving important findings | diff --git a/docs/GRAPH_EXPLORER_VALIDATION_REPORT.md b/docs/GRAPH_EXPLORER_VALIDATION_REPORT.md deleted file mode 100644 index 3857e98..0000000 --- a/docs/GRAPH_EXPLORER_VALIDATION_REPORT.md +++ /dev/null @@ -1,198 +0,0 @@ -# Graph Explorer Validation Summary - -## Overview -This document summarizes the comprehensive validation of the Graph Explorer to ensure it searches and fetches everything from CosmosDB with LLM assistance, eliminating hardcoded or static elements. - -## Date: May 29, 2025 - -## Validation Results: β PASSED (100% Success Rate) - ---- - -## Key Enhancements Made - -### 1. **LLM-Powered Keyword Extraction** -- **Before**: Basic string splitting with hardcoded stop words -- **After**: Azure OpenAI-powered semantic keyword extraction -- **Improvement**: Now extracts contextually relevant terms and understands query intent - -```python -# Enhanced keyword extraction using LLM -async def _extract_search_keywords(self, query: str) -> List[str]: - # Uses Azure OpenAI to extract semantic keywords and intent - # Fallback to basic extraction only if LLM fails -``` - -### 2. **Semantic Entity Search** -- **Before**: Basic string matching (`contains()` logic) -- **After**: Embedding-based semantic similarity search with cosine similarity -- **Improvement**: Finds conceptually related entities, not just exact string matches - -```python -# Enhanced search with semantic similarity -async def search_entities(self, query: str, limit: int = 10): - # Generates query embeddings - # Calculates cosine similarity with stored entity embeddings - # Ranks results by semantic relevance -``` - -### 3. **Natural Language Query Interpretation** -- **Before**: No query understanding, direct keyword extraction -- **After**: LLM interprets user intent and suggests optimal search strategies -- **Improvement**: Understands complex queries and routes them appropriately - -```python -# New natural language interpretation -async def _interpret_natural_language_query(self, query: str): - # Determines search intent (entities, relationships, communities, etc.) - # Suggests search strategy (exact_match, semantic_search, broad_exploration) - # Provides confidence levels and explanations -``` - -### 4. **LLM-Enhanced Result Processing** -- **Before**: Raw results displayed without context -- **After**: LLM analyzes and enhances results with insights and suggestions -- **Improvement**: Provides intelligent insights, follow-up suggestions, and identifies gaps - -```python -# Enhanced result processing -async def _enhance_search_results_with_llm(self, query: str, results: List[Dict], interpretation: Dict): - # Reorders results by relevance - # Provides insights about patterns and findings - # Suggests follow-up queries - # Identifies missing information or gaps -``` - ---- - -## Validation Tests Conducted - -### β Test 1: LLM Keyword Extraction (4/4 passed) -- **Tested**: Complex natural language queries -- **Verified**: Contextual keyword extraction using Azure OpenAI -- **Examples**: - - "Find all people who work for technology companies" β ['Person', 'works_for', 'Technology Company', 'employment relationship', 'company type'] - - "Show me sustainable products and their manufacturers" β ['sustainable product', 'manufacturer', 'produced_by', 'sustainability', 'product-manufacturer network'] - -### β Test 2: Semantic Entity Search (4/4 passed) -- **Tested**: Various search terms with semantic similarity -- **Verified**: Embedding-based search returns conceptually relevant results -- **Examples**: - - "technology company" β Found Microsoft, Tesla - - "sustainable product" β Found relevant sustainability-related entities - - "renewable energy" β Found sustainability concepts and technologies - -### β Test 3: Natural Language Interpretation (4/4 passed) -- **Tested**: Complex natural language queries -- **Verified**: LLM correctly interprets intent and suggests strategies -- **Examples**: - - "Show me all people who work for organizations in the tech industry" β Intent: search_entities, Strategy: semantic_search, Confidence: 0.95 - - "Analyze the network of suppliers and manufacturers" β Intent: analyze_subgraphs, Strategy: broad_exploration, Confidence: 0.92 - -### β Test 4: Dynamic Query Generation (4/4 passed) -- **Tested**: Different search patterns without hardcoded queries -- **Verified**: System generates appropriate queries dynamically -- **Examples**: Exact match, partial match, semantic search, and broad exploration all work correctly - -### β Test 5: LLM Result Enhancement (1/1 passed) -- **Tested**: Result enhancement with insights and suggestions -- **Verified**: LLM provides meaningful insights and follow-up suggestions -- **Example**: "Find technology companies and their products" β Enhanced with insights about major technology companies and suggested related queries - ---- - -## Static/Hardcoded Elements Eliminated - -### β **Removed**: Basic keyword extraction -- Old: Simple string splitting with hardcoded stop words -- New: LLM-powered semantic keyword extraction - -### β **Removed**: String matching search -- Old: Basic `contains()` logic for entity search -- New: Embedding-based semantic similarity search - -### β **Removed**: Empty query patterns -- Old: Many searches used empty strings ("") to get all entities -- New: Intelligent query interpretation and targeted searches - -### β **Removed**: Static result display -- Old: Raw results without context or insights -- New: LLM-enhanced results with insights and suggestions - ---- - -## LLM Integration Points - -### π§ **Azure OpenAI Usage**: -1. **Keyword Extraction**: Semantic analysis of natural language queries -2. **Query Interpretation**: Understanding user intent and suggesting strategies -3. **Result Enhancement**: Analyzing results and providing insights -4. **Entity/Relationship Extraction**: Dynamic extraction from content (existing) -5. **Embedding Generation**: Semantic similarity calculations (existing) - -### π **Dynamic Behavior**: -- All searches are now context-aware and adaptive -- No hardcoded search patterns or static result sets -- Intelligent query routing based on LLM interpretation -- Real-time result enhancement with AI insights - ---- - -## CosmosDB Integration Validation - -### β **Confirmed**: All data fetched from CosmosDB -- No static data sources or hardcoded entities -- All searches query the live Cosmos database -- Entity and relationship data dynamically retrieved -- Search results based on actual stored data - -### β **Confirmed**: Dynamic Gremlin query generation -- Queries generated based on search context -- No hardcoded Gremlin patterns -- Adaptive query strategies based on user intent - ---- - -## Performance Metrics - -- **Test Suite**: 17 comprehensive tests -- **Success Rate**: 100% (17/17 passed) -- **LLM Response Time**: ~1-2 seconds per query interpretation -- **Search Performance**: Maintained while adding semantic capabilities -- **Error Handling**: Robust fallbacks for LLM failures - ---- - -## Conclusion - -The Graph Explorer has been successfully enhanced to be **fully LLM-powered and dynamic**: - -β **No hardcoded elements** - All searches and processing use LLM assistance -β **Semantic understanding** - Understands natural language queries contextually -β **Dynamic query generation** - Generates appropriate searches based on user intent -β **Intelligent results** - Provides insights, suggestions, and gap analysis -β **CosmosDB integration** - All data dynamically fetched from the live database -β **Robust error handling** - Graceful fallbacks if LLM services are unavailable - -The system now provides a truly intelligent and adaptive graph exploration experience, leveraging the full power of Azure OpenAI for natural language understanding and result enhancement while maintaining robust access to the CosmosDB knowledge graph. - ---- - -## Files Modified - -1. **`demos/graph_explorer.py`**: - - Enhanced `_extract_search_keywords()` with LLM - - Added `_interpret_natural_language_query()` - - Added `_enhance_search_results_with_llm()` - - Updated main search workflow to use LLM insights - -2. **`src/graphiti_cosmos.py`**: - - Enhanced `search_entities()` with semantic similarity - - Added `_search_entities_by_text()` as fallback - - Added `_cosine_similarity()` for embedding comparison - - Improved error handling and dynamic query generation - -3. **`validate_graph_explorer.py`** (new): - - Comprehensive validation suite - - Tests all LLM integration points - - Validates dynamic behavior and eliminates hardcoded patterns diff --git a/ecommerce_intelligence_reports/business_summary_20250530_113158.txt b/ecommerce_intelligence_reports/business_summary_20250530_113158.txt deleted file mode 100644 index c069241..0000000 --- a/ecommerce_intelligence_reports/business_summary_20250530_113158.txt +++ /dev/null @@ -1,14 +0,0 @@ -===== BUSINESS INTELLIGENCE SUMMARY ===== -Generated: 2025-05-30 11:33:49 - -KNOWLEDGE GRAPH METRICS: - Episodes Processed: 461 - Entities Identified: 210 - Relationships Mapped: 3933 - Intelligence Density: 18.73 - -ACTION RECOMMENDATIONS: - Focus marketing on highest-engaging product categories - Optimize product pages for most-viewed items - Develop personalized recommendations based on browsing patterns - Enhance checkout experience to improve conversion rates \ No newline at end of file diff --git a/ecommerce_intelligence_reports/customer_insights_20250530_113158.txt b/ecommerce_intelligence_reports/customer_insights_20250530_113158.txt deleted file mode 100644 index 28a0736..0000000 --- a/ecommerce_intelligence_reports/customer_insights_20250530_113158.txt +++ /dev/null @@ -1,9 +0,0 @@ -===== CUSTOMER INSIGHTS REPORT ===== -Generated: 2025-05-30 11:31:58 - -CUSTOMER BEHAVIOR ANALYSIS: - Purchase Events: 0 - Browse Events: 0 - Product View Events: 0 - -CUSTOMER INSIGHTS: \ No newline at end of file diff --git a/ecommerce_intelligence_reports/market_trends_20250530_113158.txt b/ecommerce_intelligence_reports/market_trends_20250530_113158.txt deleted file mode 100644 index 87c614a..0000000 --- a/ecommerce_intelligence_reports/market_trends_20250530_113158.txt +++ /dev/null @@ -1,15 +0,0 @@ -===== MARKET TRENDS REPORT ===== -Generated: 2025-05-30 11:33:08 - -MARKET ANALYSIS: - Total Business Events: 461 - Business Entities: 210 - Relationships Mapped: 3933 - Intelligence Density: 18.73 connections per entity - - Product Categories: 10 found - -KEY MARKET INSIGHTS: - Customer engagement is growing with increasing interaction counts - Average 8.5 actions per customer session - Online shopping activity shows strong category-specific patterns \ No newline at end of file diff --git a/ecommerce_intelligence_reports/product_insights_20250530_113158.txt b/ecommerce_intelligence_reports/product_insights_20250530_113158.txt deleted file mode 100644 index 974f4f1..0000000 --- a/ecommerce_intelligence_reports/product_insights_20250530_113158.txt +++ /dev/null @@ -1,12 +0,0 @@ -===== PRODUCT INSIGHTS REPORT ===== -Generated: 2025-05-30 11:32:09 - -PRODUCT CATALOG ANALYSIS: - Total Products: 10 - -POPULAR PRODUCTS: - 1. Premium Product 10 ($192.08) - Clothing - 2. Premium Product 4 ($200.63) - Sports - 3. Premium Product 20 ($182.56) - Electronics - 4. Premium Product 13 ($53.31) - Sports - 5. Premium Product 15 ($104.45) - Books \ No newline at end of file diff --git a/enhanced_graph_explorer_demo.py b/enhanced_graph_explorer_demo.py deleted file mode 100644 index be98f44..0000000 --- a/enhanced_graph_explorer_demo.py +++ /dev/null @@ -1,680 +0,0 @@ -#!/usr/bin/env python3 -""" -Enhanced Graph Explorer Demo Script -=================================== - -This script demonstrates the advanced capabilities of the LLM-powered graph explorer -for the ManyBirds e-commerce knowledge graph. It performs automated exploration, -analysis, and intelligence gathering while generating a comprehensive markdown report. - -Features Demonstrated: -- Natural language entity search with LLM interpretation -- Relationship analysis and pattern discovery -- Community detection and subgraph analysis -- Advanced query building and conditional analysis -- Statistical overview and insights generation -- Session management and result export - -Author: Enhanced Graph Explorer System -Date: May 30, 2025 -""" - -import asyncio -import json -import os -import sys -from datetime import datetime -from typing import List, Dict, Any -import traceback - -# Add the project root to the path -sys.path.append(os.path.dirname(os.path.abspath(__file__))) - -from demos.graph_explorer import GraphExplorer - -class GraphExplorerDemo: - """Automated demo of the enhanced graph explorer capabilities""" - - def __init__(self): - self.explorer = GraphExplorer() - self.demo_results = { - 'timestamp': datetime.now().isoformat(), - 'demo_name': 'Enhanced Graph Explorer Capabilities Demo', - 'sections': [] - } - self.output_file = f"demo_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md" - - def log_section(self, title: str, description: str, results: Any = None, insights: str = ""): - """Log a demo section with results""" - section = { - 'title': title, - 'description': description, - 'results': results, - 'insights': insights, - 'timestamp': datetime.now().strftime('%H:%M:%S') - } - self.demo_results['sections'].append(section) - print(f"\nπ {title}") - print(f" {description}") - if insights: - print(f" π‘ {insights}") - - async def demo_initialization(self): - """Demo: System initialization and connection""" - print("π ENHANCED GRAPH EXPLORER DEMO") - print("=" * 60) - print("π― Demonstrating advanced LLM-powered graph exploration capabilities") - print("π Using ManyBirds e-commerce knowledge graph dataset") - print() - - self.log_section( - "System Initialization", - "Connecting to Azure Cosmos DB and OpenAI services for LLM-enhanced graph exploration", - None, - "The system uses Azure Cosmos DB for graph storage and OpenAI for natural language processing" - ) - - try: - await self.explorer.initialize() - self.log_section( - "Connection Status", - "Successfully connected to all required services", - {"status": "β Connected", "services": ["Azure Cosmos DB", "OpenAI GPT-4"]}, - "Ready for AI-powered graph exploration and analysis" - ) - return True - except Exception as e: - self.log_section( - "Connection Error", - "Failed to initialize system", - {"status": "β Failed", "error": str(e)}, - "Demo cannot proceed without proper initialization" - ) - return False - - async def demo_natural_language_search(self): - """Demo: Natural language entity search with LLM interpretation""" - print("\nπ DEMO: Natural Language Entity Search") - print("-" * 50) - - # Demo queries showcasing different search capabilities - demo_queries = [ - { - "query": "Find products related to sustainable footwear", - "description": "Search for eco-friendly shoe products using semantic understanding" - }, - { - "query": "Show me companies that manufacture insoles", - "description": "Identify organizations in the insole manufacturing space" - }, - { - "query": "What blue products are available?", - "description": "Color-based product discovery with attribute filtering" - } - ] - - search_results = [] - - for demo_query in demo_queries: - query = demo_query["query"] - description = demo_query["description"] - - print(f"\nπ― Query: '{query}'") - print(f"π Purpose: {description}") - - try: - # Simulate the natural language search process - # Note: We'll call the internal methods since we need to capture results - - # Step 1: LLM interprets the query - interpretation = await self.explorer._interpret_natural_language_query(query) - - print(f"π§ LLM Interpretation:") - print(f" Intent: {interpretation.get('intent', 'unknown')}") - print(f" Strategy: {interpretation.get('strategy', 'unknown')}") - print(f" Confidence: {interpretation.get('confidence', 0.0):.2f}") - - # Step 2: Execute the search based on interpretation - search_terms = interpretation.get('search_terms', [query]) - entities = [] - - for term in search_terms[:2]: # Limit to avoid overwhelming output - try: - results = await self.explorer.graphiti.search_entities(term, limit=5) - entities.extend(results) - except Exception as e: - print(f" β οΈ Search for '{term}' failed: {e}") - - # Step 3: Process and display results - if entities: - print(f"β Found {len(entities)} entities:") - entity_summary = [] - - for i, entity in enumerate(entities[:3], 1): # Show top 3 - name = self.explorer._extract_property(entity, 'name', 'Unknown') - entity_type = self.explorer._extract_property(entity, 'type', 'unknown') - description = self.explorer._extract_property(entity, 'description', '') - - print(f" {i}. {name} ({entity_type})") - if description: - print(f" {description[:100]}{'...' if len(description) > 100 else ''}") - - entity_summary.append({ - 'name': name, - 'type': entity_type, - 'description': description[:100] if description else '' - }) - - search_results.append({ - 'query': query, - 'interpretation': interpretation, - 'entities_found': len(entities), - 'top_entities': entity_summary - }) - else: - print("β No entities found for this query") - search_results.append({ - 'query': query, - 'interpretation': interpretation, - 'entities_found': 0, - 'top_entities': [] - }) - - except Exception as e: - print(f"β Error processing query: {e}") - search_results.append({ - 'query': query, - 'error': str(e) - }) - - self.log_section( - "Natural Language Search Demo", - "Demonstrated LLM-powered query interpretation and semantic entity discovery", - search_results, - f"Successfully processed {len(demo_queries)} natural language queries with AI interpretation" - ) - - async def demo_relationship_analysis(self): - """Demo: Advanced relationship analysis and pattern discovery""" - print("\nπ DEMO: Relationship Analysis & Pattern Discovery") - print("-" * 55) - - try: - # Get sample relationships - relationships = await self.explorer.graphiti.search_relationships("", limit=50) - - if not relationships: - print("β No relationships found in the graph") - return - - print(f"π Analyzing {len(relationships)} relationships...") - - # Analyze relationship patterns - relationship_types = {} - confidence_stats = [] - entity_connections = {} - - for rel in relationships[:20]: # Analyze first 20 for demo - rel_type = self.explorer._extract_property(rel, 'type', 'unknown') - source = self.explorer._extract_property(rel, 'source', 'unknown') - target = self.explorer._extract_property(rel, 'target', 'unknown') - confidence = float(self.explorer._extract_property(rel, 'confidence', '1.0')) - - # Count relationship types - relationship_types[rel_type] = relationship_types.get(rel_type, 0) + 1 - - # Track confidence - confidence_stats.append(confidence) - - # Track entity connections - entity_connections[source] = entity_connections.get(source, 0) + 1 - entity_connections[target] = entity_connections.get(target, 0) + 1 - - # Display analysis results - print("\nπ Relationship Type Distribution:") - sorted_types = sorted(relationship_types.items(), key=lambda x: x[1], reverse=True) - for rel_type, count in sorted_types[:5]: - print(f" β’ {rel_type}: {count} instances") - - print(f"\nπ Confidence Statistics:") - if confidence_stats: - avg_confidence = sum(confidence_stats) / len(confidence_stats) - min_confidence = min(confidence_stats) - max_confidence = max(confidence_stats) - print(f" β’ Average: {avg_confidence:.3f}") - print(f" β’ Range: {min_confidence:.3f} - {max_confidence:.3f}") - - print(f"\nπ Top Connected Entities:") - sorted_connections = sorted(entity_connections.items(), key=lambda x: x[1], reverse=True) - for entity, connections in sorted_connections[:5]: - print(f" β’ {entity}: {connections} connections") - - analysis_results = { - 'total_relationships': len(relationships), - 'relationship_types': dict(sorted_types[:5]), - 'confidence_stats': { - 'average': avg_confidence if confidence_stats else 0, - 'min': min_confidence if confidence_stats else 0, - 'max': max_confidence if confidence_stats else 0 - }, - 'top_connected_entities': dict(sorted_connections[:5]) - } - - self.log_section( - "Relationship Analysis", - "Comprehensive analysis of relationship patterns, types, and connectivity", - analysis_results, - "Relationship analysis reveals the structural patterns and key connectors in the knowledge graph" - ) - - except Exception as e: - print(f"β Error in relationship analysis: {e}") - self.log_section( - "Relationship Analysis Error", - "Failed to complete relationship analysis", - {"error": str(e)}, - "Analysis could not be completed due to technical issues" - ) - - async def demo_graph_statistics(self): - """Demo: Graph overview and statistical analysis""" - print("\nπ DEMO: Graph Statistics & Overview") - print("-" * 40) - - try: - # Get entities and relationships for statistics - entities = await self.explorer.graphiti.search_entities("", limit=100) - relationships = await self.explorer.graphiti.search_relationships("", limit=100) - - print(f"π Graph Scale:") - print(f" β’ Entities: {len(entities)}") - print(f" β’ Relationships: {len(relationships)}") - - # Analyze entity types - entity_types = {} - for entity in entities: - entity_type = self.explorer._extract_property(entity, 'type', 'unknown') - entity_types[entity_type] = entity_types.get(entity_type, 0) + 1 - - print(f"\nπ·οΈ Entity Type Distribution:") - sorted_entity_types = sorted(entity_types.items(), key=lambda x: x[1], reverse=True) - for entity_type, count in sorted_entity_types: - percentage = (count / len(entities)) * 100 if entities else 0 - print(f" β’ {entity_type}: {count} ({percentage:.1f}%)") - - # Calculate basic graph metrics - if entities and relationships: - density = len(relationships) / len(entities) if entities else 0 - print(f"\nπ Graph Metrics:") - print(f" β’ Density (relationships per entity): {density:.2f}") - print(f" β’ Graph completeness: {len(entities)} entities analyzed") - - stats_results = { - 'total_entities': len(entities), - 'total_relationships': len(relationships), - 'entity_types': dict(sorted_entity_types), - 'density': density if entities and relationships else 0, - 'analysis_completeness': min(len(entities), 100) # We limited to 100 - } - - self.log_section( - "Graph Statistics", - "Comprehensive statistical overview of the knowledge graph structure", - stats_results, - "Graph statistics provide insights into data distribution and structural characteristics" - ) - - except Exception as e: - print(f"β Error in statistical analysis: {e}") - self.log_section( - "Graph Statistics Error", - "Failed to generate graph statistics", - {"error": str(e)}, - "Statistical analysis could not be completed" - ) - - async def demo_advanced_queries(self): - """Demo: Advanced query capabilities""" - print("\nπ― DEMO: Advanced Query Capabilities") - print("-" * 42) - - advanced_results = [] - - # Demo 1: High connectivity analysis - try: - print("\nπ Finding highly connected entities...") - entities = await self.explorer.graphiti.search_entities("", limit=50) - - high_connectivity = [] - for entity in entities[:20]: # Limit for demo - entity_name = self.explorer._extract_property(entity, 'name', '') - if entity_name: - relationships = await self.explorer._get_entity_relationships(entity_name) - if len(relationships) > 2: # Threshold for demo - high_connectivity.append((entity_name, len(relationships))) - - high_connectivity.sort(key=lambda x: x[1], reverse=True) - - print(f"β Found {len(high_connectivity)} highly connected entities:") - for entity_name, count in high_connectivity[:5]: - print(f" β’ {entity_name}: {count} connections") - - advanced_results.append({ - 'query_type': 'high_connectivity', - 'results': high_connectivity[:5], - 'total_found': len(high_connectivity) - }) - - except Exception as e: - print(f"β High connectivity analysis failed: {e}") - advanced_results.append({ - 'query_type': 'high_connectivity', - 'error': str(e) - }) - - # Demo 2: Pattern matching - try: - print("\nπ¨ Pattern matching analysis...") - relationships = await self.explorer.graphiti.search_relationships("", limit=30) - - patterns = {} - for rel in relationships: - rel_type = self.explorer._extract_property(rel, 'type', 'unknown') - source_name = self.explorer._extract_property(rel, 'source', '') - target_name = self.explorer._extract_property(rel, 'target', '') - - # Get entity types (simplified for demo) - pattern = f"entity β {rel_type} β entity" - patterns[pattern] = patterns.get(pattern, 0) + 1 - - print(f"β Found {len(patterns)} relationship patterns:") - sorted_patterns = sorted(patterns.items(), key=lambda x: x[1], reverse=True) - for pattern, count in sorted_patterns[:5]: - print(f" β’ {pattern}: {count} instances") - - advanced_results.append({ - 'query_type': 'pattern_matching', - 'patterns': dict(sorted_patterns[:5]), - 'total_patterns': len(patterns) - }) - - except Exception as e: - print(f"β Pattern matching failed: {e}") - advanced_results.append({ - 'query_type': 'pattern_matching', - 'error': str(e) - }) - - self.log_section( - "Advanced Query Capabilities", - "Demonstration of complex query patterns and analysis techniques", - advanced_results, - "Advanced queries enable deep insights into graph structure and connectivity patterns" - ) - - async def demo_llm_enhancements(self): - """Demo: LLM-powered enhancements and insights""" - print("\nπ§ DEMO: LLM-Powered Enhancements") - print("-" * 38) - - try: - # Demo query interpretation - test_query = "Show me all eco-friendly products from sustainable brands" - print(f"π― Test Query: '{test_query}'") - - interpretation = await self.explorer._interpret_natural_language_query(test_query) - - print(f"\nπ§ LLM Interpretation Results:") - print(f" β’ Intent: {interpretation.get('intent', 'unknown')}") - print(f" β’ Search Terms: {interpretation.get('search_terms', [])}") - print(f" β’ Strategy: {interpretation.get('strategy', 'unknown')}") - print(f" β’ Confidence: {interpretation.get('confidence', 0.0):.2f}") - print(f" β’ Explanation: {interpretation.get('explanation', 'No explanation provided')}") - - # Demo search result enhancement - print(f"\nπ Executing interpreted search...") - search_terms = interpretation.get('search_terms', [test_query]) - entities = [] - - for term in search_terms[:2]: - try: - results = await self.explorer.graphiti.search_entities(term, limit=3) - entities.extend(results) - except Exception as e: - print(f" β οΈ Search for '{term}' failed: {e}") - - if entities: - print(f"β Found {len(entities)} entities") - - # Simulate LLM enhancement of results - try: - enhancement = await self.explorer._enhance_search_results_with_llm( - test_query, entities, interpretation - ) - - if enhancement: - print(f"\nπ‘ LLM Insights:") - print(f" β’ Key Insights: {enhancement.get('insights', 'No insights generated')}") - - patterns = enhancement.get('patterns', []) - if patterns: - print(f" β’ Patterns Identified:") - for pattern in patterns[:3]: - print(f" - {pattern}") - - follow_ups = enhancement.get('follow_up_queries', []) - if follow_ups: - print(f" β’ Suggested Follow-up Queries:") - for query in follow_ups[:3]: - print(f" - {query}") - - except Exception as e: - print(f" β οΈ LLM enhancement failed: {e}") - enhancement = None - else: - enhancement = None - - llm_results = { - 'test_query': test_query, - 'interpretation': interpretation, - 'entities_found': len(entities), - 'enhancement': enhancement, - 'llm_features_tested': [ - 'Query interpretation', - 'Search strategy recommendation', - 'Result enhancement', - 'Insight generation' - ] - } - - self.log_section( - "LLM-Powered Enhancements", - "Demonstration of AI-powered query interpretation and result enhancement", - llm_results, - "LLM enhancements provide intelligent query understanding and contextual insights" - ) - - except Exception as e: - print(f"β LLM enhancement demo failed: {e}") - self.log_section( - "LLM Enhancement Error", - "Failed to demonstrate LLM capabilities", - {"error": str(e)}, - "LLM features could not be fully demonstrated due to technical issues" - ) - - def generate_markdown_report(self): - """Generate comprehensive markdown report of demo results""" - print(f"\nπ Generating comprehensive demo report...") - - markdown_content = f"""# Enhanced Graph Explorer Demo Report - -**Generated:** {self.demo_results['timestamp']} -**Demo:** {self.demo_results['demo_name']} -**System:** LLM-Powered Knowledge Graph Explorer - -## Executive Summary - -This report documents a comprehensive demonstration of the Enhanced Graph Explorer system, showcasing its advanced capabilities for AI-powered knowledge graph exploration and analysis. The system successfully demonstrates integration between Azure Cosmos DB graph storage and OpenAI's language models for intelligent query processing. - -## Key Capabilities Demonstrated - -- π§ **Natural Language Processing**: LLM-powered query interpretation and semantic understanding -- π **Advanced Search**: Multi-strategy entity and relationship discovery -- π **Statistical Analysis**: Comprehensive graph metrics and pattern recognition -- π― **Complex Queries**: High-connectivity analysis and pattern matching -- π‘ **AI Insights**: Automated insight generation and follow-up recommendations - -## Detailed Demo Results - -""" - - for i, section in enumerate(self.demo_results['sections'], 1): - markdown_content += f"""### {i}. {section['title']} - -**Description:** {section['description']} -**Timestamp:** {section['timestamp']} - -""" - - if section.get('insights'): - markdown_content += f"**Key Insights:** {section['insights']}\n\n" - - if section.get('results'): - markdown_content += "**Results:**\n```json\n" - markdown_content += json.dumps(section['results'], indent=2, ensure_ascii=False) - markdown_content += "\n```\n\n" - - markdown_content += "---\n\n" - - # Add technical specifications - markdown_content += f"""## Technical Specifications - -- **Graph Database**: Azure Cosmos DB with Gremlin API -- **AI Model**: OpenAI GPT-4 for natural language processing -- **Query Language**: Gremlin for graph traversal -- **Data Format**: Handles both simple dictionaries and Cosmos DB valueMap(true) format -- **Programming Language**: Python 3.12+ -- **Key Libraries**: asyncio, azure-cosmos, openai - -## System Features - -### β Core Capabilities -- Natural language query interpretation -- Entity and relationship search -- Graph statistical analysis -- Pattern recognition and matching -- Community detection -- Subgraph analysis - -### β LLM Enhancements -- Intelligent query understanding -- Search strategy recommendation -- Result contextualization -- Insight generation -- Follow-up query suggestions - -### β Data Handling -- Cosmos DB valueMap(true) format support -- Backward compatibility with simple dictionaries -- Robust error handling and fallback mechanisms -- Performance optimization for large graphs - -## Demo Conclusions - -The Enhanced Graph Explorer successfully demonstrates sophisticated AI-powered graph analysis capabilities. The system effectively combines traditional graph database operations with modern language model intelligence to provide intuitive, natural language interfaces for complex graph exploration tasks. - -### Key Achievements: -1. **Seamless Integration**: Successfully integrated graph database with LLM capabilities -2. **Natural Interface**: Demonstrated natural language query processing -3. **Comprehensive Analysis**: Showed multiple analysis approaches and insight generation -4. **Robust Performance**: Handled various query types and data formats -5. **Actionable Insights**: Generated meaningful patterns and recommendations - -### Future Enhancements: -- Real-time visualization integration -- Advanced community detection algorithms -- Multi-hop reasoning capabilities -- Custom domain-specific query templates -- Interactive result exploration - ---- - -*This report was automatically generated by the Enhanced Graph Explorer Demo System on {datetime.now().strftime('%B %d, %Y at %I:%M %p')}* -""" - - # Write the markdown file - try: - with open(self.output_file, 'w', encoding='utf-8') as f: - f.write(markdown_content) - - print(f"β Demo report saved to: {self.output_file}") - print(f"π Report contains {len(self.demo_results['sections'])} sections") - print(f"π Report size: {len(markdown_content)} characters") - - except Exception as e: - print(f"β Failed to save report: {e}") - - async def run_complete_demo(self): - """Run the complete demonstration sequence""" - print("π¬ Starting Enhanced Graph Explorer Demo...") - print("=" * 60) - - try: - # Initialize system - if not await self.demo_initialization(): - print("β Demo cannot proceed without proper initialization") - return False - - # Run demo sections - await self.demo_natural_language_search() - await self.demo_relationship_analysis() - await self.demo_graph_statistics() - await self.demo_advanced_queries() - await self.demo_llm_enhancements() - - # Generate report - self.generate_markdown_report() - - print("\nπ Demo completed successfully!") - print(f"π {len(self.demo_results['sections'])} sections demonstrated") - print(f"π Results exported to: {self.output_file}") - - return True - - except Exception as e: - print(f"\nβ Demo failed with error: {e}") - print("π Error details:") - traceback.print_exc() - - # Still try to generate a partial report - try: - self.generate_markdown_report() - print(f"π Partial results saved to: {self.output_file}") - except: - print("β Could not save partial results") - - return False - -async def main(): - """Main entry point for the demo""" - print("π Enhanced Graph Explorer - Comprehensive Demo") - print("=" * 60) - print("π― This demo showcases advanced LLM-powered graph exploration") - print("π Features: Natural language search, AI insights, pattern analysis") - print("π Results will be exported to a detailed markdown report") - print() - - demo = GraphExplorerDemo() - success = await demo.run_complete_demo() - - if success: - print("\nβ¨ Demo completed successfully!") - print("π Check the generated markdown report for detailed results") - else: - print("\nβ οΈ Demo completed with some issues") - print("π Check the generated report for available results") - -if __name__ == "__main__": - # Run the demo - asyncio.run(main()) diff --git a/exploration_sessions/session_20250530_090235.json b/exploration_sessions/session_20250530_090235.json deleted file mode 100644 index 976b8b3..0000000 --- a/exploration_sessions/session_20250530_090235.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "timestamp": "20250530_090235", - "history": [ - { - "type": "entity_search", - "query": "show me products related to sustainability", - "timestamp": "2025-05-30T08:55:39.009224" - } - ], - "bookmarks": { - "entities": [], - "relationships": [] - } -} \ No newline at end of file diff --git a/exploration_sessions/session_20250530_095107.json b/exploration_sessions/session_20250530_095107.json deleted file mode 100644 index 7e5eda9..0000000 --- a/exploration_sessions/session_20250530_095107.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "timestamp": "20250530_095107", - "history": [ - { - "type": "entity_search", - "query": "find people in tech", - "timestamp": "2025-05-30T09:46:06.886192" - } - ], - "bookmarks": { - "entities": [ - "azure cloud platform" - ], - "relationships": [] - } -} \ No newline at end of file diff --git a/exploration_sessions/session_20250530_112551.json b/exploration_sessions/session_20250530_112551.json deleted file mode 100644 index 66cb896..0000000 --- a/exploration_sessions/session_20250530_112551.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "timestamp": "20250530_112551", - "history": [ - { - "type": "entity_search", - "query": "sustainability products that are blue", - "timestamp": "2025-05-30T11:20:04.860725" - } - ], - "bookmarks": { - "entities": [ - "event" - ], - "relationships": [] - } -} \ No newline at end of file diff --git a/graph_explorer_validation_20250529_234048.json b/graph_explorer_validation_20250529_234048.json deleted file mode 100644 index 934383b..0000000 --- a/graph_explorer_validation_20250529_234048.json +++ /dev/null @@ -1,186 +0,0 @@ -{ - "total_tests": 17, - "successful_tests": 17, - "success_rate": 100.0, - "test_results": [ - { - "test": "llm_keyword_extraction", - "query": "Find all people who work for technology companies", - "keywords": [ - "Person", - "works_for", - "Technology Company", - "employment relationship", - "company type" - ], - "success": true - }, - { - "test": "llm_keyword_extraction", - "query": "Show me sustainable products and their manufacturers", - "keywords": [ - "sustainable product", - "manufacturer", - "produced_by", - "sustainability", - "product-manufacturer network" - ], - "success": true - }, - { - "test": "llm_keyword_extraction", - "query": "What organizations are connected to renewable energy?", - "keywords": [ - "organization", - "renewable energy", - "connected to", - "affiliation", - "energy sector" - ], - "success": true - }, - { - "test": "llm_keyword_extraction", - "query": "Find customers who purchased electronics recently", - "keywords": [ - "customers", - "purchased", - "electronics", - "recently", - "transactions" - ], - "success": true - }, - { - "test": "semantic_entity_search", - "search_term": "technology company", - "results_count": 5, - "success": true - }, - { - "test": "semantic_entity_search", - "search_term": "sustainable product", - "results_count": 5, - "success": true - }, - { - "test": "semantic_entity_search", - "search_term": "renewable energy", - "results_count": 5, - "success": true - }, - { - "test": "semantic_entity_search", - "search_term": "customer electronics", - "results_count": 5, - "success": true - }, - { - "test": "nl_interpretation", - "query": "Show me all people who work for organizations in the tech industry", - "interpretation": { - "intent": "search_entities", - "search_terms": [ - "people", - "organizations", - "tech industry", - "works for" - ], - "strategy": "semantic_search", - "result_type": "specific_entities", - "confidence": 0.95, - "explanation": "The query seeks to identify all individuals (people) who are connected via a 'works for' relationship to organizations classified within the tech industry. This requires semantic understanding of industry classification and relationship traversal." - }, - "success": true - }, - { - "test": "nl_interpretation", - "query": "Find products that are related to environmental sustainability", - "interpretation": { - "intent": "search_entities", - "search_terms": [ - "products", - "environmental sustainability", - "sustainable products", - "eco-friendly" - ], - "strategy": "semantic_search", - "result_type": "specific_entities", - "confidence": 0.92, - "explanation": "The query seeks to identify specific products associated with the concept of environmental sustainability, suggesting a semantic search for entities (products) linked to this concept." - }, - "success": true - }, - { - "test": "nl_interpretation", - "query": "What are the relationships between customers and their recent purchases?", - "interpretation": { - "intent": "search_relationships", - "search_terms": [ - "customers", - "purchases", - "recent purchases", - "customer-purchase relationships" - ], - "strategy": "semantic_search", - "result_type": "relationship_patterns", - "confidence": 0.95, - "explanation": "The query seeks to understand how customers are connected to their recent purchases, focusing on the relationships between these entities rather than individual entities or broader community structures." - }, - "success": true - }, - { - "test": "nl_interpretation", - "query": "Analyze the network of suppliers and manufacturers", - "interpretation": { - "intent": "analyze_subgraphs", - "search_terms": [ - "suppliers", - "manufacturers", - "network" - ], - "strategy": "broad_exploration", - "result_type": "relationship_patterns", - "confidence": 0.92, - "explanation": "The query seeks to analyze how suppliers and manufacturers are connected, implying a need to explore and understand the structure and patterns of their relationships within the network." - }, - "success": true - }, - { - "test": "dynamic_query_generation", - "search_type": "exact match", - "term": "Microsoft", - "results_count": 3, - "success": true - }, - { - "test": "dynamic_query_generation", - "search_type": "partial match", - "term": "tech", - "results_count": 3, - "success": true - }, - { - "test": "dynamic_query_generation", - "search_type": "semantic search", - "term": "innovation", - "results_count": 3, - "success": true - }, - { - "test": "dynamic_query_generation", - "search_type": "broad exploration", - "term": "", - "results_count": 3, - "success": true - }, - { - "test": "llm_result_enhancement", - "query": "Find technology companies and their products", - "basic_count": 6, - "enhanced_count": 6, - "has_insights": true, - "success": true - } - ] -} \ No newline at end of file diff --git a/real_time_visualizations/enhanced_graph_visualization.html b/real_time_visualizations/enhanced_graph_visualization.html deleted file mode 100644 index eec365d..0000000 --- a/real_time_visualizations/enhanced_graph_visualization.html +++ /dev/null @@ -1,2523 +0,0 @@ - - - -Real-time Knowledge Graph Visualization
-