- Overview
- Core API
- Memory Management API
- Performance Optimization API
- Integration Patterns
- Performance Characteristics
- Best Practices
- Migration Guide
- Troubleshooting
The Enhanced AST Repository provides a comprehensive, production-ready solution for managing Abstract Syntax Trees (ASTs) in Elixir applications. It extends the basic repository with advanced features including memory management, performance optimization, caching, and comprehensive analysis capabilities.
- Memory Management: Intelligent cleanup, compression, and LRU caching
- Performance Optimization: Query caching, batch operations, lazy loading
- Scalability: Handles 1000+ modules with <500MB memory usage
- Analysis Integration: CFG, DFG, and CPG analysis support
- Production Ready: Comprehensive monitoring and error handling
Enhanced AST Repository
├── EnhancedRepository (Core API)
├── MemoryManager (Memory Management)
├── PerformanceOptimizer (Performance)
├── EnhancedModuleData (Data Structures)
└── EnhancedFunctionData (Function Analysis)
The main interface for storing and retrieving enhanced AST data.
# Start with default configuration
{:ok, pid} = EnhancedRepository.start_link([])
# Start with custom configuration
{:ok, pid} = EnhancedRepository.start_link([
memory_limit: 1024 * 1024 * 1024, # 1GB
cache_enabled: true,
monitoring_enabled: true
])Stores an enhanced module with comprehensive analysis.
@spec store_enhanced_module(atom(), Macro.t()) :: :ok | {:error, term()}
# Basic usage
ast = quote do
defmodule MyModule do
def hello(name), do: "Hello, #{name}"
end
end
:ok = EnhancedRepository.store_enhanced_module(MyModule, ast)Parameters:
module_name- The module name (atom)ast- The complete module AST
Returns:
:okon success{:error, reason}on failure
Retrieves enhanced module data with all analysis results.
@spec get_enhanced_module(atom()) :: {:ok, EnhancedModuleData.t()} | {:error, :not_found}
# Retrieve module data
case EnhancedRepository.get_enhanced_module(MyModule) do
{:ok, module_data} ->
IO.inspect(module_data.functions)
IO.inspect(module_data.metadata)
{:error, :not_found} ->
IO.puts("Module not found")
endReturns:
{:ok, module_data}- Enhanced module data structure{:error, :not_found}- Module not in repository
Updates existing module data with new analysis results.
@spec update_enhanced_module(atom(), keyword()) :: :ok | {:error, term()}
# Update module metadata
:ok = EnhancedRepository.update_enhanced_module(MyModule, [
metadata: %{last_analyzed: DateTime.utc_now()},
complexity_score: 8.5
])Removes module from repository and cleans up associated data.
@spec delete_enhanced_module(atom()) :: :ok
:ok = EnhancedRepository.delete_enhanced_module(MyModule)Retrieves detailed function analysis data.
@spec get_enhanced_function(atom(), atom(), non_neg_integer()) ::
{:ok, EnhancedFunctionData.t()} | {:error, :not_found}
# Get function data
case EnhancedRepository.get_enhanced_function(MyModule, :hello, 1) do
{:ok, func_data} ->
IO.inspect(func_data.complexity_score)
IO.inspect(func_data.control_flow_graph)
{:error, :not_found} ->
IO.puts("Function not found")
endStores enhanced function data with analysis results.
@spec store_enhanced_function(atom(), atom(), non_neg_integer(), EnhancedFunctionData.t()) ::
:ok | {:error, term()}
function_data = %EnhancedFunctionData{
module_name: MyModule,
function_name: :hello,
arity: 1,
complexity_score: 2.5,
# ... other fields
}
:ok = EnhancedRepository.store_enhanced_function(MyModule, :hello, 1, function_data)Lists all modules in the repository.
@spec list_modules() :: [atom()]
modules = EnhancedRepository.list_modules()
# => [:MyModule, :AnotherModule, ...]Lists all functions for a given module.
@spec list_functions(atom()) :: [{atom(), non_neg_integer()}]
functions = EnhancedRepository.list_functions(MyModule)
# => [{:hello, 1}, {:goodbye, 2}, ...]Searches modules by pattern or criteria.
@spec search_modules(keyword()) :: [atom()]
# Search by pattern
modules = EnhancedRepository.search_modules(pattern: "Test*")
# Search by complexity
modules = EnhancedRepository.search_modules(complexity: {:gt, 10})
# Search by metadata
modules = EnhancedRepository.search_modules(metadata: %{type: :controller})Efficiently stores multiple modules in a single operation.
@spec store_modules_batch([{atom(), Macro.t()}]) :: :ok | {:error, term()}
modules = [
{Module1, ast1},
{Module2, ast2},
{Module3, ast3}
]
:ok = EnhancedRepository.store_modules_batch(modules)Retrieves multiple modules efficiently.
@spec get_modules_batch([atom()]) :: %{atom() => EnhancedModuleData.t()}
module_names = [Module1, Module2, Module3]
modules_data = EnhancedRepository.get_modules_batch(module_names)
# => %{Module1 => data1, Module2 => data2, Module3 => data3}Provides intelligent memory management for the repository.
# Start with monitoring enabled
{:ok, pid} = MemoryManager.start_link(monitoring_enabled: true)
# Start with custom configuration
{:ok, pid} = MemoryManager.start_link([
monitoring_enabled: true,
cleanup_interval: 300_000, # 5 minutes
compression_interval: 600_000, # 10 minutes
memory_check_interval: 30_000 # 30 seconds
])Gets current memory usage statistics.
@spec monitor_memory_usage() :: {:ok, map()} | {:error, term()}
case MemoryManager.monitor_memory_usage() do
{:ok, stats} ->
IO.puts("Repository memory: #{stats.repository_memory} bytes")
IO.puts("System memory usage: #{stats.memory_usage_percent}%")
IO.puts("Cache hit ratio: #{stats.cache_hit_ratio}")
{:error, reason} ->
IO.puts("Failed to get memory stats: #{reason}")
endReturns:
%{
repository_memory: 52428800, # bytes
system_memory_total: 8589934592, # bytes
system_memory_used: 4294967296, # bytes
memory_usage_percent: 50.0, # percentage
cache_hit_ratio: 0.85, # ratio
last_cleanup: ~U[2024-01-01 12:00:00Z],
last_compression: ~U[2024-01-01 11:30:00Z]
}Controls memory monitoring.
@spec enable_monitoring() :: :ok
@spec disable_monitoring() :: :ok
:ok = MemoryManager.enable_monitoring()
:ok = MemoryManager.disable_monitoring()Removes stale and unused data from the repository.
@spec cleanup_unused_data(keyword()) :: :ok | {:error, term()}
# Basic cleanup (removes data older than 1 hour)
:ok = MemoryManager.cleanup_unused_data([])
# Custom cleanup parameters
:ok = MemoryManager.cleanup_unused_data([
max_age: 3600, # 1 hour in seconds
force: false, # don't force cleanup of recently accessed data
dry_run: false # actually perform cleanup
])
# Dry run to see what would be cleaned
{:ok, stats} = MemoryManager.cleanup_unused_data([dry_run: true])
IO.puts("Would clean #{stats.modules_to_clean} modules")Options:
max_age- Maximum age in seconds (default: 3600)force- Force cleanup regardless of access patterns (default: false)dry_run- Don't actually clean, just return statistics (default: false)
Compresses infrequently accessed analysis data.
@spec compress_old_analysis(keyword()) :: {:ok, map()} | {:error, term()}
# Compress old analysis data
{:ok, stats} = MemoryManager.compress_old_analysis([
access_threshold: 5, # minimum access count
age_threshold: 1800, # 30 minutes
compression_level: 6 # zlib compression level
])
IO.puts("Compressed #{stats.modules_compressed} modules")
IO.puts("Compression ratio: #{stats.compression_ratio * 100}%")
IO.puts("Space saved: #{stats.space_saved_bytes} bytes")Returns:
%{
modules_compressed: 25,
compression_ratio: 0.65,
space_saved_bytes: 1048576,
compression_time_ms: 150
}Manual cache management for query results.
@spec cache_put(atom(), term(), term()) :: :ok
@spec cache_get(atom(), term()) :: {:ok, term()} | :miss
# Cache a query result
:ok = MemoryManager.cache_put(:query, {:module, MyModule}, module_data)
# Retrieve from cache
case MemoryManager.cache_get(:query, {:module, MyModule}) do
{:ok, data} -> data
:miss -> nil
endCache Types:
:query- Query results (TTL: 60 seconds):analysis- Analysis results (TTL: 300 seconds):cpg- Code Property Graph data (TTL: 600 seconds)
Configures LRU cache for specific data types.
@spec implement_lru_cache(atom(), keyword()) :: :ok
:ok = MemoryManager.implement_lru_cache(:query, [
max_size: 1000,
ttl: 60_000 # 60 seconds
])Clears specific cache type or all caches.
@spec cache_clear(atom() | :all) :: :ok
:ok = MemoryManager.cache_clear(:query) # Clear query cache
:ok = MemoryManager.cache_clear(:all) # Clear all cachesHandles different levels of memory pressure.
@spec memory_pressure_handler(atom()) :: :ok
# Handle memory pressure levels
:ok = MemoryManager.memory_pressure_handler(:level_1) # 80% memory - clear query caches
:ok = MemoryManager.memory_pressure_handler(:level_2) # 90% memory - compress old data
:ok = MemoryManager.memory_pressure_handler(:level_3) # 95% memory - remove unused modules
:ok = MemoryManager.memory_pressure_handler(:level_4) # 98% memory - emergency cleanup + GCPressure Levels:
:level_1(80% memory) - Clear query caches:level_2(90% memory) - Clear caches + compress old analysis data:level_3(95% memory) - Comprehensive cleanup + remove unused modules:level_4(98% memory) - Emergency cleanup + force garbage collection
Retrieves operation statistics.
@spec get_cleanup_stats() :: map()
@spec get_compression_stats() :: map()
cleanup_stats = MemoryManager.get_cleanup_stats()
# => %{modules_cleaned: 150, data_removed_bytes: 2097152, total_cleanups: 25, ...}
compression_stats = MemoryManager.get_compression_stats()
# => %{modules_compressed: 75, total_space_saved: 5242880, avg_compression_ratio: 0.68, ...}Provides performance optimization features for the repository.
Optimizes query caching based on access patterns.
@spec optimize_query_cache(keyword()) :: :ok
:ok = PerformanceOptimizer.optimize_query_cache([
cache_size: 1000,
ttl: 300_000, # 5 minutes
preload_popular: true
])Pre-loads frequently accessed data into cache.
@spec warm_cache(keyword()) :: :ok
:ok = PerformanceOptimizer.warm_cache([
modules: [:frequently_used_module],
functions: [{:MyModule, :hot_function, 2}],
analysis_types: [:cfg, :dfg]
])Stores multiple modules with performance optimizations.
@spec store_modules_optimized([{atom(), Macro.t()}], keyword()) :: :ok
modules = [{Module1, ast1}, {Module2, ast2}]
:ok = PerformanceOptimizer.store_modules_optimized(modules, [
batch_size: 50,
parallel: true,
lazy_analysis: true
])Configures lazy loading for large analysis data.
@spec enable_lazy_loading(keyword()) :: :ok
:ok = PerformanceOptimizer.enable_lazy_loading([
threshold_bytes: 1024, # Load on-demand if > 1KB
analysis_types: [:cpg, :dfg],
cache_loaded: true
])# In your Phoenix application
defmodule MyAppWeb.AnalysisController do
use MyAppWeb, :controller
alias ElixirScope.ASTRepository.EnhancedRepository
def analyze_module(conn, %{"module" => module_name}) do
module_atom = String.to_existing_atom(module_name)
case EnhancedRepository.get_enhanced_module(module_atom) do
{:ok, module_data} ->
json(conn, %{
complexity: module_data.complexity_score,
functions: length(module_data.functions),
memory_usage: module_data.metadata.memory_usage
})
{:error, :not_found} ->
conn
|> put_status(:not_found)
|> json(%{error: "Module not found"})
end
end
enddefmodule MyApp.AnalysisWorker do
use GenServer
alias ElixirScope.ASTRepository.{EnhancedRepository, MemoryManager}
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
def init(_opts) do
# Start repository and memory manager
{:ok, _} = EnhancedRepository.start_link([])
{:ok, _} = MemoryManager.start_link(monitoring_enabled: true)
# Schedule periodic cleanup
Process.send_after(self(), :cleanup, 300_000) # 5 minutes
{:ok, %{}}
end
def handle_info(:cleanup, state) do
MemoryManager.cleanup_unused_data([])
Process.send_after(self(), :cleanup, 300_000)
{:noreply, state}
end
enddefmodule MyApp.Application do
use Application
def start(_type, _args) do
children = [
# Start Enhanced Repository
{ElixirScope.ASTRepository.EnhancedRepository, []},
# Start Memory Manager with monitoring
{ElixirScope.ASTRepository.MemoryManager, [monitoring_enabled: true]},
# Your application workers
MyApp.AnalysisWorker
]
opts = [strategy: :one_for_one, name: MyApp.Supervisor]
Supervisor.start_link(children, opts)
end
enddefmodule MyApp.AnalysisTest do
use ExUnit.Case, async: false
alias ElixirScope.ASTRepository.{EnhancedRepository, MemoryManager}
setup do
# Start repository for testing
{:ok, repo} = EnhancedRepository.start_link([])
{:ok, memory_manager} = MemoryManager.start_link([])
on_exit(fn ->
if Process.alive?(repo), do: GenServer.stop(repo)
if Process.alive?(memory_manager), do: GenServer.stop(memory_manager)
end)
%{repo: repo}
end
test "analyzes module complexity", %{repo: _repo} do
ast = quote do
defmodule TestModule do
def simple_function, do: :ok
end
end
:ok = EnhancedRepository.store_enhanced_module(TestModule, ast)
{:ok, module_data} = EnhancedRepository.get_enhanced_module(TestModule)
assert module_data.complexity_score > 0
end
end| Scale | Modules | Memory Usage | Per Module |
|---|---|---|---|
| Small | 10 | ~25KB | ~2.5KB |
| Medium | 100 | ~250KB | ~2.5KB |
| Large | 1000 | ~2.5MB | ~2.5KB |
| Operation | Target | Typical |
|---|---|---|
| Module lookup | <100ms | ~0.1ms |
| Function search | <100ms | ~0.05ms |
| Pattern matching | <100ms | ~0.02ms |
| Batch operations | <1s/100 modules | ~0.5s |
| Cache Type | TTL | Hit Ratio Target | Typical |
|---|---|---|---|
| Query | 60s | >80% | ~95% |
| Analysis | 5min | >70% | ~85% |
| CPG | 10min | >60% | ~75% |
- Maximum modules: 10,000+ (tested up to 1,000)
- Memory limit: Configurable (default: 500MB for 1,000 modules)
- Concurrent queries: 100+ concurrent operations
- Startup time: <30s for 1,000 modules
-
Enable monitoring in production:
MemoryManager.start_link(monitoring_enabled: true)
-
Configure appropriate cleanup intervals:
# For high-traffic applications cleanup_interval: 180_000 # 3 minutes # For low-traffic applications cleanup_interval: 600_000 # 10 minutes
-
Use batch operations for bulk data:
# Instead of individual stores EnhancedRepository.store_modules_batch(modules)
-
Monitor memory pressure:
{:ok, stats} = MemoryManager.monitor_memory_usage() if stats.memory_usage_percent > 80 do MemoryManager.memory_pressure_handler(:level_1) end
-
Use lazy loading for large modules:
PerformanceOptimizer.enable_lazy_loading(threshold_bytes: 1024)
-
Warm caches for frequently accessed data:
PerformanceOptimizer.warm_cache(modules: [:frequently_used])
-
Configure appropriate cache sizes:
MemoryManager.implement_lru_cache(:query, max_size: 1000)
-
Always handle repository errors:
case EnhancedRepository.get_enhanced_module(module) do {:ok, data} -> process_data(data) {:error, :not_found} -> handle_missing_module() {:error, reason} -> handle_error(reason) end
-
Monitor memory manager health:
case MemoryManager.monitor_memory_usage() do {:ok, stats} -> check_memory_health(stats) {:error, reason} -> alert_memory_monitoring_failure(reason) end
-
Use async: false for repository tests:
use ExUnit.Case, async: false
-
Clean up processes in tests:
on_exit(fn -> if Process.alive?(repo), do: GenServer.stop(repo) end)
-
Test memory management scenarios:
test "handles memory pressure" do # Generate memory pressure # Test cleanup behavior # Verify memory recovery end
# mix.exs
def deps do
[
# Add enhanced repository dependencies
{:elixir_scope, "~> 0.1.0"}
]
endBefore (Basic Repository):
# Basic repository usage
ASTRepository.store_module(MyModule, ast)
{:ok, data} = ASTRepository.get_module(MyModule)After (Enhanced Repository):
# Enhanced repository usage
EnhancedRepository.store_enhanced_module(MyModule, ast)
{:ok, enhanced_data} = EnhancedRepository.get_enhanced_module(MyModule)# Start memory manager in your supervision tree
children = [
{ElixirScope.ASTRepository.EnhancedRepository, []},
{ElixirScope.ASTRepository.MemoryManager, [monitoring_enabled: true]}
]Before:
# Direct data access
module_data = get_module_data(module)
functions = module_data.functionsAfter:
# Enhanced data access with error handling
case EnhancedRepository.get_enhanced_module(module) do
{:ok, module_data} ->
functions = module_data.functions
complexity = module_data.complexity_score
# Access enhanced analysis data
{:error, reason} ->
handle_error(reason)
end# Use batch operations
modules = [{Module1, ast1}, {Module2, ast2}]
EnhancedRepository.store_modules_batch(modules)
# Enable performance optimizations
PerformanceOptimizer.enable_lazy_loading([])
PerformanceOptimizer.warm_cache(modules: frequently_used_modules)
# Monitor memory usage
{:ok, stats} = MemoryManager.monitor_memory_usage()# Update test setup
setup do
{:ok, repo} = EnhancedRepository.start_link([])
{:ok, memory_manager} = MemoryManager.start_link([])
on_exit(fn ->
if Process.alive?(repo), do: GenServer.stop(repo)
if Process.alive?(memory_manager), do: GenServer.stop(memory_manager)
end)
%{repo: repo}
end- Update dependencies
- Replace basic repository calls
- Add memory manager to supervision tree
- Update data access patterns with error handling
- Enable performance optimizations
- Add memory monitoring
- Update tests for enhanced repository
- Configure cleanup and compression intervals
- Set up cache warming for frequently accessed data
- Add memory pressure handling
- Return Values: Enhanced repository returns
{:ok, data}tuples instead of direct data - Data Structures:
EnhancedModuleDatahas different fields than basicModuleData - Function Signatures: Some functions have additional optional parameters
- Process Management: Requires starting additional GenServer processes
For gradual migration, you can create a compatibility layer:
defmodule MyApp.RepositoryAdapter do
alias ElixirScope.ASTRepository.EnhancedRepository
# Compatibility wrapper for basic repository calls
def store_module(module, ast) do
case EnhancedRepository.store_enhanced_module(module, ast) do
:ok -> :ok
{:error, reason} -> {:error, reason}
end
end
def get_module(module) do
case EnhancedRepository.get_enhanced_module(module) do
{:ok, enhanced_data} -> {:ok, convert_to_basic_data(enhanced_data)}
{:error, reason} -> {:error, reason}
end
end
defp convert_to_basic_data(enhanced_data) do
# Convert enhanced data to basic format
%{
module_name: enhanced_data.module_name,
ast: enhanced_data.ast,
functions: enhanced_data.functions
# ... other basic fields
}
end
endSymptoms:
- Memory usage exceeding expected limits
- Slow query performance
- Out of memory errors
Solutions:
-
Enable memory monitoring:
{:ok, stats} = MemoryManager.monitor_memory_usage()
-
Reduce cleanup intervals:
MemoryManager.cleanup_unused_data(max_age: 1800) # 30 minutes
-
Enable compression:
MemoryManager.compress_old_analysis([])
Symptoms:
- Low cache hit ratios
- Slow repeated queries
Solutions:
-
Check cache configuration:
MemoryManager.implement_lru_cache(:query, max_size: 2000)
-
Warm cache with frequently accessed data:
PerformanceOptimizer.warm_cache(modules: hot_modules)
-
Adjust TTL values:
# Increase TTL for stable data cache_ttl: 600_000 # 10 minutes
Symptoms:
- Long application startup
- Timeout errors during initialization
Solutions:
-
Enable lazy loading:
PerformanceOptimizer.enable_lazy_loading(threshold_bytes: 512)
-
Use batch operations:
EnhancedRepository.store_modules_batch(modules)
-
Reduce initial analysis depth:
# Store basic data first, analyze later EnhancedRepository.store_enhanced_module(module, ast, analyze: false)
{:ok, stats} = MemoryManager.monitor_memory_usage()
IO.inspect(stats, label: "Memory Stats")cleanup_stats = MemoryManager.get_cleanup_stats()
compression_stats = MemoryManager.get_compression_stats()
IO.inspect({cleanup_stats, compression_stats}, label: "Operation Stats")# Enable telemetry for detailed metrics
:telemetry.attach("repo-metrics", [:enhanced_repository, :query], fn event, measurements, metadata, _config ->
IO.puts("Query #{metadata.type} took #{measurements.duration}ms")
end, nil)For additional support:
- Check the Integration Guide
- Review test files for usage examples
- Enable debug logging for detailed operation traces
- Use the built-in monitoring and statistics functions
This documentation covers Enhanced AST Repository v0.1.0. For the latest updates, please refer to the project repository.