From 845e1d01cf9bfa5b1635e350c6853541c7bf013a Mon Sep 17 00:00:00 2001 From: pheobeayo Date: Thu, 16 Oct 2025 10:49:39 +0100 Subject: [PATCH 1/7] doc: Add doc for indexer --- docs/BLOCKCHAIN_INDEXER.md | 459 +++++++++++++++++++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100644 docs/BLOCKCHAIN_INDEXER.md diff --git a/docs/BLOCKCHAIN_INDEXER.md b/docs/BLOCKCHAIN_INDEXER.md new file mode 100644 index 0000000..65ab458 --- /dev/null +++ b/docs/BLOCKCHAIN_INDEXER.md @@ -0,0 +1,459 @@ +# Blockchain Indexer Documentation + +## Overview + +The Guild Genesis blockchain indexer is a service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. + +## Architecture + +### Components + +The indexer consists of several key components: + +1. **Event Listener**: Monitors the blockchain for new events emitted by the smart contract +2. **Data Transformer**: Converts raw blockchain event data into structured database records +3. **Database Handler**: Manages storage and retrieval of indexed data in PostgreSQL +4. **Re-org Handler**: Manages blockchain reorganizations to ensure data consistency + +### How It Works + +``` +┌─────────────────┐ +│ Blockchain │ +│ (Ethereum/L2) │ +└────────┬────────┘ + │ + │ RPC Connection + │ + ▼ +┌─────────────────┐ +│ Event Listener │◄──── Monitors BadgeCreated events +└────────┬────────┘ + │ + │ Raw Event Data + │ + ▼ +┌─────────────────┐ +│ Transformer │◄──── Decodes & structures data +└────────┬────────┘ + │ + │ Structured Records + │ + ▼ +┌─────────────────┐ +│ PostgreSQL │◄──── Stores indexed data +└─────────────────┘ +``` + +## Indexed Events + +### BadgeCreated Event + +The primary event indexed by the service: + +```solidity +event BadgeCreated( + bytes32 indexed name, + bytes32 description, + address indexed creator +) +``` + +**Indexed Fields:** +- `name`: The unique identifier for the badge +- `description`: Badge description +- `creator`: Ethereum address that created the badge +- `block_number`: Block number when the badge was created +- `transaction_hash`: Transaction hash of the badge creation +- `timestamp`: Unix timestamp of the block +- `log_index`: Position of the event in the transaction logs + +## Infrastructure Setup + +### Prerequisites + +- **Node.js**: v18 or higher +- **PostgreSQL**: v14 or higher +- **Ethereum RPC endpoint**: Access to an Ethereum node (Infura, Alchemy, or self-hosted) +- **Environment variables configured** + +### Environment Variables + +Create a `.env` file in the indexer directory: + +```env +# Database Configuration +DATABASE_URL=postgresql://user:password@localhost:5432/guild_genesis + +# Blockchain Configuration +RPC_URL=https://mainnet.infura.io/v3/YOUR_INFURA_KEY +CHAIN_ID=1 +CONTRACT_ADDRESS=0x... # TheGuildBadgeRegistry contract address +START_BLOCK=0 # Block number to start indexing from + +# Indexer Configuration +POLL_INTERVAL=12000 # Polling interval in milliseconds (12 seconds for Ethereum) +BATCH_SIZE=1000 # Number of blocks to process in each batch +MAX_RETRIES=3 # Maximum retry attempts for failed requests +CONFIRMATION_BLOCKS=12 # Number of blocks to wait before considering finalized + +# Monitoring (Optional) +LOG_LEVEL=info +SENTRY_DSN=... # Error tracking +METRICS_PORT=9090 # Prometheus metrics endpoint +``` + +### Database Schema + +The indexer uses the following database tables: + +```sql +-- Badge events table +CREATE TABLE badge_created_events ( + id SERIAL PRIMARY KEY, + badge_name VARCHAR(66) NOT NULL, -- bytes32 as hex string + description VARCHAR(66) NOT NULL, + creator VARCHAR(42) NOT NULL, + block_number BIGINT NOT NULL, + transaction_hash VARCHAR(66) NOT NULL, + log_index INTEGER NOT NULL, + timestamp BIGINT NOT NULL, + created_at TIMESTAMP DEFAULT NOW(), + UNIQUE(transaction_hash, log_index) +); + +-- Index for efficient queries +CREATE INDEX idx_badge_name ON badge_created_events(badge_name); +CREATE INDEX idx_creator ON badge_created_events(creator); +CREATE INDEX idx_block_number ON badge_created_events(block_number); +CREATE INDEX idx_timestamp ON badge_created_events(timestamp); + +-- Indexer metadata table +CREATE TABLE indexer_state ( + id INTEGER PRIMARY KEY DEFAULT 1, + last_indexed_block BIGINT NOT NULL DEFAULT 0, + last_indexed_timestamp BIGINT, + last_updated_at TIMESTAMP DEFAULT NOW(), + CHECK (id = 1) -- Ensure only one row +); + +-- Insert initial state +INSERT INTO indexer_state (id, last_indexed_block) VALUES (1, 0); +``` + +## Installation & Setup + +### Step 1: Install Dependencies + +```bash +cd indexer +npm install +``` + +### Step 2: Configure Environment + +Copy the example environment file and update with your values: + +```bash +cp .env.example .env +# Edit .env with your configuration +``` + +### Step 3: Initialize Database + +Run the database migrations: + +```bash +npm run db:migrate +``` + +### Step 4: Verify Configuration + +Test your RPC connection and contract configuration: + +```bash +npm run verify-config +``` + +## Running the Indexer + +### Development Mode + +Run the indexer with hot-reload and verbose logging: + +```bash +npm run dev +``` + +### Production Mode + +Build and run the optimized version: + +```bash +npm run build +npm start +``` + +### Docker Deployment + +Build the Docker image: + +```bash +docker build -t guild-indexer . +``` + +Run with Docker Compose: + +```bash +docker-compose up -d indexer +``` + +Example `docker-compose.yml`: + +```yaml +version: '3.8' + +services: + indexer: + build: ./indexer + restart: unless-stopped + environment: + - DATABASE_URL=postgresql://postgres:password@db:5432/guild_genesis + - RPC_URL=${RPC_URL} + - CONTRACT_ADDRESS=${CONTRACT_ADDRESS} + - START_BLOCK=${START_BLOCK} + depends_on: + - db + networks: + - guild-network + + db: + image: postgres:14 + environment: + POSTGRES_DB: guild_genesis + POSTGRES_USER: postgres + POSTGRES_PASSWORD: password + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - guild-network + +volumes: + postgres_data: + +networks: + guild-network: +``` + +## Monitoring & Maintenance + +### Health Check + +Check if the indexer is running and synced: + +```bash +curl http://localhost:9090/health +``` + +Response: +```json +{ + "status": "healthy", + "lastIndexedBlock": 18456789, + "currentBlock": 18456800, + "blocksBehind": 11, + "uptime": 86400 +} +``` + +### Logs + +View real-time logs: + +```bash +# Docker +docker logs -f guild-indexer + +# PM2 +pm2 logs indexer + +# Direct +tail -f logs/indexer.log +``` + +### Metrics + +The indexer exposes Prometheus metrics at `http://localhost:9090/metrics`: + +- `indexer_last_indexed_block`: Latest block number indexed +- `indexer_blocks_behind`: Number of blocks behind the chain tip +- `indexer_events_indexed_total`: Total number of events indexed +- `indexer_processing_duration_seconds`: Time taken to process blocks +- `indexer_errors_total`: Total number of errors encountered + +### Backfilling + +To reindex from a specific block: + +```bash +# Set START_BLOCK in .env or pass as argument +npm run backfill -- --from-block=18000000 --to-block=18100000 +``` + +### Handling Chain Reorganizations + +The indexer automatically handles chain reorganizations (re-orgs): + +1. Monitors for block hash changes at previously indexed heights +2. Rolls back affected data when a re-org is detected +3. Re-indexes the canonical chain from the re-org point + +Manual re-org recovery: + +```bash +npm run reorg-recovery -- --from-block=18456700 +``` + +## Troubleshooting + +### Common Issues + +**Issue: Indexer falls behind** +- **Cause**: RPC rate limiting or slow database writes +- **Solution**: + - Increase `POLL_INTERVAL` + - Reduce `BATCH_SIZE` + - Optimize database indexes + - Use a dedicated RPC endpoint + +**Issue: Duplicate events** +- **Cause**: Re-org handling or restart during processing +- **Solution**: Database constraints prevent duplicates automatically; check logs for warnings + +**Issue: Missing events** +- **Cause**: RPC endpoint issues or gaps in block processing +- **Solution**: + - Run integrity check: `npm run verify-events` + - Backfill missing ranges + +**Issue: Database connection errors** +- **Cause**: PostgreSQL not accessible or connection pool exhausted +- **Solution**: + - Verify `DATABASE_URL` + - Check PostgreSQL is running + - Increase connection pool size in configuration + +### Debug Mode + +Enable debug logging: + +```bash +LOG_LEVEL=debug npm run dev +``` + +### Data Integrity Verification + +Run the verification script to check for gaps: + +```bash +npm run verify-integrity +``` + +This checks: +- Continuous block range (no gaps) +- Event consistency with blockchain +- Database constraint violations + +## API Integration + +The backend API can query indexed data: + +```typescript +// Example: Get all badges created by an address +const badges = await db.query( + 'SELECT * FROM badge_created_events WHERE creator = $1 ORDER BY block_number DESC', + [creatorAddress] +); + +// Example: Get recent badge creations +const recentBadges = await db.query( + 'SELECT * FROM badge_created_events ORDER BY timestamp DESC LIMIT 10' +); + +// Example: Check if a badge name exists +const exists = await db.query( + 'SELECT EXISTS(SELECT 1 FROM badge_created_events WHERE badge_name = $1)', + [badgeName] +); +``` + +## Performance Optimization + +### Recommended Settings + +For optimal performance: + +- **Batch Processing**: Process 500-1000 blocks per batch +- **Connection Pooling**: Set PostgreSQL max connections to 20-50 +- **Caching**: Cache recent blocks to reduce RPC calls +- **Parallel Processing**: Process multiple block ranges in parallel for backfilling +- **Partitioning**: For high-volume chains, partition tables by block_number ranges + +### Scaling Considerations + +**Vertical Scaling:** +- Increase server memory for larger batches +- Use SSD storage for PostgreSQL +- Optimize PostgreSQL configuration (shared_buffers, work_mem) + +**Horizontal Scaling:** +- Run multiple indexer instances for different block ranges +- Use read replicas for query load +- Implement event streaming (Kafka) for real-time consumers + +## Security Considerations + +1. **RPC Endpoint Security**: Use authenticated endpoints; don't expose API keys +2. **Database Access**: Restrict database access to indexer service only +3. **Rate Limiting**: Implement rate limiting to prevent RPC abuse +4. **Data Validation**: Validate all blockchain data before storage +5. **Error Handling**: Never expose internal errors to external consumers + +## Maintenance Schedule + +**Daily:** +- Monitor sync status +- Check error logs +- Verify disk space + +**Weekly:** +- Review performance metrics +- Check for missed events +- Update RPC endpoints if needed + +**Monthly:** +- Database maintenance (VACUUM, ANALYZE) +- Review and optimize slow queries +- Update dependencies + +## Support & Resources + +- **Documentation**: `/docs/` +- **GitHub Issues**: [TheGuildGenesis/issues](https://github.com/TheSoftwareDevGuild/TheGuildGenesis/issues) +- **Discord**: [The Software Dev Guild Discord](https://discord.gg/pg4UgaTr) + +## Contributing + +To contribute to the indexer: + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Add tests +5. Submit a pull request + +See [CONTRIBUTING.md](../CONTRIBUTION.md) for detailed guidelines. + +## License + +See [LICENSE](../LICENSE) file for details. \ No newline at end of file From 3e6fd77562c2481b306af29a2ca1057b69d7b40b Mon Sep 17 00:00:00 2001 From: pheobeayo Date: Tue, 28 Oct 2025 03:42:23 +0100 Subject: [PATCH 2/7] update changes requested --- docs/BLOCKCHAIN_INDEXER.md | 94 ++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 50 deletions(-) diff --git a/docs/BLOCKCHAIN_INDEXER.md b/docs/BLOCKCHAIN_INDEXER.md index 65ab458..42ec359 100644 --- a/docs/BLOCKCHAIN_INDEXER.md +++ b/docs/BLOCKCHAIN_INDEXER.md @@ -60,13 +60,10 @@ event BadgeCreated( ``` **Indexed Fields:** -- `name`: The unique identifier for the badge -- `description`: Badge description -- `creator`: Ethereum address that created the badge -- `block_number`: Block number when the badge was created -- `transaction_hash`: Transaction hash of the badge creation -- `timestamp`: Unix timestamp of the block -- `log_index`: Position of the event in the transaction logs +- `id`: Unique identifier for the event (VARCHAR(255), PRIMARY KEY) +- `event_type`: Type of the event (TEXT, e.g., "BadgeCreated") +- `timestamp`: Timestamp when the event was indexed (TIMESTAMPTZ) +- `created_at`: Timestamp when the record was created (TIMESTAMPTZ) ## Infrastructure Setup @@ -105,42 +102,30 @@ METRICS_PORT=9090 # Prometheus metrics endpoint ### Database Schema -The indexer uses the following database tables: +The indexer uses the following database table structure: ```sql --- Badge events table -CREATE TABLE badge_created_events ( - id SERIAL PRIMARY KEY, - badge_name VARCHAR(66) NOT NULL, -- bytes32 as hex string - description VARCHAR(66) NOT NULL, - creator VARCHAR(42) NOT NULL, - block_number BIGINT NOT NULL, - transaction_hash VARCHAR(66) NOT NULL, - log_index INTEGER NOT NULL, - timestamp BIGINT NOT NULL, - created_at TIMESTAMP DEFAULT NOW(), - UNIQUE(transaction_hash, log_index) +-- Ethereum events table +CREATE TABLE ethereum_events ( + id VARCHAR(255) PRIMARY KEY, + event_type TEXT NOT NULL, + timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); --- Index for efficient queries -CREATE INDEX idx_badge_name ON badge_created_events(badge_name); -CREATE INDEX idx_creator ON badge_created_events(creator); -CREATE INDEX idx_block_number ON badge_created_events(block_number); -CREATE INDEX idx_timestamp ON badge_created_events(timestamp); - --- Indexer metadata table -CREATE TABLE indexer_state ( - id INTEGER PRIMARY KEY DEFAULT 1, - last_indexed_block BIGINT NOT NULL DEFAULT 0, - last_indexed_timestamp BIGINT, - last_updated_at TIMESTAMP DEFAULT NOW(), - CHECK (id = 1) -- Ensure only one row -); - --- Insert initial state -INSERT INTO indexer_state (id, last_indexed_block) VALUES (1, 0); +-- Indexes for efficient queries +CREATE INDEX idx_event_type ON ethereum_events(event_type); +CREATE INDEX idx_timestamp ON ethereum_events(timestamp); +CREATE INDEX idx_created_at ON ethereum_events(created_at); ``` +**Column Descriptions:** + +- **id** (VARCHAR(255)): Unique identifier for each event, typically constructed from transaction hash and log index +- **event_type** (TEXT): The type of event (e.g., "BadgeCreated", "BadgeUpdated") +- **timestamp** (TIMESTAMPTZ): The timestamp when the blockchain event occurred +- **created_at** (TIMESTAMPTZ): The timestamp when the record was inserted into the database + ## Installation & Setup ### Step 1: Install Dependencies @@ -328,7 +313,7 @@ npm run reorg-recovery -- --from-block=18456700 **Issue: Duplicate events** - **Cause**: Re-org handling or restart during processing -- **Solution**: Database constraints prevent duplicates automatically; check logs for warnings +- **Solution**: Database primary key constraints prevent duplicates automatically; check logs for warnings **Issue: Missing events** - **Cause**: RPC endpoint issues or gaps in block processing @@ -369,21 +354,30 @@ This checks: The backend API can query indexed data: ```typescript -// Example: Get all badges created by an address -const badges = await db.query( - 'SELECT * FROM badge_created_events WHERE creator = $1 ORDER BY block_number DESC', - [creatorAddress] +// Example: Get all events of a specific type +const events = await db.query( + 'SELECT * FROM ethereum_events WHERE event_type = $1 ORDER BY timestamp DESC', + ['BadgeCreated'] +); + +// Example: Get recent events +const recentEvents = await db.query( + 'SELECT * FROM ethereum_events ORDER BY timestamp DESC LIMIT 10' ); -// Example: Get recent badge creations -const recentBadges = await db.query( - 'SELECT * FROM badge_created_events ORDER BY timestamp DESC LIMIT 10' +// Example: Get events within a time range +const timeRangeEvents = await db.query( + `SELECT * FROM ethereum_events + WHERE timestamp BETWEEN $1 AND $2 + ORDER BY timestamp ASC`, + [startTime, endTime] ); -// Example: Check if a badge name exists -const exists = await db.query( - 'SELECT EXISTS(SELECT 1 FROM badge_created_events WHERE badge_name = $1)', - [badgeName] +// Example: Count events by type +const eventCounts = await db.query( + `SELECT event_type, COUNT(*) as count + FROM ethereum_events + GROUP BY event_type` ); ``` @@ -397,7 +391,7 @@ For optimal performance: - **Connection Pooling**: Set PostgreSQL max connections to 20-50 - **Caching**: Cache recent blocks to reduce RPC calls - **Parallel Processing**: Process multiple block ranges in parallel for backfilling -- **Partitioning**: For high-volume chains, partition tables by block_number ranges +- **Partitioning**: For high-volume chains, partition tables by timestamp ranges ### Scaling Considerations From d4ec46022fdd95835f44ce5a42f63663be4e5c2f Mon Sep 17 00:00:00 2001 From: pheobeayo Date: Thu, 30 Oct 2025 03:34:26 +0100 Subject: [PATCH 3/7] update --- docs/BLOCKCHAIN_INDEXER.md | 365 +++++++++++++++++++++++++++++++------ 1 file changed, 308 insertions(+), 57 deletions(-) diff --git a/docs/BLOCKCHAIN_INDEXER.md b/docs/BLOCKCHAIN_INDEXER.md index 42ec359..52e8e54 100644 --- a/docs/BLOCKCHAIN_INDEXER.md +++ b/docs/BLOCKCHAIN_INDEXER.md @@ -2,7 +2,7 @@ ## Overview -The Guild Genesis blockchain indexer is a service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. +The Guild Genesis blockchain indexer is a high-performance service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. ## Architecture @@ -14,6 +14,8 @@ The indexer consists of several key components: 2. **Data Transformer**: Converts raw blockchain event data into structured database records 3. **Database Handler**: Manages storage and retrieval of indexed data in PostgreSQL 4. **Re-org Handler**: Manages blockchain reorganizations to ensure data consistency +5. **Configuration Manager**: Centralized configuration with validation +6. **API Server**: RESTful endpoints for querying indexed data ### How It Works @@ -43,6 +45,37 @@ The indexer consists of several key components: ┌─────────────────┐ │ PostgreSQL │◄──── Stores indexed data └─────────────────┘ + │ + │ Query API + │ + ▼ +┌─────────────────┐ +│ REST API │◄──── Exposes data to clients +└─────────────────┘ +``` + +## Project Structure + +``` +indexer/ +├── src/ +│ ├── main.rs # Application entry point +│ ├── bin/ +│ │ └── migrate.rs # Standalone migration tool +│ ├── application/ # Application business logic +│ ├── domain/ # Domain models and events +│ ├── infrastructure/ # External integrations +│ │ ├── config.rs # Configuration management +│ │ ├── database.rs # Database connections +│ │ └── blockchain.rs # Blockchain client +│ └── presentation/ # API layer +│ └── api.rs # REST endpoints +├── migrations/ # Database migrations +├── tests/ # Integration tests +├── Cargo.toml # Rust dependencies +├── Dockerfile # Container image +├── docker-compose.yml # Local development stack +└── .env.example # Configuration template ``` ## Indexed Events @@ -69,10 +102,10 @@ event BadgeCreated( ### Prerequisites -- **Node.js**: v18 or higher +- **Rust**: v1.70 or higher - **PostgreSQL**: v14 or higher - **Ethereum RPC endpoint**: Access to an Ethereum node (Infura, Alchemy, or self-hosted) -- **Environment variables configured** +- **Docker** (optional): For containerized deployment ### Environment Variables @@ -80,7 +113,7 @@ Create a `.env` file in the indexer directory: ```env # Database Configuration -DATABASE_URL=postgresql://user:password@localhost:5432/guild_genesis +DATABASE_URL=postgresql://postgres:password@localhost:5432/guild_genesis # Blockchain Configuration RPC_URL=https://mainnet.infura.io/v3/YOUR_INFURA_KEY @@ -94,8 +127,11 @@ BATCH_SIZE=1000 # Number of blocks to process in each batch MAX_RETRIES=3 # Maximum retry attempts for failed requests CONFIRMATION_BLOCKS=12 # Number of blocks to wait before considering finalized +# Server Configuration +PORT=3002 +LOG_LEVEL=info # Options: trace, debug, info, warn, error + # Monitoring (Optional) -LOG_LEVEL=info SENTRY_DSN=... # Error tracking METRICS_PORT=9090 # Prometheus metrics endpoint ``` @@ -132,7 +168,7 @@ CREATE INDEX idx_created_at ON ethereum_events(created_at); ```bash cd indexer -npm install +cargo build --release ``` ### Step 2: Configure Environment @@ -149,16 +185,16 @@ cp .env.example .env Run the database migrations: ```bash -npm run db:migrate +# Using the migration binary +cargo run --bin migrate-indexer + +# Or using cargo +cargo sqlx migrate run ``` ### Step 4: Verify Configuration -Test your RPC connection and contract configuration: - -```bash -npm run verify-config -``` +The indexer will validate all configuration on startup. Check logs for any errors. ## Running the Indexer @@ -167,7 +203,7 @@ npm run verify-config Run the indexer with hot-reload and verbose logging: ```bash -npm run dev +RUST_LOG=debug cargo run --bin guild-indexer ``` ### Production Mode @@ -175,22 +211,29 @@ npm run dev Build and run the optimized version: ```bash -npm run build -npm start +cargo build --release +./target/release/guild-indexer ``` ### Docker Deployment -Build the Docker image: +#### Build the Docker image: ```bash docker build -t guild-indexer . ``` -Run with Docker Compose: +#### Run with Docker Compose: ```bash -docker-compose up -d indexer +# Start all services (indexer + PostgreSQL) +docker-compose up -d + +# View logs +docker-compose logs -f indexer + +# Stop services +docker-compose down ``` Example `docker-compose.yml`: @@ -200,20 +243,39 @@ version: '3.8' services: indexer: - build: ./indexer + build: + context: . + dockerfile: Dockerfile restart: unless-stopped environment: - DATABASE_URL=postgresql://postgres:password@db:5432/guild_genesis - RPC_URL=${RPC_URL} - CONTRACT_ADDRESS=${CONTRACT_ADDRESS} - - START_BLOCK=${START_BLOCK} + - START_BLOCK=${START_BLOCK:-0} + - CHAIN_ID=${CHAIN_ID:-1} + - POLL_INTERVAL=${POLL_INTERVAL:-12000} + - BATCH_SIZE=${BATCH_SIZE:-1000} + - MAX_RETRIES=${MAX_RETRIES:-3} + - CONFIRMATION_BLOCKS=${CONFIRMATION_BLOCKS:-12} + - PORT=3002 + - RUST_LOG=info + ports: + - "3002:3002" depends_on: - - db + db: + condition: service_healthy networks: - guild-network + healthcheck: + test: ["CMD", "/app/healthcheck.sh"] + interval: 30s + timeout: 3s + start_period: 10s + retries: 3 db: - image: postgres:14 + image: postgres:14-alpine + restart: unless-stopped environment: POSTGRES_DB: guild_genesis POSTGRES_USER: postgres @@ -222,12 +284,18 @@ services: - postgres_data:/var/lib/postgresql/data networks: - guild-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 volumes: postgres_data: networks: guild-network: + driver: bridge ``` ## Monitoring & Maintenance @@ -237,7 +305,7 @@ networks: Check if the indexer is running and synced: ```bash -curl http://localhost:9090/health +curl http://localhost:3002/health ``` Response: @@ -259,6 +327,9 @@ View real-time logs: # Docker docker logs -f guild-indexer +# Docker Compose +docker-compose logs -f indexer + # PM2 pm2 logs indexer @@ -282,7 +353,10 @@ To reindex from a specific block: ```bash # Set START_BLOCK in .env or pass as argument -npm run backfill -- --from-block=18000000 --to-block=18100000 +START_BLOCK=18000000 cargo run --bin guild-indexer + +# Using environment variable +START_BLOCK=18000000 ./target/release/guild-indexer ``` ### Handling Chain Reorganizations @@ -293,10 +367,11 @@ The indexer automatically handles chain reorganizations (re-orgs): 2. Rolls back affected data when a re-org is detected 3. Re-indexes the canonical chain from the re-org point -Manual re-org recovery: +Manual re-org recovery (if implemented): ```bash -npm run reorg-recovery -- --from-block=18456700 +# This would be a custom command if implemented +cargo run --bin guild-indexer -- --reorg-recovery --from-block=18456700 ``` ## Troubleshooting @@ -311,6 +386,13 @@ npm run reorg-recovery -- --from-block=18456700 - Optimize database indexes - Use a dedicated RPC endpoint +**Issue: Configuration validation errors** +- **Cause**: Invalid or missing environment variables +- **Solution**: + - Check `.env` file against `.env.example` + - Ensure all required variables are set + - Verify CONTRACT_ADDRESS format (0x + 40 hex chars) + **Issue: Duplicate events** - **Cause**: Re-org handling or restart during processing - **Solution**: Database primary key constraints prevent duplicates automatically; check logs for warnings @@ -318,37 +400,38 @@ npm run reorg-recovery -- --from-block=18456700 **Issue: Missing events** - **Cause**: RPC endpoint issues or gaps in block processing - **Solution**: - - Run integrity check: `npm run verify-events` - - Backfill missing ranges + - Verify RPC endpoint is reliable + - Check for gaps in indexed blocks + - Backfill missing ranges with START_BLOCK **Issue: Database connection errors** - **Cause**: PostgreSQL not accessible or connection pool exhausted - **Solution**: - - Verify `DATABASE_URL` - - Check PostgreSQL is running - - Increase connection pool size in configuration + - Verify `DATABASE_URL` is correct + - Check PostgreSQL is running: `pg_isready -h localhost -p 5432` + - Check connection pool settings + +**Issue: Container health check failures** +- **Cause**: Service not fully started or port not accessible +- **Solution**: + - Check container logs: `docker logs guild-indexer` + - Verify PORT environment variable matches exposed port + - Increase `start_period` in health check configuration ### Debug Mode Enable debug logging: ```bash -LOG_LEVEL=debug npm run dev +RUST_LOG=debug cargo run --bin guild-indexer ``` -### Data Integrity Verification - -Run the verification script to check for gaps: +Or with specific modules: ```bash -npm run verify-integrity +RUST_LOG=guild_indexer=debug,sqlx=info cargo run --bin guild-indexer ``` -This checks: -- Continuous block range (no gaps) -- Event consistency with blockchain -- Database constraint violations - ## API Integration The backend API can query indexed data: @@ -381,6 +464,35 @@ const eventCounts = await db.query( ); ``` +## API Endpoints + +The indexer exposes the following REST endpoints: + +### Health Check +``` +GET /health +``` +Returns service health status and indexing statistics. + +### Query Events +``` +GET /api/events?type=BadgeCreated&limit=100&offset=0 +``` +Query indexed events with optional filters. + +**Query Parameters:** +- `type`: Filter by event type +- `limit`: Maximum number of results (default: 100) +- `offset`: Pagination offset (default: 0) +- `from`: Start timestamp (ISO 8601) +- `to`: End timestamp (ISO 8601) + +### Get Event by ID +``` +GET /api/events/:id +``` +Retrieve a specific event by its unique identifier. + ## Performance Optimization ### Recommended Settings @@ -398,37 +510,157 @@ For optimal performance: **Vertical Scaling:** - Increase server memory for larger batches - Use SSD storage for PostgreSQL -- Optimize PostgreSQL configuration (shared_buffers, work_mem) +- Optimize PostgreSQL configuration: + ```sql + -- postgresql.conf + shared_buffers = 256MB + work_mem = 8MB + maintenance_work_mem = 128MB + effective_cache_size = 1GB + ``` **Horizontal Scaling:** - Run multiple indexer instances for different block ranges - Use read replicas for query load - Implement event streaming (Kafka) for real-time consumers +- Deploy indexers per contract or event type + +## Configuration Reference + +### Required Environment Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `DATABASE_URL` | PostgreSQL connection string | `postgresql://user:pass@localhost:5432/db` | +| `RPC_URL` | Ethereum RPC endpoint | `https://mainnet.infura.io/v3/KEY` | +| `CONTRACT_ADDRESS` | Smart contract address | `0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb1` | + +### Optional Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `CHAIN_ID` | `1` | Ethereum chain ID | +| `START_BLOCK` | `0` | Block to start indexing from | +| `POLL_INTERVAL` | `12000` | Polling interval in milliseconds | +| `BATCH_SIZE` | `1000` | Blocks per batch | +| `MAX_RETRIES` | `3` | Max retry attempts | +| `CONFIRMATION_BLOCKS` | `12` | Blocks before finality | +| `PORT` | `3002` | API server port | +| `LOG_LEVEL` | `info` | Logging level | + +## Testing + +### Running Tests + +```bash +# All tests +cargo test + +# Integration tests only +cargo test --test integration_test + +# With output +cargo test -- --nocapture + +# Specific test +cargo test test_database_connection +``` + +### Test Database Setup + +Tests use a separate test database: + +```bash +# Create test database +createdb guild_genesis_test + +# Run migrations +DATABASE_URL=postgresql://postgres:password@localhost:5432/guild_genesis_test \ + cargo run --bin migrate-indexer +``` ## Security Considerations -1. **RPC Endpoint Security**: Use authenticated endpoints; don't expose API keys -2. **Database Access**: Restrict database access to indexer service only -3. **Rate Limiting**: Implement rate limiting to prevent RPC abuse -4. **Data Validation**: Validate all blockchain data before storage -5. **Error Handling**: Never expose internal errors to external consumers +1. **RPC Endpoint Security**: + - Use authenticated endpoints + - Don't expose API keys in logs or version control + - Rotate keys regularly + +2. **Database Access**: + - Restrict database access to indexer service only + - Use read-only credentials for query endpoints + - Enable SSL/TLS for database connections + +3. **Rate Limiting**: + - Implement rate limiting to prevent RPC abuse + - Use backoff strategies for retries + - Monitor RPC usage + +4. **Data Validation**: + - Validate all blockchain data before storage + - Sanitize inputs for SQL queries (use parameterized queries) + - Verify event signatures + +5. **Error Handling**: + - Never expose internal errors to external consumers + - Log errors securely without sensitive data + - Implement proper error boundaries + +6. **Container Security**: + - Run as non-root user (already configured) + - Use minimal base images + - Scan images for vulnerabilities + - Keep dependencies updated ## Maintenance Schedule **Daily:** -- Monitor sync status -- Check error logs -- Verify disk space +- Monitor sync status and lag +- Check error logs for anomalies +- Verify disk space availability **Weekly:** - Review performance metrics - Check for missed events - Update RPC endpoints if needed +- Review security logs **Monthly:** - Database maintenance (VACUUM, ANALYZE) - Review and optimize slow queries -- Update dependencies +- Update dependencies (security patches) +- Review and rotate credentials +- Backup database + +**Quarterly:** +- Performance audit +- Capacity planning +- Disaster recovery testing +- Security audit + +## Continuous Integration + +The project includes a GitHub Actions workflow for automated testing: + +```yaml +# .github/workflows/rust.yml +name: Rust CI + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +# See rust.yml file for complete configuration +``` + +**CI Pipeline includes:** +- Code formatting checks (rustfmt) +- Linting (clippy) +- Unit tests +- Integration tests +- Release builds ## Support & Resources @@ -441,13 +673,32 @@ For optimal performance: To contribute to the indexer: 1. Fork the repository -2. Create a feature branch +2. Create a feature branch (`git checkout -b feature/amazing-feature`) 3. Make your changes -4. Add tests -5. Submit a pull request - -See [CONTRIBUTING.md](../CONTRIBUTION.md) for detailed guidelines. +4. Add tests for new functionality +5. Ensure all tests pass (`cargo test`) +6. Run formatting (`cargo fmt`) +7. Run linting (`cargo clippy`) +8. Commit your changes (`git commit -m 'Add amazing feature'`) +9. Push to the branch (`git push origin feature/amazing-feature`) +10. Submit a pull request + +See [CONTRIBUTING.md](../CONTRIBUTING.md) for detailed guidelines. + +## Changelog + +### Version 0.1.0 (Current) +- Initial release +- BadgeCreated event indexing +- PostgreSQL storage +- RESTful API +- Docker support +- Configuration validation +- Health monitoring +- Automatic re-org handling ## License -See [LICENSE](../LICENSE) file for details. \ No newline at end of file +See [LICENSE](../LICENSE) file for details. + +--- \ No newline at end of file From 5d6d27d7c815bb42ff9b392ccf6c1836ad95cf38 Mon Sep 17 00:00:00 2001 From: pheobeayo Date: Mon, 3 Nov 2025 10:25:05 +0100 Subject: [PATCH 4/7] update --- docs/BLOCKCHAIN_INDEXER.md | 489 +++++++++++-------------------------- 1 file changed, 136 insertions(+), 353 deletions(-) diff --git a/docs/BLOCKCHAIN_INDEXER.md b/docs/BLOCKCHAIN_INDEXER.md index 52e8e54..62d2190 100644 --- a/docs/BLOCKCHAIN_INDEXER.md +++ b/docs/BLOCKCHAIN_INDEXER.md @@ -2,7 +2,7 @@ ## Overview -The Guild Genesis blockchain indexer is a high-performance service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. +The Guild Genesis blockchain indexer is a service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. ## Architecture @@ -14,11 +14,37 @@ The indexer consists of several key components: 2. **Data Transformer**: Converts raw blockchain event data into structured database records 3. **Database Handler**: Manages storage and retrieval of indexed data in PostgreSQL 4. **Re-org Handler**: Manages blockchain reorganizations to ensure data consistency -5. **Configuration Manager**: Centralized configuration with validation -6. **API Server**: RESTful endpoints for querying indexed data ### How It Works +The indexer uses a domain-driven design with clean architecture principles: + +**1. Event Polling (Infrastructure Layer)** +- `AlloyEthereumEventPollingService` implements the polling logic using the Alloy library +- Connects to the blockchain via RPC (`https://reth-ethereum.ithaca.xyz/rpc`) +- Fetches the latest block number +- Creates a filter for `Transfer(from,to,value)` events +- Retrieves all logs matching the filter from the latest block + +**2. Event Processing (Domain Layer)** +- Decodes raw log data using `ITokenActivity::Transfer::decode_log_data()` +- Filters for minting events (`from == Address::ZERO`) +- Creates `EthereumEvent` domain entities with: + - Transaction hash and log index (for unique ID) + - Recipient address (`to`) + - Token amount (`value`) + - Event type (`ActivityTokenMinted`) + +**3. Data Persistence (Infrastructure Layer)** +- Stores processed events in PostgreSQL using SQLx +- Ensures data integrity with primary key constraints +- Indexes key fields for efficient querying + +**4. Continuous Operation** +- Polls at configured intervals (default: 12 seconds) +- Handles errors with retry logic +- Monitors for blockchain reorganizations + ``` ┌─────────────────┐ │ Blockchain │ @@ -44,40 +70,9 @@ The indexer consists of several key components: ▼ ┌─────────────────┐ │ PostgreSQL │◄──── Stores indexed data -└─────────────────┘ - │ - │ Query API - │ - ▼ -┌─────────────────┐ -│ REST API │◄──── Exposes data to clients └─────────────────┘ ``` -## Project Structure - -``` -indexer/ -├── src/ -│ ├── main.rs # Application entry point -│ ├── bin/ -│ │ └── migrate.rs # Standalone migration tool -│ ├── application/ # Application business logic -│ ├── domain/ # Domain models and events -│ ├── infrastructure/ # External integrations -│ │ ├── config.rs # Configuration management -│ │ ├── database.rs # Database connections -│ │ └── blockchain.rs # Blockchain client -│ └── presentation/ # API layer -│ └── api.rs # REST endpoints -├── migrations/ # Database migrations -├── tests/ # Integration tests -├── Cargo.toml # Rust dependencies -├── Dockerfile # Container image -├── docker-compose.yml # Local development stack -└── .env.example # Configuration template -``` - ## Indexed Events ### BadgeCreated Event @@ -93,19 +88,22 @@ event BadgeCreated( ``` **Indexed Fields:** -- `id`: Unique identifier for the event (VARCHAR(255), PRIMARY KEY) -- `event_type`: Type of the event (TEXT, e.g., "BadgeCreated") -- `timestamp`: Timestamp when the event was indexed (TIMESTAMPTZ) -- `created_at`: Timestamp when the record was created (TIMESTAMPTZ) +- `name`: The unique identifier for the badge +- `description`: Badge description +- `creator`: Ethereum address that created the badge +- `block_number`: Block number when the badge was created +- `transaction_hash`: Transaction hash of the badge creation +- `timestamp`: Unix timestamp of the block +- `log_index`: Position of the event in the transaction logs ## Infrastructure Setup ### Prerequisites -- **Rust**: v1.70 or higher +- **Node.js**: v18 or higher - **PostgreSQL**: v14 or higher - **Ethereum RPC endpoint**: Access to an Ethereum node (Infura, Alchemy, or self-hosted) -- **Docker** (optional): For containerized deployment +- **Environment variables configured** ### Environment Variables @@ -113,12 +111,12 @@ Create a `.env` file in the indexer directory: ```env # Database Configuration -DATABASE_URL=postgresql://postgres:password@localhost:5432/guild_genesis +DATABASE_URL=postgresql://user:password@localhost:5432/guild_genesis # Blockchain Configuration -RPC_URL=https://mainnet.infura.io/v3/YOUR_INFURA_KEY +RPC_URL=https://reth-ethereum.ithaca.xyz/rpc # Currently used Ithaca RPC endpoint CHAIN_ID=1 -CONTRACT_ADDRESS=0x... # TheGuildBadgeRegistry contract address +CONTRACT_ADDRESS=0x... # Activity Token contract address START_BLOCK=0 # Block number to start indexing from # Indexer Configuration @@ -127,40 +125,49 @@ BATCH_SIZE=1000 # Number of blocks to process in each batch MAX_RETRIES=3 # Maximum retry attempts for failed requests CONFIRMATION_BLOCKS=12 # Number of blocks to wait before considering finalized -# Server Configuration -PORT=3002 -LOG_LEVEL=info # Options: trace, debug, info, warn, error - # Monitoring (Optional) +LOG_LEVEL=info SENTRY_DSN=... # Error tracking METRICS_PORT=9090 # Prometheus metrics endpoint ``` ### Database Schema -The indexer uses the following database table structure: +The indexer uses the following database tables: ```sql --- Ethereum events table -CREATE TABLE ethereum_events ( - id VARCHAR(255) PRIMARY KEY, - event_type TEXT NOT NULL, - timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() +-- Badge events table +CREATE TABLE badge_created_events ( + id SERIAL PRIMARY KEY, + badge_name VARCHAR(66) NOT NULL, -- bytes32 as hex string + description VARCHAR(66) NOT NULL, + creator VARCHAR(42) NOT NULL, + block_number BIGINT NOT NULL, + transaction_hash VARCHAR(66) NOT NULL, + log_index INTEGER NOT NULL, + timestamp BIGINT NOT NULL, + created_at TIMESTAMP DEFAULT NOW(), + UNIQUE(transaction_hash, log_index) ); --- Indexes for efficient queries -CREATE INDEX idx_event_type ON ethereum_events(event_type); -CREATE INDEX idx_timestamp ON ethereum_events(timestamp); -CREATE INDEX idx_created_at ON ethereum_events(created_at); -``` - -**Column Descriptions:** +-- Index for efficient queries +CREATE INDEX idx_badge_name ON badge_created_events(badge_name); +CREATE INDEX idx_creator ON badge_created_events(creator); +CREATE INDEX idx_block_number ON badge_created_events(block_number); +CREATE INDEX idx_timestamp ON badge_created_events(timestamp); + +-- Indexer metadata table +CREATE TABLE indexer_state ( + id INTEGER PRIMARY KEY DEFAULT 1, + last_indexed_block BIGINT NOT NULL DEFAULT 0, + last_indexed_timestamp BIGINT, + last_updated_at TIMESTAMP DEFAULT NOW(), + CHECK (id = 1) -- Ensure only one row +); -- **id** (VARCHAR(255)): Unique identifier for each event, typically constructed from transaction hash and log index -- **event_type** (TEXT): The type of event (e.g., "BadgeCreated", "BadgeUpdated") -- **timestamp** (TIMESTAMPTZ): The timestamp when the blockchain event occurred -- **created_at** (TIMESTAMPTZ): The timestamp when the record was inserted into the database +-- Insert initial state +INSERT INTO indexer_state (id, last_indexed_block) VALUES (1, 0); +``` ## Installation & Setup @@ -168,7 +175,7 @@ CREATE INDEX idx_created_at ON ethereum_events(created_at); ```bash cd indexer -cargo build --release +npm install ``` ### Step 2: Configure Environment @@ -185,16 +192,16 @@ cp .env.example .env Run the database migrations: ```bash -# Using the migration binary -cargo run --bin migrate-indexer - -# Or using cargo -cargo sqlx migrate run +npm run db:migrate ``` ### Step 4: Verify Configuration -The indexer will validate all configuration on startup. Check logs for any errors. +Test your RPC connection and contract configuration: + +```bash +npm run verify-config +``` ## Running the Indexer @@ -203,7 +210,7 @@ The indexer will validate all configuration on startup. Check logs for any error Run the indexer with hot-reload and verbose logging: ```bash -RUST_LOG=debug cargo run --bin guild-indexer +npm run dev ``` ### Production Mode @@ -211,29 +218,22 @@ RUST_LOG=debug cargo run --bin guild-indexer Build and run the optimized version: ```bash -cargo build --release -./target/release/guild-indexer +npm run build +npm start ``` ### Docker Deployment -#### Build the Docker image: +Build the Docker image: ```bash docker build -t guild-indexer . ``` -#### Run with Docker Compose: +Run with Docker Compose: ```bash -# Start all services (indexer + PostgreSQL) -docker-compose up -d - -# View logs -docker-compose logs -f indexer - -# Stop services -docker-compose down +docker-compose up -d indexer ``` Example `docker-compose.yml`: @@ -243,39 +243,20 @@ version: '3.8' services: indexer: - build: - context: . - dockerfile: Dockerfile + build: ./indexer restart: unless-stopped environment: - DATABASE_URL=postgresql://postgres:password@db:5432/guild_genesis - RPC_URL=${RPC_URL} - CONTRACT_ADDRESS=${CONTRACT_ADDRESS} - - START_BLOCK=${START_BLOCK:-0} - - CHAIN_ID=${CHAIN_ID:-1} - - POLL_INTERVAL=${POLL_INTERVAL:-12000} - - BATCH_SIZE=${BATCH_SIZE:-1000} - - MAX_RETRIES=${MAX_RETRIES:-3} - - CONFIRMATION_BLOCKS=${CONFIRMATION_BLOCKS:-12} - - PORT=3002 - - RUST_LOG=info - ports: - - "3002:3002" + - START_BLOCK=${START_BLOCK} depends_on: - db: - condition: service_healthy + - db networks: - guild-network - healthcheck: - test: ["CMD", "/app/healthcheck.sh"] - interval: 30s - timeout: 3s - start_period: 10s - retries: 3 db: - image: postgres:14-alpine - restart: unless-stopped + image: postgres:14 environment: POSTGRES_DB: guild_genesis POSTGRES_USER: postgres @@ -284,18 +265,12 @@ services: - postgres_data:/var/lib/postgresql/data networks: - guild-network - healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] - interval: 10s - timeout: 5s - retries: 5 volumes: postgres_data: networks: guild-network: - driver: bridge ``` ## Monitoring & Maintenance @@ -305,7 +280,7 @@ networks: Check if the indexer is running and synced: ```bash -curl http://localhost:3002/health +curl http://localhost:9090/health ``` Response: @@ -327,9 +302,6 @@ View real-time logs: # Docker docker logs -f guild-indexer -# Docker Compose -docker-compose logs -f indexer - # PM2 pm2 logs indexer @@ -353,10 +325,7 @@ To reindex from a specific block: ```bash # Set START_BLOCK in .env or pass as argument -START_BLOCK=18000000 cargo run --bin guild-indexer - -# Using environment variable -START_BLOCK=18000000 ./target/release/guild-indexer +npm run backfill -- --from-block=18000000 --to-block=18100000 ``` ### Handling Chain Reorganizations @@ -367,11 +336,10 @@ The indexer automatically handles chain reorganizations (re-orgs): 2. Rolls back affected data when a re-org is detected 3. Re-indexes the canonical chain from the re-org point -Manual re-org recovery (if implemented): +Manual re-org recovery: ```bash -# This would be a custom command if implemented -cargo run --bin guild-indexer -- --reorg-recovery --from-block=18456700 +npm run reorg-recovery -- --from-block=18456700 ``` ## Troubleshooting @@ -386,113 +354,67 @@ cargo run --bin guild-indexer -- --reorg-recovery --from-block=18456700 - Optimize database indexes - Use a dedicated RPC endpoint -**Issue: Configuration validation errors** -- **Cause**: Invalid or missing environment variables -- **Solution**: - - Check `.env` file against `.env.example` - - Ensure all required variables are set - - Verify CONTRACT_ADDRESS format (0x + 40 hex chars) - **Issue: Duplicate events** - **Cause**: Re-org handling or restart during processing -- **Solution**: Database primary key constraints prevent duplicates automatically; check logs for warnings +- **Solution**: Database constraints prevent duplicates automatically; check logs for warnings **Issue: Missing events** - **Cause**: RPC endpoint issues or gaps in block processing - **Solution**: - - Verify RPC endpoint is reliable - - Check for gaps in indexed blocks - - Backfill missing ranges with START_BLOCK + - Run integrity check: `npm run verify-events` + - Backfill missing ranges **Issue: Database connection errors** - **Cause**: PostgreSQL not accessible or connection pool exhausted - **Solution**: - - Verify `DATABASE_URL` is correct - - Check PostgreSQL is running: `pg_isready -h localhost -p 5432` - - Check connection pool settings - -**Issue: Container health check failures** -- **Cause**: Service not fully started or port not accessible -- **Solution**: - - Check container logs: `docker logs guild-indexer` - - Verify PORT environment variable matches exposed port - - Increase `start_period` in health check configuration + - Verify `DATABASE_URL` + - Check PostgreSQL is running + - Increase connection pool size in configuration ### Debug Mode Enable debug logging: ```bash -RUST_LOG=debug cargo run --bin guild-indexer +LOG_LEVEL=debug npm run dev ``` -Or with specific modules: +### Data Integrity Verification + +Run the verification script to check for gaps: ```bash -RUST_LOG=guild_indexer=debug,sqlx=info cargo run --bin guild-indexer +npm run verify-integrity ``` +This checks: +- Continuous block range (no gaps) +- Event consistency with blockchain +- Database constraint violations + ## API Integration The backend API can query indexed data: ```typescript -// Example: Get all events of a specific type -const events = await db.query( - 'SELECT * FROM ethereum_events WHERE event_type = $1 ORDER BY timestamp DESC', - ['BadgeCreated'] -); - -// Example: Get recent events -const recentEvents = await db.query( - 'SELECT * FROM ethereum_events ORDER BY timestamp DESC LIMIT 10' +// Example: Get all badges created by an address +const badges = await db.query( + 'SELECT * FROM badge_created_events WHERE creator = $1 ORDER BY block_number DESC', + [creatorAddress] ); -// Example: Get events within a time range -const timeRangeEvents = await db.query( - `SELECT * FROM ethereum_events - WHERE timestamp BETWEEN $1 AND $2 - ORDER BY timestamp ASC`, - [startTime, endTime] +// Example: Get recent badge creations +const recentBadges = await db.query( + 'SELECT * FROM badge_created_events ORDER BY timestamp DESC LIMIT 10' ); -// Example: Count events by type -const eventCounts = await db.query( - `SELECT event_type, COUNT(*) as count - FROM ethereum_events - GROUP BY event_type` +// Example: Check if a badge name exists +const exists = await db.query( + 'SELECT EXISTS(SELECT 1 FROM badge_created_events WHERE badge_name = $1)', + [badgeName] ); ``` -## API Endpoints - -The indexer exposes the following REST endpoints: - -### Health Check -``` -GET /health -``` -Returns service health status and indexing statistics. - -### Query Events -``` -GET /api/events?type=BadgeCreated&limit=100&offset=0 -``` -Query indexed events with optional filters. - -**Query Parameters:** -- `type`: Filter by event type -- `limit`: Maximum number of results (default: 100) -- `offset`: Pagination offset (default: 0) -- `from`: Start timestamp (ISO 8601) -- `to`: End timestamp (ISO 8601) - -### Get Event by ID -``` -GET /api/events/:id -``` -Retrieve a specific event by its unique identifier. - ## Performance Optimization ### Recommended Settings @@ -503,168 +425,48 @@ For optimal performance: - **Connection Pooling**: Set PostgreSQL max connections to 20-50 - **Caching**: Cache recent blocks to reduce RPC calls - **Parallel Processing**: Process multiple block ranges in parallel for backfilling -- **Partitioning**: For high-volume chains, partition tables by timestamp ranges +- **Partitioning**: For high-volume chains, partition tables by block_number ranges ### Scaling Considerations **Vertical Scaling:** - Increase server memory for larger batches - Use SSD storage for PostgreSQL -- Optimize PostgreSQL configuration: - ```sql - -- postgresql.conf - shared_buffers = 256MB - work_mem = 8MB - maintenance_work_mem = 128MB - effective_cache_size = 1GB - ``` +- Optimize PostgreSQL configuration (shared_buffers, work_mem) **Horizontal Scaling:** - Run multiple indexer instances for different block ranges - Use read replicas for query load - Implement event streaming (Kafka) for real-time consumers -- Deploy indexers per contract or event type - -## Configuration Reference - -### Required Environment Variables - -| Variable | Description | Example | -|----------|-------------|---------| -| `DATABASE_URL` | PostgreSQL connection string | `postgresql://user:pass@localhost:5432/db` | -| `RPC_URL` | Ethereum RPC endpoint | `https://mainnet.infura.io/v3/KEY` | -| `CONTRACT_ADDRESS` | Smart contract address | `0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb1` | - -### Optional Environment Variables - -| Variable | Default | Description | -|----------|---------|-------------| -| `CHAIN_ID` | `1` | Ethereum chain ID | -| `START_BLOCK` | `0` | Block to start indexing from | -| `POLL_INTERVAL` | `12000` | Polling interval in milliseconds | -| `BATCH_SIZE` | `1000` | Blocks per batch | -| `MAX_RETRIES` | `3` | Max retry attempts | -| `CONFIRMATION_BLOCKS` | `12` | Blocks before finality | -| `PORT` | `3002` | API server port | -| `LOG_LEVEL` | `info` | Logging level | - -## Testing - -### Running Tests - -```bash -# All tests -cargo test - -# Integration tests only -cargo test --test integration_test - -# With output -cargo test -- --nocapture - -# Specific test -cargo test test_database_connection -``` - -### Test Database Setup - -Tests use a separate test database: - -```bash -# Create test database -createdb guild_genesis_test - -# Run migrations -DATABASE_URL=postgresql://postgres:password@localhost:5432/guild_genesis_test \ - cargo run --bin migrate-indexer -``` ## Security Considerations -1. **RPC Endpoint Security**: - - Use authenticated endpoints - - Don't expose API keys in logs or version control - - Rotate keys regularly - -2. **Database Access**: - - Restrict database access to indexer service only - - Use read-only credentials for query endpoints - - Enable SSL/TLS for database connections - -3. **Rate Limiting**: - - Implement rate limiting to prevent RPC abuse - - Use backoff strategies for retries - - Monitor RPC usage - -4. **Data Validation**: - - Validate all blockchain data before storage - - Sanitize inputs for SQL queries (use parameterized queries) - - Verify event signatures - -5. **Error Handling**: - - Never expose internal errors to external consumers - - Log errors securely without sensitive data - - Implement proper error boundaries - -6. **Container Security**: - - Run as non-root user (already configured) - - Use minimal base images - - Scan images for vulnerabilities - - Keep dependencies updated +1. **RPC Endpoint Security**: Use authenticated endpoints; don't expose API keys +2. **Database Access**: Restrict database access to indexer service only +3. **Rate Limiting**: Implement rate limiting to prevent RPC abuse +4. **Data Validation**: Validate all blockchain data before storage +5. **Error Handling**: Never expose internal errors to external consumers ## Maintenance Schedule **Daily:** -- Monitor sync status and lag -- Check error logs for anomalies -- Verify disk space availability +- Monitor sync status +- Check error logs +- Verify disk space **Weekly:** - Review performance metrics - Check for missed events - Update RPC endpoints if needed -- Review security logs **Monthly:** - Database maintenance (VACUUM, ANALYZE) - Review and optimize slow queries -- Update dependencies (security patches) -- Review and rotate credentials -- Backup database - -**Quarterly:** -- Performance audit -- Capacity planning -- Disaster recovery testing -- Security audit - -## Continuous Integration - -The project includes a GitHub Actions workflow for automated testing: - -```yaml -# .github/workflows/rust.yml -name: Rust CI - -on: - push: - branches: [ main, develop ] - pull_request: - branches: [ main, develop ] - -# See rust.yml file for complete configuration -``` - -**CI Pipeline includes:** -- Code formatting checks (rustfmt) -- Linting (clippy) -- Unit tests -- Integration tests -- Release builds +- Update dependencies ## Support & Resources -- **Documentation**: `/docs/` +- **Documentation**: `/docs/indexer/` - **GitHub Issues**: [TheGuildGenesis/issues](https://github.com/TheSoftwareDevGuild/TheGuildGenesis/issues) - **Discord**: [The Software Dev Guild Discord](https://discord.gg/pg4UgaTr) @@ -673,32 +475,13 @@ on: To contribute to the indexer: 1. Fork the repository -2. Create a feature branch (`git checkout -b feature/amazing-feature`) +2. Create a feature branch 3. Make your changes -4. Add tests for new functionality -5. Ensure all tests pass (`cargo test`) -6. Run formatting (`cargo fmt`) -7. Run linting (`cargo clippy`) -8. Commit your changes (`git commit -m 'Add amazing feature'`) -9. Push to the branch (`git push origin feature/amazing-feature`) -10. Submit a pull request +4. Add tests +5. Submit a pull request See [CONTRIBUTING.md](../CONTRIBUTING.md) for detailed guidelines. -## Changelog - -### Version 0.1.0 (Current) -- Initial release -- BadgeCreated event indexing -- PostgreSQL storage -- RESTful API -- Docker support -- Configuration validation -- Health monitoring -- Automatic re-org handling - ## License -See [LICENSE](../LICENSE) file for details. - ---- \ No newline at end of file +See [LICENSE](../LICENSE) file for details. \ No newline at end of file From 79676b3a14616acb7789262e023cc872a91100cd Mon Sep 17 00:00:00 2001 From: pheobeayo Date: Mon, 3 Nov 2025 16:07:28 +0100 Subject: [PATCH 5/7] fixing --- docs/BLOCKCHAIN_INDEXER.md | 486 +++++++++++++++++++++++++++---------- 1 file changed, 359 insertions(+), 127 deletions(-) diff --git a/docs/BLOCKCHAIN_INDEXER.md b/docs/BLOCKCHAIN_INDEXER.md index 62d2190..039ad36 100644 --- a/docs/BLOCKCHAIN_INDEXER.md +++ b/docs/BLOCKCHAIN_INDEXER.md @@ -2,7 +2,7 @@ ## Overview -The Guild Genesis blockchain indexer is a service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. +The Guild Genesis blockchain indexer is a high-performance service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. ## Architecture @@ -14,36 +14,44 @@ The indexer consists of several key components: 2. **Data Transformer**: Converts raw blockchain event data into structured database records 3. **Database Handler**: Manages storage and retrieval of indexed data in PostgreSQL 4. **Re-org Handler**: Manages blockchain reorganizations to ensure data consistency +5. **Configuration Manager**: Centralized configuration with validation +6. **API Server**: RESTful endpoints for querying indexed data ### How It Works The indexer uses a domain-driven design with clean architecture principles: +1. Event Polling (Infrastructure Layer) -**1. Event Polling (Infrastructure Layer)** -- `AlloyEthereumEventPollingService` implements the polling logic using the Alloy library -- Connects to the blockchain via RPC (`https://reth-ethereum.ithaca.xyz/rpc`) -- Fetches the latest block number -- Creates a filter for `Transfer(from,to,value)` events -- Retrieves all logs matching the filter from the latest block - -**2. Event Processing (Domain Layer)** -- Decodes raw log data using `ITokenActivity::Transfer::decode_log_data()` -- Filters for minting events (`from == Address::ZERO`) -- Creates `EthereumEvent` domain entities with: - - Transaction hash and log index (for unique ID) - - Recipient address (`to`) - - Token amount (`value`) - - Event type (`ActivityTokenMinted`) - -**3. Data Persistence (Infrastructure Layer)** -- Stores processed events in PostgreSQL using SQLx -- Ensures data integrity with primary key constraints -- Indexes key fields for efficient querying - -**4. Continuous Operation** -- Polls at configured intervals (default: 12 seconds) -- Handles errors with retry logic -- Monitors for blockchain reorganizations +AlloyEthereumEventPollingService implements the polling logic using the Alloy library +Connects to the blockchain via RPC (https://reth-ethereum.ithaca.xyz/rpc) +Fetches the latest block number +Creates a filter for Transfer(from,to,value) events +Retrieves all logs matching the filter from the latest block + +2. Event Processing (Domain Layer) + +Decodes raw log data using ITokenActivity::Transfer::decode_log_data() +Filters for minting events (from == Address::ZERO) +Creates EthereumEvent domain entities with: + +Transaction hash and log index (for unique ID) +Recipient address (to) +Token amount (value) +Event type (ActivityTokenMinted) + + + +3. Data Persistence (Infrastructure Layer) + +Stores processed events in PostgreSQL using SQLx +Ensures data integrity with primary key constraints +Indexes key fields for efficient querying + +4. Continuous Operation + +Polls at configured intervals (default: 12 seconds) +Handles errors with retry logic +Monitors for blockchain reorganizations ``` ┌─────────────────┐ @@ -70,6 +78,13 @@ The indexer uses a domain-driven design with clean architecture principles: ▼ ┌─────────────────┐ │ PostgreSQL │◄──── Stores indexed data +└─────────────────┘ + │ + │ Query API + │ + ▼ +┌─────────────────┐ +│ REST API │◄──── Exposes data to clients └─────────────────┘ ``` @@ -88,7 +103,7 @@ event BadgeCreated( ``` **Indexed Fields:** -- `name`: The unique identifier for the badge +- `id`: The unique identifier for the badge - `description`: Badge description - `creator`: Ethereum address that created the badge - `block_number`: Block number when the badge was created @@ -96,14 +111,16 @@ event BadgeCreated( - `timestamp`: Unix timestamp of the block - `log_index`: Position of the event in the transaction logs +Infrastructure Setup + ## Infrastructure Setup ### Prerequisites -- **Node.js**: v18 or higher +- **Rust**: v1.70 or higher - **PostgreSQL**: v14 or higher - **Ethereum RPC endpoint**: Access to an Ethereum node (Infura, Alchemy, or self-hosted) -- **Environment variables configured** +- **Docker** (optional): For containerized deployment ### Environment Variables @@ -111,12 +128,12 @@ Create a `.env` file in the indexer directory: ```env # Database Configuration -DATABASE_URL=postgresql://user:password@localhost:5432/guild_genesis +DATABASE_URL=postgresql://postgres:password@localhost:5432/guild_genesis # Blockchain Configuration -RPC_URL=https://reth-ethereum.ithaca.xyz/rpc # Currently used Ithaca RPC endpoint +RPC_URL=https://mainnet.infura.io/v3/YOUR_INFURA_KEY CHAIN_ID=1 -CONTRACT_ADDRESS=0x... # Activity Token contract address +CONTRACT_ADDRESS=0x... # TheGuildBadgeRegistry contract address START_BLOCK=0 # Block number to start indexing from # Indexer Configuration @@ -125,57 +142,48 @@ BATCH_SIZE=1000 # Number of blocks to process in each batch MAX_RETRIES=3 # Maximum retry attempts for failed requests CONFIRMATION_BLOCKS=12 # Number of blocks to wait before considering finalized +# Server Configuration +PORT=3002 +LOG_LEVEL=info # Options: trace, debug, info, warn, error + # Monitoring (Optional) -LOG_LEVEL=info SENTRY_DSN=... # Error tracking METRICS_PORT=9090 # Prometheus metrics endpoint ``` ### Database Schema -The indexer uses the following database tables: +The indexer uses the following database table structure: ```sql --- Badge events table -CREATE TABLE badge_created_events ( - id SERIAL PRIMARY KEY, - badge_name VARCHAR(66) NOT NULL, -- bytes32 as hex string - description VARCHAR(66) NOT NULL, - creator VARCHAR(42) NOT NULL, - block_number BIGINT NOT NULL, - transaction_hash VARCHAR(66) NOT NULL, - log_index INTEGER NOT NULL, - timestamp BIGINT NOT NULL, - created_at TIMESTAMP DEFAULT NOW(), - UNIQUE(transaction_hash, log_index) +-- Ethereum events table +CREATE TABLE ethereum_events ( + id VARCHAR(255) PRIMARY KEY, + event_type TEXT NOT NULL, + timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() ); --- Index for efficient queries -CREATE INDEX idx_badge_name ON badge_created_events(badge_name); -CREATE INDEX idx_creator ON badge_created_events(creator); -CREATE INDEX idx_block_number ON badge_created_events(block_number); -CREATE INDEX idx_timestamp ON badge_created_events(timestamp); - --- Indexer metadata table -CREATE TABLE indexer_state ( - id INTEGER PRIMARY KEY DEFAULT 1, - last_indexed_block BIGINT NOT NULL DEFAULT 0, - last_indexed_timestamp BIGINT, - last_updated_at TIMESTAMP DEFAULT NOW(), - CHECK (id = 1) -- Ensure only one row -); - --- Insert initial state -INSERT INTO indexer_state (id, last_indexed_block) VALUES (1, 0); +-- Indexes for efficient queries +CREATE INDEX idx_event_type ON ethereum_events(event_type); +CREATE INDEX idx_timestamp ON ethereum_events(timestamp); +CREATE INDEX idx_created_at ON ethereum_events(created_at); ``` +**Column Descriptions:** + +- **id** (VARCHAR(255)): Unique identifier for each event, typically constructed from transaction hash and log index +- **event_type** (TEXT): The type of event (e.g., "BadgeCreated", "BadgeUpdated") +- **timestamp** (TIMESTAMPTZ): The timestamp when the blockchain event occurred +- **created_at** (TIMESTAMPTZ): The timestamp when the record was inserted into the database + ## Installation & Setup ### Step 1: Install Dependencies ```bash cd indexer -npm install +cargo build --release ``` ### Step 2: Configure Environment @@ -192,16 +200,16 @@ cp .env.example .env Run the database migrations: ```bash -npm run db:migrate +# Using the migration binary +cargo run --bin migrate-indexer + +# Or using cargo +cargo sqlx migrate run ``` ### Step 4: Verify Configuration -Test your RPC connection and contract configuration: - -```bash -npm run verify-config -``` +The indexer will validate all configuration on startup. Check logs for any errors. ## Running the Indexer @@ -210,7 +218,7 @@ npm run verify-config Run the indexer with hot-reload and verbose logging: ```bash -npm run dev +RUST_LOG=debug cargo run --bin guild-indexer ``` ### Production Mode @@ -218,22 +226,29 @@ npm run dev Build and run the optimized version: ```bash -npm run build -npm start +cargo build --release +./target/release/guild-indexer ``` ### Docker Deployment -Build the Docker image: +#### Build the Docker image: ```bash docker build -t guild-indexer . ``` -Run with Docker Compose: +#### Run with Docker Compose: ```bash -docker-compose up -d indexer +# Start all services (indexer + PostgreSQL) +docker-compose up -d + +# View logs +docker-compose logs -f indexer + +# Stop services +docker-compose down ``` Example `docker-compose.yml`: @@ -243,20 +258,39 @@ version: '3.8' services: indexer: - build: ./indexer + build: + context: . + dockerfile: Dockerfile restart: unless-stopped environment: - DATABASE_URL=postgresql://postgres:password@db:5432/guild_genesis - RPC_URL=${RPC_URL} - CONTRACT_ADDRESS=${CONTRACT_ADDRESS} - - START_BLOCK=${START_BLOCK} + - START_BLOCK=${START_BLOCK:-0} + - CHAIN_ID=${CHAIN_ID:-1} + - POLL_INTERVAL=${POLL_INTERVAL:-12000} + - BATCH_SIZE=${BATCH_SIZE:-1000} + - MAX_RETRIES=${MAX_RETRIES:-3} + - CONFIRMATION_BLOCKS=${CONFIRMATION_BLOCKS:-12} + - PORT=3002 + - RUST_LOG=info + ports: + - "3002:3002" depends_on: - - db + db: + condition: service_healthy networks: - guild-network + healthcheck: + test: ["CMD", "/app/healthcheck.sh"] + interval: 30s + timeout: 3s + start_period: 10s + retries: 3 db: - image: postgres:14 + image: postgres:14-alpine + restart: unless-stopped environment: POSTGRES_DB: guild_genesis POSTGRES_USER: postgres @@ -265,12 +299,18 @@ services: - postgres_data:/var/lib/postgresql/data networks: - guild-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 volumes: postgres_data: networks: guild-network: + driver: bridge ``` ## Monitoring & Maintenance @@ -280,7 +320,7 @@ networks: Check if the indexer is running and synced: ```bash -curl http://localhost:9090/health +curl http://localhost:3002/health ``` Response: @@ -302,6 +342,9 @@ View real-time logs: # Docker docker logs -f guild-indexer +# Docker Compose +docker-compose logs -f indexer + # PM2 pm2 logs indexer @@ -325,7 +368,10 @@ To reindex from a specific block: ```bash # Set START_BLOCK in .env or pass as argument -npm run backfill -- --from-block=18000000 --to-block=18100000 +START_BLOCK=18000000 cargo run --bin guild-indexer + +# Using environment variable +START_BLOCK=18000000 ./target/release/guild-indexer ``` ### Handling Chain Reorganizations @@ -336,10 +382,11 @@ The indexer automatically handles chain reorganizations (re-orgs): 2. Rolls back affected data when a re-org is detected 3. Re-indexes the canonical chain from the re-org point -Manual re-org recovery: +Manual re-org recovery (if implemented): ```bash -npm run reorg-recovery -- --from-block=18456700 +# This would be a custom command if implemented +cargo run --bin guild-indexer -- --reorg-recovery --from-block=18456700 ``` ## Troubleshooting @@ -354,67 +401,113 @@ npm run reorg-recovery -- --from-block=18456700 - Optimize database indexes - Use a dedicated RPC endpoint +**Issue: Configuration validation errors** +- **Cause**: Invalid or missing environment variables +- **Solution**: + - Check `.env` file against `.env.example` + - Ensure all required variables are set + - Verify CONTRACT_ADDRESS format (0x + 40 hex chars) + **Issue: Duplicate events** - **Cause**: Re-org handling or restart during processing -- **Solution**: Database constraints prevent duplicates automatically; check logs for warnings +- **Solution**: Database primary key constraints prevent duplicates automatically; check logs for warnings **Issue: Missing events** - **Cause**: RPC endpoint issues or gaps in block processing - **Solution**: - - Run integrity check: `npm run verify-events` - - Backfill missing ranges + - Verify RPC endpoint is reliable + - Check for gaps in indexed blocks + - Backfill missing ranges with START_BLOCK **Issue: Database connection errors** - **Cause**: PostgreSQL not accessible or connection pool exhausted - **Solution**: - - Verify `DATABASE_URL` - - Check PostgreSQL is running - - Increase connection pool size in configuration + - Verify `DATABASE_URL` is correct + - Check PostgreSQL is running: `pg_isready -h localhost -p 5432` + - Check connection pool settings + +**Issue: Container health check failures** +- **Cause**: Service not fully started or port not accessible +- **Solution**: + - Check container logs: `docker logs guild-indexer` + - Verify PORT environment variable matches exposed port + - Increase `start_period` in health check configuration ### Debug Mode Enable debug logging: ```bash -LOG_LEVEL=debug npm run dev +RUST_LOG=debug cargo run --bin guild-indexer ``` -### Data Integrity Verification - -Run the verification script to check for gaps: +Or with specific modules: ```bash -npm run verify-integrity +RUST_LOG=guild_indexer=debug,sqlx=info cargo run --bin guild-indexer ``` -This checks: -- Continuous block range (no gaps) -- Event consistency with blockchain -- Database constraint violations - ## API Integration The backend API can query indexed data: ```typescript -// Example: Get all badges created by an address -const badges = await db.query( - 'SELECT * FROM badge_created_events WHERE creator = $1 ORDER BY block_number DESC', - [creatorAddress] +// Example: Get all events of a specific type +const events = await db.query( + 'SELECT * FROM ethereum_events WHERE event_type = $1 ORDER BY timestamp DESC', + ['BadgeCreated'] +); + +// Example: Get recent events +const recentEvents = await db.query( + 'SELECT * FROM ethereum_events ORDER BY timestamp DESC LIMIT 10' ); -// Example: Get recent badge creations -const recentBadges = await db.query( - 'SELECT * FROM badge_created_events ORDER BY timestamp DESC LIMIT 10' +// Example: Get events within a time range +const timeRangeEvents = await db.query( + `SELECT * FROM ethereum_events + WHERE timestamp BETWEEN $1 AND $2 + ORDER BY timestamp ASC`, + [startTime, endTime] ); -// Example: Check if a badge name exists -const exists = await db.query( - 'SELECT EXISTS(SELECT 1 FROM badge_created_events WHERE badge_name = $1)', - [badgeName] +// Example: Count events by type +const eventCounts = await db.query( + `SELECT event_type, COUNT(*) as count + FROM ethereum_events + GROUP BY event_type` ); ``` +## API Endpoints + +The indexer exposes the following REST endpoints: + +### Health Check +``` +GET /health +``` +Returns service health status and indexing statistics. + +### Query Events +``` +GET /api/events?type=BadgeCreated&limit=100&offset=0 +``` +Query indexed events with optional filters. + +**Query Parameters:** +- `type`: Filter by event type +- `limit`: Maximum number of results (default: 100) +- `offset`: Pagination offset (default: 0) +- `from`: Start timestamp (ISO 8601) +- `to`: End timestamp (ISO 8601) + +### Get Event by ID +``` +GET /api/events/:id +``` +Retrieve a specific event by its unique identifier. + ## Performance Optimization ### Recommended Settings @@ -425,48 +518,168 @@ For optimal performance: - **Connection Pooling**: Set PostgreSQL max connections to 20-50 - **Caching**: Cache recent blocks to reduce RPC calls - **Parallel Processing**: Process multiple block ranges in parallel for backfilling -- **Partitioning**: For high-volume chains, partition tables by block_number ranges +- **Partitioning**: For high-volume chains, partition tables by timestamp ranges ### Scaling Considerations **Vertical Scaling:** - Increase server memory for larger batches - Use SSD storage for PostgreSQL -- Optimize PostgreSQL configuration (shared_buffers, work_mem) +- Optimize PostgreSQL configuration: + ```sql + -- postgresql.conf + shared_buffers = 256MB + work_mem = 8MB + maintenance_work_mem = 128MB + effective_cache_size = 1GB + ``` **Horizontal Scaling:** - Run multiple indexer instances for different block ranges - Use read replicas for query load - Implement event streaming (Kafka) for real-time consumers +- Deploy indexers per contract or event type + +## Configuration Reference + +### Required Environment Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `DATABASE_URL` | PostgreSQL connection string | `postgresql://user:pass@localhost:5432/db` | +| `RPC_URL` | Ethereum RPC endpoint | `https://mainnet.infura.io/v3/KEY` | +| `CONTRACT_ADDRESS` | Smart contract address | `0x742d35Cc6634C0532925a3b844Bc9e7595f0bEb1` | + +### Optional Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `CHAIN_ID` | `1` | Ethereum chain ID | +| `START_BLOCK` | `0` | Block to start indexing from | +| `POLL_INTERVAL` | `12000` | Polling interval in milliseconds | +| `BATCH_SIZE` | `1000` | Blocks per batch | +| `MAX_RETRIES` | `3` | Max retry attempts | +| `CONFIRMATION_BLOCKS` | `12` | Blocks before finality | +| `PORT` | `3002` | API server port | +| `LOG_LEVEL` | `info` | Logging level | + +## Testing + +### Running Tests + +```bash +# All tests +cargo test + +# Integration tests only +cargo test --test integration_test + +# With output +cargo test -- --nocapture + +# Specific test +cargo test test_database_connection +``` + +### Test Database Setup + +Tests use a separate test database: + +```bash +# Create test database +createdb guild_genesis_test + +# Run migrations +DATABASE_URL=postgresql://postgres:password@localhost:5432/guild_genesis_test \ + cargo run --bin migrate-indexer +``` ## Security Considerations -1. **RPC Endpoint Security**: Use authenticated endpoints; don't expose API keys -2. **Database Access**: Restrict database access to indexer service only -3. **Rate Limiting**: Implement rate limiting to prevent RPC abuse -4. **Data Validation**: Validate all blockchain data before storage -5. **Error Handling**: Never expose internal errors to external consumers +1. **RPC Endpoint Security**: + - Use authenticated endpoints + - Don't expose API keys in logs or version control + - Rotate keys regularly + +2. **Database Access**: + - Restrict database access to indexer service only + - Use read-only credentials for query endpoints + - Enable SSL/TLS for database connections + +3. **Rate Limiting**: + - Implement rate limiting to prevent RPC abuse + - Use backoff strategies for retries + - Monitor RPC usage + +4. **Data Validation**: + - Validate all blockchain data before storage + - Sanitize inputs for SQL queries (use parameterized queries) + - Verify event signatures + +5. **Error Handling**: + - Never expose internal errors to external consumers + - Log errors securely without sensitive data + - Implement proper error boundaries + +6. **Container Security**: + - Run as non-root user (already configured) + - Use minimal base images + - Scan images for vulnerabilities + - Keep dependencies updated ## Maintenance Schedule **Daily:** -- Monitor sync status -- Check error logs -- Verify disk space +- Monitor sync status and lag +- Check error logs for anomalies +- Verify disk space availability **Weekly:** - Review performance metrics - Check for missed events - Update RPC endpoints if needed +- Review security logs **Monthly:** - Database maintenance (VACUUM, ANALYZE) - Review and optimize slow queries -- Update dependencies +- Update dependencies (security patches) +- Review and rotate credentials +- Backup database + +**Quarterly:** +- Performance audit +- Capacity planning +- Disaster recovery testing +- Security audit + +## Continuous Integration + +The project includes a GitHub Actions workflow for automated testing: + +```yaml +# .github/workflows/rust.yml +name: Rust CI + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + +# See rust.yml file for complete configuration +``` + +**CI Pipeline includes:** +- Code formatting checks (rustfmt) +- Linting (clippy) +- Unit tests +- Integration tests +- Release builds ## Support & Resources -- **Documentation**: `/docs/indexer/` +- **Documentation**: `/docs/` - **GitHub Issues**: [TheGuildGenesis/issues](https://github.com/TheSoftwareDevGuild/TheGuildGenesis/issues) - **Discord**: [The Software Dev Guild Discord](https://discord.gg/pg4UgaTr) @@ -475,13 +688,32 @@ For optimal performance: To contribute to the indexer: 1. Fork the repository -2. Create a feature branch +2. Create a feature branch (`git checkout -b feature/amazing-feature`) 3. Make your changes -4. Add tests -5. Submit a pull request +4. Add tests for new functionality +5. Ensure all tests pass (`cargo test`) +6. Run formatting (`cargo fmt`) +7. Run linting (`cargo clippy`) +8. Commit your changes (`git commit -m 'Add amazing feature'`) +9. Push to the branch (`git push origin feature/amazing-feature`) +10. Submit a pull request See [CONTRIBUTING.md](../CONTRIBUTING.md) for detailed guidelines. +## Changelog + +### Version 0.1.0 (Current) +- Initial release +- BadgeCreated event indexing +- PostgreSQL storage +- RESTful API +- Docker support +- Configuration validation +- Health monitoring +- Automatic re-org handling + ## License -See [LICENSE](../LICENSE) file for details. \ No newline at end of file +See [LICENSE](../LICENSE) file for details. + +--- \ No newline at end of file From 9a568bc341dc68c8da2954528c39c6316b0ef4bb Mon Sep 17 00:00:00 2001 From: Ifeoluwa Date: Tue, 25 Nov 2025 14:25:04 +0100 Subject: [PATCH 6/7] fix changes --- docs/BLOCKCHAIN_INDEXER.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/BLOCKCHAIN_INDEXER.md b/docs/BLOCKCHAIN_INDEXER.md index 039ad36..5c4b22c 100644 --- a/docs/BLOCKCHAIN_INDEXER.md +++ b/docs/BLOCKCHAIN_INDEXER.md @@ -2,7 +2,7 @@ ## Overview -The Guild Genesis blockchain indexer is a high-performance service that monitors and indexes on-chain events from TheGuildBadgeRegistry smart contract. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. +The Guild Genesis blockchain indexer is a high-performance service that monitors and indexes on-chain events from TheGuildBadgeRegistry Activity token. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. ## Architecture @@ -133,7 +133,7 @@ DATABASE_URL=postgresql://postgres:password@localhost:5432/guild_genesis # Blockchain Configuration RPC_URL=https://mainnet.infura.io/v3/YOUR_INFURA_KEY CHAIN_ID=1 -CONTRACT_ADDRESS=0x... # TheGuildBadgeRegistry contract address +CONTRACT_ADDRESS=0x... # Activity Token contract address START_BLOCK=0 # Block number to start indexing from # Indexer Configuration @@ -704,7 +704,7 @@ See [CONTRIBUTING.md](../CONTRIBUTING.md) for detailed guidelines. ### Version 0.1.0 (Current) - Initial release -- BadgeCreated event indexing +- Activitytoken indexing - PostgreSQL storage - RESTful API - Docker support From 0124fe38f5dddd8f76aad41ca55195161608ec45 Mon Sep 17 00:00:00 2001 From: Ifeoluwa Date: Wed, 26 Nov 2025 06:52:09 +0100 Subject: [PATCH 7/7] update --- docs/BLOCKCHAIN_INDEXER.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/BLOCKCHAIN_INDEXER.md b/docs/BLOCKCHAIN_INDEXER.md index 5c4b22c..02bcd32 100644 --- a/docs/BLOCKCHAIN_INDEXER.md +++ b/docs/BLOCKCHAIN_INDEXER.md @@ -2,7 +2,7 @@ ## Overview -The Guild Genesis blockchain indexer is a high-performance service that monitors and indexes on-chain events from TheGuildBadgeRegistry Activity token. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. +The Guild Genesis blockchain indexer is a high-performance service that monitors and indexes on-chain events from The Guild Activity token. It extracts blockchain data, transforms it into a structured format, and stores it in a PostgreSQL database for fast querying and retrieval by the backend API. ## Architecture