From 75943e3a6c69c23ad8823faecd0286695c7c1c44 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Mon, 3 Nov 2025 23:11:22 +0800
Subject: [PATCH 01/14] feat: implement parallel sync with performance
 optimizations

**Parallel Sync Implementation:**
- Add ParallelCycleSync class with 10x+ performance improvement
- Implement composite cursor pagination (cycle + timestamp + ID) to prevent data loss
- Multi-cycle batching: fetch 10-200 cycles per HTTP request (vs 1 cycle previously)
- Prefetching: overlap network fetch with database writes
- Automatic checkpoint/resume from database (ParallelSyncCheckpointManager)
- Configurable concurrency, batch size, retry attempts via env vars

**Client-Side JSON Optimization:**
- Configure axios with StringUtils.safeStringify for request serialization
- Configure axios with StringUtils.safeJsonParse for response parsing
- Add timing measurements for stringify/parse operations
- Use Content-Length header for size (eliminates expensive re-stringify)
- HTTP connection pooling with keep-alive agents (maxSockets: concurrency * 2)

**Database Enhancements:**
- Add composite indexes for cursor-based pagination:
  - receipts: (cycle ASC, timestamp ASC, receiptId ASC)
  - originalTxsData: (cycle ASC, timestamp ASC, txId ASC)
- Add SQLite lock contention diagnostics (queueMs vs engineMs)
- Track query timing with registerQuery/cleanupQuery pattern
- Warn on queueMs > 250ms or totalMs > 1000ms

**Configuration:**
- Add PARALLEL_SYNC_CONCURRENCY env var (default: 10)
- Add USE_PARALLEL_SYNC env var (default: true)
- Add CYCLES_PER_BATCH env var (default: 10)
- Add ENABLE_PREFETCH env var (default: true)
- Add SYNC_RETRY_ATTEMPTS env var (default: 3)

**Collector Entry Point:**
- Auto-select parallel vs legacy sync based on USE_PARALLEL_SYNC flag
- Add downloadTxsDataAndCyclesParallel() function in DataSync.ts
- Maintain backward compatibility with legacy sequential sync

**Error Handling:**
- Exponential backoff retry for ECONNRESET/ETIMEDOUT/ECONNREFUSED/EPIPE
- Detailed error logging with cycle ranges and attempt numbers

**Package Updates:**
- Add p-queue for work queue management
---
 package-lock.json                   |  29 +-
 package.json                        |   3 +-
 src/class/DataSync.ts               |  26 +
 src/class/ParallelCycleSync.ts      | 827 ++++++++++++++++++++++++++++
 src/class/ParallelSyncCheckpoint.ts | 315 +++++++++++
 src/collector.ts                    |  10 +
 src/config/index.ts                 |  10 +
 src/storage/index.ts                |  10 +
 src/storage/originalTxData.ts       |   4 +-
 src/storage/sqlite3storage.ts       | 125 ++++-
 10 files changed, 1351 insertions(+), 8 deletions(-)
 create mode 100644 src/class/ParallelCycleSync.ts
 create mode 100644 src/class/ParallelSyncCheckpoint.ts

diff --git a/package-lock.json b/package-lock.json
index 6b8b23e..07d8fe3 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -42,6 +42,7 @@
         "next": "13.3.4",
         "node-cron": "3.0.2",
         "node-sass": "7.0.3",
+        "p-queue": "^9.0.0",
         "qs": "6.11.0",
         "react": "18.2.0",
         "react-dom": "18.2.0",
@@ -10508,6 +10509,32 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/p-queue": {
+      "version": "9.0.0",
+      "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-9.0.0.tgz",
+      "integrity": "sha512-KO1RyxstL9g1mK76530TExamZC/S2Glm080Nx8PE5sTd7nlduDQsAfEl4uXX+qZjLiwvDauvzXavufy3+rJ9zQ==",
+      "dependencies": {
+        "eventemitter3": "^5.0.1",
+        "p-timeout": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=20"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-timeout": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-7.0.1.tgz",
+      "integrity": "sha512-AxTM2wDGORHGEkPCt8yqxOTMgpfbEHqF51f/5fJCmwFC3C/zNcGT63SymH2ttOAaiIws2zVg4+izQCjrakcwHg==",
+      "engines": {
+        "node": ">=20"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/p-try": {
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
@@ -14077,4 +14104,4 @@
       }
     }
   }
-}
+}
\ No newline at end of file
diff --git a/package.json b/package.json
index e8698db..c6b62b6 100644
--- a/package.json
+++ b/package.json
@@ -61,6 +61,7 @@
     "next": "13.3.4",
     "node-cron": "3.0.2",
     "node-sass": "7.0.3",
+    "p-queue": "^9.0.0",
     "qs": "6.11.0",
     "react": "18.2.0",
     "react-dom": "18.2.0",
@@ -109,4 +110,4 @@
   "publishConfig": {
     "access": "public"
   }
-}
+}
\ No newline at end of file
diff --git a/src/class/DataSync.ts b/src/class/DataSync.ts
index d6f4de5..12b05ce 100644
--- a/src/class/DataSync.ts
+++ b/src/class/DataSync.ts
@@ -4,6 +4,7 @@ import { AccountDB, CycleDB, ReceiptDB, TransactionDB, OriginalTxDataDB } from '
 import { config, DISTRIBUTOR_URL } from '../config'
 import { Cycle } from '../types'
 import { Utils as StringUtils } from '@shardus/types'
+import { ParallelCycleSync } from './ParallelCycleSync'
 
 export enum DataType {
   CYCLE = 'cycleinfo',
@@ -832,3 +833,28 @@ export const downloadOriginalTxsDataBetweenCycles = async (
     endCycle += config.requestLimits.MAX_BETWEEN_CYCLES_PER_REQUEST
   }
 }
+
+/**
+ * NEW: Parallel sync using cycle-based partitioning with composite cursors
+ * This is the optimal sync strategy with 10x+ performance improvement
+ */
+export const downloadTxsDataAndCyclesParallel = async (
+  totalCyclesToSync: number,
+  fromCycle = 0
+): Promise<void> => {
+  console.log('\n')
+  console.log('='.repeat(60))
+  console.log('Using PARALLEL SYNC with Composite Cursor')
+  console.log('This prevents data loss and provides 10x+ performance improvement')
+  console.log('='.repeat(60))
+  console.log('\n')
+
+  const parallelSync = new ParallelCycleSync({
+    concurrency: config.parallelSyncConcurrency,
+    batchSize: 500,
+    retryAttempts: 3,
+    retryDelayMs: 1000,
+  })
+
+  await parallelSync.syncCycleRange(fromCycle, totalCyclesToSync)
+}
diff --git a/src/class/ParallelCycleSync.ts b/src/class/ParallelCycleSync.ts
new file mode 100644
index 0000000..4053846
--- /dev/null
+++ b/src/class/ParallelCycleSync.ts
@@ -0,0 +1,827 @@
+import PQueue from 'p-queue'
+import * as crypto from '@shardus/crypto-utils'
+import { Utils as StringUtils } from '@shardus/types'
+import { config, DISTRIBUTOR_URL } from '../config'
+import { queryFromDistributor, DataType } from './DataSync'
+import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
+import { ParallelSyncCheckpointManager, CompositeCursor } from './ParallelSyncCheckpoint'
+import { Cycle } from '../types'
+import axios, { AxiosInstance } from 'axios'
+import http from 'http'
+import https from 'https'
+
+/**
+ * Configuration for parallel sync
+ */
+export interface ParallelSyncConfig {
+  concurrency: number // Number of parallel workers
+  batchSize: number // Items per request
+  retryAttempts: number // Retry failed requests
+  retryDelayMs: number // Delay between retries
+  cyclesPerBatch: number // Number of cycles to batch together (default: 10)
+  enablePrefetch: boolean // Enable prefetching (default: true)
+  prefetchDepth: number // Number of batches to prefetch ahead (default: 1)
+}
+
+/**
+ * Statistics for sync operation
+ */
+export interface SyncStats {
+  startTime: number
+  endTime?: number
+  totalCycles: number
+  completedCycles: number
+  totalReceipts: number
+  totalOriginalTxs: number
+  errors: number
+}
+
+/**
+ * Parallel sync orchestrator using cycle-based partitioning with composite cursors
+ * Implements the optimal sync strategy with:
+ * - Cycle-level parallelization
+ * - Composite cursor (timestamp + ID) to prevent data loss
+ * - Automatic resume from database
+ * - Work queue for load balancing
+ */
+export class ParallelCycleSync {
+  private checkpointManager: ParallelSyncCheckpointManager
+  private queue: PQueue
+  private syncConfig: ParallelSyncConfig
+  private stats: SyncStats
+  private httpAgent: http.Agent
+  private httpsAgent: https.Agent
+  private axiosInstance: AxiosInstance
+
+  constructor(syncConfig?: Partial<ParallelSyncConfig>) {
+    this.checkpointManager = new ParallelSyncCheckpointManager()
+
+    this.syncConfig = {
+      concurrency: syncConfig?.concurrency || config.parallelSyncConcurrency || 10,
+      batchSize: syncConfig?.batchSize || 500,
+      retryAttempts: syncConfig?.retryAttempts || config.syncRetryAttempts || 3,
+      retryDelayMs: syncConfig?.retryDelayMs || 1000,
+      cyclesPerBatch: syncConfig?.cyclesPerBatch || config.cyclesPerBatch || 10,
+      enablePrefetch: syncConfig?.enablePrefetch ?? config.enablePrefetch ?? true,
+      prefetchDepth: syncConfig?.prefetchDepth || 1,
+    }
+
+    // Create HTTP agents with keep-alive to reuse connections
+    this.httpAgent = new http.Agent({
+      keepAlive: true,
+      keepAliveMsecs: 30000,
+      maxSockets: this.syncConfig.concurrency * 2,
+      maxFreeSockets: this.syncConfig.concurrency,
+    })
+
+    this.httpsAgent = new https.Agent({
+      keepAlive: true,
+      keepAliveMsecs: 30000,
+      maxSockets: this.syncConfig.concurrency * 2,
+      maxFreeSockets: this.syncConfig.concurrency,
+    })
+
+    // Create axios instance with keep-alive agents and custom JSON serialization with timing
+    this.axiosInstance = axios.create({
+      httpAgent: this.httpAgent,
+      httpsAgent: this.httpsAgent,
+      timeout: 45000,
+      headers: { 'Content-Type': 'application/json' },
+      transformRequest: [
+        (data) => {
+          // Use custom stringify for request body
+          const startTime = Date.now()
+          const result = StringUtils.safeStringify(data)
+          const elapsed = Date.now() - startTime
+          if (config.verbose && elapsed > 10) {
+            console.log(
+              `[Client] Request stringify: ${elapsed}ms, size: ${(result.length / 1024).toFixed(2)}KB`
+            )
+          }
+          return result
+        },
+      ],
+      transformResponse: [
+        (res) => {
+          // Use custom parse for response with timing
+          const startTime = Date.now()
+          const result = StringUtils.safeJsonParse(res)
+          const elapsed = Date.now() - startTime
+          const sizeKB = typeof res === 'string' ? (res.length / 1024).toFixed(2) : 'unknown'
+          if (config.verbose && elapsed > 50) {
+            console.log(`[Client] Response parse: ${elapsed}ms, size: ${sizeKB}KB`)
+          }
+          return result
+        },
+      ],
+    })
+
+    // Add interval between tasks to prevent overwhelming the distributor
+    this.queue = new PQueue({
+      concurrency: this.syncConfig.concurrency,
+      interval: 100, // 100ms between batches
+      intervalCap: this.syncConfig.concurrency,
+    })
+
+    this.stats = {
+      startTime: Date.now(),
+      totalCycles: 0,
+      completedCycles: 0,
+      totalReceipts: 0,
+      totalOriginalTxs: 0,
+      errors: 0,
+    }
+
+    console.log(
+      `Parallel Sync initialized:` +
+        ` concurrency=${this.syncConfig.concurrency},` +
+        ` cyclesPerBatch=${this.syncConfig.cyclesPerBatch},` +
+        ` prefetch=${this.syncConfig.enablePrefetch ? 'enabled' : 'disabled'},` +
+        ` retryAttempts=${this.syncConfig.retryAttempts}`
+    )
+  }
+
+  /**
+   * Main entry point for parallel sync
+   */
+  async syncCycleRange(startCycle: number, endCycle: number): Promise<void> {
+    console.log(`\n${'='.repeat(60)}`)
+    console.log(`Starting Parallel Cycle Sync: ${startCycle} → ${endCycle}`)
+    console.log(`Concurrency: ${this.syncConfig.concurrency} workers`)
+    console.log(`${'='.repeat(60)}\n`)
+
+    this.stats.startTime = Date.now()
+    this.stats.totalCycles = endCycle - startCycle
+
+    try {
+      // Step 1: Fetch all cycle metadata (lightweight)
+      console.log('Step 1: Fetching cycle metadata...')
+      const cycles = await this.fetchCyclesMetadata(startCycle, endCycle)
+      console.log(`✓ Retrieved ${cycles.length} cycles\n`)
+
+      // Step 2: Sync cycles themselves in parallel
+      console.log('Step 2: Syncing cycle records...')
+      await this.syncCyclesData(cycles)
+      console.log(`✓ Synced ${cycles.length} cycle records\n`)
+
+      // Step 3: Sync receipts and originalTxs for all cycles in parallel with multi-cycle batching
+      console.log('Step 3: Syncing receipts and originalTxs with multi-cycle batching...')
+      await this.syncAllCyclesDataMultiBatch(cycles)
+
+      this.stats.endTime = Date.now()
+
+      // Summary
+      await this.printSummary()
+    } catch (error) {
+      console.error('Fatal error in parallel sync:', error)
+      this.stats.errors++
+      throw error
+    }
+  }
+
+  /**
+   * Fetch cycle metadata from distributor
+   */
+  private async fetchCyclesMetadata(startCycle: number, endCycle: number): Promise<Cycle[]> {
+    const cycles: Cycle[] = []
+
+    // Fetch in chunks
+    const CHUNK_SIZE = 100
+    for (let i = startCycle; i <= endCycle; i += CHUNK_SIZE) {
+      const chunkEnd = Math.min(i + CHUNK_SIZE - 1, endCycle)
+
+      const response = await queryFromDistributor(DataType.CYCLE, {
+        start: i,
+        end: chunkEnd,
+      })
+
+      if (response && response.data && response.data.cycleInfo) {
+        cycles.push(
+          ...response.data.cycleInfo.map((cycleRecord: any) => ({
+            counter: cycleRecord.counter,
+            cycleRecord,
+            start: cycleRecord.start,
+            cycleMarker: cycleRecord.marker,
+          }))
+        )
+      }
+    }
+
+    return cycles
+  }
+
+  /**
+   * Sync cycle records to database
+   */
+  private async syncCyclesData(cycles: Cycle[]): Promise<void> {
+    // Insert cycles in batches
+    const BATCH_SIZE = 100
+    for (let i = 0; i < cycles.length; i += BATCH_SIZE) {
+      const batch = cycles.slice(i, i + BATCH_SIZE)
+      await CycleDB.bulkInsertCycles(batch)
+    }
+  }
+
+  /**
+   * Sync receipts and originalTxs for all cycles in parallel (LEGACY - single cycle per request)
+   */
+  private async syncAllCyclesData(cycles: Cycle[]): Promise<void> {
+    // Add all cycle sync tasks to the queue
+    const tasks = cycles.map((cycle) => this.queue.add(() => this.syncSingleCycle(cycle)))
+
+    // Wait for all tasks to complete
+    await Promise.all(tasks)
+  }
+
+  /**
+   * Sync receipts and originalTxs using multi-cycle batching with prefetching
+   * This dramatically reduces HTTP overhead for cycles with small data
+   */
+  private async syncAllCyclesDataMultiBatch(cycles: Cycle[]): Promise<void> {
+    // Group cycles into batches
+    const cycleBatches: Cycle[][] = []
+    for (let i = 0; i < cycles.length; i += this.syncConfig.cyclesPerBatch) {
+      cycleBatches.push(cycles.slice(i, i + this.syncConfig.cyclesPerBatch))
+    }
+
+    console.log(
+      `Created ${cycleBatches.length} cycle batches (${this.syncConfig.cyclesPerBatch} cycles per batch)`
+    )
+
+    // Add all batch sync tasks to the queue
+    const tasks = cycleBatches.map((batch) => this.queue.add(() => this.syncCycleBatch(batch)))
+
+    // Wait for all tasks to complete
+    await Promise.all(tasks)
+  }
+
+  /**
+   * Sync receipts and originalTxs for a single cycle
+   */
+  private async syncSingleCycle(cycle: Cycle): Promise<void> {
+    try {
+      // Get cycle time boundaries
+      const cycleStart = cycle.start
+      const cycleEnd = cycle.cycleRecord.duration
+        ? cycle.start + cycle.cycleRecord.duration
+        : cycle.start + 60 * 1000 // Default 1 minute
+
+      // Sync both data types in parallel for this cycle
+      await Promise.all([
+        this.syncCycleReceipts(cycle.counter, cycleStart, cycleEnd),
+        this.syncCycleOriginalTxs(cycle.counter, cycleStart, cycleEnd),
+      ])
+
+      this.stats.completedCycles++
+
+      if (config.verbose || this.stats.completedCycles % 10 === 0) {
+        const progress = ((this.stats.completedCycles / this.stats.totalCycles) * 100).toFixed(1)
+        console.log(`Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%)`)
+      }
+    } catch (error) {
+      console.error(`Error syncing cycle ${cycle.counter}:`, error)
+      this.stats.errors++
+      throw error
+    }
+  }
+
+  /**
+   * Sync receipts and originalTxs for a batch of cycles using multi-cycle endpoints
+   * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5)
+   */
+  private async syncCycleBatch(cycleBatch: Cycle[]): Promise<void> {
+    if (cycleBatch.length === 0) return
+
+    try {
+      const startCycle = cycleBatch[0].counter
+      const endCycle = cycleBatch[cycleBatch.length - 1].counter
+
+      // Sync both data types in parallel
+      await Promise.all([this.syncCycleBatchReceipts(cycleBatch), this.syncCycleBatchOriginalTxs(cycleBatch)])
+
+      this.stats.completedCycles += cycleBatch.length
+
+      if (config.verbose || this.stats.completedCycles % 10 === 0) {
+        const progress = ((this.stats.completedCycles / this.stats.totalCycles) * 100).toFixed(1)
+        console.log(
+          `Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
+        )
+      }
+    } catch (error) {
+      console.error(
+        `Error syncing cycle batch ${cycleBatch[0].counter}-${cycleBatch[cycleBatch.length - 1].counter}:`,
+        error
+      )
+      this.stats.errors++
+      throw error
+    }
+  }
+
+  /**
+   * Sync receipts across a batch of cycles using adaptive multi-cycle fetching with prefetching
+   */
+  private async syncCycleBatchReceipts(cycleBatch: Cycle[]): Promise<void> {
+    const startCycle = cycleBatch[0].counter
+    const endCycle = cycleBatch[cycleBatch.length - 1].counter
+
+    // Get resume cursor from database for the start cycle
+    const initialCursor = await this.checkpointManager.getReceiptsCursor(startCycle, cycleBatch[0].start)
+
+    let currentCycle = startCycle
+    let currentCursor: CompositeCursor = initialCursor
+    let totalFetched = 0
+
+    // Prefetch: Start fetching first batch immediately
+    let nextFetchPromise: Promise<any[]> | null = this.syncConfig.enablePrefetch
+      ? this.fetchReceiptsMultiCycle(currentCycle, endCycle, currentCursor)
+      : null
+
+    while (currentCycle <= endCycle) {
+      try {
+        // Get the data (either from prefetch or fetch now)
+        const response = nextFetchPromise
+          ? await nextFetchPromise
+          : await this.fetchReceiptsMultiCycle(currentCycle, endCycle, currentCursor)
+
+        if (!response || response.length === 0) {
+          break // No more receipts in this cycle range
+        }
+
+        // Update cursor based on last receipt BEFORE starting next fetch
+        const lastReceipt = response[response.length - 1]
+        currentCycle = lastReceipt.cycle
+        const nextCursor: CompositeCursor = {
+          timestamp: lastReceipt.timestamp,
+          id: lastReceipt.receiptId,
+        }
+
+        // Prefetch next batch while processing current batch
+        if (this.syncConfig.enablePrefetch && response.length >= this.syncConfig.batchSize) {
+          nextFetchPromise = this.fetchReceiptsMultiCycle(currentCycle, endCycle, nextCursor)
+        } else {
+          nextFetchPromise = null
+        }
+
+        // Process receipts (overlaps with next fetch if prefetch enabled)
+        await ReceiptDB.processReceiptData(response)
+
+        totalFetched += response.length
+        this.stats.totalReceipts += response.length
+        currentCursor = nextCursor
+
+        if (config.verbose) {
+          console.log(
+            `[Cycles ${startCycle}-${endCycle}] Receipts: +${response.length} (total: ${totalFetched}), ` +
+              `last in cycle ${currentCycle}` +
+              (this.syncConfig.enablePrefetch ? ' [prefetch]' : '')
+          )
+        }
+
+        // If we got less than batch size, we've exhausted this cycle range
+        if (response.length < this.syncConfig.batchSize) {
+          break
+        }
+      } catch (error) {
+        console.error(`Error fetching receipts for cycle batch ${startCycle}-${endCycle}:`, error)
+        throw error
+      }
+    }
+  }
+
+  /**
+   * Sync originalTxs across a batch of cycles using adaptive multi-cycle fetching with prefetching
+   */
+  private async syncCycleBatchOriginalTxs(cycleBatch: Cycle[]): Promise<void> {
+    const startCycle = cycleBatch[0].counter
+    const endCycle = cycleBatch[cycleBatch.length - 1].counter
+
+    // Get resume cursor from database for the start cycle
+    const initialCursor = await this.checkpointManager.getOriginalTxsCursor(startCycle, cycleBatch[0].start)
+
+    let currentCycle = startCycle
+    let currentCursor: CompositeCursor = initialCursor
+    let totalFetched = 0
+
+    // Prefetch: Start fetching first batch immediately
+    let nextFetchPromise: Promise<any[]> | null = this.syncConfig.enablePrefetch
+      ? this.fetchOriginalTxsMultiCycle(currentCycle, endCycle, currentCursor)
+      : null
+
+    while (currentCycle <= endCycle) {
+      try {
+        // Get the data (either from prefetch or fetch now)
+        const response = nextFetchPromise
+          ? await nextFetchPromise
+          : await this.fetchOriginalTxsMultiCycle(currentCycle, endCycle, currentCursor)
+
+        if (!response || response.length === 0) {
+          break // No more originalTxs in this cycle range
+        }
+
+        // Update cursor based on last tx BEFORE starting next fetch
+        const lastTx = response[response.length - 1]
+        currentCycle = lastTx.cycle
+        const nextCursor: CompositeCursor = {
+          timestamp: lastTx.timestamp,
+          id: lastTx.txId,
+        }
+
+        // Prefetch next batch while processing current batch
+        if (this.syncConfig.enablePrefetch && response.length >= this.syncConfig.batchSize) {
+          nextFetchPromise = this.fetchOriginalTxsMultiCycle(currentCycle, endCycle, nextCursor)
+        } else {
+          nextFetchPromise = null
+        }
+
+        // Process originalTxs (overlaps with next fetch if prefetch enabled)
+        await OriginalTxDataDB.processOriginalTxData(response)
+
+        totalFetched += response.length
+        this.stats.totalOriginalTxs += response.length
+        currentCursor = nextCursor
+
+        if (config.verbose) {
+          console.log(
+            `[Cycles ${startCycle}-${endCycle}] OriginalTxs: +${response.length} (total: ${totalFetched}), ` +
+              `last in cycle ${currentCycle}` +
+              (this.syncConfig.enablePrefetch ? ' [prefetch]' : '')
+          )
+        }
+
+        // If we got less than batch size, we've exhausted this cycle range
+        if (response.length < this.syncConfig.batchSize) {
+          break
+        }
+      } catch (error) {
+        console.error(`Error fetching originalTxs for cycle batch ${startCycle}-${endCycle}:`, error)
+        throw error
+      }
+    }
+  }
+
+  /**
+   * Sync receipts for a specific cycle using composite cursor
+   */
+  private async syncCycleReceipts(cycleNumber: number, cycleStart: number, cycleEnd: number): Promise<void> {
+    // Get resume cursor from database
+    const cursor = await this.checkpointManager.getReceiptsCursor(cycleNumber, cycleStart)
+
+    let currentCursor: CompositeCursor = cursor
+    let totalFetched = 0
+
+    while (true) {
+      try {
+        const response = await this.fetchReceiptsWithCursor(cycleNumber, currentCursor, cycleEnd)
+
+        if (!response || response.length === 0) {
+          break // No more receipts for this cycle
+        }
+
+        // Process receipts
+        await ReceiptDB.processReceiptData(response)
+
+        totalFetched += response.length
+        this.stats.totalReceipts += response.length
+
+        // Update cursor to last item
+        const lastReceipt = response[response.length - 1]
+        currentCursor = {
+          timestamp: lastReceipt.timestamp,
+          id: lastReceipt.receiptId,
+        }
+
+        if (config.verbose) {
+          console.log(`[Cycle ${cycleNumber}] Receipts: +${response.length} (total: ${totalFetched})`)
+        }
+
+        // If we got less than batch size, we're done
+        if (response.length < this.syncConfig.batchSize) {
+          break
+        }
+      } catch (error) {
+        console.error(`Error fetching receipts for cycle ${cycleNumber}:`, error)
+        throw error
+      }
+    }
+  }
+
+  /**
+   * Sync originalTxs for a specific cycle using composite cursor
+   */
+  private async syncCycleOriginalTxs(
+    cycleNumber: number,
+    cycleStart: number,
+    cycleEnd: number
+  ): Promise<void> {
+    // Get resume cursor from database
+    const cursor = await this.checkpointManager.getOriginalTxsCursor(cycleNumber, cycleStart)
+
+    let currentCursor: CompositeCursor = cursor
+    let totalFetched = 0
+
+    while (true) {
+      try {
+        const response = await this.fetchOriginalTxsWithCursor(cycleNumber, currentCursor, cycleEnd)
+
+        if (!response || response.length === 0) {
+          break // No more originalTxs for this cycle
+        }
+
+        // Process originalTxs
+        await OriginalTxDataDB.processOriginalTxData(response)
+
+        totalFetched += response.length
+        this.stats.totalOriginalTxs += response.length
+
+        // Update cursor to last item
+        const lastTx = response[response.length - 1]
+        currentCursor = {
+          timestamp: lastTx.timestamp,
+          id: lastTx.txId,
+        }
+
+        if (config.verbose) {
+          console.log(`[Cycle ${cycleNumber}] OriginalTxs: +${response.length} (total: ${totalFetched})`)
+        }
+
+        // If we got less than batch size, we're done
+        if (response.length < this.syncConfig.batchSize) {
+          break
+        }
+      } catch (error) {
+        console.error(`Error fetching originalTxs for cycle ${cycleNumber}:`, error)
+        throw error
+      }
+    }
+  }
+
+  /**
+   * Fetch receipts using composite cursor (prevents data loss on timestamp collisions)
+   */
+  private async fetchReceiptsWithCursor(
+    cycle: number,
+    cursor: CompositeCursor,
+    beforeTimestamp?: number
+  ): Promise<any[]> {
+    const data = {
+      cycle,
+      afterTimestamp: cursor.timestamp,
+      afterReceiptId: cursor.id,
+      beforeTimestamp,
+      limit: this.syncConfig.batchSize,
+      sender: config.collectorInfo.publicKey,
+      sign: undefined,
+    }
+
+    crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
+
+    const url = `${DISTRIBUTOR_URL}/receipt/cycle-cursor`
+
+    try {
+      const response = await this.axiosInstance.post(url, data)
+
+      if (response.data && response.data.receipts) {
+        return response.data.receipts
+      }
+
+      return []
+    } catch (error) {
+      console.error(`Error fetching receipts with cursor:`, error.message)
+      throw error
+    }
+  }
+
+  /**
+   * Fetch originalTxs using composite cursor
+   */
+  private async fetchOriginalTxsWithCursor(
+    cycle: number,
+    cursor: CompositeCursor,
+    beforeTimestamp?: number
+  ): Promise<any[]> {
+    const data = {
+      cycle,
+      afterTimestamp: cursor.timestamp,
+      afterTxId: cursor.id,
+      beforeTimestamp,
+      limit: this.syncConfig.batchSize,
+      sender: config.collectorInfo.publicKey,
+      sign: undefined,
+    }
+
+    crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
+
+    const url = `${DISTRIBUTOR_URL}/originalTx/cycle-cursor`
+
+    try {
+      const response = await this.axiosInstance.post(url, data)
+
+      if (response.data && response.data.originalTxs) {
+        return response.data.originalTxs
+      }
+
+      return []
+    } catch (error) {
+      console.error(`Error fetching originalTxs with cursor:`, error.message)
+      throw error
+    }
+  }
+
+  /**
+   * Fetch receipts across multiple cycles using composite cursor with retry logic
+   * Automatically adapts to cycle sizes - if cycles 1-10 only have data in 1-5, returns that subset
+   */
+  private async fetchReceiptsMultiCycle(
+    startCycle: number,
+    endCycle: number,
+    cursor: CompositeCursor
+  ): Promise<any[]> {
+    const data = {
+      startCycle,
+      endCycle,
+      afterCycle: startCycle,
+      afterTimestamp: cursor.timestamp,
+      afterReceiptId: cursor.id,
+      limit: this.syncConfig.batchSize,
+      sender: config.collectorInfo.publicKey,
+      sign: undefined,
+    }
+
+    crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
+
+    const url = `${DISTRIBUTOR_URL}/receipt/multi-cycle-cursor`
+
+    // Retry with exponential backoff
+    for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
+      try {
+        const startTime = Date.now()
+        const response = await this.axiosInstance.post(url, data)
+        const networkElapsed = Date.now() - startTime
+
+        const receipts = response.data?.receipts || []
+
+        // Get response size from Content-Length header (fast) instead of re-stringifying (slow)
+        const responseSizeBytes = parseInt(response.headers['content-length'] || '0', 10)
+        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : 'unknown'
+
+        if (config.verbose || networkElapsed > 1000) {
+          console.log(
+            `[API Timing] Receipts fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+              `records: ${receipts.length}, size: ${responseSizeKB}KB`
+          )
+        }
+
+        if (response.data && response.data.receipts) {
+          return response.data.receipts
+        }
+
+        return []
+      } catch (error: any) {
+        const isLastAttempt = attempt === this.syncConfig.retryAttempts
+        const isRetryableError =
+          error.code === 'ECONNRESET' ||
+          error.code === 'ETIMEDOUT' ||
+          error.code === 'ECONNREFUSED' ||
+          error.code === 'EPIPE'
+
+        if (isRetryableError && !isLastAttempt) {
+          const delay = this.syncConfig.retryDelayMs * Math.pow(2, attempt)
+          console.warn(
+            `ECONNRESET on receipts fetch (cycles ${startCycle}-${endCycle}), ` +
+              `attempt ${attempt + 1}/${this.syncConfig.retryAttempts + 1}, ` +
+              `retrying in ${delay}ms...`
+          )
+          await this.sleep(delay)
+          continue
+        }
+
+        // Non-retryable error or last attempt failed
+        console.error(
+          `Error fetching receipts multi-cycle (cycles ${startCycle}-${endCycle}):`,
+          error.message
+        )
+        throw error
+      }
+    }
+
+    return []
+  }
+
+  /**
+   * Fetch originalTxs across multiple cycles using composite cursor with retry logic
+   */
+  private async fetchOriginalTxsMultiCycle(
+    startCycle: number,
+    endCycle: number,
+    cursor: CompositeCursor
+  ): Promise<any[]> {
+    const data = {
+      startCycle,
+      endCycle,
+      afterCycle: startCycle,
+      afterTimestamp: cursor.timestamp,
+      afterTxId: cursor.id,
+      limit: this.syncConfig.batchSize,
+      sender: config.collectorInfo.publicKey,
+      sign: undefined,
+    }
+
+    crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
+
+    const url = `${DISTRIBUTOR_URL}/originalTx/multi-cycle-cursor`
+
+    // Retry with exponential backoff
+    for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
+      try {
+        const startTime = Date.now()
+        const response = await this.axiosInstance.post(url, data)
+        const networkElapsed = Date.now() - startTime
+
+        const originalTxs = response.data?.originalTxs || []
+
+        // Get response size from Content-Length header (fast) instead of re-stringifying (slow)
+        const responseSizeBytes = parseInt(response.headers['content-length'] || '0', 10)
+        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : 'unknown'
+
+        if (config.verbose || networkElapsed > 1000) {
+          console.log(
+            `[API Timing] OriginalTxs fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+              `records: ${originalTxs.length}, size: ${responseSizeKB}KB`
+          )
+        }
+
+        if (response.data && response.data.originalTxs) {
+          return response.data.originalTxs
+        }
+
+        return []
+      } catch (error: any) {
+        const isLastAttempt = attempt === this.syncConfig.retryAttempts
+        const isRetryableError =
+          error.code === 'ECONNRESET' ||
+          error.code === 'ETIMEDOUT' ||
+          error.code === 'ECONNREFUSED' ||
+          error.code === 'EPIPE'
+
+        if (isRetryableError && !isLastAttempt) {
+          const delay = this.syncConfig.retryDelayMs * Math.pow(2, attempt)
+          console.warn(
+            `ECONNRESET on originalTxs fetch (cycles ${startCycle}-${endCycle}), ` +
+              `attempt ${attempt + 1}/${this.syncConfig.retryAttempts + 1}, ` +
+              `retrying in ${delay}ms...`
+          )
+          await this.sleep(delay)
+          continue
+        }
+
+        // Non-retryable error or last attempt failed
+        console.error(
+          `Error fetching originalTxs multi-cycle (cycles ${startCycle}-${endCycle}):`,
+          error.message
+        )
+        throw error
+      }
+    }
+
+    return []
+  }
+
+  /**
+   * Sleep helper for retry delays
+   */
+  private sleep(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms))
+  }
+
+  /**
+   * Print sync summary
+   */
+  private async printSummary(): Promise<void> {
+    const elapsedMs = (this.stats.endTime || Date.now()) - this.stats.startTime
+    const elapsedSec = (elapsedMs / 1000).toFixed(2)
+    const elapsedMin = (elapsedMs / 60000).toFixed(2)
+
+    console.log(`\n${'='.repeat(60)}`)
+    console.log('Parallel Sync Complete!')
+    console.log(`${'='.repeat(60)}`)
+    console.log(`  Cycles Synced:     ${this.stats.completedCycles}/${this.stats.totalCycles}`)
+    console.log(`  Receipts Synced:   ${this.stats.totalReceipts}`)
+    console.log(`  OriginalTxs Synced: ${this.stats.totalOriginalTxs}`)
+    console.log(`  Errors:            ${this.stats.errors}`)
+    console.log(`  Time Elapsed:      ${elapsedSec}s (${elapsedMin} min)`)
+    console.log(
+      `  Throughput:        ${(this.stats.totalReceipts / (elapsedMs / 1000)).toFixed(0)} receipts/sec`
+    )
+    console.log(`${'='.repeat(60)}\n`)
+
+    // Print DB summary
+    await this.checkpointManager.printSyncSummary()
+  }
+
+  /**
+   * Get current statistics
+   */
+  getStats(): SyncStats {
+    return { ...this.stats }
+  }
+}
diff --git a/src/class/ParallelSyncCheckpoint.ts b/src/class/ParallelSyncCheckpoint.ts
new file mode 100644
index 0000000..472a0d8
--- /dev/null
+++ b/src/class/ParallelSyncCheckpoint.ts
@@ -0,0 +1,315 @@
+import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
+import { config } from '../config'
+
+/**
+ * Composite cursor for tracking sync progress
+ * Uses both timestamp and ID to handle timestamp collisions
+ */
+export interface CompositeCursor {
+  timestamp: number
+  id: string // receiptId or txId
+}
+
+/**
+ * Cycle resume information from database
+ */
+export interface CycleResumeInfo {
+  cycleNumber: number
+  startTimestamp: number
+  endTimestamp: number
+  receipts: {
+    lastTimestamp: number
+    lastId: string
+    count: number
+  }
+  originalTxs: {
+    lastTimestamp: number
+    lastId: string
+    count: number
+  }
+}
+
+/**
+ * Manages sync state by querying the database
+ * No separate checkpoint storage needed - DB is source of truth
+ */
+export class ParallelSyncCheckpointManager {
+  /**
+   * Get the last completed cycle from database
+   */
+  async getLastCompletedCycle(): Promise<number> {
+    try {
+      const cycles = await CycleDB.queryLatestCycleRecords(1)
+      if (cycles && cycles.length > 0) {
+        return cycles[0].counter
+      }
+      return 0
+    } catch (error) {
+      console.error('Error getting last completed cycle:', error)
+      return 0
+    }
+  }
+
+  /**
+   * Get resume cursor for receipts in a specific cycle
+   * Returns the last receipt's timestamp and ID, or cycle start if none exist
+   */
+  async getReceiptsCursor(cycleNumber: number, cycleStartTimestamp: number): Promise<CompositeCursor> {
+    try {
+      // Query last receipt for this cycle
+      const receipts = await ReceiptDB.queryReceipts({
+        limit: 1,
+        startCycleNumber: cycleNumber,
+      })
+
+      if (receipts && receipts.length > 0) {
+        const lastReceipt = receipts[0]
+        return {
+          timestamp: lastReceipt.timestamp,
+          id: lastReceipt.receiptId,
+        }
+      }
+
+      // No receipts found for this cycle, start from cycle beginning
+      return {
+        timestamp: cycleStartTimestamp,
+        id: '',
+      }
+    } catch (error) {
+      console.error(`Error getting receipts cursor for cycle ${cycleNumber}:`, error)
+      return {
+        timestamp: cycleStartTimestamp,
+        id: '',
+      }
+    }
+  }
+
+  /**
+   * Get resume cursor for originalTxs in a specific cycle
+   */
+  async getOriginalTxsCursor(cycleNumber: number, cycleStartTimestamp: number): Promise<CompositeCursor> {
+    try {
+      // Query last originalTx for this cycle
+      const originalTxs = await OriginalTxDataDB.queryOriginalTxsData({
+        limit: 1, // limit
+        startCycle: cycleNumber, // startCycle
+      })
+
+      if (originalTxs && originalTxs.length > 0) {
+        // Sort by timestamp DESC to get the last one
+        originalTxs.sort((a, b) => b.timestamp - a.timestamp)
+        const lastTx = originalTxs[0]
+        return {
+          timestamp: lastTx.timestamp,
+          id: lastTx.txId,
+        }
+      }
+
+      // No originalTxs found for this cycle, start from cycle beginning
+      return {
+        timestamp: cycleStartTimestamp,
+        id: '',
+      }
+    } catch (error) {
+      console.error(`Error getting originalTxs cursor for cycle ${cycleNumber}:`, error)
+      return {
+        timestamp: cycleStartTimestamp,
+        id: '',
+      }
+    }
+  }
+
+  /**
+   * Get counts of data already synced for a cycle
+   */
+  async getCycleSyncStatus(cycleNumber: number): Promise<{
+    receiptsCount: number
+    originalTxsCount: number
+    isComplete: boolean
+  }> {
+    try {
+      const [receiptsCountResult, originalTxsCountResult] = await Promise.all([
+        ReceiptDB.queryReceiptCountByCycles(cycleNumber, cycleNumber),
+        OriginalTxDataDB.queryOriginalTxDataCountByCycles(cycleNumber, cycleNumber),
+      ])
+
+      const receiptsCount =
+        receiptsCountResult && receiptsCountResult.length > 0 ? receiptsCountResult[0].receipts : 0
+
+      const originalTxsCount =
+        originalTxsCountResult && originalTxsCountResult.length > 0
+          ? originalTxsCountResult[0].originalTxsData
+          : 0
+
+      return {
+        receiptsCount,
+        originalTxsCount,
+        isComplete: false, // Determined by sync logic
+      }
+    } catch (error) {
+      console.error(`Error getting cycle sync status for cycle ${cycleNumber}:`, error)
+      return {
+        receiptsCount: 0,
+        originalTxsCount: 0,
+        isComplete: false,
+      }
+    }
+  }
+
+  /**
+   * Determine which cycles need to be synced
+   * Compares local DB with distributor totals
+   */
+  async getCyclesToSync(startCycle: number, endCycle: number): Promise<number[]> {
+    try {
+      const lastLocalCycle = await this.getLastCompletedCycle()
+
+      // If we have no local data, sync all cycles
+      if (lastLocalCycle === 0) {
+        const cyclesToSync: number[] = []
+        for (let i = startCycle; i <= endCycle; i++) {
+          cyclesToSync.push(i)
+        }
+        return cyclesToSync
+      }
+
+      // If endCycle is beyond what we have, sync from last local + 1
+      if (endCycle > lastLocalCycle) {
+        const cyclesToSync: number[] = []
+        for (let i = lastLocalCycle + 1; i <= endCycle; i++) {
+          cyclesToSync.push(i)
+        }
+        return cyclesToSync
+      }
+
+      // All cycles already synced
+      return []
+    } catch (error) {
+      console.error('Error determining cycles to sync:', error)
+      return []
+    }
+  }
+
+  /**
+   * Check if a cycle is fully synced by comparing counts with distributor
+   */
+  async isCycleFullySynced(
+    cycleNumber: number,
+    expectedReceiptsCount: number,
+    expectedOriginalTxsCount: number
+  ): Promise<boolean> {
+    try {
+      const status = await this.getCycleSyncStatus(cycleNumber)
+
+      const receiptsMatch = status.receiptsCount === expectedReceiptsCount
+      const originalTxsMatch = status.originalTxsCount === expectedOriginalTxsCount
+
+      if (config.verbose) {
+        console.log(
+          `Cycle ${cycleNumber} sync check: ` +
+            `receipts ${status.receiptsCount}/${expectedReceiptsCount}, ` +
+            `originalTxs ${status.originalTxsCount}/${expectedOriginalTxsCount}`
+        )
+      }
+
+      return receiptsMatch && originalTxsMatch
+    } catch (error) {
+      console.error(`Error checking if cycle ${cycleNumber} is fully synced:`, error)
+      return false
+    }
+  }
+
+  /**
+   * Get detailed resume information for a specific cycle
+   */
+  async getCycleResumeInfo(
+    cycleNumber: number,
+    cycleStartTimestamp: number,
+    cycleEndTimestamp: number
+  ): Promise<CycleResumeInfo> {
+    const [receiptsCursor, originalTxsCursor, syncStatus] = await Promise.all([
+      this.getReceiptsCursor(cycleNumber, cycleStartTimestamp),
+      this.getOriginalTxsCursor(cycleNumber, cycleStartTimestamp),
+      this.getCycleSyncStatus(cycleNumber),
+    ])
+
+    return {
+      cycleNumber,
+      startTimestamp: cycleStartTimestamp,
+      endTimestamp: cycleEndTimestamp,
+      receipts: {
+        lastTimestamp: receiptsCursor.timestamp,
+        lastId: receiptsCursor.id,
+        count: syncStatus.receiptsCount,
+      },
+      originalTxs: {
+        lastTimestamp: originalTxsCursor.timestamp,
+        lastId: originalTxsCursor.id,
+        count: syncStatus.originalTxsCount,
+      },
+    }
+  }
+
+  /**
+   * Log sync progress
+   */
+  logProgress(
+    cycleNumber: number,
+    dataType: 'receipts' | 'originalTxs',
+    itemsFetched: number,
+    totalItems: number
+  ): void {
+    const percentage = totalItems > 0 ? ((totalItems / totalItems) * 100).toFixed(1) : '0.0'
+    console.log(
+      `[Cycle ${cycleNumber}] ${dataType}: +${itemsFetched} items (total: ${totalItems}, ${percentage}%)`
+    )
+  }
+
+  /**
+   * Get overall sync statistics from database
+   */
+  async getSyncStats(): Promise<{
+    totalCycles: number
+    totalReceipts: number
+    totalOriginalTxs: number
+    lastCycleNumber: number
+  }> {
+    try {
+      const [cycleCount, receiptCount, originalTxCount, lastCycle] = await Promise.all([
+        CycleDB.queryCycleCount(),
+        ReceiptDB.queryReceiptCount(),
+        OriginalTxDataDB.queryOriginalTxDataCount(),
+        this.getLastCompletedCycle(),
+      ])
+
+      return {
+        totalCycles: cycleCount || 0,
+        totalReceipts: receiptCount || 0,
+        totalOriginalTxs: originalTxCount || 0,
+        lastCycleNumber: lastCycle,
+      }
+    } catch (error) {
+      console.error('Error getting sync stats:', error)
+      return {
+        totalCycles: 0,
+        totalReceipts: 0,
+        totalOriginalTxs: 0,
+        lastCycleNumber: 0,
+      }
+    }
+  }
+
+  /**
+   * Print sync summary
+   */
+  async printSyncSummary(): Promise<void> {
+    const stats = await this.getSyncStats()
+    console.log('='.repeat(60))
+    console.log('Sync Summary:')
+    console.log(`  Total Cycles:      ${stats.totalCycles}`)
+    console.log(`  Total Receipts:    ${stats.totalReceipts}`)
+    console.log(`  Total OriginalTxs: ${stats.totalOriginalTxs}`)
+    console.log(`  Last Cycle:        ${stats.lastCycleNumber}`)
+    console.log('='.repeat(60))
+  }
+}
diff --git a/src/collector.ts b/src/collector.ts
index 1232126..76cb0b2 100644
--- a/src/collector.ts
+++ b/src/collector.ts
@@ -9,6 +9,7 @@ import * as Crypto from './utils/crypto'
 import { CycleDB, ReceiptDB, OriginalTxDataDB } from './storage'
 import {
   downloadTxsDataAndCycles,
+  downloadTxsDataAndCyclesParallel,
   compareWithOldReceiptsData,
   compareWithOldCyclesData,
   downloadAndSyncGenesisAccounts,
@@ -224,6 +225,15 @@ export const checkAndSyncData = async (): Promise<() => Promise<void>> => {
   const syncData = async (): Promise<void> => {
     // If there is already some data in the db, we can assume that the genesis accounts data has been synced already
     if (lastStoredCycleCount === 0) await downloadAndSyncGenesisAccounts() // To sync accounts data that are from genesis accounts/accounts data that the network start with
+
+    // Use parallel sync if enabled (default)
+    if (config.useParallelSync) {
+      console.log('Using optimized parallel sync strategy')
+      await downloadTxsDataAndCyclesParallel(totalCyclesToSync, lastStoredCycleCount)
+      return
+    }
+
+    console.log('Using legacy sequential sync strategy')
     // Sync receipts and originalTxsData data first if there is old data
     if (
       lastStoredReceiptCycle > 0 &&
diff --git a/src/config/index.ts b/src/config/index.ts
index 2ddb2d5..b602842 100644
--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -94,6 +94,11 @@ export interface Config {
     MAX_ACCOUNT_HISTORY_STATES_PER_REQUEST: number
     MAX_STATS_PER_REQUEST: number
   }
+  parallelSyncConcurrency: number // Number of parallel workers for cycle sync
+  useParallelSync: boolean // Enable parallel sync with composite cursor
+  cyclesPerBatch: number // Number of cycles to batch together in multi-cycle requests (default: 10)
+  enablePrefetch: boolean // Enable prefetching of next batch while processing current batch (default: true)
+  syncRetryAttempts: number // Number of retry attempts for failed requests (default: 3)
   dexScreenerAPI: string // Dex Screener API URL for Liberdus token
   dexScreenerLink: string // Dex Screener Link for Liberdus token
 }
@@ -184,6 +189,11 @@ let config: Config = {
     MAX_ACCOUNT_HISTORY_STATES_PER_REQUEST: 100,
     MAX_STATS_PER_REQUEST: 1000000,
   },
+  parallelSyncConcurrency: Number(process.env.PARALLEL_SYNC_CONCURRENCY) || 10, // 10 parallel workers
+  useParallelSync: process.env.USE_PARALLEL_SYNC !== 'false', // Enable by default
+  cyclesPerBatch: Number(process.env.CYCLES_PER_BATCH) || 10, // Batch 10 cycles together
+  enablePrefetch: process.env.ENABLE_PREFETCH !== 'false', // Enable prefetch by default
+  syncRetryAttempts: Number(process.env.SYNC_RETRY_ATTEMPTS) || 3, // Retry failed requests 3 times
   dexScreenerAPI:
     'https://api.dexscreener.com/latest/dex/search?q=0x693ed886545970F0a3ADf8C59af5cCdb6dDF0a76',
   dexScreenerLink: 'https://dexscreener.com/polygon/0x041e48a5b11c29fdbd92498eb05573c52728398c',
diff --git a/src/storage/index.ts b/src/storage/index.ts
index cc314d6..92f0793 100644
--- a/src/storage/index.ts
+++ b/src/storage/index.ts
@@ -147,6 +147,11 @@ export const initializeDB = async (): Promise<void> => {
     receiptDatabase,
     'CREATE INDEX if not exists `receipts_cycle_timestamp` ON `receipts` (`cycle` DESC, `timestamp` DESC)'
   )
+  // Composite index for cursor-based pagination (optimal for parallel sync)
+  await runCreate(
+    receiptDatabase,
+    'CREATE INDEX if not exists `receipts_cycle_timestamp_receiptId` ON `receipts` (`cycle` ASC, `timestamp` ASC, `receiptId` ASC)'
+  )
   // be sure to adjust the data types of `transactionType`, `txFrom`, `txTo` as needed
   await runCreate(
     originalTxDataDatabase,
@@ -173,6 +178,11 @@ export const initializeDB = async (): Promise<void> => {
     originalTxDataDatabase,
     'CREATE INDEX if not exists `originalTxsData_cycle_timestamp` ON `originalTxsData` (`cycle` DESC, `timestamp` DESC)'
   )
+  // Composite index for cursor-based pagination (optimal for parallel sync)
+  await runCreate(
+    originalTxDataDatabase,
+    'CREATE INDEX if not exists `originalTxsData_cycle_timestamp_txId` ON `originalTxsData` (`cycle` ASC, `timestamp` ASC, `txId` ASC)'
+  )
   await runCreate(
     originalTxDataDatabase,
     'CREATE INDEX if not exists `originalTxsData_txType` ON `originalTxsData` (`transactionType`)'
diff --git a/src/storage/originalTxData.ts b/src/storage/originalTxData.ts
index 137a5a1..d5bcd45 100644
--- a/src/storage/originalTxData.ts
+++ b/src/storage/originalTxData.ts
@@ -142,7 +142,7 @@ export async function queryOriginalTxDataCount(
     }
     if (startCycle || endCycle) {
       sql = db.updateSqlStatementClause(sql, values)
-      sql += `cycleNumber BETWEEN ? AND ?`
+      sql += `cycle BETWEEN ? AND ?`
       values.push(startCycle, endCycle)
     }
     if (afterTimestamp) {
@@ -176,7 +176,7 @@ export async function queryOriginalTxsData(query: QueryOriginalTxsDataParams): P
     }
     if (startCycle || endCycle) {
       sql = db.updateSqlStatementClause(sql, values)
-      sql += `cycleNumber BETWEEN ? AND ?`
+      sql += `cycle BETWEEN ? AND ?`
       values.push(startCycle, endCycle)
     }
     if (afterTimestamp) {
diff --git a/src/storage/sqlite3storage.ts b/src/storage/sqlite3storage.ts
index 36988e4..6c68b85 100644
--- a/src/storage/sqlite3storage.ts
+++ b/src/storage/sqlite3storage.ts
@@ -1,6 +1,73 @@
 import { Utils as StringUtils } from '@shardus/types'
 import { Database } from 'sqlite3'
 
+interface QueryTiming {
+  id: number
+  sql: string
+  startMs: number
+  engineMs?: number
+}
+
+const SQL_LOG_MAX_LENGTH = 200
+const SQL_ENGINE_WARN_THRESHOLD_MS = 500
+const SQL_QUEUE_WARN_THRESHOLD_MS = 250
+const SQL_TOTAL_WARN_THRESHOLD_MS = 1000
+
+let queryIdSequence = 0
+const pendingQueries = new Map<number, QueryTiming>()
+const queuedBySql = new Map<string, number[]>()
+
+function formatSqlForLog(sql: string): string {
+  const normalized = sql.replace(/\s+/g, ' ').trim()
+  if (normalized.length <= SQL_LOG_MAX_LENGTH) return normalized
+  return `${normalized.slice(0, SQL_LOG_MAX_LENGTH - 3)}...`
+}
+
+function registerQuery(sql: string): QueryTiming {
+  const entry: QueryTiming = {
+    id: ++queryIdSequence,
+    sql,
+    startMs: Date.now(),
+  }
+  pendingQueries.set(entry.id, entry)
+  let queue = queuedBySql.get(sql)
+  if (!queue) {
+    queue = []
+    queuedBySql.set(sql, queue)
+  }
+  queue.push(entry.id)
+  return entry
+}
+
+function cleanupQuery(entry: QueryTiming): void {
+  pendingQueries.delete(entry.id)
+  const queue = queuedBySql.get(entry.sql)
+  if (!queue) return
+  const index = queue.indexOf(entry.id)
+  if (index !== -1) queue.splice(index, 1)
+  if (queue.length === 0) queuedBySql.delete(entry.sql)
+}
+
+function logTiming(operation: string, entry: QueryTiming, rows?: number): void {
+  const totalMs = Date.now() - entry.startMs
+  const engineMs = entry.engineMs ?? 0
+  const queueMs = Math.max(0, totalMs - engineMs)
+  const payload = {
+    operation,
+    totalMs: Number(totalMs.toFixed(2)),
+    queueMs: Number(queueMs.toFixed(2)),
+    engineMs: Number(engineMs.toFixed(2)),
+    sql: formatSqlForLog(entry.sql),
+    rows,
+  }
+
+  if (totalMs > SQL_TOTAL_WARN_THRESHOLD_MS || queueMs > SQL_QUEUE_WARN_THRESHOLD_MS) {
+    console.warn('[DB Timing]', payload)
+  } else {
+    console.log('[DB Timing]', payload)
+  }
+}
+
 export const createDB = async (dbPath: string, dbName: string): Promise<Database> => {
   console.log('dbName', dbName, 'dbPath', dbPath)
   const db = new Database(dbPath, (err) => {
@@ -15,10 +82,33 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
   await run(db, 'PRAGMA cache_size = -64000') // ~64MB cache
   await run(db, 'PRAGMA wal_autocheckpoint = 1000') // Checkpoint every 1000 ( default value ) pages
   db.on('profile', (sql, time) => {
-    if (time > 500 && time < 1000) {
-      console.log('SLOW QUERY', process.pid, sql, time)
-    } else if (time > 1000) {
-      console.log('VERY SLOW QUERY', process.pid, sql, time)
+    const engineMs = typeof time === 'number' ? time : Number(time)
+    const queue = queuedBySql.get(sql)
+    const id = queue && queue.length > 0 ? queue[0] : undefined
+    if (id === undefined) {
+      console.warn('[DB Timing] profile event without pending query', {
+        pid: process.pid,
+        engineMs,
+        sql: formatSqlForLog(sql),
+      })
+      return
+    }
+    const entry = pendingQueries.get(id)
+    if (!entry) {
+      console.warn('[DB Timing] profile missing pending entry', {
+        pid: process.pid,
+        engineMs,
+        sql: formatSqlForLog(sql),
+      })
+      return
+    }
+    entry.engineMs = engineMs
+    if (engineMs > SQL_ENGINE_WARN_THRESHOLD_MS) {
+      console.warn('[DB Engine] Slow engine execution detected', {
+        pid: process.pid,
+        engineMs: Number(engineMs.toFixed(2)),
+        sql: formatSqlForLog(sql),
+      })
     }
   })
   console.log(`Database ${dbName} Initialized!`)
@@ -58,12 +148,21 @@ export async function run(
   params: unknown[] | object = []
 ): Promise<{ id: number }> {
   return new Promise((resolve, reject) => {
+    const entry = registerQuery(sql)
+    const finalize = (): void => {
+      setImmediate(() => {
+        logTiming('run', entry)
+        cleanupQuery(entry)
+      })
+    }
     db.run(sql, params, function (err: Error) {
       if (err) {
         console.log('Error running sql ' + sql)
         console.log(err)
+        finalize()
         reject(err)
       } else {
+        finalize()
         resolve({ id: this.lastID })
       }
     })
@@ -72,12 +171,21 @@ export async function run(
 
 export async function get<T>(db: Database, sql: string, params = []): Promise<T> {
   return new Promise((resolve, reject) => {
+    const entry = registerQuery(sql)
+    const finalize = (rows?: number): void => {
+      setImmediate(() => {
+        logTiming('get', entry, rows)
+        cleanupQuery(entry)
+      })
+    }
     db.get(sql, params, (err: Error, result: T) => {
       if (err) {
         console.log('Error running sql: ' + sql)
         console.log(err)
+        finalize()
         reject(err)
       } else {
+        finalize(result ? 1 : 0)
         resolve(result)
       }
     })
@@ -86,12 +194,21 @@ export async function get<T>(db: Database, sql: string, params = []): Promise<T>
 
 export async function all<T>(db: Database, sql: string, params = []): Promise<T[]> {
   return new Promise((resolve, reject) => {
+    const entry = registerQuery(sql)
+    const finalize = (rowsCount?: number): void => {
+      setImmediate(() => {
+        logTiming('all', entry, rowsCount)
+        cleanupQuery(entry)
+      })
+    }
     db.all(sql, params, (err: Error, rows: T[]) => {
       if (err) {
         console.log('Error running sql: ' + sql)
         console.log(err)
+        finalize()
         reject(err)
       } else {
+        finalize(rows ? rows.length : 0)
         resolve(rows)
       }
     })

From e2126ce111f566822bbd45c2d809b82403fd04a2 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Tue, 4 Nov 2025 16:22:52 +0800
Subject: [PATCH 02/14] feat: enhance API request handling with gzip support
 and improved response size logging

---
 src/class/ParallelCycleSync.ts | 35 +++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/class/ParallelCycleSync.ts b/src/class/ParallelCycleSync.ts
index 4053846..e25bfd9 100644
--- a/src/class/ParallelCycleSync.ts
+++ b/src/class/ParallelCycleSync.ts
@@ -86,7 +86,10 @@ export class ParallelCycleSync {
       httpAgent: this.httpAgent,
       httpsAgent: this.httpsAgent,
       timeout: 45000,
-      headers: { 'Content-Type': 'application/json' },
+      headers: {
+        'Content-Type': 'application/json',
+        'Accept-Encoding': 'gzip, deflate', // Request compressed responses
+      },
       transformRequest: [
         (data) => {
           // Use custom stringify for request body
@@ -660,14 +663,19 @@ export class ParallelCycleSync {
 
         const receipts = response.data?.receipts || []
 
-        // Get response size from Content-Length header (fast) instead of re-stringifying (slow)
-        const responseSizeBytes = parseInt(response.headers['content-length'] || '0', 10)
-        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : 'unknown'
+        // Get response size - with compression, Content-Length might not be accurate
+        const contentLength = response.headers['content-length']
+        const contentEncoding = response.headers['content-encoding']
+        const responseSizeBytes = contentLength ? parseInt(contentLength, 10) : 0
+        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : '0.00'
 
-        if (config.verbose || networkElapsed > 1000) {
+        if (config.verbose || networkElapsed > 1000 || receipts.length === 0) {
           console.log(
             `[API Timing] Receipts fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
-              `records: ${receipts.length}, size: ${responseSizeKB}KB`
+              `records: ${receipts.length}, size: ${responseSizeKB}KB` +
+              (contentEncoding ? `, encoding: ${contentEncoding}` : '') +
+              (receipts.length === 0 && response.data ? ', response.data exists but empty' : '') +
+              (!response.data ? ', response.data is null/undefined!' : '')
           )
         }
 
@@ -739,14 +747,19 @@ export class ParallelCycleSync {
 
         const originalTxs = response.data?.originalTxs || []
 
-        // Get response size from Content-Length header (fast) instead of re-stringifying (slow)
-        const responseSizeBytes = parseInt(response.headers['content-length'] || '0', 10)
-        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : 'unknown'
+        // Get response size - with compression, Content-Length might not be accurate
+        const contentLength = response.headers['content-length']
+        const contentEncoding = response.headers['content-encoding']
+        const responseSizeBytes = contentLength ? parseInt(contentLength, 10) : 0
+        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : '0.00'
 
-        if (config.verbose || networkElapsed > 1000) {
+        if (config.verbose || networkElapsed > 1000 || originalTxs.length === 0) {
           console.log(
             `[API Timing] OriginalTxs fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
-              `records: ${originalTxs.length}, size: ${responseSizeKB}KB`
+              `records: ${originalTxs.length}, size: ${responseSizeKB}KB` +
+              (contentEncoding ? `, encoding: ${contentEncoding}` : '') +
+              (originalTxs.length === 0 && response.data ? ', response.data exists but empty' : '') +
+              (!response.data ? ', response.data is null/undefined!' : '')
           )
         }
 

From c5e283c5acd79c0660c026af4823e90d927faa73 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Wed, 5 Nov 2025 23:26:24 +0800
Subject: [PATCH 03/14] Enhance API timing logs with compression metrics and
 standardize naming

- Track response payload sizes (compressed and uncompressed) via socket bytesRead
- Calculate compression ratio and savings for API responses
- Standardize log naming to match Fastify: payload (compressed), payloadUncompressed (uncompressed)
- Add response interceptor to capture actual bytes transferred over network
- Update receipts and originalTxs fetch logs with consistent format
---
 src/class/ParallelCycleSync.ts | 179 ++++++++++++++++++++++++++++-----
 1 file changed, 155 insertions(+), 24 deletions(-)

diff --git a/src/class/ParallelCycleSync.ts b/src/class/ParallelCycleSync.ts
index e25bfd9..55a8722 100644
--- a/src/class/ParallelCycleSync.ts
+++ b/src/class/ParallelCycleSync.ts
@@ -36,6 +36,23 @@ export interface SyncStats {
   errors: number
 }
 
+/**
+ * Response size metadata attached by transformResponse and interceptor
+ */
+interface ResponseSizeMetadata {
+  decompressedBytes: number
+  decompressedKB: string
+  compressedBytes?: number
+  compressedKB?: string
+  compressionRatio?: number
+  compressionSavings?: string
+}
+
+interface ResponseDataWithMetadata {
+  __responseSize?: ResponseSizeMetadata
+  [key: string]: unknown
+}
+
 /**
  * Parallel sync orchestrator using cycle-based partitioning with composite cursors
  * Implements the optimal sync strategy with:
@@ -108,9 +125,25 @@ export class ParallelCycleSync {
         (res) => {
           // Use custom parse for response with timing
           const startTime = Date.now()
-          const result = StringUtils.safeJsonParse(res)
+          const result = typeof res === 'string' ? StringUtils.safeJsonParse(res) : res
           const elapsed = Date.now() - startTime
-          const sizeKB = typeof res === 'string' ? (res.length / 1024).toFixed(2) : 'unknown'
+
+          // Calculate decompressed size from raw response string
+          const decompressedBytes = typeof res === 'string' ? Buffer.byteLength(res) : 0
+          const sizeKB = (decompressedBytes / 1024).toFixed(2)
+
+          // Attach size metadata to result for later use
+          if (result && typeof result === 'object') {
+            Object.defineProperty(result, '__responseSize', {
+              value: {
+                decompressedBytes,
+                decompressedKB: sizeKB,
+              },
+              enumerable: false, // Hidden from JSON.stringify and iteration
+              configurable: true,
+            })
+          }
+
           if (config.verbose && elapsed > 50) {
             console.log(`[Client] Response parse: ${elapsed}ms, size: ${sizeKB}KB`)
           }
@@ -119,6 +152,80 @@ export class ParallelCycleSync {
       ],
     })
 
+    // Add response interceptor to capture compressed size from socket bytesRead
+    this.axiosInstance.interceptors.response.use(
+      (response) => {
+        // Get Content-Length header for fallback
+        const contentLength = response.headers['content-length']
+
+        // Get socket from the request object
+        const socket = response.request?.socket
+
+        let compressedBytes: number | undefined
+
+        // Try to calculate compressed size from socket bytesRead (most accurate)
+        // We track cumulative bytesRead on the socket across requests (due to keep-alive)
+        if (socket && typeof socket.bytesRead === 'number') {
+          const currentBytesRead = socket.bytesRead
+          const lastBytesRead = (socket as { _lastBytesRead?: number })._lastBytesRead
+
+          if (lastBytesRead !== undefined) {
+            const rawBytes = currentBytesRead - lastBytesRead
+
+            // Subtract estimated header size (HTTP response headers + status line)
+            // Typical: "HTTP/1.1 200 OK\r\n" + headers + "\r\n\r\n" ≈ 200-400 bytes
+            const estimatedHeaderSize = 250
+            if (rawBytes > estimatedHeaderSize) {
+              compressedBytes = rawBytes - estimatedHeaderSize
+            }
+          }
+
+          // Update last bytesRead for next request on this socket
+          ;(socket as { _lastBytesRead?: number })._lastBytesRead = currentBytesRead
+        }
+
+        // Fallback: Use Content-Length header if socket method didn't work
+        if (!compressedBytes && contentLength) {
+          compressedBytes = parseInt(contentLength, 10)
+        }
+
+        // Get existing metadata from transformResponse
+        const existingMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
+
+        // Merge compressed size with existing decompressed size metadata
+        if (existingMetadata && response.data && typeof response.data === 'object') {
+          const decompressedBytes = existingMetadata.decompressedBytes
+
+          // Calculate compression metrics if both sizes are available
+          const compressionRatio =
+            compressedBytes && decompressedBytes > 0
+              ? +(compressedBytes / decompressedBytes).toFixed(3)
+              : undefined
+
+          const compressionSavings =
+            compressionRatio && compressionRatio < 1
+              ? `${((1 - compressionRatio) * 100).toFixed(1)}%`
+              : undefined
+
+          // Update the metadata with compressed size info
+          Object.defineProperty(response.data, '__responseSize', {
+            value: {
+              ...existingMetadata,
+              compressedBytes,
+              compressedKB: compressedBytes ? (compressedBytes / 1024).toFixed(2) : undefined,
+              compressionRatio,
+              compressionSavings,
+            },
+            enumerable: false,
+            configurable: true,
+          })
+        }
+
+        return response
+      },
+      (error) => Promise.reject(error)
+    )
+
     // Add interval between tasks to prevent overwhelming the distributor
     this.queue = new PQueue({
       concurrency: this.syncConfig.concurrency,
@@ -663,20 +770,32 @@ export class ParallelCycleSync {
 
         const receipts = response.data?.receipts || []
 
-        // Get response size - with compression, Content-Length might not be accurate
-        const contentLength = response.headers['content-length']
-        const contentEncoding = response.headers['content-encoding']
-        const responseSizeBytes = contentLength ? parseInt(contentLength, 10) : 0
-        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : '0.00'
+        // Get size metadata from transformResponse and interceptor
+        const sizeMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
+        const decompressedKB = sizeMetadata?.decompressedKB || '0.00'
+        const compressedKB = sizeMetadata?.compressedKB
+        const compressionRatio = sizeMetadata?.compressionRatio
+        const compressionSavings = sizeMetadata?.compressionSavings
 
         if (config.verbose || networkElapsed > 1000 || receipts.length === 0) {
-          console.log(
+          // Build log message with compression info if available
+          let logMessage =
             `[API Timing] Receipts fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
-              `records: ${receipts.length}, size: ${responseSizeKB}KB` +
-              (contentEncoding ? `, encoding: ${contentEncoding}` : '') +
-              (receipts.length === 0 && response.data ? ', response.data exists but empty' : '') +
-              (!response.data ? ', response.data is null/undefined!' : '')
-          )
+            `records: ${receipts.length}`
+
+          // Only show compression metrics if compression actually reduced the size (ratio < 1)
+          if (compressedKB !== undefined && compressionRatio !== undefined && compressionRatio < 1) {
+            logMessage += `, payload: ${compressedKB}KB, payloadUncompressed: ${decompressedKB}KB, ratio: ${compressionRatio}, savings: ${compressionSavings}`
+          } else {
+            // No compression or not effective, just show uncompressed size
+            logMessage += `, payload: ${decompressedKB}KB`
+          }
+
+          logMessage +=
+            (receipts.length === 0 && response.data ? ', response.data exists but empty' : '') +
+            (!response.data ? ', response.data is null/undefined!' : '')
+
+          console.log(logMessage)
         }
 
         if (response.data && response.data.receipts) {
@@ -747,20 +866,32 @@ export class ParallelCycleSync {
 
         const originalTxs = response.data?.originalTxs || []
 
-        // Get response size - with compression, Content-Length might not be accurate
-        const contentLength = response.headers['content-length']
-        const contentEncoding = response.headers['content-encoding']
-        const responseSizeBytes = contentLength ? parseInt(contentLength, 10) : 0
-        const responseSizeKB = responseSizeBytes > 0 ? (responseSizeBytes / 1024).toFixed(2) : '0.00'
+        // Get size metadata from transformResponse and interceptor
+        const sizeMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
+        const decompressedKB = sizeMetadata?.decompressedKB || '0.00'
+        const compressedKB = sizeMetadata?.compressedKB
+        const compressionRatio = sizeMetadata?.compressionRatio
+        const compressionSavings = sizeMetadata?.compressionSavings
 
         if (config.verbose || networkElapsed > 1000 || originalTxs.length === 0) {
-          console.log(
+          // Build log message with compression info if available
+          let logMessage =
             `[API Timing] OriginalTxs fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
-              `records: ${originalTxs.length}, size: ${responseSizeKB}KB` +
-              (contentEncoding ? `, encoding: ${contentEncoding}` : '') +
-              (originalTxs.length === 0 && response.data ? ', response.data exists but empty' : '') +
-              (!response.data ? ', response.data is null/undefined!' : '')
-          )
+            `records: ${originalTxs.length}`
+
+          // Only show compression metrics if compression actually reduced the size (ratio < 1)
+          if (compressedKB !== undefined && compressionRatio !== undefined && compressionRatio < 1) {
+            logMessage += `, payload: ${compressedKB}KB, payloadUncompressed: ${decompressedKB}KB, ratio: ${compressionRatio}, savings: ${compressionSavings}`
+          } else {
+            // No compression or not effective, just show uncompressed size
+            logMessage += `, payload: ${decompressedKB}KB`
+          }
+
+          logMessage +=
+            (originalTxs.length === 0 && response.data ? ', response.data exists but empty' : '') +
+            (!response.data ? ', response.data is null/undefined!' : '')
+
+          console.log(logMessage)
         }
 
         if (response.data && response.data.originalTxs) {

From 87e7c6054d43be225eca4aef171b035095859459 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Fri, 7 Nov 2025 00:46:19 +0800
Subject: [PATCH 04/14] refactor: rename ParallelCycleSync to ParallelDataSync
 and simplify sync architecture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename ParallelCycleSync class to ParallelDataSync for better clarity
- Remove batchSize from ParallelSyncConfig, use config limits instead
- Replace composite cursor approach with simpler timestamp + txId pagination
- Simplify sync flow by removing separate cycle metadata fetching step
- Update method names: syncCycleRange → startSyncing for better semantics
- Remove legacy downloadTxsDataAndCyclesParallel function from DataSync
- Streamline API endpoints to use /cycle instead of /multi-cycle-cursor
- Add SyncTxDataByCycleRange interface for cleaner parameter passing
---
 src/class/DataSync.ts                         |  26 -
 ...rallelCycleSync.ts => ParallelDataSync.ts} | 527 ++++++------------
 src/collector.ts                              |  18 +-
 3 files changed, 198 insertions(+), 373 deletions(-)
 rename src/class/{ParallelCycleSync.ts => ParallelDataSync.ts} (62%)

diff --git a/src/class/DataSync.ts b/src/class/DataSync.ts
index 12b05ce..d6f4de5 100644
--- a/src/class/DataSync.ts
+++ b/src/class/DataSync.ts
@@ -4,7 +4,6 @@ import { AccountDB, CycleDB, ReceiptDB, TransactionDB, OriginalTxDataDB } from '
 import { config, DISTRIBUTOR_URL } from '../config'
 import { Cycle } from '../types'
 import { Utils as StringUtils } from '@shardus/types'
-import { ParallelCycleSync } from './ParallelCycleSync'
 
 export enum DataType {
   CYCLE = 'cycleinfo',
@@ -833,28 +832,3 @@ export const downloadOriginalTxsDataBetweenCycles = async (
     endCycle += config.requestLimits.MAX_BETWEEN_CYCLES_PER_REQUEST
   }
 }
-
-/**
- * NEW: Parallel sync using cycle-based partitioning with composite cursors
- * This is the optimal sync strategy with 10x+ performance improvement
- */
-export const downloadTxsDataAndCyclesParallel = async (
-  totalCyclesToSync: number,
-  fromCycle = 0
-): Promise<void> => {
-  console.log('\n')
-  console.log('='.repeat(60))
-  console.log('Using PARALLEL SYNC with Composite Cursor')
-  console.log('This prevents data loss and provides 10x+ performance improvement')
-  console.log('='.repeat(60))
-  console.log('\n')
-
-  const parallelSync = new ParallelCycleSync({
-    concurrency: config.parallelSyncConcurrency,
-    batchSize: 500,
-    retryAttempts: 3,
-    retryDelayMs: 1000,
-  })
-
-  await parallelSync.syncCycleRange(fromCycle, totalCyclesToSync)
-}
diff --git a/src/class/ParallelCycleSync.ts b/src/class/ParallelDataSync.ts
similarity index 62%
rename from src/class/ParallelCycleSync.ts
rename to src/class/ParallelDataSync.ts
index 55a8722..4f90513 100644
--- a/src/class/ParallelCycleSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -4,7 +4,7 @@ import { Utils as StringUtils } from '@shardus/types'
 import { config, DISTRIBUTOR_URL } from '../config'
 import { queryFromDistributor, DataType } from './DataSync'
 import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
-import { ParallelSyncCheckpointManager, CompositeCursor } from './ParallelSyncCheckpoint'
+import { ParallelSyncCheckpointManager } from './ParallelSyncCheckpoint'
 import { Cycle } from '../types'
 import axios, { AxiosInstance } from 'axios'
 import http from 'http'
@@ -15,7 +15,6 @@ import https from 'https'
  */
 export interface ParallelSyncConfig {
   concurrency: number // Number of parallel workers
-  batchSize: number // Items per request
   retryAttempts: number // Retry failed requests
   retryDelayMs: number // Delay between retries
   cyclesPerBatch: number // Number of cycles to batch together (default: 10)
@@ -54,14 +53,26 @@ interface ResponseDataWithMetadata {
 }
 
 /**
- * Parallel sync orchestrator using cycle-based partitioning with composite cursors
+ * Sync receipts and originalTxs data by cycle range with timestamp pagination
+ * Uses both timestamp and ID to handle timestamp collisions and prevent data loss
+ */
+export interface SyncTxDataByCycleRange {
+  startCycle: number
+  endCycle: number
+  afterTimestamp?: number
+  afterTxId?: string // receiptId or txId
+  limit?: number
+}
+
+/**
+ * Parallel sync orchestrator using cycle-based partitioning with timestamp + txId pagination
  * Implements the optimal sync strategy with:
  * - Cycle-level parallelization
- * - Composite cursor (timestamp + ID) to prevent data loss
+ * - Composite cursor (timestamp + txId ) to prevent data loss
  * - Automatic resume from database
  * - Work queue for load balancing
  */
-export class ParallelCycleSync {
+export class ParallelDataSync {
   private checkpointManager: ParallelSyncCheckpointManager
   private queue: PQueue
   private syncConfig: ParallelSyncConfig
@@ -72,10 +83,8 @@ export class ParallelCycleSync {
 
   constructor(syncConfig?: Partial<ParallelSyncConfig>) {
     this.checkpointManager = new ParallelSyncCheckpointManager()
-
     this.syncConfig = {
       concurrency: syncConfig?.concurrency || config.parallelSyncConcurrency || 10,
-      batchSize: syncConfig?.batchSize || 500,
       retryAttempts: syncConfig?.retryAttempts || config.syncRetryAttempts || 3,
       retryDelayMs: syncConfig?.retryDelayMs || 1000,
       cyclesPerBatch: syncConfig?.cyclesPerBatch || config.cyclesPerBatch || 10,
@@ -254,7 +263,7 @@ export class ParallelCycleSync {
   /**
    * Main entry point for parallel sync
    */
-  async syncCycleRange(startCycle: number, endCycle: number): Promise<void> {
+  async startSyncing(startCycle: number, endCycle: number): Promise<void> {
     console.log(`\n${'='.repeat(60)}`)
     console.log(`Starting Parallel Cycle Sync: ${startCycle} → ${endCycle}`)
     console.log(`Concurrency: ${this.syncConfig.concurrency} workers`)
@@ -264,20 +273,29 @@ export class ParallelCycleSync {
     this.stats.totalCycles = endCycle - startCycle
 
     try {
-      // Step 1: Fetch all cycle metadata (lightweight)
-      console.log('Step 1: Fetching cycle metadata...')
-      const cycles = await this.fetchCyclesMetadata(startCycle, endCycle)
-      console.log(`✓ Retrieved ${cycles.length} cycles\n`)
+      // Split cycles into batches
+      const cycleBatches: { startCycle: number; endCycle: number }[] = []
+
+      for (let i = startCycle; i <= endCycle; ) {
+        let batchEnd = i + this.syncConfig.cyclesPerBatch
+        if (batchEnd > endCycle) {
+          batchEnd = endCycle
+        }
+        cycleBatches.push({ startCycle: i, endCycle: batchEnd })
+        i = batchEnd + 1
+      }
 
-      // Step 2: Sync cycles themselves in parallel
-      console.log('Step 2: Syncing cycle records...')
-      await this.syncCyclesData(cycles)
-      console.log(`✓ Synced ${cycles.length} cycle records\n`)
+      console.log(
+        `Created ${cycleBatches.length} cycle batches (${this.syncConfig.cyclesPerBatch} cycles per batch)`
+      )
 
-      // Step 3: Sync receipts and originalTxs for all cycles in parallel with multi-cycle batching
-      console.log('Step 3: Syncing receipts and originalTxs with multi-cycle batching...')
-      await this.syncAllCyclesDataMultiBatch(cycles)
+      // Add all batch sync tasks to the queue
+      const tasks = cycleBatches.map((batch) =>
+        this.queue.add(() => this.syncDataByCycleRange(batch.startCycle, batch.endCycle))
+      )
 
+      // Wait for all tasks to complete
+      await Promise.all(tasks)
       this.stats.endTime = Date.now()
 
       // Summary
@@ -290,139 +308,55 @@ export class ParallelCycleSync {
   }
 
   /**
-   * Fetch cycle metadata from distributor
-   */
-  private async fetchCyclesMetadata(startCycle: number, endCycle: number): Promise<Cycle[]> {
-    const cycles: Cycle[] = []
-
-    // Fetch in chunks
-    const CHUNK_SIZE = 100
-    for (let i = startCycle; i <= endCycle; i += CHUNK_SIZE) {
-      const chunkEnd = Math.min(i + CHUNK_SIZE - 1, endCycle)
-
-      const response = await queryFromDistributor(DataType.CYCLE, {
-        start: i,
-        end: chunkEnd,
-      })
-
-      if (response && response.data && response.data.cycleInfo) {
-        cycles.push(
-          ...response.data.cycleInfo.map((cycleRecord: any) => ({
-            counter: cycleRecord.counter,
-            cycleRecord,
-            start: cycleRecord.start,
-            cycleMarker: cycleRecord.marker,
-          }))
-        )
-      }
-    }
-
-    return cycles
-  }
-
-  /**
-   * Sync cycle records to database
-   */
-  private async syncCyclesData(cycles: Cycle[]): Promise<void> {
-    // Insert cycles in batches
-    const BATCH_SIZE = 100
-    for (let i = 0; i < cycles.length; i += BATCH_SIZE) {
-      const batch = cycles.slice(i, i + BATCH_SIZE)
-      await CycleDB.bulkInsertCycles(batch)
-    }
-  }
-
-  /**
-   * Sync receipts and originalTxs for all cycles in parallel (LEGACY - single cycle per request)
-   */
-  private async syncAllCyclesData(cycles: Cycle[]): Promise<void> {
-    // Add all cycle sync tasks to the queue
-    const tasks = cycles.map((cycle) => this.queue.add(() => this.syncSingleCycle(cycle)))
-
-    // Wait for all tasks to complete
-    await Promise.all(tasks)
-  }
-
-  /**
-   * Sync receipts and originalTxs using multi-cycle batching with prefetching
-   * This dramatically reduces HTTP overhead for cycles with small data
-   */
-  private async syncAllCyclesDataMultiBatch(cycles: Cycle[]): Promise<void> {
-    // Group cycles into batches
-    const cycleBatches: Cycle[][] = []
-    for (let i = 0; i < cycles.length; i += this.syncConfig.cyclesPerBatch) {
-      cycleBatches.push(cycles.slice(i, i + this.syncConfig.cyclesPerBatch))
-    }
-
-    console.log(
-      `Created ${cycleBatches.length} cycle batches (${this.syncConfig.cyclesPerBatch} cycles per batch)`
-    )
-
-    // Add all batch sync tasks to the queue
-    const tasks = cycleBatches.map((batch) => this.queue.add(() => this.syncCycleBatch(batch)))
-
-    // Wait for all tasks to complete
-    await Promise.all(tasks)
-  }
-
-  /**
-   * Sync receipts and originalTxs for a single cycle
+   * Sync data in parallel using adaptive multi-cycle fetching with prefetching on endpoints
+   * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5)
    */
-  private async syncSingleCycle(cycle: Cycle): Promise<void> {
+  private async syncDataByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     try {
-      // Get cycle time boundaries
-      const cycleStart = cycle.start
-      const cycleEnd = cycle.cycleRecord.duration
-        ? cycle.start + cycle.cycleRecord.duration
-        : cycle.start + 60 * 1000 // Default 1 minute
-
-      // Sync both data types in parallel for this cycle
+      // Sync all data types in parallel
       await Promise.all([
-        this.syncCycleReceipts(cycle.counter, cycleStart, cycleEnd),
-        this.syncCycleOriginalTxs(cycle.counter, cycleStart, cycleEnd),
+        this.syncCyclesByCycleRange(startCycle, endCycle),
+        this.syncReceiptsByCycleRange(startCycle, endCycle),
+        this.syncOriginalTxsByCycleRange(startCycle, endCycle),
       ])
 
-      this.stats.completedCycles++
+      this.stats.completedCycles += endCycle - startCycle + 1
 
       if (config.verbose || this.stats.completedCycles % 10 === 0) {
         const progress = ((this.stats.completedCycles / this.stats.totalCycles) * 100).toFixed(1)
-        console.log(`Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%)`)
+        console.log(
+          `Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
+        )
       }
     } catch (error) {
-      console.error(`Error syncing cycle ${cycle.counter}:`, error)
+      console.error(`Error syncing cycle batch ${startCycle}-${endCycle}:`, error)
       this.stats.errors++
       throw error
     }
   }
 
   /**
-   * Sync receipts and originalTxs for a batch of cycles using multi-cycle endpoints
-   * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5)
+   * Sync cycles across a batch of cycles using multi-cycle fetching
    */
-  private async syncCycleBatch(cycleBatch: Cycle[]): Promise<void> {
-    if (cycleBatch.length === 0) return
-
+  private async syncCyclesByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     try {
-      const startCycle = cycleBatch[0].counter
-      const endCycle = cycleBatch[cycleBatch.length - 1].counter
+      const response = await this.fetchCyclesByCycleRange(startCycle, endCycle)
 
-      // Sync both data types in parallel
-      await Promise.all([this.syncCycleBatchReceipts(cycleBatch), this.syncCycleBatchOriginalTxs(cycleBatch)])
+      if (!response || response.length === 0) {
+        if (config.verbose) {
+          console.log(`[Cycles ${startCycle}-${endCycle}] No cycle data returned`)
+        }
+        return
+      }
 
-      this.stats.completedCycles += cycleBatch.length
+      // Process cycles using bulkInsertCycles
+      await CycleDB.bulkInsertCycles(response)
 
-      if (config.verbose || this.stats.completedCycles % 10 === 0) {
-        const progress = ((this.stats.completedCycles / this.stats.totalCycles) * 100).toFixed(1)
-        console.log(
-          `Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
-        )
+      if (config.verbose) {
+        console.log(`[Cycles ${startCycle}-${endCycle}] Cycles: +${response.length}`)
       }
     } catch (error) {
-      console.error(
-        `Error syncing cycle batch ${cycleBatch[0].counter}-${cycleBatch[cycleBatch.length - 1].counter}:`,
-        error
-      )
-      this.stats.errors++
+      console.error(`Error fetching cycles for cycle batch ${startCycle}-${endCycle}:`, error)
       throw error
     }
   }
@@ -430,20 +364,15 @@ export class ParallelCycleSync {
   /**
    * Sync receipts across a batch of cycles using adaptive multi-cycle fetching with prefetching
    */
-  private async syncCycleBatchReceipts(cycleBatch: Cycle[]): Promise<void> {
-    const startCycle = cycleBatch[0].counter
-    const endCycle = cycleBatch[cycleBatch.length - 1].counter
-
-    // Get resume cursor from database for the start cycle
-    const initialCursor = await this.checkpointManager.getReceiptsCursor(startCycle, cycleBatch[0].start)
-
+  private async syncReceiptsByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     let currentCycle = startCycle
-    let currentCursor: CompositeCursor = initialCursor
+    let afterTimestamp = 0
+    let afterTxId = ''
     let totalFetched = 0
 
     // Prefetch: Start fetching first batch immediately
     let nextFetchPromise: Promise<any[]> | null = this.syncConfig.enablePrefetch
-      ? this.fetchReceiptsMultiCycle(currentCycle, endCycle, currentCursor)
+      ? this.fetchReceiptsByCycleRange({ startCycle: currentCycle, endCycle, afterTimestamp, afterTxId })
       : null
 
     while (currentCycle <= endCycle) {
@@ -451,23 +380,34 @@ export class ParallelCycleSync {
         // Get the data (either from prefetch or fetch now)
         const response = nextFetchPromise
           ? await nextFetchPromise
-          : await this.fetchReceiptsMultiCycle(currentCycle, endCycle, currentCursor)
+          : await this.fetchReceiptsByCycleRange({
+              startCycle: currentCycle,
+              endCycle,
+              afterTimestamp,
+              afterTxId,
+            })
 
         if (!response || response.length === 0) {
           break // No more receipts in this cycle range
         }
 
-        // Update cursor based on last receipt BEFORE starting next fetch
+        // Update after timestamp and txId based on last receipt BEFORE starting next fetch
         const lastReceipt = response[response.length - 1]
         currentCycle = lastReceipt.cycle
-        const nextCursor: CompositeCursor = {
-          timestamp: lastReceipt.timestamp,
-          id: lastReceipt.receiptId,
-        }
+        afterTimestamp = lastReceipt.timestamp
+        afterTxId = lastReceipt.receiptId
 
         // Prefetch next batch while processing current batch
-        if (this.syncConfig.enablePrefetch && response.length >= this.syncConfig.batchSize) {
-          nextFetchPromise = this.fetchReceiptsMultiCycle(currentCycle, endCycle, nextCursor)
+        if (
+          this.syncConfig.enablePrefetch &&
+          response.length >= config.requestLimits.MAX_RECEIPTS_PER_REQUEST
+        ) {
+          nextFetchPromise = this.fetchReceiptsByCycleRange({
+            startCycle: currentCycle,
+            endCycle,
+            afterTimestamp,
+            afterTxId,
+          })
         } else {
           nextFetchPromise = null
         }
@@ -477,7 +417,6 @@ export class ParallelCycleSync {
 
         totalFetched += response.length
         this.stats.totalReceipts += response.length
-        currentCursor = nextCursor
 
         if (config.verbose) {
           console.log(
@@ -487,8 +426,8 @@ export class ParallelCycleSync {
           )
         }
 
-        // If we got less than batch size, we've exhausted this cycle range
-        if (response.length < this.syncConfig.batchSize) {
+        // If we got less than the max response size, we've exhausted this cycle range
+        if (response.length < config.requestLimits.MAX_RECEIPTS_PER_REQUEST) {
           break
         }
       } catch (error) {
@@ -501,20 +440,20 @@ export class ParallelCycleSync {
   /**
    * Sync originalTxs across a batch of cycles using adaptive multi-cycle fetching with prefetching
    */
-  private async syncCycleBatchOriginalTxs(cycleBatch: Cycle[]): Promise<void> {
-    const startCycle = cycleBatch[0].counter
-    const endCycle = cycleBatch[cycleBatch.length - 1].counter
-
-    // Get resume cursor from database for the start cycle
-    const initialCursor = await this.checkpointManager.getOriginalTxsCursor(startCycle, cycleBatch[0].start)
-
+  private async syncOriginalTxsByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     let currentCycle = startCycle
-    let currentCursor: CompositeCursor = initialCursor
+    let afterTimestamp = 0
+    let afterTxId = ''
     let totalFetched = 0
 
     // Prefetch: Start fetching first batch immediately
     let nextFetchPromise: Promise<any[]> | null = this.syncConfig.enablePrefetch
-      ? this.fetchOriginalTxsMultiCycle(currentCycle, endCycle, currentCursor)
+      ? this.fetchOriginalTxsByCycleRange({
+          startCycle: currentCycle,
+          endCycle,
+          afterTimestamp,
+          afterTxId,
+        })
       : null
 
     while (currentCycle <= endCycle) {
@@ -522,23 +461,34 @@ export class ParallelCycleSync {
         // Get the data (either from prefetch or fetch now)
         const response = nextFetchPromise
           ? await nextFetchPromise
-          : await this.fetchOriginalTxsMultiCycle(currentCycle, endCycle, currentCursor)
+          : await this.fetchOriginalTxsByCycleRange({
+              startCycle: currentCycle,
+              endCycle,
+              afterTimestamp,
+              afterTxId,
+            })
 
         if (!response || response.length === 0) {
           break // No more originalTxs in this cycle range
         }
 
-        // Update cursor based on last tx BEFORE starting next fetch
+        // Update after timestamp and txId based on last tx BEFORE starting next fetch
         const lastTx = response[response.length - 1]
         currentCycle = lastTx.cycle
-        const nextCursor: CompositeCursor = {
-          timestamp: lastTx.timestamp,
-          id: lastTx.txId,
-        }
+        afterTimestamp = lastTx.timestamp
+        afterTxId = lastTx.txId
 
         // Prefetch next batch while processing current batch
-        if (this.syncConfig.enablePrefetch && response.length >= this.syncConfig.batchSize) {
-          nextFetchPromise = this.fetchOriginalTxsMultiCycle(currentCycle, endCycle, nextCursor)
+        if (
+          this.syncConfig.enablePrefetch &&
+          response.length >= config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST
+        ) {
+          nextFetchPromise = this.fetchOriginalTxsByCycleRange({
+            startCycle: currentCycle,
+            endCycle,
+            afterTimestamp,
+            afterTxId,
+          })
         } else {
           nextFetchPromise = null
         }
@@ -548,7 +498,6 @@ export class ParallelCycleSync {
 
         totalFetched += response.length
         this.stats.totalOriginalTxs += response.length
-        currentCursor = nextCursor
 
         if (config.verbose) {
           console.log(
@@ -558,8 +507,8 @@ export class ParallelCycleSync {
           )
         }
 
-        // If we got less than batch size, we've exhausted this cycle range
-        if (response.length < this.syncConfig.batchSize) {
+        // If we got less than the max response size, we've exhausted this cycle range
+        if (response.length < config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST) {
           break
         }
       } catch (error) {
@@ -570,196 +519,86 @@ export class ParallelCycleSync {
   }
 
   /**
-   * Sync receipts for a specific cycle using composite cursor
+   * Fetch cycles by cycle range with retry logic
    */
-  private async syncCycleReceipts(cycleNumber: number, cycleStart: number, cycleEnd: number): Promise<void> {
-    // Get resume cursor from database
-    const cursor = await this.checkpointManager.getReceiptsCursor(cycleNumber, cycleStart)
-
-    let currentCursor: CompositeCursor = cursor
-    let totalFetched = 0
-
-    while (true) {
-      try {
-        const response = await this.fetchReceiptsWithCursor(cycleNumber, currentCursor, cycleEnd)
-
-        if (!response || response.length === 0) {
-          break // No more receipts for this cycle
-        }
-
-        // Process receipts
-        await ReceiptDB.processReceiptData(response)
-
-        totalFetched += response.length
-        this.stats.totalReceipts += response.length
-
-        // Update cursor to last item
-        const lastReceipt = response[response.length - 1]
-        currentCursor = {
-          timestamp: lastReceipt.timestamp,
-          id: lastReceipt.receiptId,
-        }
-
-        if (config.verbose) {
-          console.log(`[Cycle ${cycleNumber}] Receipts: +${response.length} (total: ${totalFetched})`)
-        }
-
-        // If we got less than batch size, we're done
-        if (response.length < this.syncConfig.batchSize) {
-          break
-        }
-      } catch (error) {
-        console.error(`Error fetching receipts for cycle ${cycleNumber}:`, error)
-        throw error
-      }
-    }
-  }
-
-  /**
-   * Sync originalTxs for a specific cycle using composite cursor
-   */
-  private async syncCycleOriginalTxs(
-    cycleNumber: number,
-    cycleStart: number,
-    cycleEnd: number
-  ): Promise<void> {
-    // Get resume cursor from database
-    const cursor = await this.checkpointManager.getOriginalTxsCursor(cycleNumber, cycleStart)
-
-    let currentCursor: CompositeCursor = cursor
-    let totalFetched = 0
-
-    while (true) {
+  private async fetchCyclesByCycleRange(startCycle: number, endCycle: number): Promise<Cycle[]> {
+    // Retry with exponential backoff
+    for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
       try {
-        const response = await this.fetchOriginalTxsWithCursor(cycleNumber, currentCursor, cycleEnd)
-
-        if (!response || response.length === 0) {
-          break // No more originalTxs for this cycle
-        }
-
-        // Process originalTxs
-        await OriginalTxDataDB.processOriginalTxData(response)
+        const startTime = Date.now()
+        const response = await queryFromDistributor(DataType.CYCLE, {
+          start: startCycle,
+          end: endCycle,
+        })
+        const networkElapsed = Date.now() - startTime
 
-        totalFetched += response.length
-        this.stats.totalOriginalTxs += response.length
+        if (response && response.data && response.data.cycleInfo) {
+          const cycleRecords = response.data.cycleInfo.map((cycleRecord: any) => ({
+            counter: cycleRecord.counter,
+            cycleRecord,
+            start: cycleRecord.start,
+            cycleMarker: cycleRecord.marker,
+          }))
 
-        // Update cursor to last item
-        const lastTx = response[response.length - 1]
-        currentCursor = {
-          timestamp: lastTx.timestamp,
-          id: lastTx.txId,
+          if (config.verbose) {
+            console.log(
+              `[API Timing] Cycles fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+                `records: ${cycleRecords.length}`
+            )
+          }
+          return cycleRecords
         }
+      } catch (error: any) {
+        const isLastAttempt = attempt === this.syncConfig.retryAttempts
+        const isRetryableError =
+          error.code === 'ECONNRESET' ||
+          error.code === 'ETIMEDOUT' ||
+          error.code === 'ECONNREFUSED' ||
+          error.code === 'EPIPE'
 
-        if (config.verbose) {
-          console.log(`[Cycle ${cycleNumber}] OriginalTxs: +${response.length} (total: ${totalFetched})`)
+        if (isRetryableError && !isLastAttempt) {
+          const delay = this.syncConfig.retryDelayMs * Math.pow(2, attempt)
+          console.warn(
+            `Error on cycles fetch (cycles ${startCycle}-${endCycle}), ` +
+              `attempt ${attempt + 1}/${this.syncConfig.retryAttempts + 1}, ` +
+              `retrying in ${delay}ms...`
+          )
+          await this.sleep(delay)
+          continue
         }
 
-        // If we got less than batch size, we're done
-        if (response.length < this.syncConfig.batchSize) {
-          break
-        }
-      } catch (error) {
-        console.error(`Error fetching originalTxs for cycle ${cycleNumber}:`, error)
+        // Non-retryable error or last attempt failed
+        console.error(`Error fetching cycles (cycles ${startCycle}-${endCycle}):`, error.message)
         throw error
       }
     }
-  }
-
-  /**
-   * Fetch receipts using composite cursor (prevents data loss on timestamp collisions)
-   */
-  private async fetchReceiptsWithCursor(
-    cycle: number,
-    cursor: CompositeCursor,
-    beforeTimestamp?: number
-  ): Promise<any[]> {
-    const data = {
-      cycle,
-      afterTimestamp: cursor.timestamp,
-      afterReceiptId: cursor.id,
-      beforeTimestamp,
-      limit: this.syncConfig.batchSize,
-      sender: config.collectorInfo.publicKey,
-      sign: undefined,
-    }
-
-    crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
-
-    const url = `${DISTRIBUTOR_URL}/receipt/cycle-cursor`
-
-    try {
-      const response = await this.axiosInstance.post(url, data)
-
-      if (response.data && response.data.receipts) {
-        return response.data.receipts
-      }
-
-      return []
-    } catch (error) {
-      console.error(`Error fetching receipts with cursor:`, error.message)
-      throw error
-    }
-  }
-
-  /**
-   * Fetch originalTxs using composite cursor
-   */
-  private async fetchOriginalTxsWithCursor(
-    cycle: number,
-    cursor: CompositeCursor,
-    beforeTimestamp?: number
-  ): Promise<any[]> {
-    const data = {
-      cycle,
-      afterTimestamp: cursor.timestamp,
-      afterTxId: cursor.id,
-      beforeTimestamp,
-      limit: this.syncConfig.batchSize,
-      sender: config.collectorInfo.publicKey,
-      sign: undefined,
-    }
 
-    crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
-
-    const url = `${DISTRIBUTOR_URL}/originalTx/cycle-cursor`
-
-    try {
-      const response = await this.axiosInstance.post(url, data)
-
-      if (response.data && response.data.originalTxs) {
-        return response.data.originalTxs
-      }
-
-      return []
-    } catch (error) {
-      console.error(`Error fetching originalTxs with cursor:`, error.message)
-      throw error
-    }
+    return []
   }
 
   /**
-   * Fetch receipts across multiple cycles using composite cursor with retry logic
+   * Fetch receipts by multi-cycle  range with retry logic
    * Automatically adapts to cycle sizes - if cycles 1-10 only have data in 1-5, returns that subset
    */
-  private async fetchReceiptsMultiCycle(
-    startCycle: number,
-    endCycle: number,
-    cursor: CompositeCursor
-  ): Promise<any[]> {
+  private async fetchReceiptsByCycleRange({
+    startCycle,
+    endCycle,
+    afterTimestamp,
+    afterTxId,
+  }: SyncTxDataByCycleRange): Promise<any[]> {
     const data = {
       startCycle,
       endCycle,
-      afterCycle: startCycle,
-      afterTimestamp: cursor.timestamp,
-      afterReceiptId: cursor.id,
-      limit: this.syncConfig.batchSize,
+      afterTimestamp,
+      afterTxId,
+      limit: config.requestLimits.MAX_RECEIPTS_PER_REQUEST,
       sender: config.collectorInfo.publicKey,
       sign: undefined,
     }
 
     crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
 
-    const url = `${DISTRIBUTOR_URL}/receipt/multi-cycle-cursor`
+    const url = `${DISTRIBUTOR_URL}/receipt/cycle`
 
     // Retry with exponential backoff
     for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
@@ -835,27 +674,27 @@ export class ParallelCycleSync {
   }
 
   /**
-   * Fetch originalTxs across multiple cycles using composite cursor with retry logic
+   * Fetch originalTxs by multi-cycle range with retry logic
    */
-  private async fetchOriginalTxsMultiCycle(
-    startCycle: number,
-    endCycle: number,
-    cursor: CompositeCursor
-  ): Promise<any[]> {
+  private async fetchOriginalTxsByCycleRange({
+    startCycle,
+    endCycle,
+    afterTimestamp,
+    afterTxId,
+  }: SyncTxDataByCycleRange): Promise<any[]> {
     const data = {
       startCycle,
       endCycle,
-      afterCycle: startCycle,
-      afterTimestamp: cursor.timestamp,
-      afterTxId: cursor.id,
-      limit: this.syncConfig.batchSize,
+      afterTimestamp,
+      afterTxId,
+      limit: config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST,
       sender: config.collectorInfo.publicKey,
       sign: undefined,
     }
 
     crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
 
-    const url = `${DISTRIBUTOR_URL}/originalTx/multi-cycle-cursor`
+    const url = `${DISTRIBUTOR_URL}/originalTx/cycle`
 
     // Retry with exponential backoff
     for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
diff --git a/src/collector.ts b/src/collector.ts
index 76cb0b2..dfdb04d 100644
--- a/src/collector.ts
+++ b/src/collector.ts
@@ -9,7 +9,6 @@ import * as Crypto from './utils/crypto'
 import { CycleDB, ReceiptDB, OriginalTxDataDB } from './storage'
 import {
   downloadTxsDataAndCycles,
-  downloadTxsDataAndCyclesParallel,
   compareWithOldReceiptsData,
   compareWithOldCyclesData,
   downloadAndSyncGenesisAccounts,
@@ -29,6 +28,7 @@ import RMQCyclesConsumer from './collectors/rmq/cycles'
 import RMQOriginalTxsConsumer from './collectors/rmq/original_txs'
 import RMQReceiptsConsumer from './collectors/rmq/receipts'
 import { setupCollectorSocketServer } from './collectorServer'
+import { ParallelDataSync } from './class/ParallelDataSync'
 
 const DistributorFirehoseEvent = 'FIREHOSE'
 let ws: WebSocket
@@ -228,8 +228,20 @@ export const checkAndSyncData = async (): Promise<() => Promise<void>> => {
 
     // Use parallel sync if enabled (default)
     if (config.useParallelSync) {
-      console.log('Using optimized parallel sync strategy')
-      await downloadTxsDataAndCyclesParallel(totalCyclesToSync, lastStoredCycleCount)
+      console.log('\n')
+      console.log('='.repeat(60))
+      console.log('Using NEW EFFICIENT PARALLEL SYNC STRATEGY based on cycle batches!')
+      console.log('This strategy is more robust and provides 10x+ performance improvement')
+      console.log('='.repeat(60))
+      console.log('\n')
+
+      const parallelDataSync = new ParallelDataSync({
+        concurrency: config.parallelSyncConcurrency,
+        retryAttempts: 3,
+        retryDelayMs: 1000,
+      })
+
+      await parallelDataSync.startSyncing(lastStoredCycleCount, totalCyclesToSync)
       return
     }
 

From a9e2fb9c944c0b20d61527bb5e62b177ec2da63b Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Fri, 7 Nov 2025 17:51:19 +0800
Subject: [PATCH 05/14] Add DataSyncManager for intelligent cycle-based data
 synchronization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements DataSyncManager to handle missing data identification and recovery
for parallel sync operations with comprehensive gap detection and verification.

Key features:
- Automatic gap detection across cycle ranges
- Data anomaly detection (validates last 15 cycles before sync)
- Lookback verification window (cyclesPerBatch × parallelSyncConcurrency)
- Recovery orchestration using ParallelDataSync for all scenarios
- Fail-fast validation before websocket connection
- Fresh start vs resume from interruption routing

Handles complex scenarios:
- Multiple interruption points during parallel sync
- Incremental data ( received through websocket ) gaps  during process restarts
- Data integrity verification and mismatch detection
- Unified recovery strategy using cycle-batch parallel sync
---
 src/class/DataSyncManager.ts | 727 +++++++++++++++++++++++++++++++++++
 src/collector.ts             |  49 +--
 2 files changed, 754 insertions(+), 22 deletions(-)
 create mode 100644 src/class/DataSyncManager.ts

diff --git a/src/class/DataSyncManager.ts b/src/class/DataSyncManager.ts
new file mode 100644
index 0000000..b91b27a
--- /dev/null
+++ b/src/class/DataSyncManager.ts
@@ -0,0 +1,727 @@
+import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
+import { config } from '../config'
+import { queryFromDistributor, DataType, downloadAndSyncGenesisAccounts } from './DataSync'
+import { ParallelDataSync } from './ParallelDataSync'
+
+/**
+ * Represents a gap in cycle sequence
+ */
+export interface CycleGap {
+  startCycle: number
+  endCycle: number
+  gapSize: number
+}
+
+/**
+ * Represents a cycle with mismatched transaction data
+ */
+export interface MismatchedCycle {
+  cycle: number
+  localReceipts: number
+  distributorReceipts: number
+  localOriginalTxs: number
+  distributorOriginalTxs: number
+  receiptsMismatch: boolean
+  originalTxsMismatch: boolean
+}
+
+/**
+ * Comprehensive recovery plan for data synchronization
+ */
+export interface DataSyncRecoveryPlan {
+  currentDistributorCycle: number
+  lastLocalCycle: number
+  missingCycleRanges: CycleGap[]
+  mismatchedCycles: MismatchedCycle[]
+  lookbackVerificationRanges: CycleGap[]
+  totalMissingCycles: number
+  totalMismatchedCycles: number
+  recoveryNeeded: boolean
+}
+
+/**
+ * DataSyncManager
+ *
+ * Orchestrates intelligent data synchronization with automatic gap detection and recovery.
+ *
+ * Key Features:
+ * - Anomaly detection: Validates data integrity before sync
+ * - Gap identification: Detects missing cycle ranges in local database
+ * - Data reconciliation: Compares local vs distributor data
+ * - Recovery orchestration: Patches gaps and mismatched cycles
+ * - Intelligent routing: Fresh start vs resume from interruption
+ *
+ * Example Scenario:
+ * 1. Parallel sync stops at cycle 150000 (target was 300000)
+ * 2. WebSocket saves incremental data from 300001 to 300100
+ * 3. Process restarts at cycle 300105
+ *
+ * Manager identifies and recovers:
+ * - Missing range: 150000 to 300001 (parallel sync interruption)
+ * - Missing range: 300100 to 300105 (websocket gap during restart)
+ * - Mismatched data in lookback window (e.g., 149900-150000)
+ *
+ * Handles multiple interruption points automatically.
+ */
+export class DataSyncManager {
+  private lookbackCycles: number
+
+  constructor() {
+    // Calculate lookback window: cyclesPerBatch * parallelSyncConcurrency
+    const cyclesPerBatch = config.cyclesPerBatch || 10
+    const concurrency = config.parallelSyncConcurrency || 10
+    this.lookbackCycles = cyclesPerBatch * concurrency
+
+    console.log(`DataSyncManager initialized with lookback window: ${this.lookbackCycles} cycles`)
+  }
+
+  /**
+   * Main entry point for intelligent data synchronization
+   * Handles both fresh start and recovery from interruptions
+   */
+  async syncData(): Promise<void> {
+    const response = await this.getTotalDataFromDistributor()
+    if (!response) {
+      throw new Error('Failed to fetch total data from distributor')
+    }
+    const { totalCycles } = response
+    const lastLocalCycles = await CycleDB.queryLatestCycleRecords(1)
+    const lastLocalCycle = lastLocalCycles.length > 0 ? lastLocalCycles[0].counter : -1
+
+    // Always sync genesis accounts first
+    if (lastLocalCycle === 0) {
+      console.log('Syncing genesis accounts...')
+      await downloadAndSyncGenesisAccounts()
+    }
+
+    // Check if this is a fresh start
+    const isFreshStart = lastLocalCycle === -1 || lastLocalCycle === 0
+
+    if (isFreshStart) {
+      // Fresh start - no checkpoint needed, just sync from beginning
+      console.log('🆕 Fresh start detected - syncing from cycle 0')
+      const parallelDataSync = new ParallelDataSync({
+        concurrency: config.parallelSyncConcurrency,
+        retryAttempts: 3,
+        retryDelayMs: 1000,
+      })
+
+      await parallelDataSync.startSyncing(0, totalCycles - 1)
+    } else {
+      // Existing data - use DataSyncManager to identify and patch gaps/mismatches
+      console.log('📊 Existing data detected - running recovery analysis')
+      const recoveryPlan = await this.generateRecoveryPlan(totalCycles)
+
+      // Execute the complete sync (recovery + normal sync)
+      await this.executeSyncWithRecovery(recoveryPlan)
+    }
+  }
+
+  /**
+   * Detect data anomalies by verifying last 10-15 cycles against distributor
+   * Throws error if critical anomalies are found
+   * Fetches local cycle data internally
+   */
+  async detectDataAnomalies(): Promise<{ lastLocalCycle: number; currentDistributorCycle: number }> {
+    console.log('\n📊 Running data anomaly detection...')
+
+    // Fetch local and distributor cycle info
+    const lastLocalCycles = await CycleDB.queryLatestCycleRecords(1)
+    const lastLocalCycle = lastLocalCycles.length > 0 ? lastLocalCycles[0].counter : -1
+
+    const response = await this.getTotalDataFromDistributor()
+    if (!response) {
+      throw new Error('Failed to fetch distributor cycle info')
+    }
+    const currentDistributorCycle = response.totalCycles
+
+    console.log(`Last local cycle: ${lastLocalCycle}`)
+    console.log(`Current distributor cycle: ${currentDistributorCycle}`)
+
+    const anomalies: string[] = []
+
+    // Anomaly 1: Local DB has more cycles than distributor
+    if (lastLocalCycle > currentDistributorCycle) {
+      anomalies.push(
+        `Local DB has newer cycle than distributor (Local: ${lastLocalCycle}, Distributor: ${currentDistributorCycle})`
+      )
+    }
+
+    // Anomaly 2: Verify last 10-15 cycles match with distributor
+    if (lastLocalCycle >= 15) {
+      const verificationCycles = 15
+      const startCycle = lastLocalCycle - verificationCycles + 1
+      const endCycle = lastLocalCycle
+
+      console.log(
+        `Verifying last ${verificationCycles} cycles (${startCycle} to ${endCycle}) against distributor...`
+      )
+
+      try {
+        // Compare cycles data
+        const localCycles = await CycleDB.queryCycleRecordsBetween(startCycle, endCycle)
+        const distributorResponse = await queryFromDistributor(DataType.CYCLE, {
+          start: startCycle,
+          end: endCycle,
+        })
+
+        if (distributorResponse?.data?.cycleInfo) {
+          const distributorCycles = distributorResponse.data.cycleInfo
+
+          // Check if cycle counts match
+          if (localCycles.length !== distributorCycles.length) {
+            anomalies.push(
+              `Cycle count mismatch in range ${startCycle}-${endCycle}: ` +
+                `Local has ${localCycles.length}, Distributor has ${distributorCycles.length}`
+            )
+          } else {
+            // Verify each cycle's marker matches
+            for (let i = 0; i < localCycles.length; i++) {
+              /* eslint-disable security/detect-object-injection */
+              const localCycle = localCycles[i]
+              /* eslint-enable security/detect-object-injection */
+              const distributorCycle = distributorCycles.find(
+                (c: { counter: number; marker: string }) => c.counter === localCycle.counter
+              )
+
+              if (!distributorCycle) {
+                anomalies.push(`Cycle ${localCycle.counter} exists locally but not in distributor`)
+              } else if (localCycle.cycleMarker !== distributorCycle.marker) {
+                anomalies.push(
+                  `Cycle ${localCycle.counter} marker mismatch: ` +
+                    `Local ${localCycle.cycleMarker} vs Distributor ${distributorCycle.marker}`
+                )
+              }
+            }
+          }
+        }
+
+        // Compare receipts count
+        const receiptsResponse = await queryFromDistributor(DataType.RECEIPT, {
+          startCycle,
+          endCycle,
+          type: 'tally',
+        })
+
+        if (receiptsResponse?.data?.receipts) {
+          const distributorReceipts: { cycle: number; receipts: number }[] = receiptsResponse.data.receipts
+          const localReceiptsCount = await ReceiptDB.queryReceiptCountByCycles(startCycle, endCycle)
+
+          for (const distReceipt of distributorReceipts) {
+            const localReceipt = localReceiptsCount.find((r) => r.cycle === distReceipt.cycle)
+            if (localReceipt && localReceipt.receipts !== distReceipt.receipts) {
+              anomalies.push(
+                `Receipts count mismatch in cycle ${distReceipt.cycle}: ` +
+                  `Local has ${localReceipt.receipts}, Distributor has ${distReceipt.receipts}`
+              )
+            }
+          }
+        }
+
+        // Compare originalTxs count
+        const originalTxsResponse = await queryFromDistributor(DataType.ORIGINALTX, {
+          startCycle,
+          endCycle,
+          type: 'tally',
+        })
+
+        if (originalTxsResponse?.data?.originalTxs) {
+          const distributorOriginalTxs: { cycle: number; originalTxsData: number }[] =
+            originalTxsResponse.data.originalTxs
+          const localOriginalTxsCount = await OriginalTxDataDB.queryOriginalTxDataCountByCycles(
+            startCycle,
+            endCycle
+          )
+
+          for (const distTx of distributorOriginalTxs) {
+            const localTx = localOriginalTxsCount.find((t) => t.cycle === distTx.cycle)
+            if (localTx && localTx.originalTxsData !== distTx.originalTxsData) {
+              anomalies.push(
+                `OriginalTxs count mismatch in cycle ${distTx.cycle}: ` +
+                  `Local has ${localTx.originalTxsData}, Distributor has ${distTx.originalTxsData}`
+              )
+            }
+          }
+        }
+      } catch (error) {
+        console.warn('Warning: Could not complete anomaly verification:', error)
+        // Don't fail on verification errors, just warn
+      }
+    }
+
+    if (anomalies.length > 0) {
+      console.error('\n❌ DATA ANOMALIES DETECTED:')
+      anomalies.forEach((anomaly) => console.error(`  - ${anomaly}`))
+      throw new Error(
+        'Data anomalies detected! Local database may be corrupted or out of sync. ' +
+          'Please clear the database and restart the server.'
+      )
+    }
+
+    console.log('✅ No data anomalies detected')
+
+    return { lastLocalCycle, currentDistributorCycle }
+  }
+
+  /**
+   * Fetch total data count from distributor
+   */
+  private async getTotalDataFromDistributor(): Promise<{
+    totalCycles: number
+    totalAccounts: number
+    totalReceipts: number
+    totalOriginalTxs: number
+  } | null> {
+    const response = await queryFromDistributor(DataType.TOTALDATA, {})
+    if (!response?.data || response.data.totalCycles === undefined) {
+      return null
+    }
+    return response.data
+  }
+
+  /**
+   * Identify all missing cycle ranges by finding gaps in the cycles DB
+   *
+   * Example:
+   * - DB has cycles: 0-149999, 300001-300099, 300106-300200
+   * - Returns gaps: [{150000, 300000}, {300100, 300105}]
+   */
+  private async identifyMissingCycleRanges(targetCycle: number): Promise<CycleGap[]> {
+    try {
+      console.log(`\n${'='.repeat(60)}`)
+      console.log(`Identifying missing cycle ranges up to cycle ${targetCycle}`)
+      console.log(`${'='.repeat(60)}`)
+
+      // Get all cycles from DB ordered by counter
+      const allCycles = await CycleDB.queryCycleRecordsBetween(0, targetCycle)
+
+      if (!allCycles || allCycles.length === 0) {
+        // No cycles in DB, everything from 0 to targetCycle is missing
+        console.log('No cycles found in DB, entire range is missing')
+        return [
+          {
+            startCycle: 0,
+            endCycle: targetCycle,
+            gapSize: targetCycle + 1,
+          },
+        ]
+      }
+
+      const gaps: CycleGap[] = []
+      const cycleNumbers = allCycles.map((c) => c.counter).sort((a, b) => a - b)
+
+      console.log(`Found ${cycleNumbers.length} cycles in DB`)
+      console.log(`First cycle: ${cycleNumbers[0]}, Last cycle: ${cycleNumbers[cycleNumbers.length - 1]}`)
+
+      // Check if there's a gap at the beginning
+      if (cycleNumbers[0] > 0) {
+        gaps.push({
+          startCycle: 0,
+          endCycle: cycleNumbers[0] - 1,
+          gapSize: cycleNumbers[0],
+        })
+        console.log(`Gap found at beginning: 0 to ${cycleNumbers[0] - 1}`)
+      }
+
+      // Find gaps in the middle
+      for (let i = 0; i < cycleNumbers.length - 1; i++) {
+        const currentCycle = cycleNumbers[i]
+        const nextCycle = cycleNumbers[i + 1]
+
+        // If next cycle is not immediately after current, there's a gap
+        if (nextCycle - currentCycle > 1) {
+          const gapStart = currentCycle + 1
+          const gapEnd = nextCycle - 1
+          gaps.push({
+            startCycle: gapStart,
+            endCycle: gapEnd,
+            gapSize: gapEnd - gapStart + 1,
+          })
+          console.log(`Gap found: ${gapStart} to ${gapEnd} (${gapEnd - gapStart + 1} cycles)`)
+        }
+      }
+
+      // Check if there's a gap at the end
+      const lastLocalCycle = cycleNumbers[cycleNumbers.length - 1]
+      if (lastLocalCycle < targetCycle) {
+        gaps.push({
+          startCycle: lastLocalCycle + 1,
+          endCycle: targetCycle,
+          gapSize: targetCycle - lastLocalCycle,
+        })
+        console.log(`Gap found at end: ${lastLocalCycle + 1} to ${targetCycle}`)
+      }
+
+      console.log(`\nTotal gaps found: ${gaps.length}`)
+      const totalMissing = gaps.reduce((sum, gap) => sum + gap.gapSize, 0)
+      console.log(`Total missing cycles: ${totalMissing}`)
+
+      return gaps
+    } catch (error) {
+      console.error('Error identifying missing cycle ranges:', error)
+      throw error
+    }
+  }
+
+  /**
+   * Verify data integrity with lookback window before each gap
+   *
+   * For each gap, check cyclesPerBatch * parallelSyncConcurrency cycles before the gap
+   * to ensure transaction data matches the distributor.
+   *
+   * Example: Gap at 150000, lookback 100 cycles -> verify 149900-150000
+   */
+  private async verifyDataIntegrityWithLookback(gaps: CycleGap[]): Promise<MismatchedCycle[]> {
+    try {
+      console.log(`\n${'='.repeat(60)}`)
+      console.log(`Verifying data integrity with ${this.lookbackCycles}-cycle lookback window`)
+      console.log(`${'='.repeat(60)}`)
+
+      const allMismatchedCycles: MismatchedCycle[] = []
+      const verificationRanges: CycleGap[] = []
+
+      // Build verification ranges for each gap
+      for (const gap of gaps) {
+        const lookbackStart = Math.max(0, gap.startCycle - this.lookbackCycles)
+        const lookbackEnd = gap.startCycle - 1
+
+        // Only verify if there's a valid lookback range
+        if (lookbackEnd >= lookbackStart && lookbackEnd >= 0) {
+          verificationRanges.push({
+            startCycle: lookbackStart,
+            endCycle: lookbackEnd,
+            gapSize: lookbackEnd - lookbackStart + 1,
+          })
+          console.log(
+            `Verification range for gap at ${gap.startCycle}: cycles ${lookbackStart}-${lookbackEnd}`
+          )
+        }
+      }
+
+      // Deduplicate overlapping verification ranges
+      const mergedRanges = this.mergeOverlappingRanges(verificationRanges)
+      console.log(`Merged into ${mergedRanges.length} verification ranges`)
+
+      // Verify each range
+      for (const range of mergedRanges) {
+        console.log(`\nVerifying cycles ${range.startCycle} to ${range.endCycle}...`)
+
+        const mismatched = await this.compareCycleDataWithDistributor(range.startCycle, range.endCycle)
+        allMismatchedCycles.push(...mismatched)
+      }
+
+      if (allMismatchedCycles.length > 0) {
+        console.log(`\n⚠️  Found ${allMismatchedCycles.length} cycles with mismatched data:`)
+        for (const mismatch of allMismatchedCycles) {
+          console.log(
+            `  Cycle ${mismatch.cycle}: ` +
+              `Receipts (local: ${mismatch.localReceipts}, distributor: ${mismatch.distributorReceipts}), ` +
+              `OriginalTxs (local: ${mismatch.localOriginalTxs}, distributor: ${mismatch.distributorOriginalTxs})`
+          )
+        }
+      } else {
+        console.log(`\n✅ All verified cycles have matching data`)
+      }
+
+      return allMismatchedCycles
+    } catch (error) {
+      console.error('Error verifying data integrity:', error)
+      throw error
+    }
+  }
+
+  /**
+   * Compare cycle data counts between local DB and distributor
+   */
+  private async compareCycleDataWithDistributor(
+    startCycle: number,
+    endCycle: number
+  ): Promise<MismatchedCycle[]> {
+    const mismatched: MismatchedCycle[] = []
+
+    try {
+      // Fetch counts from distributor
+      const [receiptsResponse, originalTxsResponse] = await Promise.all([
+        queryFromDistributor(DataType.RECEIPT, { startCycle, endCycle, type: 'tally' }),
+        queryFromDistributor(DataType.ORIGINALTX, { startCycle, endCycle, type: 'tally' }),
+      ])
+
+      if (!receiptsResponse?.data?.receipts || !originalTxsResponse?.data?.originalTxs) {
+        console.warn(`Failed to fetch distributor data for cycles ${startCycle}-${endCycle}`)
+        return mismatched
+      }
+
+      const distributorReceipts: { cycle: number; receipts: number }[] = receiptsResponse.data.receipts
+      const distributorOriginalTxs: { cycle: number; originalTxsData: number }[] =
+        originalTxsResponse.data.originalTxs
+
+      // Fetch counts from local DB
+      const [localReceipts, localOriginalTxs] = await Promise.all([
+        ReceiptDB.queryReceiptCountByCycles(startCycle, endCycle),
+        OriginalTxDataDB.queryOriginalTxDataCountByCycles(startCycle, endCycle),
+      ])
+
+      // Create maps for easier lookup
+      const localReceiptsMap = new Map(localReceipts.map((r) => [r.cycle, r.receipts]))
+      const localOriginalTxsMap = new Map(localOriginalTxs.map((t) => [t.cycle, t.originalTxsData]))
+
+      // Compare each cycle
+      const allCycles = new Set([
+        ...distributorReceipts.map((r) => r.cycle),
+        ...distributorOriginalTxs.map((t) => t.cycle),
+      ])
+
+      for (const cycle of allCycles) {
+        const distReceipts = distributorReceipts.find((r) => r.cycle === cycle)?.receipts || 0
+        const distOriginalTxs = distributorOriginalTxs.find((t) => t.cycle === cycle)?.originalTxsData || 0
+
+        const localReceiptsCount = localReceiptsMap.get(cycle) || 0
+        const localOriginalTxsCount = localOriginalTxsMap.get(cycle) || 0
+
+        const receiptsMismatch = localReceiptsCount !== distReceipts
+        const originalTxsMismatch = localOriginalTxsCount !== distOriginalTxs
+
+        if (receiptsMismatch || originalTxsMismatch) {
+          mismatched.push({
+            cycle,
+            localReceipts: localReceiptsCount,
+            distributorReceipts: distReceipts,
+            localOriginalTxs: localOriginalTxsCount,
+            distributorOriginalTxs: distOriginalTxs,
+            receiptsMismatch,
+            originalTxsMismatch,
+          })
+        }
+      }
+
+      return mismatched
+    } catch (error) {
+      console.error(`Error comparing data for cycles ${startCycle}-${endCycle}:`, error)
+      return mismatched
+    }
+  }
+
+  /**
+   * Merge overlapping or adjacent ranges to minimize API calls
+   */
+  private mergeOverlappingRanges(ranges: CycleGap[]): CycleGap[] {
+    if (ranges.length === 0) return []
+
+    // Sort by start cycle
+    const sorted = [...ranges].sort((a, b) => a.startCycle - b.startCycle)
+    const merged: CycleGap[] = [sorted[0]]
+
+    for (let i = 1; i < sorted.length; i++) {
+      const current = sorted[i]
+      const last = merged[merged.length - 1]
+
+      // If current range overlaps or is adjacent to last range, merge them
+      if (current.startCycle <= last.endCycle + 1) {
+        last.endCycle = Math.max(last.endCycle, current.endCycle)
+        last.gapSize = last.endCycle - last.startCycle + 1
+      } else {
+        merged.push(current)
+      }
+    }
+
+    return merged
+  }
+
+  /**
+   * Generate comprehensive recovery plan
+   *
+   * Orchestrates gap detection and data verification to create a complete recovery strategy.
+   * NOTE: This should only be called when there's existing data in DB (not fresh start)
+   */
+  async generateRecoveryPlan(currentDistributorCycle: number): Promise<DataSyncRecoveryPlan> {
+    try {
+      const lastLocalCycles = await CycleDB.queryLatestCycleRecords(1)
+      const lastLocalCycle = lastLocalCycles.length > 0 ? lastLocalCycles[0].counter : -1
+
+      console.log(`\n${'='.repeat(70)}`)
+      console.log(`GENERATING DATA SYNC RECOVERY PLAN`)
+      console.log(`${'='.repeat(70)}`)
+      console.log(`Current distributor cycle: ${currentDistributorCycle}`)
+      console.log(`Last local cycle: ${lastLocalCycle}`)
+
+      // Step 1: Identify missing cycle ranges
+      const missingCycleRanges = await this.identifyMissingCycleRanges(currentDistributorCycle)
+
+      // Step 2: Verify data integrity with lookback (only if there are gaps)
+      const mismatchedCycles =
+        missingCycleRanges.length > 0 ? await this.verifyDataIntegrityWithLookback(missingCycleRanges) : []
+
+      // Calculate lookback ranges for reporting
+      const lookbackVerificationRanges: CycleGap[] = []
+      for (const gap of missingCycleRanges) {
+        const lookbackStart = Math.max(0, gap.startCycle - this.lookbackCycles)
+        const lookbackEnd = gap.startCycle - 1
+        if (lookbackEnd >= lookbackStart && lookbackEnd >= 0) {
+          lookbackVerificationRanges.push({
+            startCycle: lookbackStart,
+            endCycle: lookbackEnd,
+            gapSize: lookbackEnd - lookbackStart + 1,
+          })
+        }
+      }
+
+      const totalMissingCycles = missingCycleRanges.reduce((sum, gap) => sum + gap.gapSize, 0)
+      const recoveryNeeded = missingCycleRanges.length > 0 || mismatchedCycles.length > 0
+
+      const plan: DataSyncRecoveryPlan = {
+        currentDistributorCycle,
+        lastLocalCycle,
+        missingCycleRanges,
+        mismatchedCycles,
+        lookbackVerificationRanges,
+        totalMissingCycles,
+        totalMismatchedCycles: mismatchedCycles.length,
+        recoveryNeeded,
+      }
+
+      this.printRecoveryPlan(plan)
+
+      return plan
+    } catch (error) {
+      console.error('Error generating recovery plan:', error)
+      throw error
+    }
+  }
+
+  /**
+   * Execute comprehensive sync with recovery
+   *
+   * Combines all sync needs (mismatched cycles + missing ranges) and uses ParallelDataSync
+   * for everything. No distinction between "patching" and "syncing" - both use the same mechanism.
+   */
+  async executeSyncWithRecovery(recoveryPlan: DataSyncRecoveryPlan): Promise<void> {
+    console.log(`\n${'='.repeat(70)}`)
+    console.log(`EXECUTING DATA SYNC WITH RECOVERY`)
+    console.log(`${'='.repeat(70)}`)
+
+    try {
+      // Combine mismatched cycles and missing ranges into unified sync plan
+      const allRangesToSync: CycleGap[] = []
+
+      // Step 1: Add mismatched cycles (convert to ranges)
+      if (recoveryPlan.mismatchedCycles.length > 0) {
+        console.log(`\n📝 Identified ${recoveryPlan.mismatchedCycles.length} mismatched cycles to patch`)
+        const patchRanges = this.groupCyclesIntoRanges(recoveryPlan.mismatchedCycles.map((m) => m.cycle))
+        allRangesToSync.push(...patchRanges)
+      }
+
+      // Step 2: Add missing cycle ranges
+      if (recoveryPlan.missingCycleRanges.length > 0) {
+        console.log(`\n📥 Identified ${recoveryPlan.missingCycleRanges.length} missing cycle ranges to sync`)
+        allRangesToSync.push(...recoveryPlan.missingCycleRanges)
+      }
+
+      // Step 3: Merge and deduplicate ranges
+      const mergedRanges = this.mergeOverlappingRanges(allRangesToSync)
+      console.log(`\nMerged into ${mergedRanges.length} sync ranges`)
+
+      // Step 4: Execute ParallelDataSync for all ranges
+      if (mergedRanges.length > 0) {
+        for (const range of mergedRanges) {
+          console.log(`\nSyncing range: ${range.startCycle} to ${range.endCycle} (${range.gapSize} cycles)`)
+
+          const parallelSync = new ParallelDataSync({
+            concurrency: config.parallelSyncConcurrency || 10,
+            retryAttempts: 3,
+            retryDelayMs: 1000,
+          })
+
+          await parallelSync.startSyncing(range.startCycle, range.endCycle)
+          console.log(`✅ Completed range ${range.startCycle} to ${range.endCycle}`)
+        }
+      } else {
+        console.log('\n✅ No data to sync, database is up to date')
+      }
+
+      console.log(`\n${'='.repeat(70)}`)
+      console.log(`✅ DATA SYNC COMPLETED SUCCESSFULLY`)
+      console.log(`${'='.repeat(70)}\n`)
+    } catch (error) {
+      console.error('Error executing sync with recovery:', error)
+      throw error
+    }
+  }
+
+  /**
+   * Group individual cycles into consecutive ranges
+   */
+  private groupCyclesIntoRanges(cycles: number[]): CycleGap[] {
+    if (cycles.length === 0) return []
+
+    const sorted = [...cycles].sort((a, b) => a - b)
+    const ranges: CycleGap[] = []
+    let rangeStart = sorted[0]
+    let rangeEnd = sorted[0]
+
+    for (let i = 1; i < sorted.length; i++) {
+      if (sorted[i] === rangeEnd + 1) {
+        // Consecutive cycle, extend range
+        rangeEnd = sorted[i]
+      } else {
+        // Gap found, save current range and start new one
+        ranges.push({
+          startCycle: rangeStart,
+          endCycle: rangeEnd,
+          gapSize: rangeEnd - rangeStart + 1,
+        })
+        rangeStart = sorted[i]
+        rangeEnd = sorted[i]
+      }
+    }
+
+    // Add last range
+    ranges.push({
+      startCycle: rangeStart,
+      endCycle: rangeEnd,
+      gapSize: rangeEnd - rangeStart + 1,
+    })
+
+    return ranges
+  }
+
+  /**
+   * Print recovery plan summary
+   */
+  private printRecoveryPlan(plan: DataSyncRecoveryPlan): void {
+    console.log(`\n${'='.repeat(70)}`)
+    console.log(`RECOVERY PLAN SUMMARY`)
+    console.log(`${'='.repeat(70)}`)
+    console.log(`Current distributor cycle: ${plan.currentDistributorCycle}`)
+    console.log(`Last local cycle:          ${plan.lastLocalCycle}`)
+    console.log(`Recovery needed:           ${plan.recoveryNeeded ? '⚠️  YES' : '✅ NO'}`)
+    console.log(``)
+    console.log(`Missing Cycle Ranges:      ${plan.missingCycleRanges.length}`)
+    console.log(`Total missing cycles:      ${plan.totalMissingCycles}`)
+    if (plan.missingCycleRanges.length > 0) {
+      for (const gap of plan.missingCycleRanges) {
+        console.log(`  - Cycles ${gap.startCycle} to ${gap.endCycle} (${gap.gapSize} cycles)`)
+      }
+    }
+    console.log(``)
+    console.log(`Mismatched Cycles:         ${plan.totalMismatchedCycles}`)
+    if (plan.mismatchedCycles.length > 0) {
+      for (const mismatch of plan.mismatchedCycles.slice(0, 10)) {
+        // Show first 10
+        console.log(
+          `  - Cycle ${mismatch.cycle}: ` +
+            `Receipts ${mismatch.localReceipts}→${mismatch.distributorReceipts}, ` +
+            `OriginalTxs ${mismatch.localOriginalTxs}→${mismatch.distributorOriginalTxs}`
+        )
+      }
+      if (plan.mismatchedCycles.length > 10) {
+        console.log(`  ... and ${plan.mismatchedCycles.length - 10} more`)
+      }
+    }
+    console.log(``)
+    console.log(`Lookback Verification:`)
+    for (const range of plan.lookbackVerificationRanges) {
+      console.log(`  - Verified cycles ${range.startCycle} to ${range.endCycle}`)
+    }
+    console.log(`${'='.repeat(70)}\n`)
+  }
+}
diff --git a/src/collector.ts b/src/collector.ts
index dfdb04d..0cf573e 100644
--- a/src/collector.ts
+++ b/src/collector.ts
@@ -28,7 +28,7 @@ import RMQCyclesConsumer from './collectors/rmq/cycles'
 import RMQOriginalTxsConsumer from './collectors/rmq/original_txs'
 import RMQReceiptsConsumer from './collectors/rmq/receipts'
 import { setupCollectorSocketServer } from './collectorServer'
-import { ParallelDataSync } from './class/ParallelDataSync'
+import { DataSyncManager } from './class/DataSyncManager'
 
 const DistributorFirehoseEvent = 'FIREHOSE'
 let ws: WebSocket
@@ -79,6 +79,7 @@ if (config.env == envEnum.DEV) {
 }
 
 export const checkAndSyncData = async (): Promise<() => Promise<void>> => {
+  console.log('Using legacy sequential sync strategy')
   // Check if there is any existing data in the db
   let lastStoredReceiptCount = await ReceiptDB.queryReceiptCount()
   let lastStoredOriginalTxDataCount = await OriginalTxDataDB.queryOriginalTxDataCount()
@@ -226,26 +227,6 @@ export const checkAndSyncData = async (): Promise<() => Promise<void>> => {
     // If there is already some data in the db, we can assume that the genesis accounts data has been synced already
     if (lastStoredCycleCount === 0) await downloadAndSyncGenesisAccounts() // To sync accounts data that are from genesis accounts/accounts data that the network start with
 
-    // Use parallel sync if enabled (default)
-    if (config.useParallelSync) {
-      console.log('\n')
-      console.log('='.repeat(60))
-      console.log('Using NEW EFFICIENT PARALLEL SYNC STRATEGY based on cycle batches!')
-      console.log('This strategy is more robust and provides 10x+ performance improvement')
-      console.log('='.repeat(60))
-      console.log('\n')
-
-      const parallelDataSync = new ParallelDataSync({
-        concurrency: config.parallelSyncConcurrency,
-        retryAttempts: 3,
-        retryDelayMs: 1000,
-      })
-
-      await parallelDataSync.startSyncing(lastStoredCycleCount, totalCyclesToSync)
-      return
-    }
-
-    console.log('Using legacy sequential sync strategy')
     // Sync receipts and originalTxsData data first if there is old data
     if (
       lastStoredReceiptCycle > 0 &&
@@ -275,6 +256,30 @@ export const checkAndSyncData = async (): Promise<() => Promise<void>> => {
   return syncData
 }
 
+export const startDataSyncManager = async (): Promise<() => Promise<void>> => {
+  console.log('\n')
+  console.log('='.repeat(60))
+  console.log('INITIALIZING DATA SYNC MANAGER')
+  console.log('='.repeat(60))
+  console.log('DataSyncManager provides intelligent data synchronization with:')
+  console.log('  • Early data anomaly detection before sync operations')
+  console.log('  • Automatic gap detection and recovery')
+  console.log('  • Lookback verification window for data integrity')
+  console.log('  • Parallel batch-cycle-based sync (10x+ performance improvement)')
+  console.log('='.repeat(60))
+  console.log('\n')
+
+  // Run anomaly detection BEFORE connecting to websocket
+  // This fails fast if there are data corruption issues
+  const syncManager = new DataSyncManager()
+  await syncManager.detectDataAnomalies()
+
+  console.log('✅ Data anomaly check passed - proceeding with sync')
+
+  // Return the sync function to be executed after WS connection
+  return syncManager.syncData
+}
+
 const attemptReconnection = (): void => {
   console.log(`Re-connecting Distributor in ${config.DISTRIBUTOR_RECONNECT_INTERVAL / 1000}s...`)
   reconnecting = true
@@ -394,7 +399,7 @@ const startServer = async (): Promise<void> => {
   await Storage.initializeDB()
   addExitListeners()
 
-  const syncData = await checkAndSyncData()
+  const syncData = config.useParallelSync ? await startDataSyncManager() : await checkAndSyncData()
   if (config.dataLogWrite) await initDataLogWriter()
 
   addSigListeners()

From 176febf323d2d9144bd466bd0eab533b202ed234 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Fri, 7 Nov 2025 21:24:27 +0800
Subject: [PATCH 06/14] refactor: optimize data sync with improved anomaly
 detection and gap finding

- Move CycleGap interface to storage/cycle.ts and implement efficient SQL-based gap detection
- Enhance anomaly detection with better error handling and validation logic
- Add sync summary functionality to DataSyncManager with database statistics
- Remove ParallelSyncCheckpoint dependency and simplify ParallelDataSync
- Improve logging and error messages throughout sync process
---
 src/class/DataSyncManager.ts        | 330 ++++++++++++++--------------
 src/class/ParallelDataSync.ts       |  12 +-
 src/class/ParallelSyncCheckpoint.ts | 315 --------------------------
 src/collector.ts                    |   6 +-
 src/storage/cycle.ts                |  92 ++++++++
 5 files changed, 262 insertions(+), 493 deletions(-)
 delete mode 100644 src/class/ParallelSyncCheckpoint.ts

diff --git a/src/class/DataSyncManager.ts b/src/class/DataSyncManager.ts
index b91b27a..f62ba52 100644
--- a/src/class/DataSyncManager.ts
+++ b/src/class/DataSyncManager.ts
@@ -1,17 +1,9 @@
 import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
+import { CycleGap } from '../storage/cycle'
 import { config } from '../config'
 import { queryFromDistributor, DataType, downloadAndSyncGenesisAccounts } from './DataSync'
 import { ParallelDataSync } from './ParallelDataSync'
 
-/**
- * Represents a gap in cycle sequence
- */
-export interface CycleGap {
-  startCycle: number
-  endCycle: number
-  gapSize: number
-}
-
 /**
  * Represents a cycle with mismatched transaction data
  */
@@ -107,6 +99,9 @@ export class DataSyncManager {
       })
 
       await parallelDataSync.startSyncing(0, totalCycles - 1)
+
+      // Print final database summary
+      await this.printSyncSummary()
     } else {
       // Existing data - use DataSyncManager to identify and patch gaps/mismatches
       console.log('📊 Existing data detected - running recovery analysis')
@@ -122,12 +117,16 @@ export class DataSyncManager {
    * Throws error if critical anomalies are found
    * Fetches local cycle data internally
    */
-  async detectDataAnomalies(): Promise<{ lastLocalCycle: number; currentDistributorCycle: number }> {
-    console.log('\n📊 Running data anomaly detection...')
-
-    // Fetch local and distributor cycle info
+  async detectDataAnomalies(): Promise<void> {
+    // Fetch local cycle data
     const lastLocalCycles = await CycleDB.queryLatestCycleRecords(1)
     const lastLocalCycle = lastLocalCycles.length > 0 ? lastLocalCycles[0].counter : -1
+    if (lastLocalCycle === -1) {
+      console.log('No local data found, skipping anomaly detection')
+      return
+    }
+
+    console.log('\n📊 Running data anomaly detection...')
 
     const response = await this.getTotalDataFromDistributor()
     if (!response) {
@@ -138,129 +137,113 @@ export class DataSyncManager {
     console.log(`Last local cycle: ${lastLocalCycle}`)
     console.log(`Current distributor cycle: ${currentDistributorCycle}`)
 
-    const anomalies: string[] = []
-
     // Anomaly 1: Local DB has more cycles than distributor
     if (lastLocalCycle > currentDistributorCycle) {
-      anomalies.push(
+      throw new Error(
         `Local DB has newer cycle than distributor (Local: ${lastLocalCycle}, Distributor: ${currentDistributorCycle})`
       )
     }
 
-    // Anomaly 2: Verify last 10-15 cycles match with distributor
-    if (lastLocalCycle >= 15) {
-      const verificationCycles = 15
-      const startCycle = lastLocalCycle - verificationCycles + 1
-      const endCycle = lastLocalCycle
+    const verificationCycles = 15
 
-      console.log(
-        `Verifying last ${verificationCycles} cycles (${startCycle} to ${endCycle}) against distributor...`
-      )
+    // Anomaly 2: Verify last 15 cycles match with distributor
+    let startCycle = lastLocalCycle - verificationCycles + 1
+    if (startCycle < 0) {
+      startCycle = 0
+    }
+    const endCycle = lastLocalCycle
 
-      try {
-        // Compare cycles data
-        const localCycles = await CycleDB.queryCycleRecordsBetween(startCycle, endCycle)
-        const distributorResponse = await queryFromDistributor(DataType.CYCLE, {
-          start: startCycle,
-          end: endCycle,
-        })
+    console.log(
+      `Verifying last ${verificationCycles} cycles (${startCycle} to ${endCycle}) against distributor...`
+    )
+
+    try {
+      // Compare cycles data
+      const localCycles = await CycleDB.queryCycleRecordsBetween(startCycle, endCycle)
+      const distributorResponse = await queryFromDistributor(DataType.CYCLE, {
+        start: startCycle,
+        end: endCycle,
+      })
 
-        if (distributorResponse?.data?.cycleInfo) {
-          const distributorCycles = distributorResponse.data.cycleInfo
+      if (distributorResponse?.data?.cycleInfo) {
+        const distributorCycles = distributorResponse.data.cycleInfo
 
-          // Check if cycle counts match
-          if (localCycles.length !== distributorCycles.length) {
-            anomalies.push(
-              `Cycle count mismatch in range ${startCycle}-${endCycle}: ` +
-                `Local has ${localCycles.length}, Distributor has ${distributorCycles.length}`
+        // Verify each cycle's marker matches
+        for (let i = 0; i < localCycles.length; i++) {
+          /* eslint-disable security/detect-object-injection */
+          const localCycle = localCycles[i]
+          /* eslint-enable security/detect-object-injection */
+          const distributorCycle = distributorCycles.find(
+            (c: { counter: number; marker: string }) => c.counter === localCycle.counter
+          )
+
+          if (!distributorCycle) {
+            throw new Error(`Cycle ${localCycle.counter} exists locally but not in distributor`)
+          } else if (localCycle.cycleMarker !== distributorCycle.marker) {
+            throw new Error(
+              `Cycle ${localCycle.counter} marker mismatch: ` +
+                `Local ${localCycle.cycleMarker} vs Distributor ${distributorCycle.marker}`
             )
-          } else {
-            // Verify each cycle's marker matches
-            for (let i = 0; i < localCycles.length; i++) {
-              /* eslint-disable security/detect-object-injection */
-              const localCycle = localCycles[i]
-              /* eslint-enable security/detect-object-injection */
-              const distributorCycle = distributorCycles.find(
-                (c: { counter: number; marker: string }) => c.counter === localCycle.counter
-              )
-
-              if (!distributorCycle) {
-                anomalies.push(`Cycle ${localCycle.counter} exists locally but not in distributor`)
-              } else if (localCycle.cycleMarker !== distributorCycle.marker) {
-                anomalies.push(
-                  `Cycle ${localCycle.counter} marker mismatch: ` +
-                    `Local ${localCycle.cycleMarker} vs Distributor ${distributorCycle.marker}`
-                )
-              }
-            }
           }
         }
+      }
 
-        // Compare receipts count
-        const receiptsResponse = await queryFromDistributor(DataType.RECEIPT, {
-          startCycle,
-          endCycle,
-          type: 'tally',
-        })
+      // Compare receipts count
+      const receiptsResponse = await queryFromDistributor(DataType.RECEIPT, {
+        startCycle,
+        endCycle,
+        type: 'tally',
+      })
+
+      if (receiptsResponse?.data?.receipts) {
+        const distributorReceipts: { cycle: number; receipts: number }[] = receiptsResponse.data.receipts
+        const localReceiptsCount = await ReceiptDB.queryReceiptCountByCycles(startCycle, endCycle)
 
-        if (receiptsResponse?.data?.receipts) {
-          const distributorReceipts: { cycle: number; receipts: number }[] = receiptsResponse.data.receipts
-          const localReceiptsCount = await ReceiptDB.queryReceiptCountByCycles(startCycle, endCycle)
-
-          for (const distReceipt of distributorReceipts) {
-            const localReceipt = localReceiptsCount.find((r) => r.cycle === distReceipt.cycle)
-            if (localReceipt && localReceipt.receipts !== distReceipt.receipts) {
-              anomalies.push(
-                `Receipts count mismatch in cycle ${distReceipt.cycle}: ` +
-                  `Local has ${localReceipt.receipts}, Distributor has ${distReceipt.receipts}`
-              )
-            }
+        for (const distReceipt of distributorReceipts) {
+          const localReceipt = localReceiptsCount.find((r) => r.cycle === distReceipt.cycle)
+          if (localReceipt && localReceipt.receipts > distReceipt.receipts) {
+            throw new Error(
+              `Receipts count in local DB has more in cycle ${distReceipt.cycle}: ` +
+                `Local has ${localReceipt.receipts}, Distributor has ${distReceipt.receipts}`
+            )
           }
         }
+      }
 
-        // Compare originalTxs count
-        const originalTxsResponse = await queryFromDistributor(DataType.ORIGINALTX, {
-          startCycle,
-          endCycle,
-          type: 'tally',
-        })
+      // Compare originalTxs count
+      const originalTxsResponse = await queryFromDistributor(DataType.ORIGINALTX, {
+        startCycle,
+        endCycle,
+        type: 'tally',
+      })
 
-        if (originalTxsResponse?.data?.originalTxs) {
-          const distributorOriginalTxs: { cycle: number; originalTxsData: number }[] =
-            originalTxsResponse.data.originalTxs
-          const localOriginalTxsCount = await OriginalTxDataDB.queryOriginalTxDataCountByCycles(
-            startCycle,
-            endCycle
-          )
+      if (originalTxsResponse?.data?.originalTxs) {
+        const distributorOriginalTxs: { cycle: number; originalTxsData: number }[] =
+          originalTxsResponse.data.originalTxs
+        const localOriginalTxsCount = await OriginalTxDataDB.queryOriginalTxDataCountByCycles(
+          startCycle,
+          endCycle
+        )
 
-          for (const distTx of distributorOriginalTxs) {
-            const localTx = localOriginalTxsCount.find((t) => t.cycle === distTx.cycle)
-            if (localTx && localTx.originalTxsData !== distTx.originalTxsData) {
-              anomalies.push(
-                `OriginalTxs count mismatch in cycle ${distTx.cycle}: ` +
-                  `Local has ${localTx.originalTxsData}, Distributor has ${distTx.originalTxsData}`
-              )
-            }
+        for (const distTx of distributorOriginalTxs) {
+          const localTx = localOriginalTxsCount.find((t) => t.cycle === distTx.cycle)
+          if (localTx && localTx.originalTxsData > distTx.originalTxsData) {
+            throw new Error(
+              `OriginalTxs count mismatch in cycle ${distTx.cycle}: ` +
+                `Local has ${localTx.originalTxsData}, Distributor has ${distTx.originalTxsData}`
+            )
           }
         }
-      } catch (error) {
-        console.warn('Warning: Could not complete anomaly verification:', error)
-        // Don't fail on verification errors, just warn
       }
-    }
-
-    if (anomalies.length > 0) {
-      console.error('\n❌ DATA ANOMALIES DETECTED:')
-      anomalies.forEach((anomaly) => console.error(`  - ${anomaly}`))
-      throw new Error(
-        'Data anomalies detected! Local database may be corrupted or out of sync. ' +
-          'Please clear the database and restart the server.'
+    } catch (error) {
+      throw Error(
+        `Data anomalies detected! Local database may be corrupted or out of sync. ` +
+          `Please patch the database or clear the database and restart the server. ` +
+          `Error: ${error}`
       )
     }
 
     console.log('✅ No data anomalies detected')
-
-    return { lastLocalCycle, currentDistributorCycle }
   }
 
   /**
@@ -281,9 +264,11 @@ export class DataSyncManager {
 
   /**
    * Identify all missing cycle ranges by finding gaps in the cycles DB
+   * Uses efficient LEFT JOIN-based SQL query to find ranges directly - O(N) complexity
    *
    * Example:
    * - DB has cycles: 0-149999, 300001-300099, 300106-300200
+   * - Missing ranges: 150000-300000, 300100-300105
    * - Returns gaps: [{150000, 300000}, {300100, 300105}]
    */
   private async identifyMissingCycleRanges(targetCycle: number): Promise<CycleGap[]> {
@@ -292,67 +277,34 @@ export class DataSyncManager {
       console.log(`Identifying missing cycle ranges up to cycle ${targetCycle}`)
       console.log(`${'='.repeat(60)}`)
 
-      // Get all cycles from DB ordered by counter
-      const allCycles = await CycleDB.queryCycleRecordsBetween(0, targetCycle)
-
-      if (!allCycles || allCycles.length === 0) {
-        // No cycles in DB, everything from 0 to targetCycle is missing
-        console.log('No cycles found in DB, entire range is missing')
-        return [
-          {
-            startCycle: 0,
-            endCycle: targetCycle,
-            gapSize: targetCycle + 1,
-          },
-        ]
-      }
-
-      const gaps: CycleGap[] = []
-      const cycleNumbers = allCycles.map((c) => c.counter).sort((a, b) => a - b)
-
-      console.log(`Found ${cycleNumbers.length} cycles in DB`)
-      console.log(`First cycle: ${cycleNumbers[0]}, Last cycle: ${cycleNumbers[cycleNumbers.length - 1]}`)
-
-      // Check if there's a gap at the beginning
-      if (cycleNumbers[0] > 0) {
-        gaps.push({
-          startCycle: 0,
-          endCycle: cycleNumbers[0] - 1,
-          gapSize: cycleNumbers[0],
-        })
-        console.log(`Gap found at beginning: 0 to ${cycleNumbers[0] - 1}`)
-      }
-
-      // Find gaps in the middle
-      for (let i = 0; i < cycleNumbers.length - 1; i++) {
-        const currentCycle = cycleNumbers[i]
-        const nextCycle = cycleNumbers[i + 1]
-
-        // If next cycle is not immediately after current, there's a gap
-        if (nextCycle - currentCycle > 1) {
-          const gapStart = currentCycle + 1
-          const gapEnd = nextCycle - 1
-          gaps.push({
-            startCycle: gapStart,
-            endCycle: gapEnd,
-            gapSize: gapEnd - gapStart + 1,
-          })
-          console.log(`Gap found: ${gapStart} to ${gapEnd} (${gapEnd - gapStart + 1} cycles)`)
+      // Get missing cycle ranges directly from SQL using LEFT JOIN
+      const gaps = await CycleDB.queryMissingCycleRanges(targetCycle)
+
+      // Handle case where no cycles exist in DB
+      if (gaps.length === 0) {
+        const cycleCount = await CycleDB.queryCycleCount()
+        if (cycleCount === 0) {
+          // No cycles in DB, entire range is missing
+          console.log('No cycles found in DB, entire range is missing')
+          return [
+            {
+              startCycle: 0,
+              endCycle: targetCycle,
+              gapSize: targetCycle + 1,
+            },
+          ]
+        } else {
+          // All cycles present
+          console.log('✅ No missing cycles - database is complete up to target cycle')
+          return []
         }
       }
 
-      // Check if there's a gap at the end
-      const lastLocalCycle = cycleNumbers[cycleNumbers.length - 1]
-      if (lastLocalCycle < targetCycle) {
-        gaps.push({
-          startCycle: lastLocalCycle + 1,
-          endCycle: targetCycle,
-          gapSize: targetCycle - lastLocalCycle,
-        })
-        console.log(`Gap found at end: ${lastLocalCycle + 1} to ${targetCycle}`)
-      }
-
+      // Log results
       console.log(`\nTotal gaps found: ${gaps.length}`)
+      for (const gap of gaps) {
+        console.log(`  Gap: ${gap.startCycle} to ${gap.endCycle} (${gap.gapSize} cycles)`)
+      }
       const totalMissing = gaps.reduce((sum, gap) => sum + gap.gapSize, 0)
       console.log(`Total missing cycles: ${totalMissing}`)
 
@@ -641,6 +593,9 @@ export class DataSyncManager {
       console.log(`\n${'='.repeat(70)}`)
       console.log(`✅ DATA SYNC COMPLETED SUCCESSFULLY`)
       console.log(`${'='.repeat(70)}\n`)
+
+      // Print final database summary
+      await this.printSyncSummary()
     } catch (error) {
       console.error('Error executing sync with recovery:', error)
       throw error
@@ -724,4 +679,47 @@ export class DataSyncManager {
     }
     console.log(`${'='.repeat(70)}\n`)
   }
+
+  /**
+   * Get overall sync statistics from database
+   */
+  async getSyncStats(): Promise<{
+    totalCycles: number
+    totalReceipts: number
+    totalOriginalTxs: number
+  }> {
+    try {
+      const [cycleCount, receiptCount, originalTxCount] = await Promise.all([
+        CycleDB.queryCycleCount(),
+        ReceiptDB.queryReceiptCount(),
+        OriginalTxDataDB.queryOriginalTxDataCount(),
+      ])
+
+      return {
+        totalCycles: cycleCount || 0,
+        totalReceipts: receiptCount || 0,
+        totalOriginalTxs: originalTxCount || 0,
+      }
+    } catch (error) {
+      console.error('Error getting sync stats:', error)
+      return {
+        totalCycles: 0,
+        totalReceipts: 0,
+        totalOriginalTxs: 0,
+      }
+    }
+  }
+
+  /**
+   * Print sync summary
+   */
+  async printSyncSummary(): Promise<void> {
+    const stats = await this.getSyncStats()
+    console.log('='.repeat(60))
+    console.log('Sync Summary:')
+    console.log(`  Total Cycles:      ${stats.totalCycles}`)
+    console.log(`  Total Receipts:    ${stats.totalReceipts}`)
+    console.log(`  Total OriginalTxs: ${stats.totalOriginalTxs}`)
+    console.log('='.repeat(60))
+  }
 }
diff --git a/src/class/ParallelDataSync.ts b/src/class/ParallelDataSync.ts
index 4f90513..f012aa3 100644
--- a/src/class/ParallelDataSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -4,7 +4,6 @@ import { Utils as StringUtils } from '@shardus/types'
 import { config, DISTRIBUTOR_URL } from '../config'
 import { queryFromDistributor, DataType } from './DataSync'
 import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
-import { ParallelSyncCheckpointManager } from './ParallelSyncCheckpoint'
 import { Cycle } from '../types'
 import axios, { AxiosInstance } from 'axios'
 import http from 'http'
@@ -69,11 +68,9 @@ export interface SyncTxDataByCycleRange {
  * Implements the optimal sync strategy with:
  * - Cycle-level parallelization
  * - Composite cursor (timestamp + txId ) to prevent data loss
- * - Automatic resume from database
  * - Work queue for load balancing
  */
 export class ParallelDataSync {
-  private checkpointManager: ParallelSyncCheckpointManager
   private queue: PQueue
   private syncConfig: ParallelSyncConfig
   private stats: SyncStats
@@ -82,7 +79,6 @@ export class ParallelDataSync {
   private axiosInstance: AxiosInstance
 
   constructor(syncConfig?: Partial<ParallelSyncConfig>) {
-    this.checkpointManager = new ParallelSyncCheckpointManager()
     this.syncConfig = {
       concurrency: syncConfig?.concurrency || config.parallelSyncConcurrency || 10,
       retryAttempts: syncConfig?.retryAttempts || config.syncRetryAttempts || 3,
@@ -299,7 +295,7 @@ export class ParallelDataSync {
       this.stats.endTime = Date.now()
 
       // Summary
-      await this.printSummary()
+      await this.printSummary(startCycle, endCycle)
     } catch (error) {
       console.error('Fatal error in parallel sync:', error)
       this.stats.errors++
@@ -779,7 +775,7 @@ export class ParallelDataSync {
   /**
    * Print sync summary
    */
-  private async printSummary(): Promise<void> {
+  private async printSummary(startCycle: number, endCycle: number): Promise<void> {
     const elapsedMs = (this.stats.endTime || Date.now()) - this.stats.startTime
     const elapsedSec = (elapsedMs / 1000).toFixed(2)
     const elapsedMin = (elapsedMs / 60000).toFixed(2)
@@ -787,6 +783,7 @@ export class ParallelDataSync {
     console.log(`\n${'='.repeat(60)}`)
     console.log('Parallel Sync Complete!')
     console.log(`${'='.repeat(60)}`)
+    console.log(`  Cycle Range:       ${startCycle} → ${endCycle}`)
     console.log(`  Cycles Synced:     ${this.stats.completedCycles}/${this.stats.totalCycles}`)
     console.log(`  Receipts Synced:   ${this.stats.totalReceipts}`)
     console.log(`  OriginalTxs Synced: ${this.stats.totalOriginalTxs}`)
@@ -796,9 +793,6 @@ export class ParallelDataSync {
       `  Throughput:        ${(this.stats.totalReceipts / (elapsedMs / 1000)).toFixed(0)} receipts/sec`
     )
     console.log(`${'='.repeat(60)}\n`)
-
-    // Print DB summary
-    await this.checkpointManager.printSyncSummary()
   }
 
   /**
diff --git a/src/class/ParallelSyncCheckpoint.ts b/src/class/ParallelSyncCheckpoint.ts
deleted file mode 100644
index 472a0d8..0000000
--- a/src/class/ParallelSyncCheckpoint.ts
+++ /dev/null
@@ -1,315 +0,0 @@
-import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
-import { config } from '../config'
-
-/**
- * Composite cursor for tracking sync progress
- * Uses both timestamp and ID to handle timestamp collisions
- */
-export interface CompositeCursor {
-  timestamp: number
-  id: string // receiptId or txId
-}
-
-/**
- * Cycle resume information from database
- */
-export interface CycleResumeInfo {
-  cycleNumber: number
-  startTimestamp: number
-  endTimestamp: number
-  receipts: {
-    lastTimestamp: number
-    lastId: string
-    count: number
-  }
-  originalTxs: {
-    lastTimestamp: number
-    lastId: string
-    count: number
-  }
-}
-
-/**
- * Manages sync state by querying the database
- * No separate checkpoint storage needed - DB is source of truth
- */
-export class ParallelSyncCheckpointManager {
-  /**
-   * Get the last completed cycle from database
-   */
-  async getLastCompletedCycle(): Promise<number> {
-    try {
-      const cycles = await CycleDB.queryLatestCycleRecords(1)
-      if (cycles && cycles.length > 0) {
-        return cycles[0].counter
-      }
-      return 0
-    } catch (error) {
-      console.error('Error getting last completed cycle:', error)
-      return 0
-    }
-  }
-
-  /**
-   * Get resume cursor for receipts in a specific cycle
-   * Returns the last receipt's timestamp and ID, or cycle start if none exist
-   */
-  async getReceiptsCursor(cycleNumber: number, cycleStartTimestamp: number): Promise<CompositeCursor> {
-    try {
-      // Query last receipt for this cycle
-      const receipts = await ReceiptDB.queryReceipts({
-        limit: 1,
-        startCycleNumber: cycleNumber,
-      })
-
-      if (receipts && receipts.length > 0) {
-        const lastReceipt = receipts[0]
-        return {
-          timestamp: lastReceipt.timestamp,
-          id: lastReceipt.receiptId,
-        }
-      }
-
-      // No receipts found for this cycle, start from cycle beginning
-      return {
-        timestamp: cycleStartTimestamp,
-        id: '',
-      }
-    } catch (error) {
-      console.error(`Error getting receipts cursor for cycle ${cycleNumber}:`, error)
-      return {
-        timestamp: cycleStartTimestamp,
-        id: '',
-      }
-    }
-  }
-
-  /**
-   * Get resume cursor for originalTxs in a specific cycle
-   */
-  async getOriginalTxsCursor(cycleNumber: number, cycleStartTimestamp: number): Promise<CompositeCursor> {
-    try {
-      // Query last originalTx for this cycle
-      const originalTxs = await OriginalTxDataDB.queryOriginalTxsData({
-        limit: 1, // limit
-        startCycle: cycleNumber, // startCycle
-      })
-
-      if (originalTxs && originalTxs.length > 0) {
-        // Sort by timestamp DESC to get the last one
-        originalTxs.sort((a, b) => b.timestamp - a.timestamp)
-        const lastTx = originalTxs[0]
-        return {
-          timestamp: lastTx.timestamp,
-          id: lastTx.txId,
-        }
-      }
-
-      // No originalTxs found for this cycle, start from cycle beginning
-      return {
-        timestamp: cycleStartTimestamp,
-        id: '',
-      }
-    } catch (error) {
-      console.error(`Error getting originalTxs cursor for cycle ${cycleNumber}:`, error)
-      return {
-        timestamp: cycleStartTimestamp,
-        id: '',
-      }
-    }
-  }
-
-  /**
-   * Get counts of data already synced for a cycle
-   */
-  async getCycleSyncStatus(cycleNumber: number): Promise<{
-    receiptsCount: number
-    originalTxsCount: number
-    isComplete: boolean
-  }> {
-    try {
-      const [receiptsCountResult, originalTxsCountResult] = await Promise.all([
-        ReceiptDB.queryReceiptCountByCycles(cycleNumber, cycleNumber),
-        OriginalTxDataDB.queryOriginalTxDataCountByCycles(cycleNumber, cycleNumber),
-      ])
-
-      const receiptsCount =
-        receiptsCountResult && receiptsCountResult.length > 0 ? receiptsCountResult[0].receipts : 0
-
-      const originalTxsCount =
-        originalTxsCountResult && originalTxsCountResult.length > 0
-          ? originalTxsCountResult[0].originalTxsData
-          : 0
-
-      return {
-        receiptsCount,
-        originalTxsCount,
-        isComplete: false, // Determined by sync logic
-      }
-    } catch (error) {
-      console.error(`Error getting cycle sync status for cycle ${cycleNumber}:`, error)
-      return {
-        receiptsCount: 0,
-        originalTxsCount: 0,
-        isComplete: false,
-      }
-    }
-  }
-
-  /**
-   * Determine which cycles need to be synced
-   * Compares local DB with distributor totals
-   */
-  async getCyclesToSync(startCycle: number, endCycle: number): Promise<number[]> {
-    try {
-      const lastLocalCycle = await this.getLastCompletedCycle()
-
-      // If we have no local data, sync all cycles
-      if (lastLocalCycle === 0) {
-        const cyclesToSync: number[] = []
-        for (let i = startCycle; i <= endCycle; i++) {
-          cyclesToSync.push(i)
-        }
-        return cyclesToSync
-      }
-
-      // If endCycle is beyond what we have, sync from last local + 1
-      if (endCycle > lastLocalCycle) {
-        const cyclesToSync: number[] = []
-        for (let i = lastLocalCycle + 1; i <= endCycle; i++) {
-          cyclesToSync.push(i)
-        }
-        return cyclesToSync
-      }
-
-      // All cycles already synced
-      return []
-    } catch (error) {
-      console.error('Error determining cycles to sync:', error)
-      return []
-    }
-  }
-
-  /**
-   * Check if a cycle is fully synced by comparing counts with distributor
-   */
-  async isCycleFullySynced(
-    cycleNumber: number,
-    expectedReceiptsCount: number,
-    expectedOriginalTxsCount: number
-  ): Promise<boolean> {
-    try {
-      const status = await this.getCycleSyncStatus(cycleNumber)
-
-      const receiptsMatch = status.receiptsCount === expectedReceiptsCount
-      const originalTxsMatch = status.originalTxsCount === expectedOriginalTxsCount
-
-      if (config.verbose) {
-        console.log(
-          `Cycle ${cycleNumber} sync check: ` +
-            `receipts ${status.receiptsCount}/${expectedReceiptsCount}, ` +
-            `originalTxs ${status.originalTxsCount}/${expectedOriginalTxsCount}`
-        )
-      }
-
-      return receiptsMatch && originalTxsMatch
-    } catch (error) {
-      console.error(`Error checking if cycle ${cycleNumber} is fully synced:`, error)
-      return false
-    }
-  }
-
-  /**
-   * Get detailed resume information for a specific cycle
-   */
-  async getCycleResumeInfo(
-    cycleNumber: number,
-    cycleStartTimestamp: number,
-    cycleEndTimestamp: number
-  ): Promise<CycleResumeInfo> {
-    const [receiptsCursor, originalTxsCursor, syncStatus] = await Promise.all([
-      this.getReceiptsCursor(cycleNumber, cycleStartTimestamp),
-      this.getOriginalTxsCursor(cycleNumber, cycleStartTimestamp),
-      this.getCycleSyncStatus(cycleNumber),
-    ])
-
-    return {
-      cycleNumber,
-      startTimestamp: cycleStartTimestamp,
-      endTimestamp: cycleEndTimestamp,
-      receipts: {
-        lastTimestamp: receiptsCursor.timestamp,
-        lastId: receiptsCursor.id,
-        count: syncStatus.receiptsCount,
-      },
-      originalTxs: {
-        lastTimestamp: originalTxsCursor.timestamp,
-        lastId: originalTxsCursor.id,
-        count: syncStatus.originalTxsCount,
-      },
-    }
-  }
-
-  /**
-   * Log sync progress
-   */
-  logProgress(
-    cycleNumber: number,
-    dataType: 'receipts' | 'originalTxs',
-    itemsFetched: number,
-    totalItems: number
-  ): void {
-    const percentage = totalItems > 0 ? ((totalItems / totalItems) * 100).toFixed(1) : '0.0'
-    console.log(
-      `[Cycle ${cycleNumber}] ${dataType}: +${itemsFetched} items (total: ${totalItems}, ${percentage}%)`
-    )
-  }
-
-  /**
-   * Get overall sync statistics from database
-   */
-  async getSyncStats(): Promise<{
-    totalCycles: number
-    totalReceipts: number
-    totalOriginalTxs: number
-    lastCycleNumber: number
-  }> {
-    try {
-      const [cycleCount, receiptCount, originalTxCount, lastCycle] = await Promise.all([
-        CycleDB.queryCycleCount(),
-        ReceiptDB.queryReceiptCount(),
-        OriginalTxDataDB.queryOriginalTxDataCount(),
-        this.getLastCompletedCycle(),
-      ])
-
-      return {
-        totalCycles: cycleCount || 0,
-        totalReceipts: receiptCount || 0,
-        totalOriginalTxs: originalTxCount || 0,
-        lastCycleNumber: lastCycle,
-      }
-    } catch (error) {
-      console.error('Error getting sync stats:', error)
-      return {
-        totalCycles: 0,
-        totalReceipts: 0,
-        totalOriginalTxs: 0,
-        lastCycleNumber: 0,
-      }
-    }
-  }
-
-  /**
-   * Print sync summary
-   */
-  async printSyncSummary(): Promise<void> {
-    const stats = await this.getSyncStats()
-    console.log('='.repeat(60))
-    console.log('Sync Summary:')
-    console.log(`  Total Cycles:      ${stats.totalCycles}`)
-    console.log(`  Total Receipts:    ${stats.totalReceipts}`)
-    console.log(`  Total OriginalTxs: ${stats.totalOriginalTxs}`)
-    console.log(`  Last Cycle:        ${stats.lastCycleNumber}`)
-    console.log('='.repeat(60))
-  }
-}
diff --git a/src/collector.ts b/src/collector.ts
index 0cf573e..836d737 100644
--- a/src/collector.ts
+++ b/src/collector.ts
@@ -271,13 +271,13 @@ export const startDataSyncManager = async (): Promise<() => Promise<void>> => {
 
   // Run anomaly detection BEFORE connecting to websocket
   // This fails fast if there are data corruption issues
-  const syncManager = new DataSyncManager()
-  await syncManager.detectDataAnomalies()
+  const dataSyncManager = new DataSyncManager()
+  await dataSyncManager.detectDataAnomalies()
 
   console.log('✅ Data anomaly check passed - proceeding with sync')
 
   // Return the sync function to be executed after WS connection
-  return syncManager.syncData
+  return dataSyncManager.syncData
 }
 
 const attemptReconnection = (): void => {
diff --git a/src/storage/cycle.ts b/src/storage/cycle.ts
index 6ddc964..d295f7e 100644
--- a/src/storage/cycle.ts
+++ b/src/storage/cycle.ts
@@ -234,3 +234,95 @@ export async function queryCycleRecordsByTimestamp(
     return []
   }
 }
+
+export interface CycleGap {
+  startCycle: number
+  endCycle: number
+  gapSize: number
+}
+
+/**
+ * Efficiently query for missing cycle ranges
+ * Returns ranges of missing cycles from 0 to targetCycle
+ * Uses LEFT JOIN to find gaps between consecutive cycles - O(N) complexity
+ */
+export async function queryMissingCycleRanges(targetCycle: number): Promise<CycleGap[]> {
+  try {
+
+    // Get first and last cycle for edge gap detection
+    const firstCycleResult = (await db.get(
+      cycleDatabase,
+      'SELECT MIN(counter) as first_cycle FROM cycles',
+      []
+    )) as { first_cycle: number } | undefined
+
+    const lastCycleResult = (await db.get(
+      cycleDatabase,
+      'SELECT MAX(counter) as last_cycle FROM cycles WHERE counter <= ?',
+      [targetCycle]
+    )) as { last_cycle: number } | undefined
+
+    const firstCycle = firstCycleResult?.first_cycle ?? 0
+    const lastCycle = lastCycleResult?.last_cycle ?? -1
+
+    const ranges: CycleGap[] = []
+
+    // Check for gap at the beginning (0 to firstCycle - 1)
+    if (firstCycle > 0) {
+      ranges.push({
+        startCycle: 0,
+        endCycle: firstCycle - 1,
+        gapSize: firstCycle,
+      })
+    }
+
+    // Find gaps in the middle using LEFT JOIN
+    // For each cycle c1, check if the next cycle (c1.counter + 1) exists
+    // If not, find where the gap ends by looking for the next existing cycle
+    const sql = `
+      SELECT
+        c1.counter + 1 AS startCycle,
+        (SELECT MIN(c2.counter) - 1
+         FROM cycles c2
+         WHERE c2.counter > c1.counter AND c2.counter <= ?) AS endCycle
+      FROM cycles c1
+      WHERE NOT EXISTS (
+        SELECT 1 FROM cycles c3
+        WHERE c3.counter = c1.counter + 1
+      )
+      AND c1.counter < ?
+      ORDER BY c1.counter
+    `
+
+    const middleGaps = (await db.all(cycleDatabase, sql, [targetCycle, targetCycle])) as {
+      startCycle: number
+      endCycle: number
+    }[]
+
+    // Add middle gaps with calculated gapSize (filter out null endCycle values)
+    for (const gap of middleGaps) {
+      if (gap.endCycle !== null && gap.endCycle >= gap.startCycle) {
+        ranges.push({
+          startCycle: gap.startCycle,
+          endCycle: gap.endCycle,
+          gapSize: gap.endCycle - gap.startCycle + 1,
+        })
+      }
+    }
+
+    // Check for gap at the end (lastCycle + 1 to targetCycle)
+    if (lastCycle >= 0 && lastCycle < targetCycle) {
+      ranges.push({
+        startCycle: lastCycle + 1,
+        endCycle: targetCycle,
+        gapSize: targetCycle - lastCycle,
+      })
+    }
+
+    if (config.verbose) console.log(`Found ${ranges.length} missing cycle ranges`)
+    return ranges
+  } catch (e) {
+    console.log('Error querying missing cycle ranges:', e)
+    throw e
+  }
+}

From eb68f9c9178b5e57382053a70801cf7691ffcb76 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Tue, 11 Nov 2025 00:34:33 +0800
Subject: [PATCH 07/14] Refactor ParallelDataSync to use unified data fetching
 and improve batch processing

- Replace separate fetch methods with unified fetchDataFromDistributor method
- Refactor startSyncing to accept pre-created cycle batches instead of range parameters
- Add createCycleBatches method for better batch management separation
- Optimize account/transaction processing by removing individual existence checks
- Add batch querying for account timestamps to reduce database calls
- Improve SQLite performance with increased cache size and memory-mapped I/O
- Add network timing and compression metrics logging for better observability
- Update DataSyncManager to use new batch-based sync approach
- Increase default cyclesPerBatch from 10 to 100 for better throughput
---
 src/class/DataSync.ts         |   2 +
 src/class/DataSyncManager.ts  | 184 ++++++++----
 src/class/ParallelDataSync.ts | 550 +++++++++++++++++-----------------
 src/collector.ts              |  41 +--
 src/config/index.ts           |   2 +-
 src/storage/account.ts        |  41 +++
 src/storage/receipt.ts        |  65 ++--
 src/storage/sqlite3storage.ts |   7 +-
 8 files changed, 509 insertions(+), 383 deletions(-)

diff --git a/src/class/DataSync.ts b/src/class/DataSync.ts
index d6f4de5..4ad76cf 100644
--- a/src/class/DataSync.ts
+++ b/src/class/DataSync.ts
@@ -15,6 +15,7 @@ export enum DataType {
 }
 
 interface queryFromDistributorParameters {
+  count?: number
   start?: number
   end?: number
   page?: number
@@ -64,6 +65,7 @@ export const queryFromDistributor = async (
     const response = await axios.post(url, data, {
       headers: {
         'Content-Type': 'application/json',
+        'Accept-Encoding': 'gzip, deflate', // Request compressed responses
       },
       timeout: 45000,
       transformResponse: (res) => {
diff --git a/src/class/DataSyncManager.ts b/src/class/DataSyncManager.ts
index f62ba52..eea0504 100644
--- a/src/class/DataSyncManager.ts
+++ b/src/class/DataSyncManager.ts
@@ -1,4 +1,5 @@
-import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
+import { P2P } from '@shardus/types'
+import { CycleDB, ReceiptDB, OriginalTxDataDB, AccountDB, TransactionDB } from '../storage'
 import { CycleGap } from '../storage/cycle'
 import { config } from '../config'
 import { queryFromDistributor, DataType, downloadAndSyncGenesisAccounts } from './DataSync'
@@ -59,6 +60,18 @@ export class DataSyncManager {
   private lookbackCycles: number
 
   constructor() {
+    console.log('\n')
+    console.log('='.repeat(60))
+    console.log('INITIALIZING DATA SYNC MANAGER')
+    console.log('='.repeat(60))
+    console.log('DataSyncManager provides intelligent data synchronization with:')
+    console.log('  • Early data anomaly detection before sync operations')
+    console.log('  • Automatic gap detection and recovery')
+    console.log('  • Lookback verification window for data integrity')
+    console.log('  • Parallel multi-cycle-based sync (10x+ performance improvement)')
+    console.log('='.repeat(60))
+    console.log('\n')
+
     // Calculate lookback window: cyclesPerBatch * parallelSyncConcurrency
     const cyclesPerBatch = config.cyclesPerBatch || 10
     const concurrency = config.parallelSyncConcurrency || 10
@@ -72,40 +85,39 @@ export class DataSyncManager {
    * Handles both fresh start and recovery from interruptions
    */
   async syncData(): Promise<void> {
-    const response = await this.getTotalDataFromDistributor()
-    if (!response) {
-      throw new Error('Failed to fetch total data from distributor')
+    const latestDistributorCycle = await this.getLatestCycleFromDistributor()
+    if (!latestDistributorCycle) {
+      throw new Error('Failed to fetch latest cycle from distributor')
     }
-    const { totalCycles } = response
     const lastLocalCycles = await CycleDB.queryLatestCycleRecords(1)
     const lastLocalCycle = lastLocalCycles.length > 0 ? lastLocalCycles[0].counter : -1
 
-    // Always sync genesis accounts first
-    if (lastLocalCycle === 0) {
-      console.log('Syncing genesis accounts...')
-      await downloadAndSyncGenesisAccounts()
-    }
-
     // Check if this is a fresh start
     const isFreshStart = lastLocalCycle === -1 || lastLocalCycle === 0
 
     if (isFreshStart) {
       // Fresh start - no checkpoint needed, just sync from beginning
       console.log('🆕 Fresh start detected - syncing from cycle 0')
+      // Always sync genesis accounts first
+      console.log('Syncing genesis accounts...')
+      await downloadAndSyncGenesisAccounts()
+
       const parallelDataSync = new ParallelDataSync({
         concurrency: config.parallelSyncConcurrency,
         retryAttempts: 3,
         retryDelayMs: 1000,
       })
 
-      await parallelDataSync.startSyncing(0, totalCycles - 1)
+      const cycleBatches = await parallelDataSync.createCycleBatches(0, latestDistributorCycle)
+
+      await parallelDataSync.startSyncing(cycleBatches)
 
       // Print final database summary
       await this.printSyncSummary()
     } else {
       // Existing data - use DataSyncManager to identify and patch gaps/mismatches
       console.log('📊 Existing data detected - running recovery analysis')
-      const recoveryPlan = await this.generateRecoveryPlan(totalCycles)
+      const recoveryPlan = await this.generateRecoveryPlan(latestDistributorCycle)
 
       // Execute the complete sync (recovery + normal sync)
       await this.executeSyncWithRecovery(recoveryPlan)
@@ -128,11 +140,10 @@ export class DataSyncManager {
 
     console.log('\n📊 Running data anomaly detection...')
 
-    const response = await this.getTotalDataFromDistributor()
-    if (!response) {
-      throw new Error('Failed to fetch distributor cycle info')
+    const currentDistributorCycle = await this.getLatestCycleFromDistributor()
+    if (!currentDistributorCycle) {
+      throw new Error('Failed to fetch latest cycle from distributor')
     }
-    const currentDistributorCycle = response.totalCycles
 
     console.log(`Last local cycle: ${lastLocalCycle}`)
     console.log(`Current distributor cycle: ${currentDistributorCycle}`)
@@ -235,6 +246,7 @@ export class DataSyncManager {
           }
         }
       }
+      console.log('✅ No data anomalies detected')
     } catch (error) {
       throw Error(
         `Data anomalies detected! Local database may be corrupted or out of sync. ` +
@@ -243,7 +255,21 @@ export class DataSyncManager {
       )
     }
 
-    console.log('✅ No data anomalies detected')
+    console.log('✅ Data anomaly check passed - proceeding with sync')
+  }
+
+  /**
+   * Fetch latest cycle from distributor
+   */
+  private async getLatestCycleFromDistributor(): Promise<number | null> {
+    const response: { data: { cycleInfo: P2P.CycleCreatorTypes.CycleRecord[] } } = await queryFromDistributor(
+      DataType.CYCLE,
+      { count: 1 }
+    )
+    if (!response?.data || response.data?.cycleInfo?.[0]?.counter === undefined) {
+      return null
+    }
+    return response.data.cycleInfo[0].counter
   }
 
   /**
@@ -384,6 +410,7 @@ export class DataSyncManager {
 
   /**
    * Compare cycle data counts between local DB and distributor
+   * Queries in batches to respect MAX_CYCLES_PER_REQUEST limit
    */
   private async compareCycleDataWithDistributor(
     startCycle: number,
@@ -392,43 +419,61 @@ export class DataSyncManager {
     const mismatched: MismatchedCycle[] = []
 
     try {
-      // Fetch counts from distributor
-      const [receiptsResponse, originalTxsResponse] = await Promise.all([
-        queryFromDistributor(DataType.RECEIPT, { startCycle, endCycle, type: 'tally' }),
-        queryFromDistributor(DataType.ORIGINALTX, { startCycle, endCycle, type: 'tally' }),
-      ])
+      // Split into batches if range is larger than max allowed
+      const batches: { start: number; end: number }[] = []
+      for (let i = startCycle; i <= endCycle; i += config.requestLimits.MAX_CYCLES_PER_REQUEST) {
+        const batchEnd = Math.min(i + config.requestLimits.MAX_CYCLES_PER_REQUEST, endCycle)
+        batches.push({ start: i, end: batchEnd })
+      }
 
-      if (!receiptsResponse?.data?.receipts || !originalTxsResponse?.data?.originalTxs) {
-        console.warn(`Failed to fetch distributor data for cycles ${startCycle}-${endCycle}`)
-        return mismatched
+      // Fetch all distributor data in batches
+      const allDistributorReceipts: { cycle: number; receipts: number }[] = []
+      const allDistributorOriginalTxs: { cycle: number; originalTxsData: number }[] = []
+
+      for (const batch of batches) {
+        const [receiptsResponse, originalTxsResponse] = await Promise.all([
+          queryFromDistributor(DataType.RECEIPT, {
+            startCycle: batch.start,
+            endCycle: batch.end,
+            type: 'tally',
+          }),
+          queryFromDistributor(DataType.ORIGINALTX, {
+            startCycle: batch.start,
+            endCycle: batch.end,
+            type: 'tally',
+          }),
+        ])
+
+        if (receiptsResponse?.data?.receipts) {
+          allDistributorReceipts.push(...receiptsResponse.data.receipts)
+        }
+        if (originalTxsResponse?.data?.originalTxs) {
+          allDistributorOriginalTxs.push(...originalTxsResponse.data.originalTxs)
+        }
       }
 
-      const distributorReceipts: { cycle: number; receipts: number }[] = receiptsResponse.data.receipts
-      const distributorOriginalTxs: { cycle: number; originalTxsData: number }[] =
-        originalTxsResponse.data.originalTxs
+      // Sort distributor data by cycle
+      allDistributorReceipts.sort((a, b) => a.cycle - b.cycle)
+      allDistributorOriginalTxs.sort((a, b) => a.cycle - b.cycle)
 
-      // Fetch counts from local DB
+      // Fetch counts from local DB (single query for entire range)
       const [localReceipts, localOriginalTxs] = await Promise.all([
         ReceiptDB.queryReceiptCountByCycles(startCycle, endCycle),
         OriginalTxDataDB.queryOriginalTxDataCountByCycles(startCycle, endCycle),
       ])
 
-      // Create maps for easier lookup
-      const localReceiptsMap = new Map(localReceipts.map((r) => [r.cycle, r.receipts]))
-      const localOriginalTxsMap = new Map(localOriginalTxs.map((t) => [t.cycle, t.originalTxsData]))
-
-      // Compare each cycle
-      const allCycles = new Set([
-        ...distributorReceipts.map((r) => r.cycle),
-        ...distributorOriginalTxs.map((t) => t.cycle),
-      ])
+      console.log(
+        `Comparing cycles ${startCycle} to ${endCycle} with ${allDistributorReceipts.length} distributor receipts and ${allDistributorOriginalTxs.length} distributor originalTxs`
+      )
+      console.log(allDistributorReceipts, localReceipts)
+      console.log(allDistributorOriginalTxs, localOriginalTxs)
 
-      for (const cycle of allCycles) {
-        const distReceipts = distributorReceipts.find((r) => r.cycle === cycle)?.receipts || 0
-        const distOriginalTxs = distributorOriginalTxs.find((t) => t.cycle === cycle)?.originalTxsData || 0
+      for (let cycle = startCycle; cycle <= endCycle; cycle++) {
+        const distReceipts = allDistributorReceipts.find((r) => r.cycle === cycle)?.receipts || 0
+        const distOriginalTxs = allDistributorOriginalTxs.find((t) => t.cycle === cycle)?.originalTxsData || 0
 
-        const localReceiptsCount = localReceiptsMap.get(cycle) || 0
-        const localOriginalTxsCount = localOriginalTxsMap.get(cycle) || 0
+        const localReceiptsCount = localReceipts.find((r) => r.cycle === cycle)?.receipts || 0
+        const localOriginalTxsCount = localOriginalTxs.find((t) => t.cycle === cycle)?.originalTxsData || 0
 
         const receiptsMismatch = localReceiptsCount !== distReceipts
         const originalTxsMismatch = localOriginalTxsCount !== distOriginalTxs
@@ -574,18 +619,25 @@ export class DataSyncManager {
 
       // Step 4: Execute ParallelDataSync for all ranges
       if (mergedRanges.length > 0) {
-        for (const range of mergedRanges) {
-          console.log(`\nSyncing range: ${range.startCycle} to ${range.endCycle} (${range.gapSize} cycles)`)
+        console.log('\n📡 Starting data sync with recovery plan')
 
-          const parallelSync = new ParallelDataSync({
-            concurrency: config.parallelSyncConcurrency || 10,
-            retryAttempts: 3,
-            retryDelayMs: 1000,
-          })
+        const parallelDataSync = new ParallelDataSync({
+          concurrency: config.parallelSyncConcurrency,
+          retryAttempts: 3,
+          retryDelayMs: 1000,
+        })
 
-          await parallelSync.startSyncing(range.startCycle, range.endCycle)
-          console.log(`✅ Completed range ${range.startCycle} to ${range.endCycle}`)
+        const cycleBatches = []
+        // For each range, create cycle batches and merge them into one
+        for (const range of mergedRanges) {
+          console.log(`\nFor range: ${range.startCycle} to ${range.endCycle} (${range.gapSize} cycles)`)
+          const cycleBatch = parallelDataSync.createCycleBatches(range.startCycle, range.endCycle)
+          cycleBatches.push(...cycleBatch)
         }
+
+        await parallelDataSync.startSyncing(cycleBatches)
+
+        console.log('\n✅ Data sync with recovery completed successfully')
       } else {
         console.log('\n✅ No data to sync, database is up to date')
       }
@@ -685,27 +737,35 @@ export class DataSyncManager {
    */
   async getSyncStats(): Promise<{
     totalCycles: number
+    totalAccounts: number
     totalReceipts: number
     totalOriginalTxs: number
+    totalTransactions: number
   }> {
     try {
-      const [cycleCount, receiptCount, originalTxCount] = await Promise.all([
+      const [cycleCount, accountCount, receiptCount, originalTxCount, transactionCount] = await Promise.all([
         CycleDB.queryCycleCount(),
+        AccountDB.queryAccountCount(),
         ReceiptDB.queryReceiptCount(),
         OriginalTxDataDB.queryOriginalTxDataCount(),
+        TransactionDB.queryTransactionCount(),
       ])
 
       return {
         totalCycles: cycleCount || 0,
+        totalAccounts: accountCount || 0,
         totalReceipts: receiptCount || 0,
         totalOriginalTxs: originalTxCount || 0,
+        totalTransactions: transactionCount || 0,
       }
     } catch (error) {
       console.error('Error getting sync stats:', error)
       return {
         totalCycles: 0,
+        totalAccounts: 0,
         totalReceipts: 0,
         totalOriginalTxs: 0,
+        totalTransactions: 0,
       }
     }
   }
@@ -715,11 +775,27 @@ export class DataSyncManager {
    */
   async printSyncSummary(): Promise<void> {
     const stats = await this.getSyncStats()
+    const distributorData = await this.getTotalDataFromDistributor()
+
     console.log('='.repeat(60))
     console.log('Sync Summary:')
+    console.log('\nLocal Database:')
     console.log(`  Total Cycles:      ${stats.totalCycles}`)
+    console.log(`  Total Accounts:    ${stats.totalAccounts}`)
     console.log(`  Total Receipts:    ${stats.totalReceipts}`)
     console.log(`  Total OriginalTxs: ${stats.totalOriginalTxs}`)
+    console.log(`  Total Transactions: ${stats.totalTransactions}`)
+
+    if (distributorData) {
+      console.log('\nDistributor:')
+      console.log(`  Total Cycles:      ${distributorData.totalCycles}`)
+      console.log(`  Total Accounts:    ${distributorData.totalAccounts}`)
+      console.log(`  Total Receipts:    ${distributorData.totalReceipts}`)
+      console.log(`  Total OriginalTxs: ${distributorData.totalOriginalTxs}`)
+    } else {
+      console.log('\nDistributor: Failed to fetch data')
+    }
+
     console.log('='.repeat(60))
   }
 }
diff --git a/src/class/ParallelDataSync.ts b/src/class/ParallelDataSync.ts
index f012aa3..510260c 100644
--- a/src/class/ParallelDataSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -1,8 +1,8 @@
 import PQueue from 'p-queue'
 import * as crypto from '@shardus/crypto-utils'
-import { Utils as StringUtils } from '@shardus/types'
+import { P2P, Utils as StringUtils } from '@shardus/types'
 import { config, DISTRIBUTOR_URL } from '../config'
-import { queryFromDistributor, DataType } from './DataSync'
+import { DataType } from './DataSync'
 import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
 import { Cycle } from '../types'
 import axios, { AxiosInstance } from 'axios'
@@ -48,6 +48,7 @@ interface ResponseSizeMetadata {
 
 interface ResponseDataWithMetadata {
   __responseSize?: ResponseSizeMetadata
+  __networkElapsed?: number
   [key: string]: unknown
 }
 
@@ -256,10 +257,39 @@ export class ParallelDataSync {
     )
   }
 
+  /**
+   * Creates batches of cycles for parallel processing.
+   * This is a preparatory step before calling startSyncing, which expects these batches.
+   * @param startCycle The starting cycle number.
+   * @param endCycle The ending cycle number.
+   * @returns An array of cycle batches, each with a start and end cycle.
+   */
+  public createCycleBatches(
+    startCycle: number,
+    endCycle: number
+  ): { startCycle: number; endCycle: number }[] {
+    const cycleBatches: { startCycle: number; endCycle: number }[] = []
+
+    for (let i = startCycle; i <= endCycle; i += this.syncConfig.cyclesPerBatch) {
+      const batchEndCycle = Math.min(i + this.syncConfig.cyclesPerBatch - 1, endCycle)
+      cycleBatches.push({ startCycle: i, endCycle: batchEndCycle })
+    }
+
+    return cycleBatches
+  }
+
   /**
    * Main entry point for parallel sync
    */
-  async startSyncing(startCycle: number, endCycle: number): Promise<void> {
+  async startSyncing(cycleBatches: { startCycle: number; endCycle: number }[]): Promise<void> {
+    if (!cycleBatches || cycleBatches.length === 0) {
+      console.log('No cycle batches provided for syncing.')
+      return
+    }
+
+    const startCycle = cycleBatches[0].startCycle
+    const endCycle = cycleBatches[cycleBatches.length - 1].endCycle
+
     console.log(`\n${'='.repeat(60)}`)
     console.log(`Starting Parallel Cycle Sync: ${startCycle} → ${endCycle}`)
     console.log(`Concurrency: ${this.syncConfig.concurrency} workers`)
@@ -269,20 +299,8 @@ export class ParallelDataSync {
     this.stats.totalCycles = endCycle - startCycle
 
     try {
-      // Split cycles into batches
-      const cycleBatches: { startCycle: number; endCycle: number }[] = []
-
-      for (let i = startCycle; i <= endCycle; ) {
-        let batchEnd = i + this.syncConfig.cyclesPerBatch
-        if (batchEnd > endCycle) {
-          batchEnd = endCycle
-        }
-        cycleBatches.push({ startCycle: i, endCycle: batchEnd })
-        i = batchEnd + 1
-      }
-
       console.log(
-        `Created ${cycleBatches.length} cycle batches (${this.syncConfig.cyclesPerBatch} cycles per batch)`
+        `Syncing ${cycleBatches.length} cycle batches created with ${this.syncConfig.cyclesPerBatch} cycles per batch`
       )
 
       // Add all batch sync tasks to the queue
@@ -305,7 +323,7 @@ export class ParallelDataSync {
 
   /**
    * Sync data in parallel using adaptive multi-cycle fetching with prefetching on endpoints
-   * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5)
+   * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5, then sends next request for 5-10)
    */
   private async syncDataByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     try {
@@ -318,12 +336,10 @@ export class ParallelDataSync {
 
       this.stats.completedCycles += endCycle - startCycle + 1
 
-      if (config.verbose || this.stats.completedCycles % 10 === 0) {
-        const progress = ((this.stats.completedCycles / this.stats.totalCycles) * 100).toFixed(1)
-        console.log(
-          `Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
-        )
-      }
+      const progress = ((this.stats.completedCycles / this.stats.totalCycles) * 100).toFixed(1)
+      console.log(
+        `Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
+      )
     } catch (error) {
       console.error(`Error syncing cycle batch ${startCycle}-${endCycle}:`, error)
       this.stats.errors++
@@ -336,17 +352,61 @@ export class ParallelDataSync {
    */
   private async syncCyclesByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     try {
-      const response = await this.fetchCyclesByCycleRange(startCycle, endCycle)
+      const response = await this.fetchDataFromDistributor(
+        DataType.CYCLE,
+        startCycle,
+        endCycle,
+        this.signData({ start: startCycle, end: endCycle })
+      )
 
-      if (!response || response.length === 0) {
-        if (config.verbose) {
-          console.log(`[Cycles ${startCycle}-${endCycle}] No cycle data returned`)
+      const cycles = response?.data?.cycleInfo || []
+
+      // Get size metadata from transformResponse and interceptor
+      const sizeMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
+      const decompressedKB = sizeMetadata?.decompressedKB || '0.00'
+      const compressedKB = sizeMetadata?.compressedKB
+      const compressionRatio = sizeMetadata?.compressionRatio
+      const compressionSavings = sizeMetadata?.compressionSavings
+      const networkElapsed = (response.data as ResponseDataWithMetadata)?.__networkElapsed || 0
+
+      if (config.verbose || networkElapsed > 1000) {
+        // Build log message with compression info if available
+        let logMessage =
+          `[API Timing] Cycles fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+          `records: ${cycles.length}`
+
+        // Only show compression metrics if compression actually reduced the size (ratio < 1)
+        if (compressedKB !== undefined && compressionRatio !== undefined && compressionRatio < 1) {
+          logMessage += `, payload: ${compressedKB}KB, payloadUncompressed: ${decompressedKB}KB, ratio: ${compressionRatio}, savings: ${compressionSavings}`
+        } else {
+          // No compression or not effective, just show uncompressed size
+          logMessage += `, payload: ${decompressedKB}KB`
         }
-        return
+
+        logMessage +=
+          (cycles.length === 0 && response.data ? ', response.data exists but empty' : '') +
+          (!response.data ? ', response.data is null/undefined!' : '')
+
+        console.log(logMessage)
       }
 
+      if (!response || !response.data || !response.data.cycleInfo) {
+        console.error(`Error fetching cycles for cycle batch ${startCycle}-${endCycle}:`, response)
+        return // Couldn't fetch any cycles
+      }
+
+      if (cycles.length === 0) {
+        return // No more originalTxs in this cycle range
+      }
+      const cycleRecords = cycles.map((cycleRecord: Cycle['cycleRecord']) => ({
+        counter: cycleRecord.counter,
+        cycleRecord,
+        start: cycleRecord.start,
+        cycleMarker: cycleRecord.marker,
+      }))
+
       // Process cycles using bulkInsertCycles
-      await CycleDB.bulkInsertCycles(response)
+      await CycleDB.bulkInsertCycles(cycleRecords)
 
       if (config.verbose) {
         console.log(`[Cycles ${startCycle}-${endCycle}] Cycles: +${response.length}`)
@@ -359,6 +419,7 @@ export class ParallelDataSync {
 
   /**
    * Sync receipts across a batch of cycles using adaptive multi-cycle fetching with prefetching
+   * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5, then sends next request for 5-10)
    */
   private async syncReceiptsByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     let currentCycle = startCycle
@@ -366,9 +427,22 @@ export class ParallelDataSync {
     let afterTxId = ''
     let totalFetched = 0
 
+    const route = `receipt/cycle`
+
     // Prefetch: Start fetching first batch immediately
     let nextFetchPromise: Promise<any[]> | null = this.syncConfig.enablePrefetch
-      ? this.fetchReceiptsByCycleRange({ startCycle: currentCycle, endCycle, afterTimestamp, afterTxId })
+      ? this.fetchDataFromDistributor(
+          route,
+          currentCycle,
+          endCycle,
+          this.signData({
+            startCycle: currentCycle,
+            endCycle,
+            afterTimestamp,
+            afterTxId,
+            limit: config.requestLimits.MAX_RECEIPTS_PER_REQUEST,
+          })
+        )
       : null
 
     while (currentCycle <= endCycle) {
@@ -376,19 +450,61 @@ export class ParallelDataSync {
         // Get the data (either from prefetch or fetch now)
         const response = nextFetchPromise
           ? await nextFetchPromise
-          : await this.fetchReceiptsByCycleRange({
-              startCycle: currentCycle,
+          : await this.fetchDataFromDistributor(
+              route,
+              currentCycle,
               endCycle,
-              afterTimestamp,
-              afterTxId,
-            })
+              this.signData({
+                startCycle: currentCycle,
+                endCycle,
+                afterTimestamp,
+                afterTxId,
+                limit: config.requestLimits.MAX_RECEIPTS_PER_REQUEST,
+              })
+            )
+
+        const receipts = response?.data?.receipts || []
+
+        // Get size metadata from transformResponse and interceptor
+        const sizeMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
+        const decompressedKB = sizeMetadata?.decompressedKB || '0.00'
+        const compressedKB = sizeMetadata?.compressedKB
+        const compressionRatio = sizeMetadata?.compressionRatio
+        const compressionSavings = sizeMetadata?.compressionSavings
+        const networkElapsed = (response.data as ResponseDataWithMetadata)?.__networkElapsed || 0
+
+        if (config.verbose || networkElapsed > 1000) {
+          // Build log message with compression info if available
+          let logMessage =
+            `[API Timing] Receipts fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+            `records: ${receipts.length}`
 
-        if (!response || response.length === 0) {
-          break // No more receipts in this cycle range
+          // Only show compression metrics if compression actually reduced the size (ratio < 1)
+          if (compressedKB !== undefined && compressionRatio !== undefined && compressionRatio < 1) {
+            logMessage += `, payload: ${compressedKB}KB, payloadUncompressed: ${decompressedKB}KB, ratio: ${compressionRatio}, savings: ${compressionSavings}`
+          } else {
+            // No compression or not effective, just show uncompressed size
+            logMessage += `, payload: ${decompressedKB}KB`
+          }
+
+          logMessage +=
+            (receipts.length === 0 && response.data ? ', response.data exists but empty' : '') +
+            (!response.data ? ', response.data is null/undefined!' : '')
+
+          console.log(logMessage)
+        }
+
+        if (!response || !response.data || !response.data.receipts) {
+          console.error(`Error fetching receipts for cycle batch ${startCycle}-${endCycle}:`, response)
+          break // Couldn't fetch any receipts
+        }
+
+        if (receipts.length === 0) {
+          break // No more originalTxs in this cycle range
         }
 
         // Update after timestamp and txId based on last receipt BEFORE starting next fetch
-        const lastReceipt = response[response.length - 1]
+        const lastReceipt = receipts[receipts.length - 1]
         currentCycle = lastReceipt.cycle
         afterTimestamp = lastReceipt.timestamp
         afterTxId = lastReceipt.receiptId
@@ -396,34 +512,40 @@ export class ParallelDataSync {
         // Prefetch next batch while processing current batch
         if (
           this.syncConfig.enablePrefetch &&
-          response.length >= config.requestLimits.MAX_RECEIPTS_PER_REQUEST
+          receipts.length >= config.requestLimits.MAX_RECEIPTS_PER_REQUEST
         ) {
-          nextFetchPromise = this.fetchReceiptsByCycleRange({
-            startCycle: currentCycle,
+          nextFetchPromise = this.fetchDataFromDistributor(
+            route,
+            currentCycle,
             endCycle,
-            afterTimestamp,
-            afterTxId,
-          })
+            this.signData({
+              startCycle: currentCycle,
+              endCycle,
+              afterTimestamp,
+              afterTxId,
+              limit: config.requestLimits.MAX_RECEIPTS_PER_REQUEST,
+            })
+          )
         } else {
           nextFetchPromise = null
         }
 
         // Process receipts (overlaps with next fetch if prefetch enabled)
-        await ReceiptDB.processReceiptData(response)
+        await ReceiptDB.processReceiptData(receipts)
 
-        totalFetched += response.length
-        this.stats.totalReceipts += response.length
+        totalFetched += receipts.length
+        this.stats.totalReceipts += receipts.length
 
         if (config.verbose) {
           console.log(
-            `[Cycles ${startCycle}-${endCycle}] Receipts: +${response.length} (total: ${totalFetched}), ` +
+            `[Cycles ${startCycle}-${endCycle}] Receipts: +${receipts.length} (total: ${totalFetched}), ` +
               `last in cycle ${currentCycle}` +
               (this.syncConfig.enablePrefetch ? ' [prefetch]' : '')
           )
         }
 
-        // If we got less than the max response size, we've exhausted this cycle range
-        if (response.length < config.requestLimits.MAX_RECEIPTS_PER_REQUEST) {
+        // If we got less than the max receipts size, we've exhausted this cycle range
+        if (receipts.length < config.requestLimits.MAX_RECEIPTS_PER_REQUEST) {
           break
         }
       } catch (error) {
@@ -435,6 +557,7 @@ export class ParallelDataSync {
 
   /**
    * Sync originalTxs across a batch of cycles using adaptive multi-cycle fetching with prefetching
+   * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5, then sends next request for 5-10)
    */
   private async syncOriginalTxsByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     let currentCycle = startCycle
@@ -442,14 +565,22 @@ export class ParallelDataSync {
     let afterTxId = ''
     let totalFetched = 0
 
+    const route = `originalTx/cycle`
+
     // Prefetch: Start fetching first batch immediately
     let nextFetchPromise: Promise<any[]> | null = this.syncConfig.enablePrefetch
-      ? this.fetchOriginalTxsByCycleRange({
-          startCycle: currentCycle,
+      ? this.fetchDataFromDistributor(
+          route,
+          currentCycle,
           endCycle,
-          afterTimestamp,
-          afterTxId,
-        })
+          this.signData({
+            startCycle: currentCycle,
+            endCycle,
+            afterTimestamp,
+            afterTxId,
+            limit: config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST,
+          })
+        )
       : null
 
     while (currentCycle <= endCycle) {
@@ -457,19 +588,61 @@ export class ParallelDataSync {
         // Get the data (either from prefetch or fetch now)
         const response = nextFetchPromise
           ? await nextFetchPromise
-          : await this.fetchOriginalTxsByCycleRange({
-              startCycle: currentCycle,
+          : await this.fetchDataFromDistributor(
+              route,
+              currentCycle,
               endCycle,
-              afterTimestamp,
-              afterTxId,
-            })
+              this.signData({
+                startCycle: currentCycle,
+                endCycle,
+                afterTimestamp,
+                afterTxId,
+                limit: config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST,
+              })
+            )
+
+        const originalTxs = response?.data?.originalTxs || []
+
+        // Get size metadata from transformResponse and interceptor
+        const sizeMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
+        const decompressedKB = sizeMetadata?.decompressedKB || '0.00'
+        const compressedKB = sizeMetadata?.compressedKB
+        const compressionRatio = sizeMetadata?.compressionRatio
+        const compressionSavings = sizeMetadata?.compressionSavings
+        const networkElapsed = (response.data as ResponseDataWithMetadata)?.__networkElapsed || 0
+
+        if (config.verbose || networkElapsed > 1000) {
+          // Build log message with compression info if available
+          let logMessage =
+            `[API Timing] OriginalTxs fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+            `records: ${originalTxs.length}`
+
+          // Only show compression metrics if compression actually reduced the size (ratio < 1)
+          if (compressedKB !== undefined && compressionRatio !== undefined && compressionRatio < 1) {
+            logMessage += `, payload: ${compressedKB}KB, payloadUncompressed: ${decompressedKB}KB, ratio: ${compressionRatio}, savings: ${compressionSavings}`
+          } else {
+            // No compression or not effective, just show uncompressed size
+            logMessage += `, payload: ${decompressedKB}KB`
+          }
+
+          logMessage +=
+            (originalTxs.length === 0 && response.data ? ', response.data exists but empty' : '') +
+            (!response.data ? ', response.data is null/undefined!' : '')
+
+          console.log(logMessage)
+        }
 
-        if (!response || response.length === 0) {
+        if (!response || !response.data || !response.data.originalTxs) {
+          console.error(`Error fetching originalTxs for cycle batch ${startCycle}-${endCycle}:`, response)
+          break // Couldn't fetch any originalTxs
+        }
+
+        if (originalTxs.length === 0) {
           break // No more originalTxs in this cycle range
         }
 
         // Update after timestamp and txId based on last tx BEFORE starting next fetch
-        const lastTx = response[response.length - 1]
+        const lastTx = originalTxs[originalTxs.length - 1]
         currentCycle = lastTx.cycle
         afterTimestamp = lastTx.timestamp
         afterTxId = lastTx.txId
@@ -479,32 +652,38 @@ export class ParallelDataSync {
           this.syncConfig.enablePrefetch &&
           response.length >= config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST
         ) {
-          nextFetchPromise = this.fetchOriginalTxsByCycleRange({
-            startCycle: currentCycle,
+          nextFetchPromise = this.fetchDataFromDistributor(
+            route,
+            currentCycle,
             endCycle,
-            afterTimestamp,
-            afterTxId,
-          })
+            this.signData({
+              startCycle: currentCycle,
+              endCycle,
+              afterTimestamp,
+              afterTxId,
+              limit: config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST,
+            })
+          )
         } else {
           nextFetchPromise = null
         }
 
         // Process originalTxs (overlaps with next fetch if prefetch enabled)
-        await OriginalTxDataDB.processOriginalTxData(response)
+        await OriginalTxDataDB.processOriginalTxData(originalTxs)
 
-        totalFetched += response.length
-        this.stats.totalOriginalTxs += response.length
+        totalFetched += originalTxs.length
+        this.stats.totalOriginalTxs += originalTxs.length
 
         if (config.verbose) {
           console.log(
-            `[Cycles ${startCycle}-${endCycle}] OriginalTxs: +${response.length} (total: ${totalFetched}), ` +
+            `[Cycles ${startCycle}-${endCycle}] OriginalTxs: +${originalTxs.length} (total: ${totalFetched}), ` +
               `last in cycle ${currentCycle}` +
               (this.syncConfig.enablePrefetch ? ' [prefetch]' : '')
           )
         }
 
-        // If we got less than the max response size, we've exhausted this cycle range
-        if (response.length < config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST) {
+        // If we got less than the max originalTxs size, we've exhausted this cycle range
+        if (originalTxs.length < config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST) {
           break
         }
       } catch (error) {
@@ -515,86 +694,15 @@ export class ParallelDataSync {
   }
 
   /**
-   * Fetch cycles by cycle range with retry logic
+   * Fetch data by multi-cycle  range with retry logic
    */
-  private async fetchCyclesByCycleRange(startCycle: number, endCycle: number): Promise<Cycle[]> {
-    // Retry with exponential backoff
-    for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
-      try {
-        const startTime = Date.now()
-        const response = await queryFromDistributor(DataType.CYCLE, {
-          start: startCycle,
-          end: endCycle,
-        })
-        const networkElapsed = Date.now() - startTime
-
-        if (response && response.data && response.data.cycleInfo) {
-          const cycleRecords = response.data.cycleInfo.map((cycleRecord: any) => ({
-            counter: cycleRecord.counter,
-            cycleRecord,
-            start: cycleRecord.start,
-            cycleMarker: cycleRecord.marker,
-          }))
-
-          if (config.verbose) {
-            console.log(
-              `[API Timing] Cycles fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
-                `records: ${cycleRecords.length}`
-            )
-          }
-          return cycleRecords
-        }
-      } catch (error: any) {
-        const isLastAttempt = attempt === this.syncConfig.retryAttempts
-        const isRetryableError =
-          error.code === 'ECONNRESET' ||
-          error.code === 'ETIMEDOUT' ||
-          error.code === 'ECONNREFUSED' ||
-          error.code === 'EPIPE'
-
-        if (isRetryableError && !isLastAttempt) {
-          const delay = this.syncConfig.retryDelayMs * Math.pow(2, attempt)
-          console.warn(
-            `Error on cycles fetch (cycles ${startCycle}-${endCycle}), ` +
-              `attempt ${attempt + 1}/${this.syncConfig.retryAttempts + 1}, ` +
-              `retrying in ${delay}ms...`
-          )
-          await this.sleep(delay)
-          continue
-        }
-
-        // Non-retryable error or last attempt failed
-        console.error(`Error fetching cycles (cycles ${startCycle}-${endCycle}):`, error.message)
-        throw error
-      }
-    }
-
-    return []
-  }
-
-  /**
-   * Fetch receipts by multi-cycle  range with retry logic
-   * Automatically adapts to cycle sizes - if cycles 1-10 only have data in 1-5, returns that subset
-   */
-  private async fetchReceiptsByCycleRange({
-    startCycle,
-    endCycle,
-    afterTimestamp,
-    afterTxId,
-  }: SyncTxDataByCycleRange): Promise<any[]> {
-    const data = {
-      startCycle,
-      endCycle,
-      afterTimestamp,
-      afterTxId,
-      limit: config.requestLimits.MAX_RECEIPTS_PER_REQUEST,
-      sender: config.collectorInfo.publicKey,
-      sign: undefined,
-    }
-
-    crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
-
-    const url = `${DISTRIBUTOR_URL}/receipt/cycle`
+  private async fetchDataFromDistributor(
+    route: string,
+    startCycle: number,
+    endCycle: number,
+    data: any
+  ): Promise<any> {
+    const url = `${DISTRIBUTOR_URL}/${route}`
 
     // Retry with exponential backoff
     for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
@@ -602,42 +710,10 @@ export class ParallelDataSync {
         const startTime = Date.now()
         const response = await this.axiosInstance.post(url, data)
         const networkElapsed = Date.now() - startTime
-
-        const receipts = response.data?.receipts || []
-
-        // Get size metadata from transformResponse and interceptor
-        const sizeMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
-        const decompressedKB = sizeMetadata?.decompressedKB || '0.00'
-        const compressedKB = sizeMetadata?.compressedKB
-        const compressionRatio = sizeMetadata?.compressionRatio
-        const compressionSavings = sizeMetadata?.compressionSavings
-
-        if (config.verbose || networkElapsed > 1000 || receipts.length === 0) {
-          // Build log message with compression info if available
-          let logMessage =
-            `[API Timing] Receipts fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
-            `records: ${receipts.length}`
-
-          // Only show compression metrics if compression actually reduced the size (ratio < 1)
-          if (compressedKB !== undefined && compressionRatio !== undefined && compressionRatio < 1) {
-            logMessage += `, payload: ${compressedKB}KB, payloadUncompressed: ${decompressedKB}KB, ratio: ${compressionRatio}, savings: ${compressionSavings}`
-          } else {
-            // No compression or not effective, just show uncompressed size
-            logMessage += `, payload: ${decompressedKB}KB`
-          }
-
-          logMessage +=
-            (receipts.length === 0 && response.data ? ', response.data exists but empty' : '') +
-            (!response.data ? ', response.data is null/undefined!' : '')
-
-          console.log(logMessage)
-        }
-
-        if (response.data && response.data.receipts) {
-          return response.data.receipts
+        if (response && response.data) {
+          ;(response.data as ResponseDataWithMetadata).__networkElapsed = networkElapsed
         }
-
-        return []
+        return response
       } catch (error: any) {
         const isLastAttempt = attempt === this.syncConfig.retryAttempts
         const isRetryableError =
@@ -649,7 +725,7 @@ export class ParallelDataSync {
         if (isRetryableError && !isLastAttempt) {
           const delay = this.syncConfig.retryDelayMs * Math.pow(2, attempt)
           console.warn(
-            `ECONNRESET on receipts fetch (cycles ${startCycle}-${endCycle}), ` +
+            `ECONNRESET on ${route} fetch (cycles ${startCycle}-${endCycle}), ` +
               `attempt ${attempt + 1}/${this.syncConfig.retryAttempts + 1}, ` +
               `retrying in ${delay}ms...`
           )
@@ -658,111 +734,25 @@ export class ParallelDataSync {
         }
 
         // Non-retryable error or last attempt failed
-        console.error(
-          `Error fetching receipts multi-cycle (cycles ${startCycle}-${endCycle}):`,
-          error.message
-        )
+        console.error(`Error fetching ${route} for (cycles ${startCycle}-${endCycle}):`, error.message)
         throw error
       }
     }
 
-    return []
+    return null
   }
 
   /**
-   * Fetch originalTxs by multi-cycle range with retry logic
+   * Sign data
    */
-  private async fetchOriginalTxsByCycleRange({
-    startCycle,
-    endCycle,
-    afterTimestamp,
-    afterTxId,
-  }: SyncTxDataByCycleRange): Promise<any[]> {
+  private signData(obj: SyncTxDataByCycleRange | { start: number; end: number }): P2P.P2PTypes.SignedObject {
     const data = {
-      startCycle,
-      endCycle,
-      afterTimestamp,
-      afterTxId,
-      limit: config.requestLimits.MAX_ORIGINAL_TXS_PER_REQUEST,
+      ...obj,
       sender: config.collectorInfo.publicKey,
       sign: undefined,
     }
-
     crypto.signObj(data, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
-
-    const url = `${DISTRIBUTOR_URL}/originalTx/cycle`
-
-    // Retry with exponential backoff
-    for (let attempt = 0; attempt <= this.syncConfig.retryAttempts; attempt++) {
-      try {
-        const startTime = Date.now()
-        const response = await this.axiosInstance.post(url, data)
-        const networkElapsed = Date.now() - startTime
-
-        const originalTxs = response.data?.originalTxs || []
-
-        // Get size metadata from transformResponse and interceptor
-        const sizeMetadata = (response.data as ResponseDataWithMetadata)?.__responseSize
-        const decompressedKB = sizeMetadata?.decompressedKB || '0.00'
-        const compressedKB = sizeMetadata?.compressedKB
-        const compressionRatio = sizeMetadata?.compressionRatio
-        const compressionSavings = sizeMetadata?.compressionSavings
-
-        if (config.verbose || networkElapsed > 1000 || originalTxs.length === 0) {
-          // Build log message with compression info if available
-          let logMessage =
-            `[API Timing] OriginalTxs fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
-            `records: ${originalTxs.length}`
-
-          // Only show compression metrics if compression actually reduced the size (ratio < 1)
-          if (compressedKB !== undefined && compressionRatio !== undefined && compressionRatio < 1) {
-            logMessage += `, payload: ${compressedKB}KB, payloadUncompressed: ${decompressedKB}KB, ratio: ${compressionRatio}, savings: ${compressionSavings}`
-          } else {
-            // No compression or not effective, just show uncompressed size
-            logMessage += `, payload: ${decompressedKB}KB`
-          }
-
-          logMessage +=
-            (originalTxs.length === 0 && response.data ? ', response.data exists but empty' : '') +
-            (!response.data ? ', response.data is null/undefined!' : '')
-
-          console.log(logMessage)
-        }
-
-        if (response.data && response.data.originalTxs) {
-          return response.data.originalTxs
-        }
-
-        return []
-      } catch (error: any) {
-        const isLastAttempt = attempt === this.syncConfig.retryAttempts
-        const isRetryableError =
-          error.code === 'ECONNRESET' ||
-          error.code === 'ETIMEDOUT' ||
-          error.code === 'ECONNREFUSED' ||
-          error.code === 'EPIPE'
-
-        if (isRetryableError && !isLastAttempt) {
-          const delay = this.syncConfig.retryDelayMs * Math.pow(2, attempt)
-          console.warn(
-            `ECONNRESET on originalTxs fetch (cycles ${startCycle}-${endCycle}), ` +
-              `attempt ${attempt + 1}/${this.syncConfig.retryAttempts + 1}, ` +
-              `retrying in ${delay}ms...`
-          )
-          await this.sleep(delay)
-          continue
-        }
-
-        // Non-retryable error or last attempt failed
-        console.error(
-          `Error fetching originalTxs multi-cycle (cycles ${startCycle}-${endCycle}):`,
-          error.message
-        )
-        throw error
-      }
-    }
-
-    return []
+    return data
   }
 
   /**
diff --git a/src/collector.ts b/src/collector.ts
index 836d737..59b29fd 100644
--- a/src/collector.ts
+++ b/src/collector.ts
@@ -256,30 +256,6 @@ export const checkAndSyncData = async (): Promise<() => Promise<void>> => {
   return syncData
 }
 
-export const startDataSyncManager = async (): Promise<() => Promise<void>> => {
-  console.log('\n')
-  console.log('='.repeat(60))
-  console.log('INITIALIZING DATA SYNC MANAGER')
-  console.log('='.repeat(60))
-  console.log('DataSyncManager provides intelligent data synchronization with:')
-  console.log('  • Early data anomaly detection before sync operations')
-  console.log('  • Automatic gap detection and recovery')
-  console.log('  • Lookback verification window for data integrity')
-  console.log('  • Parallel batch-cycle-based sync (10x+ performance improvement)')
-  console.log('='.repeat(60))
-  console.log('\n')
-
-  // Run anomaly detection BEFORE connecting to websocket
-  // This fails fast if there are data corruption issues
-  const dataSyncManager = new DataSyncManager()
-  await dataSyncManager.detectDataAnomalies()
-
-  console.log('✅ Data anomaly check passed - proceeding with sync')
-
-  // Return the sync function to be executed after WS connection
-  return dataSyncManager.syncData
-}
-
 const attemptReconnection = (): void => {
   console.log(`Re-connecting Distributor in ${config.DISTRIBUTOR_RECONNECT_INTERVAL / 1000}s...`)
   reconnecting = true
@@ -298,7 +274,7 @@ const connectToDistributor = (): void => {
   ws = new WebSocket(URL)
   ws.onopen = () => {
     console.log(
-      `✅ Socket connected to the Distributor @ ${config.distributorInfo.ip}:${config.distributorInfo.port}}`
+      `✅ Socket connected to the Distributor @ ${config.distributorInfo.ip}:${config.distributorInfo.port}`
     )
     connected = true
     reconnecting = false
@@ -399,7 +375,15 @@ const startServer = async (): Promise<void> => {
   await Storage.initializeDB()
   addExitListeners()
 
-  const syncData = config.useParallelSync ? await startDataSyncManager() : await checkAndSyncData()
+  let dataSyncManager = null
+
+  if (config.useParallelSync) {
+    // Run anomaly detection BEFORE connecting to websocket
+    // This fails fast if there are data corruption issues
+    dataSyncManager = new DataSyncManager()
+    await dataSyncManager.detectDataAnomalies()
+  }
+  const syncData = !config.useParallelSync && (await checkAndSyncData())
   if (config.dataLogWrite) await initDataLogWriter()
 
   addSigListeners()
@@ -421,6 +405,11 @@ const startServer = async (): Promise<void> => {
     }
   }
 
+  if (config.useParallelSync) {
+    await dataSyncManager.syncData()
+    return
+  }
+
   await syncData()
 }
 
diff --git a/src/config/index.ts b/src/config/index.ts
index b602842..0e1eb50 100644
--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -191,7 +191,7 @@ let config: Config = {
   },
   parallelSyncConcurrency: Number(process.env.PARALLEL_SYNC_CONCURRENCY) || 10, // 10 parallel workers
   useParallelSync: process.env.USE_PARALLEL_SYNC !== 'false', // Enable by default
-  cyclesPerBatch: Number(process.env.CYCLES_PER_BATCH) || 10, // Batch 10 cycles together
+  cyclesPerBatch: Number(process.env.CYCLES_PER_BATCH) || 100, // Batch 100 cycles together ( matching MAX_BETWEEN_CYCLES_PER_REQUEST, can be lower if needed )
   enablePrefetch: process.env.ENABLE_PREFETCH !== 'false', // Enable prefetch by default
   syncRetryAttempts: Number(process.env.SYNC_RETRY_ATTEMPTS) || 3, // Retry failed requests 3 times
   dexScreenerAPI:
diff --git a/src/storage/account.ts b/src/storage/account.ts
index d189fa9..02d2cb9 100644
--- a/src/storage/account.ts
+++ b/src/storage/account.ts
@@ -204,6 +204,47 @@ export async function queryAccountByAccountId(accountId: string): Promise<Accoun
   return null
 }
 
+export async function queryAccountTimestamp(
+  accountId: string
+): Promise<{ timestamp: number; createdTimestamp: number } | null> {
+  try {
+    const sql = `SELECT timestamp, createdTimestamp FROM accounts WHERE accountId=?`
+    const dbAccount = (await db.get(accountDatabase, sql, [accountId])) as DbAccount
+    if (dbAccount) return { timestamp: dbAccount.timestamp, createdTimestamp: dbAccount.createdTimestamp }
+    return null
+  } catch (e) {
+    console.log(e)
+    return null
+  }
+}
+
+export async function queryAccountTimestampsBatch(
+  accountIds: string[]
+): Promise<Map<string, { timestamp: number; createdTimestamp: number }>> {
+  const resultMap = new Map<string, { timestamp: number; createdTimestamp: number }>()
+  if (accountIds.length === 0) return resultMap
+
+  try {
+    // Create placeholders for IN clause
+    const placeholders = accountIds.map(() => '?').join(', ')
+    const sql = `SELECT accountId, timestamp, createdTimestamp FROM accounts WHERE accountId IN (${placeholders})`
+    const accounts = (await db.all(accountDatabase, sql, accountIds)) as DbAccount[]
+
+    for (const account of accounts) {
+      resultMap.set(account.accountId, {
+        timestamp: account.timestamp,
+        createdTimestamp: account.createdTimestamp,
+      })
+    }
+
+    if (config.verbose) console.log('Batch queried accounts', accounts.length, 'of', accountIds.length)
+  } catch (e) {
+    console.log('Error in queryAccountTimestampsBatch', e)
+  }
+
+  return resultMap
+}
+
 export async function processAccountData(accounts: AccountsCopy[]): Promise<Account[]> {
   console.log('accounts size', accounts.length)
   if (accounts && accounts.length <= 0) return []
diff --git a/src/storage/receipt.ts b/src/storage/receipt.ts
index f3425eb..8e6bbfb 100644
--- a/src/storage/receipt.ts
+++ b/src/storage/receipt.ts
@@ -158,18 +158,19 @@ export async function processReceiptData(
           combineAccounts.push(accObj)
         }
       } else {
-        const accountExist = await AccountDB.queryAccountByAccountId(accObj.accountId)
-        if (config.verbose) console.log('accountExist', accountExist)
-        if (!accountExist) {
-          combineAccounts.push(accObj)
-        } else {
-          if (accountExist.timestamp < accObj.timestamp) {
-            await AccountDB.updateAccount(accObj)
-          }
-          if (accObj.createdTimestamp < accountExist.createdTimestamp) {
-            await AccountDB.updateCreatedTimestamp(accObj.accountId, accObj.createdTimestamp)
-          }
-        }
+        // const accountExist = await AccountDB.queryAccountTimestamp(accObj.accountId)
+        // if (config.verbose) console.log('accountExist', accountExist)
+        // if (accountExist) {
+        //     if (accountExist.timestamp < accObj.timestamp) {
+        //     await AccountDB.updateAccount(accObj)
+        //     // combineAccounts.push(accObj)
+        //   }
+        //   if (accObj.createdTimestamp < accountExist.createdTimestamp) {
+        //     await AccountDB.updateCreatedTimestamp(accObj.accountId, accObj.createdTimestamp)
+        //   }
+        // } else {
+        combineAccounts.push(accObj)
+        // }
       }
 
       // if tx receipt is saved as an account, create tx object from the account and save it
@@ -229,13 +230,13 @@ export async function processReceiptData(
       }
       txObj.data = {}
     }
-    const transactionExist = await TransactionDB.queryTransactionByTxId(tx.txId)
-    if (config.verbose) console.log('transactionExist', transactionExist)
-    if (!transactionExist) {
-      combineTransactions.push(txObj)
-    } else if (transactionExist.timestamp < txObj.timestamp) {
-      await TransactionDB.insertTransaction(txObj)
-    }
+    // const transactionExist = await TransactionDB.queryTransactionByTxId(tx.txId)
+    // if (config.verbose) console.log('transactionExist', transactionExist)
+    // if (!transactionExist) {
+    combineTransactions.push(txObj)
+    // } else if (transactionExist.timestamp < txObj.timestamp) {
+    //   await TransactionDB.insertTransaction(txObj)
+    // }
     if (config.saveAccountHistoryState) {
       // Note: This has to be changed once we change the way the global modification tx consensus is updated
       if (
@@ -286,8 +287,32 @@ export async function processReceiptData(
       accountHistoryStateList = []
     }
   }
+
+  // Batch query all collected account IDs once
+  const accountIdsToQuery = combineAccounts.map((acc) => acc.accountId)
+  const existingAccounts = await AccountDB.queryAccountTimestampsBatch(accountIdsToQuery)
+  for (const accObj of combineAccounts) {
+    const accountExist = existingAccounts.get(accObj.accountId)
+    if (accountExist) {
+      if (accountExist.timestamp > accObj.timestamp) {
+        // await AccountDB.updateAccount(accObj)
+        // Remove the account from the list
+        combineAccounts = combineAccounts.filter((acc) => acc.accountId !== accObj.accountId)
+      }
+      if (accountExist.createdTimestamp > accObj.createdTimestamp) {
+        await AccountDB.updateCreatedTimestamp(accObj.accountId, accObj.createdTimestamp)
+      }
+    }
+  }
+  // Insert the combined accounts in bucketSize
+  if (combineAccounts.length > 0) {
+    for (let i = 0; i < combineAccounts.length; i += bucketSize) {
+      const accounts = combineAccounts.slice(i, i + bucketSize)
+      await AccountDB.bulkInsertAccounts(accounts)
+    }
+  }
+
   if (combineReceipts.length > 0) await bulkInsertReceipts(combineReceipts)
-  if (combineAccounts.length > 0) await AccountDB.bulkInsertAccounts(combineAccounts)
   if (combineTransactions.length > 0) await TransactionDB.bulkInsertTransactions(combineTransactions)
   if (accountHistoryStateList.length > 0)
     await AccountHistoryStateDB.bulkInsertAccountHistoryStates(accountHistoryStateList)
diff --git a/src/storage/sqlite3storage.ts b/src/storage/sqlite3storage.ts
index 6c68b85..27acc6c 100644
--- a/src/storage/sqlite3storage.ts
+++ b/src/storage/sqlite3storage.ts
@@ -79,8 +79,11 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
   await run(db, 'PRAGMA journal_mode=WAL')
   await run(db, 'PRAGMA synchronous = NORMAL')
   await run(db, 'PRAGMA temp_store = MEMORY')
-  await run(db, 'PRAGMA cache_size = -64000') // ~64MB cache
-  await run(db, 'PRAGMA wal_autocheckpoint = 1000') // Checkpoint every 1000 ( default value ) pages
+  await run(db, 'PRAGMA cache_size = -256000') // Increased to ~256MB cache for better performance
+  await run(db, 'PRAGMA wal_autocheckpoint = 5000') // Checkpoint every 5000 pages (less frequent = less lock contention)
+  await run(db, 'PRAGMA mmap_size = 536870912') // 512MB memory-mapped I/O for faster reads (reduced disk I/O)
+  await run(db, 'PRAGMA busy_timeout = 30000') // Wait up to 30s if database is locked
+  await run(db, 'PRAGMA threads = 4') // Use up to 4 threads for parallel operations
   db.on('profile', (sql, time) => {
     const engineMs = typeof time === 'number' ? time : Number(time)
     const queue = queuedBySql.get(sql)

From bb8717d2894df5cae1d9c102df4e3854a865069f Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Wed, 12 Nov 2025 14:05:02 +0800
Subject: [PATCH 08/14] fix: improve sync statistics and throughput calculation

- Separate totalCyclesToSync from totalCycles in stats
- Calculate throughput based on all record types (cycles + receipts + originalTxs)
- Track actual cycle records inserted, not just cycle ranges processed
- Update progress reporting to show data cycles vs total records
- Change throughput label from "receipts/sec" to "records/sec"
---
 src/class/ParallelDataSync.ts | 28 ++++++++++++++++++----------
 src/collector.ts              |  2 +-
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/class/ParallelDataSync.ts b/src/class/ParallelDataSync.ts
index 510260c..e7fe05a 100644
--- a/src/class/ParallelDataSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -27,8 +27,9 @@ export interface ParallelSyncConfig {
 export interface SyncStats {
   startTime: number
   endTime?: number
-  totalCycles: number
+  totalCyclesToSync: number
   completedCycles: number
+  totalCycles: number
   totalReceipts: number
   totalOriginalTxs: number
   errors: number
@@ -241,8 +242,9 @@ export class ParallelDataSync {
 
     this.stats = {
       startTime: Date.now(),
-      totalCycles: 0,
+      totalCyclesToSync: 0,
       completedCycles: 0,
+      totalCycles: 0,
       totalReceipts: 0,
       totalOriginalTxs: 0,
       errors: 0,
@@ -296,7 +298,7 @@ export class ParallelDataSync {
     console.log(`${'='.repeat(60)}\n`)
 
     this.stats.startTime = Date.now()
-    this.stats.totalCycles = endCycle - startCycle
+    this.stats.totalCyclesToSync = endCycle - startCycle
 
     try {
       console.log(
@@ -336,9 +338,9 @@ export class ParallelDataSync {
 
       this.stats.completedCycles += endCycle - startCycle + 1
 
-      const progress = ((this.stats.completedCycles / this.stats.totalCycles) * 100).toFixed(1)
+      const progress = ((this.stats.completedCycles / this.stats.totalCyclesToSync) * 100).toFixed(1)
       console.log(
-        `Progress: ${this.stats.completedCycles}/${this.stats.totalCycles} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
+        `Progress: ${this.stats.completedCycles}/${this.stats.totalCyclesToSync} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
       )
     } catch (error) {
       console.error(`Error syncing cycle batch ${startCycle}-${endCycle}:`, error)
@@ -405,9 +407,12 @@ export class ParallelDataSync {
         cycleMarker: cycleRecord.marker,
       }))
 
-      // Process cycles using bulkInsertCycles
+      // Bulk insert cycles
       await CycleDB.bulkInsertCycles(cycleRecords)
 
+      // Update stats
+      this.stats.totalCycles += cycleRecords.length
+
       if (config.verbose) {
         console.log(`[Cycles ${startCycle}-${endCycle}] Cycles: +${response.length}`)
       }
@@ -770,18 +775,21 @@ export class ParallelDataSync {
     const elapsedSec = (elapsedMs / 1000).toFixed(2)
     const elapsedMin = (elapsedMs / 60000).toFixed(2)
 
+    const totalRecords = this.stats.totalCycles + this.stats.totalReceipts + this.stats.totalOriginalTxs
+    const throughput = (totalRecords / (elapsedMs / 1000)).toFixed(0)
+
     console.log(`\n${'='.repeat(60)}`)
     console.log('Parallel Sync Complete!')
     console.log(`${'='.repeat(60)}`)
     console.log(`  Cycle Range:       ${startCycle} → ${endCycle}`)
-    console.log(`  Cycles Synced:     ${this.stats.completedCycles}/${this.stats.totalCycles}`)
+    console.log(`  Data Cycles Synced:     ${this.stats.completedCycles}/${this.stats.totalCyclesToSync}`)
+    console.log(`  Cycles Synced: ${this.stats.totalCycles}`)
     console.log(`  Receipts Synced:   ${this.stats.totalReceipts}`)
     console.log(`  OriginalTxs Synced: ${this.stats.totalOriginalTxs}`)
+    console.log(`  Total Records:     ${totalRecords}`)
     console.log(`  Errors:            ${this.stats.errors}`)
     console.log(`  Time Elapsed:      ${elapsedSec}s (${elapsedMin} min)`)
-    console.log(
-      `  Throughput:        ${(this.stats.totalReceipts / (elapsedMs / 1000)).toFixed(0)} receipts/sec`
-    )
+    console.log(`  Throughput:        ${throughput} records/sec`)
     console.log(`${'='.repeat(60)}\n`)
   }
 
diff --git a/src/collector.ts b/src/collector.ts
index 59b29fd..c27f139 100644
--- a/src/collector.ts
+++ b/src/collector.ts
@@ -285,7 +285,7 @@ const connectToDistributor = (): void => {
     try {
       validateData(StringUtils.safeJsonParse(data))
     } catch (e) {
-      console.log('Error in processing received data!', e)
+      console.log('Error in processing received data!', data, e)
     }
   })
   ws.onerror = (error) => {

From 54f6fd9cab2e44dc57059304d918b6cfe09eeff8 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Wed, 12 Nov 2025 14:06:58 +0800
Subject: [PATCH 09/14] feat: add parallel fetching to data_sync_checker script

- Implement parallel batch fetching with configurable concurrency (up to 100)
- Fix batch boundaries to prevent overlapping cycles and double-counting
- Add detailed mismatch reporting with formatted table output
- Improve type safety with TallyItem and MismatchResult interfaces

feat: add distributor_tally_verifier script

Create comprehensive verification tool to compare distributor endpoints:
- Tally mode: Compare tally endpoint vs cycle-based pagination
- Full mode: Compare full data endpoint vs cycle-based pagination
- Track transaction IDs to debug count discrepancies
- Support page-based pagination for full data endpoint
- Display detailed mismatch analysis with ID-level comparison
---
 scripts/data_sync_checker.ts          | 235 ++++++++++---
 scripts/distributor_tally_verifier.ts | 470 ++++++++++++++++++++++++++
 2 files changed, 662 insertions(+), 43 deletions(-)
 create mode 100644 scripts/distributor_tally_verifier.ts

diff --git a/scripts/data_sync_checker.ts b/scripts/data_sync_checker.ts
index 8d21656..78a471d 100644
--- a/scripts/data_sync_checker.ts
+++ b/scripts/data_sync_checker.ts
@@ -13,53 +13,201 @@ const endCycle = 0
 const saveToFile = false
 
 const data_type: any = DataType.RECEIPT // DataType.RECEIPT // DataType.CYCLE // DataType.ORIGINALTX
-const api_url = data_type === DataType.RECEIPT ? 'receipt' : data_type === DataType.CYCLE ? 'cycleinfo' : 'originalTx'
+const api_url =
+  data_type === DataType.RECEIPT ? 'receipt' : data_type === DataType.CYCLE ? 'cycleinfo' : 'originalTx'
 
-const runProgram = async (): Promise<void> => {
-    const limit = 100
-    let distributor_responses: any = []
-    let api_responses: any = []
-    let nextEnd = startCycle + limit
-    for (let i = startCycle; i < endCycle;) {
-        console.log(`Start Cycle ${i} End Cycle ${nextEnd}`)
-        const distributor_data = data_type === DataType.CYCLE ? {
-            start: i,
-            end: nextEnd
-        } : {
-            startCycle: i,
-            endCycle: nextEnd,
-            type: 'tally'
+interface MismatchResult {
+  cycle: number
+  distributorCount: number
+  collectorCount: number
+}
+
+interface TallyItem {
+  cycle: number
+  receipts?: number
+  originalTxsData?: number
+  originalTxs?: number
+}
+
+const fetchBatch = async (
+  cycleStart: number,
+  cycleEnd: number
+): Promise<{ distributor: TallyItem[]; api: TallyItem[] }> => {
+  const distributor_data =
+    data_type === DataType.CYCLE
+      ? {
+          start: cycleStart,
+          end: cycleEnd,
         }
-        const api_data = data_type === DataType.CYCLE ? `?start=${i}&end=${nextEnd}` : `?startCycle=${i}&endCycle=${nextEnd}&tally=true`
-
-        const res1 = await queryFromDistributor(data_type, distributor_data)
-        // console.log(res1.data)
-
-        const res2 = await axios.get(`${API_SERVER_URL}/api/${api_url}${api_data}`)
-        // console.log(res2.data)
-
-        switch (data_type) {
-            case DataType.RECEIPT:
-                distributor_responses = [...distributor_responses, ...res1.data.receipts]
-                api_responses = [...api_responses, ...res2.data.totalReceipts]
-                break
-            case DataType.CYCLE:
-                distributor_responses = [...distributor_responses, ...res1.data.cycleInfo]
-                api_responses = [...api_responses, ...res2.data.cycles]
-                break
-            case DataType.ORIGINALTX:
-                distributor_responses = [...distributor_responses, ...res1.data.originalTxs]
-                api_responses = [...api_responses, ...res2.data.totalOriginalTxs]
-                break
+      : {
+          startCycle: cycleStart,
+          endCycle: cycleEnd,
+          type: 'tally',
         }
-        i = nextEnd + 1
-        nextEnd += limit
+  const api_data =
+    data_type === DataType.CYCLE
+      ? `?start=${cycleStart}&end=${cycleEnd}`
+      : `?startCycle=${cycleStart}&endCycle=${cycleEnd}&tally=true`
+
+  const [res1, res2] = await Promise.all([
+    queryFromDistributor(data_type, distributor_data),
+    axios.get(`${API_SERVER_URL}/api/${api_url}${api_data}`),
+  ])
+
+  let distributorData: TallyItem[] = []
+  let apiData: TallyItem[] = []
+
+  switch (data_type) {
+    case DataType.RECEIPT:
+      distributorData = res1.data.receipts || []
+      apiData = res2.data.totalReceipts || []
+      break
+    case DataType.CYCLE:
+      distributorData = res1.data.cycleInfo || []
+      apiData = res2.data.cycles || []
+      break
+    case DataType.ORIGINALTX:
+      distributorData = res1.data.originalTxs || []
+      apiData = res2.data.totalOriginalTxs || []
+      break
+  }
+
+  return { distributor: distributorData, api: apiData }
+}
+
+const chunkArray = <T>(array: T[], chunkSize: number): T[][] => {
+  const chunks: T[][] = []
+  for (let i = 0; i < array.length; i += chunkSize) {
+    chunks.push(array.slice(i, i + chunkSize))
+  }
+  return chunks
+}
+
+const runProgram = async (): Promise<void> => {
+  const limit = 100
+  const concurrency = 100
+
+  const batches: Array<{ start: number; end: number }> = []
+
+  // Create batches without overlapping boundaries
+  let currentStart = startCycle
+  while (currentStart <= endCycle) {
+    const batchEnd = Math.min(currentStart + limit - 1, endCycle)
+    batches.push({ start: currentStart, end: batchEnd })
+    currentStart = batchEnd + 1
+  }
+
+  console.log(`Fetching ${batches.length} batches in parallel (concurrency: ${concurrency})...`)
+
+  // Process batches in chunks to limit concurrency
+  const batchChunks = chunkArray(batches, concurrency)
+  const allResults: Array<{ distributor: TallyItem[]; api: TallyItem[] }> = []
+
+  for (const chunk of batchChunks) {
+    console.log(`Processing ${chunk.length} batches in parallel...`)
+    const chunkResults = await Promise.all(
+      chunk.map((batch) => {
+        console.log(`Fetching cycles ${batch.start} to ${batch.end}`)
+        return fetchBatch(batch.start, batch.end)
+      })
+    )
+    allResults.push(...chunkResults)
+  }
+
+  // Combine results
+  let distributor_responses: TallyItem[] = []
+  let api_responses: TallyItem[] = []
+
+  for (const result of allResults) {
+    distributor_responses = [...distributor_responses, ...result.distributor]
+    api_responses = [...api_responses, ...result.api]
+  }
+
+  console.log(
+    '\nDISTRIBUTOR RESPONSES:',
+    distributor_responses.length,
+    'API SERVER RESPONSES:',
+    api_responses.length
+  )
+
+  // Compare and find mismatches
+  const mismatches: MismatchResult[] = []
+
+  if (data_type === DataType.RECEIPT || data_type === DataType.ORIGINALTX) {
+    // Create maps for easy lookup
+    const distributorMap = new Map<number, number>()
+    const apiMap = new Map<number, number>()
+
+    for (const item of distributor_responses) {
+      const count =
+        data_type === DataType.RECEIPT
+          ? item.receipts ?? 0
+          : data_type === DataType.ORIGINALTX
+          ? item.originalTxsData ?? item.originalTxs ?? 0
+          : 0
+      distributorMap.set(item.cycle, count)
+    }
+
+    for (const item of api_responses) {
+      const count =
+        data_type === DataType.RECEIPT
+          ? item.receipts ?? 0
+          : data_type === DataType.ORIGINALTX
+          ? item.originalTxsData ?? item.originalTxs ?? 0
+          : 0
+      apiMap.set(item.cycle, count)
     }
-    console.log('DISTRIBUTOR RESPONSES', distributor_responses.length, 'API SERVER RESPONSES', api_responses.length)
-    console.log(isDeepStrictEqual(distributor_responses, api_responses))
-    // console.dir(distributor_responses, { depth: null })
-    // console.dir(api_responses, { depth: null })
-    // save to file
+
+    // Find all unique cycles
+    const allCycles = new Set([...distributorMap.keys(), ...apiMap.keys()])
+
+    for (const cycle of allCycles) {
+      const distributorCount = distributorMap.get(cycle) || 0
+      const apiCount = apiMap.get(cycle) || 0
+
+      if (distributorCount !== apiCount) {
+        mismatches.push({
+          cycle,
+          distributorCount,
+          collectorCount: apiCount,
+        })
+      }
+    }
+
+    // Sort mismatches by cycle
+    mismatches.sort((a, b) => a.cycle - b.cycle)
+  }
+
+  // Print mismatches
+  if (mismatches.length > 0) {
+    console.log(`\n${'='.repeat(70)}`)
+    console.log(`Found ${mismatches.length} mismatched cycles:`)
+    console.log(`${'='.repeat(70)}`)
+    console.log(
+      `${'Cycle'.padEnd(10)} | ${'Distributor'.padEnd(15)} | ${'Collector'.padEnd(15)} | ${'Difference'}`
+    )
+    console.log(`${'-'.repeat(70)}`)
+
+    for (const mismatch of mismatches) {
+      const diff = mismatch.collectorCount - mismatch.distributorCount
+      console.log(
+        `${String(mismatch.cycle).padEnd(10)} | ${String(mismatch.distributorCount).padEnd(15)} | ${String(
+          mismatch.collectorCount
+        ).padEnd(15)} | ${diff > 0 ? '+' : ''}${diff}`
+      )
+    }
+    console.log(`${'='.repeat(70)}\n`)
+  } else {
+    console.log('\n✅ No mismatches found! All cycles match.')
+  }
+
+  // Deep comparison for cycles
+  if (data_type === DataType.CYCLE) {
+    const isEqual = isDeepStrictEqual(distributor_responses, api_responses)
+    console.log('\nDeep comparison result:', isEqual ? '✅ MATCH' : '❌ MISMATCH')
+  }
+
+  // Save to file
   if (saveToFile) {
     writeFileSync(
       `distributor_${data_type}_${startCycle}_${endCycle}.json`,
@@ -69,6 +217,7 @@ const runProgram = async (): Promise<void> => {
       `api_server_${data_type}_${startCycle}_${endCycle}.json`,
       JSON.stringify(api_responses, null, 4)
     )
+    console.log('\n📁 Results saved to files')
   }
 }
 runProgram()
diff --git a/scripts/distributor_tally_verifier.ts b/scripts/distributor_tally_verifier.ts
new file mode 100644
index 0000000..be0f2a5
--- /dev/null
+++ b/scripts/distributor_tally_verifier.ts
@@ -0,0 +1,470 @@
+import axios from 'axios'
+import * as crypto from '@shardus/crypto-utils'
+import { config, DISTRIBUTOR_URL } from '../src/config'
+import { queryFromDistributor, DataType } from '../src/class/DataSync'
+crypto.init(config.hashKey)
+
+const startCycle = 0
+const endCycle = 0
+
+// Choose data type to verify
+const data_type: DataType = DataType.RECEIPT // DataType.RECEIPT or DataType.ORIGINALTX
+
+// Choose comparison mode:
+// 'tally' - Compare tally endpoint vs cycle-based pagination
+// 'full' - Compare full data endpoint vs cycle-based pagination
+const comparisonMode: 'tally' | 'full' = 'tally'
+
+interface TallyItem {
+  cycle: number
+  receipts?: number
+  originalTxsData?: number
+  originalTxs?: number
+}
+
+interface MismatchResult {
+  cycle: number
+  tallyCount: number
+  actualCount: number
+}
+
+interface TransactionIdDetails {
+  cycle: number
+  fullDataIds: string[]
+  cycleBasedIds: string[]
+}
+
+/**
+ * Fetch tally counts from distributor (aggregated counts per cycle)
+ */
+const fetchTallyCounts = async (
+  cycleStart: number,
+  cycleEnd: number
+): Promise<Map<number, number>> => {
+  const tallyMap = new Map<number, number>()
+
+  const response = await queryFromDistributor(data_type, {
+    startCycle: cycleStart,
+    endCycle: cycleEnd,
+    type: 'tally',
+  })
+
+  if (!response?.data) {
+    console.warn(`No tally data returned for cycles ${cycleStart}-${cycleEnd}`)
+    return tallyMap
+  }
+
+  const tallyData: TallyItem[] =
+    data_type === DataType.RECEIPT ? response.data.receipts || [] : response.data.originalTxs || []
+
+  for (const item of tallyData) {
+    const count =
+      data_type === DataType.RECEIPT
+        ? item.receipts ?? 0
+        : item.originalTxsData ?? item.originalTxs ?? 0
+    tallyMap.set(item.cycle, count)
+  }
+
+  return tallyMap
+}
+
+/**
+ * Fetch full data from distributor without tally (fetches actual records and counts them)
+ * Uses pagination to fetch all data across multiple pages
+ */
+const fetchFullDataCounts = async (
+  cycleStart: number,
+  cycleEnd: number
+): Promise<{ counts: Map<number, number>; ids: Map<number, string[]> }> => {
+  const countsMap = new Map<number, number>()
+  const idsMap = new Map<number, string[]>()
+
+  let page = 1
+  let hasMorePages = true
+  const maxLimit = config.requestLimits.MAX_RECEIPTS_PER_REQUEST
+
+  while (hasMorePages) {
+    const response = await queryFromDistributor(data_type, {
+      startCycle: cycleStart,
+      endCycle: cycleEnd,
+      page: page,
+      // No 'type: tally' - fetch actual data
+    })
+
+    if (!response?.data) {
+      console.warn(`No data returned for cycles ${cycleStart}-${cycleEnd} page ${page}`)
+      break
+    }
+
+    const items =
+      data_type === DataType.RECEIPT ? response.data.receipts || [] : response.data.originalTxs || []
+
+    if (items.length === 0) {
+      break // No more data
+    }
+
+    // Count items per cycle and collect IDs
+    for (const item of items) {
+      const cycle = item.cycle
+      const txId = data_type === DataType.RECEIPT ? item.receiptId : item.txId
+
+      countsMap.set(cycle, (countsMap.get(cycle) || 0) + 1)
+
+      if (!idsMap.has(cycle)) {
+        idsMap.set(cycle, [])
+      }
+      idsMap.get(cycle)!.push(txId)
+    }
+
+    console.log(
+      `Fetched page ${page} for cycles ${cycleStart}-${cycleEnd}: ${items.length} items, total cycles tracked: ${countsMap.size}`
+    )
+
+    // Check if we need to fetch more pages
+    if (items.length < maxLimit) {
+      hasMorePages = false
+    } else {
+      page++
+    }
+  }
+
+  return { counts: countsMap, ids: idsMap }
+}
+
+/**
+ * Fetch actual data from distributor using cycle-based pagination
+ * (Same method used in ParallelDataSync)
+ */
+const fetchActualDataCounts = async (
+  cycleStart: number,
+  cycleEnd: number
+): Promise<{ counts: Map<number, number>; ids: Map<number, string[]> }> => {
+  const actualCountsMap = new Map<number, number>()
+  const actualIdsMap = new Map<number, string[]>()
+
+  let currentCycle = cycleStart
+  let afterTimestamp = 0
+  let afterTxId = ''
+  const limit = config.requestLimits.MAX_RECEIPTS_PER_REQUEST
+
+  const url =
+    data_type === DataType.RECEIPT
+      ? `${DISTRIBUTOR_URL}/receipt/cycle`
+      : `${DISTRIBUTOR_URL}/originalTx/cycle`
+
+  while (currentCycle <= cycleEnd) {
+    const requestData = {
+      startCycle: currentCycle,
+      endCycle: cycleEnd,
+      afterTimestamp,
+      afterTxId,
+      limit,
+      sender: config.collectorInfo.publicKey,
+      sign: undefined,
+    }
+
+    crypto.signObj(requestData, config.collectorInfo.secretKey, config.collectorInfo.publicKey)
+
+    const response = await axios.post(url, requestData)
+
+    const items =
+      data_type === DataType.RECEIPT
+        ? response.data?.receipts || []
+        : response.data?.originalTxs || []
+
+    if (items.length === 0) {
+      break // No more data
+    }
+
+    // Count items per cycle and collect IDs
+    for (const item of items) {
+      const cycle = item.cycle
+      const txId = data_type === DataType.RECEIPT ? item.receiptId : item.txId
+
+      actualCountsMap.set(cycle, (actualCountsMap.get(cycle) || 0) + 1)
+
+      if (!actualIdsMap.has(cycle)) {
+        actualIdsMap.set(cycle, [])
+      }
+      actualIdsMap.get(cycle)!.push(txId)
+    }
+
+    // Update pagination cursors
+    const lastItem = items[items.length - 1]
+    currentCycle = lastItem.cycle
+    afterTimestamp = lastItem.timestamp
+    afterTxId = data_type === DataType.RECEIPT ? lastItem.receiptId : lastItem.txId
+
+    console.log(
+      `Fetched ${items.length} items, last in cycle ${currentCycle}, total cycles tracked: ${actualCountsMap.size}`
+    )
+
+    // If we got less than limit, we've exhausted the range
+    if (items.length < limit) {
+      break
+    }
+  }
+
+  return { counts: actualCountsMap, ids: actualIdsMap }
+}
+
+const chunkArray = <T>(array: T[], chunkSize: number): T[][] => {
+  const chunks: T[][] = []
+  for (let i = 0; i < array.length; i += chunkSize) {
+    chunks.push(array.slice(i, i + chunkSize))
+  }
+  return chunks
+}
+
+const runProgram = async (): Promise<void> => {
+  const limit = 100
+  const concurrency = 10
+
+  const batches: Array<{ start: number; end: number }> = []
+
+  // Create batches without overlapping boundaries
+  let currentStart = startCycle
+  while (currentStart <= endCycle) {
+    const batchEnd = Math.min(currentStart + limit - 1, endCycle)
+    batches.push({ start: currentStart, end: batchEnd })
+    currentStart = batchEnd + 1
+  }
+
+  const dataTypeName = data_type === DataType.RECEIPT ? 'Receipts' : 'OriginalTxs'
+  const modeName =
+    comparisonMode === 'tally'
+      ? 'Tally vs Cycle-Based Pagination'
+      : 'Full Data vs Cycle-Based Pagination'
+
+  console.log(`\n${'='.repeat(70)}`)
+  console.log(`Distributor Verifier - ${dataTypeName}`)
+  console.log(`${'='.repeat(70)}`)
+  console.log(`Comparison Mode: ${modeName}`)
+  console.log(`Cycle Range: ${startCycle} to ${endCycle}`)
+  console.log(`Batches: ${batches.length}`)
+  console.log(`Concurrency: ${concurrency}`)
+  console.log(`${'='.repeat(70)}\n`)
+
+  const batchChunks = chunkArray(batches, concurrency)
+
+  // Step 1: Fetch first dataset (tally or full data)
+  const firstDataLabel = comparisonMode === 'tally' ? 'tally' : 'full data'
+  console.log(`Fetching ${firstDataLabel} counts from distributor...`)
+
+  const firstDataCountsMap = new Map<number, number>()
+  const firstDataIdsMap = new Map<number, string[]>()
+
+  if (comparisonMode === 'tally') {
+    // Tally mode: only fetch counts (no IDs available)
+    const tallyMaps: Map<number, number>[] = []
+    for (const chunk of batchChunks) {
+      const chunkResults = await Promise.all(
+        chunk.map((batch) => {
+          console.log(`Fetching ${firstDataLabel} for cycles ${batch.start} to ${batch.end}`)
+          return fetchTallyCounts(batch.start, batch.end)
+        })
+      )
+      tallyMaps.push(...chunkResults)
+    }
+    // Merge tally counts
+    for (const map of tallyMaps) {
+      for (const [cycle, count] of map.entries()) {
+        firstDataCountsMap.set(cycle, (firstDataCountsMap.get(cycle) || 0) + count)
+      }
+    }
+  } else {
+    // Full data mode: fetch counts and IDs
+    const fullDataResults: Array<{ counts: Map<number, number>; ids: Map<number, string[]> }> = []
+    for (const chunk of batchChunks) {
+      const chunkResults = await Promise.all(
+        chunk.map((batch) => {
+          console.log(`Fetching ${firstDataLabel} for cycles ${batch.start} to ${batch.end}`)
+          return fetchFullDataCounts(batch.start, batch.end)
+        })
+      )
+      fullDataResults.push(...chunkResults)
+    }
+    // Merge full data counts and IDs
+    for (const result of fullDataResults) {
+      for (const [cycle, count] of result.counts.entries()) {
+        firstDataCountsMap.set(cycle, (firstDataCountsMap.get(cycle) || 0) + count)
+      }
+      for (const [cycle, ids] of result.ids.entries()) {
+        if (!firstDataIdsMap.has(cycle)) {
+          firstDataIdsMap.set(cycle, [])
+        }
+        firstDataIdsMap.get(cycle)!.push(...ids)
+      }
+    }
+  }
+
+  console.log(`\n${firstDataLabel} counts fetched: ${firstDataCountsMap.size} cycles\n`)
+
+  // Step 2: Fetch cycle-based pagination data (always with IDs)
+  console.log('Fetching data using cycle-based pagination...')
+  const cycleBasedResults: Array<{ counts: Map<number, number>; ids: Map<number, string[]> }> = []
+
+  for (const chunk of batchChunks) {
+    const chunkResults = await Promise.all(
+      chunk.map((batch) => {
+        console.log(`Fetching cycle-based data for cycles ${batch.start} to ${batch.end}`)
+        return fetchActualDataCounts(batch.start, batch.end)
+      })
+    )
+    cycleBasedResults.push(...chunkResults)
+  }
+
+  // Merge cycle-based counts and IDs
+  const cycleBasedCountsMap = new Map<number, number>()
+  const cycleBasedIdsMap = new Map<number, string[]>()
+
+  for (const result of cycleBasedResults) {
+    for (const [cycle, count] of result.counts.entries()) {
+      cycleBasedCountsMap.set(cycle, (cycleBasedCountsMap.get(cycle) || 0) + count)
+    }
+    for (const [cycle, ids] of result.ids.entries()) {
+      if (!cycleBasedIdsMap.has(cycle)) {
+        cycleBasedIdsMap.set(cycle, [])
+      }
+      cycleBasedIdsMap.get(cycle)!.push(...ids)
+    }
+  }
+
+  console.log(`\nCycle-based pagination counts fetched: ${cycleBasedCountsMap.size} cycles\n`)
+
+  // Compare first dataset vs cycle-based counts
+  const mismatches: MismatchResult[] = []
+  const allCycles = new Set([...firstDataCountsMap.keys(), ...cycleBasedCountsMap.keys()])
+
+  for (const cycle of allCycles) {
+    const firstDataCount = firstDataCountsMap.get(cycle) || 0
+    const cycleBasedCount = cycleBasedCountsMap.get(cycle) || 0
+
+    if (firstDataCount !== cycleBasedCount) {
+      mismatches.push({
+        cycle,
+        tallyCount: firstDataCount,
+        actualCount: cycleBasedCount,
+      })
+    }
+  }
+
+  // Sort mismatches by cycle
+  mismatches.sort((a, b) => a.cycle - b.cycle)
+
+  // Print results
+  const firstColumnLabel = comparisonMode === 'tally' ? 'Tally Count' : 'Full Data Count'
+  const secondColumnLabel = 'Cycle-Based Count'
+
+  console.log(`\n${'='.repeat(70)}`)
+  console.log(`Verification Results - ${dataTypeName}`)
+  console.log(`${'='.repeat(70)}`)
+  console.log(`Comparison Mode: ${modeName}`)
+  console.log(`Total cycles checked: ${allCycles.size}`)
+  console.log(`Cycles with ${firstDataLabel} data: ${firstDataCountsMap.size}`)
+  console.log(`Cycles with cycle-based data: ${cycleBasedCountsMap.size}`)
+  console.log(`Mismatches found: ${mismatches.length}`)
+  console.log(`${'='.repeat(70)}\n`)
+
+  if (mismatches.length > 0) {
+    console.log(`\n${'='.repeat(70)}`)
+    console.log(`Mismatched Cycles:`)
+    console.log(`${'='.repeat(70)}`)
+    console.log(
+      `${'Cycle'.padEnd(10)} | ${firstColumnLabel.padEnd(18)} | ${secondColumnLabel.padEnd(18)} | ${'Difference'}`
+    )
+    console.log(`${'-'.repeat(70)}`)
+
+    for (const mismatch of mismatches) {
+      const diff = mismatch.actualCount - mismatch.tallyCount
+      console.log(
+        `${String(mismatch.cycle).padEnd(10)} | ${String(mismatch.tallyCount).padEnd(18)} | ${String(
+          mismatch.actualCount
+        ).padEnd(18)} | ${diff > 0 ? '+' : ''}${diff}`
+      )
+    }
+    console.log(`${'='.repeat(70)}\n`)
+  } else {
+    console.log(`✅ All cycles match! ${firstDataLabel} and cycle-based data are consistent.\n`)
+  }
+
+  // Calculate total counts
+  let totalFirstData = 0
+  let totalCycleBased = 0
+  for (const count of firstDataCountsMap.values()) {
+    totalFirstData += count
+  }
+  for (const count of cycleBasedCountsMap.values()) {
+    totalCycleBased += count
+  }
+
+  console.log(`Total ${dataTypeName} from ${firstDataLabel}: ${totalFirstData}`)
+  console.log(`Total ${dataTypeName} from cycle-based: ${totalCycleBased}`)
+  console.log(`Difference: ${totalCycleBased - totalFirstData}`)
+
+  // Display transaction IDs for mismatched cycles (if available in full mode)
+  if (mismatches.length > 0 && firstDataIdsMap.size > 0) {
+    console.log(`\n${'='.repeat(70)}`)
+    console.log(`Transaction IDs for Mismatched Cycles:`)
+    console.log(`${'='.repeat(70)}\n`)
+
+    for (const mismatch of mismatches.slice(0, 10)) {
+      // Show first 10 mismatches
+      console.log(`Cycle ${mismatch.cycle}:`)
+
+      const fullDataIds = firstDataIdsMap.get(mismatch.cycle) || []
+      const cycleBasedIds = cycleBasedIdsMap.get(mismatch.cycle) || []
+
+      console.log(`  Full Data IDs (${fullDataIds.length}):`)
+      if (fullDataIds.length > 0) {
+        fullDataIds.slice(0, 5).forEach((id) => console.log(`    - ${id}`))
+        if (fullDataIds.length > 5) {
+          console.log(`    ... and ${fullDataIds.length - 5} more`)
+        }
+      } else {
+        console.log(`    (none)`)
+      }
+
+      console.log(`  Cycle-Based IDs (${cycleBasedIds.length}):`)
+      if (cycleBasedIds.length > 0) {
+        cycleBasedIds.slice(0, 5).forEach((id) => console.log(`    - ${id}`))
+        if (cycleBasedIds.length > 5) {
+          console.log(`    ... and ${cycleBasedIds.length - 5} more`)
+        }
+      } else {
+        console.log(`    (none)`)
+      }
+
+      // Find IDs that are in one set but not the other
+      const fullDataSet = new Set(fullDataIds)
+      const cycleBasedSet = new Set(cycleBasedIds)
+
+      const onlyInFullData = fullDataIds.filter((id) => !cycleBasedSet.has(id))
+      const onlyInCycleBased = cycleBasedIds.filter((id) => !fullDataSet.has(id))
+
+      if (onlyInFullData.length > 0) {
+        console.log(`  Only in Full Data (${onlyInFullData.length}):`)
+        onlyInFullData.slice(0, 3).forEach((id) => console.log(`    - ${id}`))
+        if (onlyInFullData.length > 3) {
+          console.log(`    ... and ${onlyInFullData.length - 3} more`)
+        }
+      }
+
+      if (onlyInCycleBased.length > 0) {
+        console.log(`  Only in Cycle-Based (${onlyInCycleBased.length}):`)
+        onlyInCycleBased.slice(0, 3).forEach((id) => console.log(`    - ${id}`))
+        if (onlyInCycleBased.length > 3) {
+          console.log(`    ... and ${onlyInCycleBased.length - 3} more`)
+        }
+      }
+
+      console.log()
+    }
+
+    if (mismatches.length > 10) {
+      console.log(`... and ${mismatches.length - 10} more mismatched cycles\n`)
+    }
+  }
+}
+
+runProgram()

From 797a53eedd8ef9764e43270af5b2e7a29510ede0 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Wed, 12 Nov 2025 18:39:28 +0800
Subject: [PATCH 10/14] Add deserialization timing tracking and improve error
 handling in parallel sync

- Track deserialization time for API responses and log when > 50ms
- Add explicit deserialization timing for receipts and originalTxs processing
- Improve parallel sync error handling with Promise.allSettled for better failure reporting
- Fix cycle count calculation (add +1 for inclusive range)
- Rename syncCyclesByCycleRange to syncCycleRecordsByCycleRange for clarity
- Refactor database timing functions and move to bottom of file
- Export deserializeDbReceipt and deserializeDbOriginalTxData functions
- Optimize deserialization calls using forEach instead of for loops
---
 src/class/ParallelDataSync.ts | 137 +++++++++++++++++++++++++++-------
 src/storage/originalTxData.ts |  12 +--
 src/storage/receipt.ts        |   2 +-
 src/storage/sqlite3storage.ts | 122 +++++++++++++++---------------
 4 files changed, 176 insertions(+), 97 deletions(-)

diff --git a/src/class/ParallelDataSync.ts b/src/class/ParallelDataSync.ts
index e7fe05a..7825818 100644
--- a/src/class/ParallelDataSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -50,6 +50,7 @@ interface ResponseSizeMetadata {
 interface ResponseDataWithMetadata {
   __responseSize?: ResponseSizeMetadata
   __networkElapsed?: number
+  _deserializedTime?: number
   [key: string]: unknown
 }
 
@@ -133,7 +134,7 @@ export class ParallelDataSync {
           // Use custom parse for response with timing
           const startTime = Date.now()
           const result = typeof res === 'string' ? StringUtils.safeJsonParse(res) : res
-          const elapsed = Date.now() - startTime
+          const deserializedTime = Date.now() - startTime
 
           // Calculate decompressed size from raw response string
           const decompressedBytes = typeof res === 'string' ? Buffer.byteLength(res) : 0
@@ -149,10 +150,12 @@ export class ParallelDataSync {
               enumerable: false, // Hidden from JSON.stringify and iteration
               configurable: true,
             })
+            // Attach deserialization time
+            ;(result as ResponseDataWithMetadata)._deserializedTime = deserializedTime
           }
 
-          if (config.verbose && elapsed > 50) {
-            console.log(`[Client] Response parse: ${elapsed}ms, size: ${sizeKB}KB`)
+          if (config.verbose && deserializedTime > 50) {
+            console.log(`[Client] Response deserialization: ${deserializedTime}ms, size: ${sizeKB}KB`)
           }
           return result
         },
@@ -298,7 +301,7 @@ export class ParallelDataSync {
     console.log(`${'='.repeat(60)}\n`)
 
     this.stats.startTime = Date.now()
-    this.stats.totalCyclesToSync = endCycle - startCycle
+    this.stats.totalCyclesToSync = endCycle - startCycle + 1
 
     try {
       console.log(
@@ -310,12 +313,45 @@ export class ParallelDataSync {
         this.queue.add(() => this.syncDataByCycleRange(batch.startCycle, batch.endCycle))
       )
 
-      // Wait for all tasks to complete
-      await Promise.all(tasks)
+      console.log(`Waiting for ${tasks.length} tasks to complete...`)
+
+      // Wait for all tasks to complete (even if some fail)
+      const results = await Promise.allSettled(tasks)
+
+      console.log('All tasks completed, setting end time...')
       this.stats.endTime = Date.now()
 
+      // Count successful and failed tasks
+      const successful = results.filter((r) => r.status === 'fulfilled').length
+      const failed = results.filter((r) => r.status === 'rejected').length
+
+      console.log(`Tasks completed: ${successful} successful, ${failed} failed`)
+
+      // Log failed task errors
+      if (failed > 0) {
+        console.error(`\n${failed} tasks failed with errors:`)
+        results.forEach((result, index) => {
+          if (result.status === 'rejected') {
+            const batch = cycleBatches[index]
+            console.error(
+              `  Batch ${index} (cycles ${batch.startCycle}-${batch.endCycle}): ${
+                result.reason?.message || result.reason
+              }`
+            )
+          }
+        })
+      }
+
+      console.log('Printing summary...')
       // Summary
       await this.printSummary(startCycle, endCycle)
+
+      console.log('Summary printed successfully')
+
+      // Throw if there were any failures so the caller knows sync was incomplete
+      if (failed > 0) {
+        throw new Error(`Parallel sync completed with ${failed} failed batches out of ${tasks.length} total`)
+      }
     } catch (error) {
       console.error('Fatal error in parallel sync:', error)
       this.stats.errors++
@@ -328,31 +364,52 @@ export class ParallelDataSync {
    * Adaptively handles partial cycle completion (e.g., if requesting cycles 1-10 but only get data from 1-5, then sends next request for 5-10)
    */
   private async syncDataByCycleRange(startCycle: number, endCycle: number): Promise<void> {
-    try {
-      // Sync all data types in parallel
-      await Promise.all([
-        this.syncCyclesByCycleRange(startCycle, endCycle),
-        this.syncReceiptsByCycleRange(startCycle, endCycle),
-        this.syncOriginalTxsByCycleRange(startCycle, endCycle),
-      ])
-
-      this.stats.completedCycles += endCycle - startCycle + 1
+    // Sync all data types in parallel with individual error tracking
+    const results = await Promise.allSettled([
+      this.syncCycleRecordsByCycleRange(startCycle, endCycle),
+      this.syncReceiptsByCycleRange(startCycle, endCycle),
+      this.syncOriginalTxsByCycleRange(startCycle, endCycle),
+    ])
+
+    const dataTypes = ['Cycle Records', 'Receipts', 'OriginalTxs']
+    const failedTypes: string[] = []
+    const errors: unknown[] = []
+
+    results.forEach((result, index) => {
+      if (result.status === 'rejected') {
+        failedTypes.push(dataTypes[index])
+        errors.push(result.reason)
+      }
+    })
 
-      const progress = ((this.stats.completedCycles / this.stats.totalCyclesToSync) * 100).toFixed(1)
-      console.log(
-        `Progress: ${this.stats.completedCycles}/${this.stats.totalCyclesToSync} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
+    if (failedTypes.length > 0) {
+      console.error(
+        `Error syncing cycle batch ${startCycle}-${endCycle}: Failed data types: ${failedTypes.join(', ')}`
       )
-    } catch (error) {
-      console.error(`Error syncing cycle batch ${startCycle}-${endCycle}:`, error)
+      errors.forEach((error, index) => {
+        const errorMessage = error instanceof Error ? error.message : String(error)
+        console.error(`  ${failedTypes[index]}: ${errorMessage}`)
+      })
       this.stats.errors++
-      throw error
+      throw new Error(
+        `Failed to sync ${
+          failedTypes.length
+        } data type(s) for batch ${startCycle}-${endCycle}: ${failedTypes.join(', ')}`
+      )
     }
+
+    this.stats.completedCycles += endCycle - startCycle + 1
+
+    const progress = ((this.stats.completedCycles / this.stats.totalCyclesToSync) * 100).toFixed(1)
+    console.log(
+      `Progress: ${this.stats.completedCycles}/${this.stats.totalCyclesToSync} cycles (${progress}%) [batch: ${startCycle}-${endCycle}]`
+    )
   }
 
   /**
-   * Sync cycles across a batch of cycles using multi-cycle fetching
+   * Sync cycle records across a batch of cycles using multi-cycle fetching
    */
-  private async syncCyclesByCycleRange(startCycle: number, endCycle: number): Promise<void> {
+  private async syncCycleRecordsByCycleRange(startCycle: number, endCycle: number): Promise<void> {
     try {
       const response = await this.fetchDataFromDistributor(
         DataType.CYCLE,
@@ -370,11 +427,13 @@ export class ParallelDataSync {
       const compressionRatio = sizeMetadata?.compressionRatio
       const compressionSavings = sizeMetadata?.compressionSavings
       const networkElapsed = (response.data as ResponseDataWithMetadata)?.__networkElapsed || 0
+      const deserializedTime = (response.data as ResponseDataWithMetadata)?._deserializedTime || 0
 
       if (config.verbose || networkElapsed > 1000) {
         // Build log message with compression info if available
         let logMessage =
-          `[API Timing] Cycles fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+          `[API Timing] Cycle Records fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+          `deserialization: ${deserializedTime}ms, ` +
           `records: ${cycles.length}`
 
         // Only show compression metrics if compression actually reduced the size (ratio < 1)
@@ -393,7 +452,7 @@ export class ParallelDataSync {
       }
 
       if (!response || !response.data || !response.data.cycleInfo) {
-        console.error(`Error fetching cycles for cycle batch ${startCycle}-${endCycle}:`, response)
+        console.error(`Error fetching cycle records for cycle batch ${startCycle}-${endCycle}:`, response)
         return // Couldn't fetch any cycles
       }
 
@@ -414,10 +473,10 @@ export class ParallelDataSync {
       this.stats.totalCycles += cycleRecords.length
 
       if (config.verbose) {
-        console.log(`[Cycles ${startCycle}-${endCycle}] Cycles: +${response.length}`)
+        console.log(`[Cycles ${startCycle}-${endCycle}] Cycle Records: +${cycleRecords.length}`)
       }
     } catch (error) {
-      console.error(`Error fetching cycles for cycle batch ${startCycle}-${endCycle}:`, error)
+      console.error(`Error fetching cycle records for cycle batch ${startCycle}-${endCycle}:`, error)
       throw error
     }
   }
@@ -477,11 +536,13 @@ export class ParallelDataSync {
         const compressionRatio = sizeMetadata?.compressionRatio
         const compressionSavings = sizeMetadata?.compressionSavings
         const networkElapsed = (response.data as ResponseDataWithMetadata)?.__networkElapsed || 0
+        const deserializedTime = (response.data as ResponseDataWithMetadata)?._deserializedTime || 0
 
         if (config.verbose || networkElapsed > 1000) {
           // Build log message with compression info if available
           let logMessage =
             `[API Timing] Receipts fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+            `deserialization: ${deserializedTime}ms, ` +
             `records: ${receipts.length}`
 
           // Only show compression metrics if compression actually reduced the size (ratio < 1)
@@ -535,6 +596,16 @@ export class ParallelDataSync {
           nextFetchPromise = null
         }
 
+        const startTime = Date.now()
+        // Deserialize receipts
+        receipts.forEach((receipt) => {
+          ReceiptDB.deserializeDbReceipt(receipt)
+        })
+        const elapsed = Date.now() - startTime
+        if (elapsed > 100) {
+          console.log(`Deserializing ${receipts.length} receipts took: ${elapsed}ms`)
+        }
+
         // Process receipts (overlaps with next fetch if prefetch enabled)
         await ReceiptDB.processReceiptData(receipts)
 
@@ -615,11 +686,13 @@ export class ParallelDataSync {
         const compressionRatio = sizeMetadata?.compressionRatio
         const compressionSavings = sizeMetadata?.compressionSavings
         const networkElapsed = (response.data as ResponseDataWithMetadata)?.__networkElapsed || 0
+        const deserializedTime = (response.data as ResponseDataWithMetadata)?._deserializedTime || 0
 
         if (config.verbose || networkElapsed > 1000) {
           // Build log message with compression info if available
           let logMessage =
             `[API Timing] OriginalTxs fetch (cycles ${startCycle}-${endCycle}): ${networkElapsed}ms, ` +
+            `deserialization: ${deserializedTime}ms, ` +
             `records: ${originalTxs.length}`
 
           // Only show compression metrics if compression actually reduced the size (ratio < 1)
@@ -673,6 +746,16 @@ export class ParallelDataSync {
           nextFetchPromise = null
         }
 
+        const startTime = Date.now()
+        // Deserialize originalTxs
+        originalTxs.forEach((originalTx) => {
+          OriginalTxDataDB.deserializeDbOriginalTxData(originalTx)
+        })
+        const elapsed = Date.now() - startTime
+        if (elapsed > 100) {
+          console.log(`Deserializing ${originalTxs.length} originalTxs took ${elapsed}ms`)
+        }
+
         // Process originalTxs (overlaps with next fetch if prefetch enabled)
         await OriginalTxDataDB.processOriginalTxData(originalTxs)
 
diff --git a/src/storage/originalTxData.ts b/src/storage/originalTxData.ts
index d5bcd45..8134e23 100644
--- a/src/storage/originalTxData.ts
+++ b/src/storage/originalTxData.ts
@@ -196,9 +196,7 @@ export async function queryOriginalTxsData(query: QueryOriginalTxsDataParams): P
       sql += ` OFFSET ${skip}`
     }
     originalTxsData = (await db.all(originalTxDataDatabase, sql, values)) as DbOriginalTxData[]
-    for (const originalTxData of originalTxsData) {
-      originalTxData.originalTxData = StringUtils.safeJsonParse(originalTxData.originalTxData)
-    }
+    originalTxsData.forEach((originalTxData: DbOriginalTxData) => deserializeDbOriginalTxData(originalTxData))
   } catch (e) {
     console.log(e)
   }
@@ -210,9 +208,7 @@ export async function queryOriginalTxDataByTxId(txId: string): Promise<OriginalT
   try {
     const sql = `SELECT * FROM originalTxsData WHERE txId=?`
     const originalTxData = (await db.get(originalTxDataDatabase, sql, [txId])) as DbOriginalTxData
-    if (originalTxData && originalTxData.originalTxData) {
-      originalTxData.originalTxData = StringUtils.safeJsonParse(originalTxData.originalTxData)
-    }
+    if (originalTxData) deserializeDbOriginalTxData(originalTxData)
     if (config.verbose) console.log('OriginalTxData txId', originalTxData)
     return originalTxData as unknown as OriginalTxData
   } catch (e) {
@@ -245,6 +241,10 @@ export async function queryOriginalTxDataCountByCycles(
   })
 }
 
+export function deserializeDbOriginalTxData(originalTxData: DbOriginalTxData): void {
+  originalTxData.originalTxData &&= StringUtils.safeJsonParse(originalTxData.originalTxData)
+}
+
 export function cleanOldOriginalTxsMap(timestamp: number): void {
   for (const [key, value] of originalTxsMap) {
     if (value < timestamp) {
diff --git a/src/storage/receipt.ts b/src/storage/receipt.ts
index 8e6bbfb..796fa48 100644
--- a/src/storage/receipt.ts
+++ b/src/storage/receipt.ts
@@ -413,7 +413,7 @@ export async function queryReceiptCountByCycles(
   })
 }
 
-function deserializeDbReceipt(receipt: DbReceipt): void {
+export function deserializeDbReceipt(receipt: DbReceipt): void {
   receipt.tx &&= StringUtils.safeJsonParse(receipt.tx)
   receipt.beforeStates &&= StringUtils.safeJsonParse(receipt.beforeStates)
   receipt.afterStates &&= StringUtils.safeJsonParse(receipt.afterStates)
diff --git a/src/storage/sqlite3storage.ts b/src/storage/sqlite3storage.ts
index 27acc6c..4cd94ee 100644
--- a/src/storage/sqlite3storage.ts
+++ b/src/storage/sqlite3storage.ts
@@ -17,57 +17,6 @@ let queryIdSequence = 0
 const pendingQueries = new Map<number, QueryTiming>()
 const queuedBySql = new Map<string, number[]>()
 
-function formatSqlForLog(sql: string): string {
-  const normalized = sql.replace(/\s+/g, ' ').trim()
-  if (normalized.length <= SQL_LOG_MAX_LENGTH) return normalized
-  return `${normalized.slice(0, SQL_LOG_MAX_LENGTH - 3)}...`
-}
-
-function registerQuery(sql: string): QueryTiming {
-  const entry: QueryTiming = {
-    id: ++queryIdSequence,
-    sql,
-    startMs: Date.now(),
-  }
-  pendingQueries.set(entry.id, entry)
-  let queue = queuedBySql.get(sql)
-  if (!queue) {
-    queue = []
-    queuedBySql.set(sql, queue)
-  }
-  queue.push(entry.id)
-  return entry
-}
-
-function cleanupQuery(entry: QueryTiming): void {
-  pendingQueries.delete(entry.id)
-  const queue = queuedBySql.get(entry.sql)
-  if (!queue) return
-  const index = queue.indexOf(entry.id)
-  if (index !== -1) queue.splice(index, 1)
-  if (queue.length === 0) queuedBySql.delete(entry.sql)
-}
-
-function logTiming(operation: string, entry: QueryTiming, rows?: number): void {
-  const totalMs = Date.now() - entry.startMs
-  const engineMs = entry.engineMs ?? 0
-  const queueMs = Math.max(0, totalMs - engineMs)
-  const payload = {
-    operation,
-    totalMs: Number(totalMs.toFixed(2)),
-    queueMs: Number(queueMs.toFixed(2)),
-    engineMs: Number(engineMs.toFixed(2)),
-    sql: formatSqlForLog(entry.sql),
-    rows,
-  }
-
-  if (totalMs > SQL_TOTAL_WARN_THRESHOLD_MS || queueMs > SQL_QUEUE_WARN_THRESHOLD_MS) {
-    console.warn('[DB Timing]', payload)
-  } else {
-    console.log('[DB Timing]', payload)
-  }
-}
-
 export const createDB = async (dbPath: string, dbName: string): Promise<Database> => {
   console.log('dbName', dbName, 'dbPath', dbPath)
   const db = new Database(dbPath, (err) => {
@@ -89,8 +38,7 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
     const queue = queuedBySql.get(sql)
     const id = queue && queue.length > 0 ? queue[0] : undefined
     if (id === undefined) {
-      console.warn('[DB Timing] profile event without pending query', {
-        pid: process.pid,
+      printQueryTimingLog('profile event without pending query', {
         engineMs,
         sql: formatSqlForLog(sql),
       })
@@ -98,8 +46,7 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
     }
     const entry = pendingQueries.get(id)
     if (!entry) {
-      console.warn('[DB Timing] profile missing pending entry', {
-        pid: process.pid,
+      printQueryTimingLog('profile missing pending entry', {
         engineMs,
         sql: formatSqlForLog(sql),
       })
@@ -107,11 +54,7 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
     }
     entry.engineMs = engineMs
     if (engineMs > SQL_ENGINE_WARN_THRESHOLD_MS) {
-      console.warn('[DB Engine] Slow engine execution detected', {
-        pid: process.pid,
-        engineMs: Number(engineMs.toFixed(2)),
-        sql: formatSqlForLog(sql),
-      })
+      console.warn(`[DB Engine] Slow Query: ${engineMs} ms for SQL: ${formatSqlForLog(sql)}`)
     }
   })
   console.log(`Database ${dbName} Initialized!`)
@@ -150,8 +93,8 @@ export async function run(
   sql: string,
   params: unknown[] | object = []
 ): Promise<{ id: number }> {
+  const entry = registerQuery(sql)
   return new Promise((resolve, reject) => {
-    const entry = registerQuery(sql)
     const finalize = (): void => {
       setImmediate(() => {
         logTiming('run', entry)
@@ -173,8 +116,8 @@ export async function run(
 }
 
 export async function get<T>(db: Database, sql: string, params = []): Promise<T> {
+  const entry = registerQuery(sql)
   return new Promise((resolve, reject) => {
-    const entry = registerQuery(sql)
     const finalize = (rows?: number): void => {
       setImmediate(() => {
         logTiming('get', entry, rows)
@@ -196,8 +139,8 @@ export async function get<T>(db: Database, sql: string, params = []): Promise<T>
 }
 
 export async function all<T>(db: Database, sql: string, params = []): Promise<T[]> {
+  const entry = registerQuery(sql)
   return new Promise((resolve, reject) => {
-    const entry = registerQuery(sql)
     const finalize = (rowsCount?: number): void => {
       setImmediate(() => {
         logTiming('all', entry, rowsCount)
@@ -254,3 +197,56 @@ export function updateSqlStatementClause(sql: string, inputs: any[]): string {
   else sql += ' WHERE '
   return sql
 }
+
+function registerQuery(sql: string): QueryTiming {
+  const entry: QueryTiming = {
+    id: ++queryIdSequence,
+    sql,
+    startMs: Date.now(),
+  }
+  pendingQueries.set(entry.id, entry)
+  let queue = queuedBySql.get(sql)
+  if (!queue) {
+    queue = []
+    queuedBySql.set(sql, queue)
+  }
+  queue.push(entry.id)
+  return entry
+}
+
+function cleanupQuery(entry: QueryTiming): void {
+  pendingQueries.delete(entry.id)
+  const queue = queuedBySql.get(entry.sql)
+  if (!queue) return
+  const index = queue.indexOf(entry.id)
+  if (index !== -1) queue.splice(index, 1)
+  if (queue.length === 0) queuedBySql.delete(entry.sql)
+}
+
+function printQueryTimingLog(message: string, payload: object): void {
+  console.warn(`[DB Timing] ${message}`, JSON.stringify(payload))
+}
+
+function logTiming(operation: string, entry: QueryTiming, rows?: number): void {
+  const totalMs = Date.now() - entry.startMs
+  const engineMs = entry.engineMs ?? 0
+  const queueMs = Math.max(0, totalMs - engineMs)
+  const payload = {
+    operation,
+    totalMs: Number(totalMs.toFixed(2)),
+    queueMs: Number(queueMs.toFixed(2)),
+    engineMs: Number(engineMs.toFixed(2)),
+    sql: formatSqlForLog(entry.sql),
+    rows,
+  }
+
+  if (totalMs > SQL_TOTAL_WARN_THRESHOLD_MS || queueMs > SQL_QUEUE_WARN_THRESHOLD_MS) {
+    printQueryTimingLog('', payload)
+  }
+}
+
+function formatSqlForLog(sql: string): string {
+  const normalized = sql.replace(/\s+/g, ' ').trim()
+  if (normalized.length <= SQL_LOG_MAX_LENGTH) return normalized
+  return `${normalized.slice(0, SQL_LOG_MAX_LENGTH - 3)}...`
+}

From 1e218211b0021ace5549f99bc27c5aba10e5dd20 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Fri, 14 Nov 2025 00:27:44 +0800
Subject: [PATCH 11/14] feat: implement buffered database writes with
 serialized queue for parallel sync

- Add accumulation buffers (1000 record threshold) to batch DB writes and reduce contention
- Implement serialized write queue with transaction support to prevent concurrent write conflicts
- Optimize receipt processing by pre-fetching existing IDs to avoid N+1 query problem
- Increase retry attempts to 5 with exponential backoff for better collector recovery
- Add mutex locks to prevent race conditions during buffer flushes
- Configure WAL checkpoint frequency and database pragmas for high-throughput operations
---
 src/class/DataSyncManager.ts       |  10 +-
 src/class/ParallelDataSync.ts      | 327 +++++++++++++++++++++++++++--
 src/config/index.ts                |   2 +-
 src/storage/account.ts             |   3 +-
 src/storage/accountHistoryState.ts |   3 +-
 src/storage/cycle.ts               |   4 +-
 src/storage/originalTxData.ts      |   3 +-
 src/storage/receipt.ts             |  67 ++++--
 src/storage/sqlite3storage.ts      | 149 ++++++++++++-
 src/storage/transaction.ts         |   7 +-
 10 files changed, 519 insertions(+), 56 deletions(-)

diff --git a/src/class/DataSyncManager.ts b/src/class/DataSyncManager.ts
index eea0504..565c534 100644
--- a/src/class/DataSyncManager.ts
+++ b/src/class/DataSyncManager.ts
@@ -104,8 +104,9 @@ export class DataSyncManager {
 
       const parallelDataSync = new ParallelDataSync({
         concurrency: config.parallelSyncConcurrency,
-        retryAttempts: 3,
-        retryDelayMs: 1000,
+        cyclesPerBatch: config.cyclesPerBatch,
+        retryAttempts: config.syncRetryAttempts,
+        enablePrefetch: config.enablePrefetch,
       })
 
       const cycleBatches = await parallelDataSync.createCycleBatches(0, latestDistributorCycle)
@@ -623,8 +624,9 @@ export class DataSyncManager {
 
         const parallelDataSync = new ParallelDataSync({
           concurrency: config.parallelSyncConcurrency,
-          retryAttempts: 3,
-          retryDelayMs: 1000,
+          cyclesPerBatch: config.cyclesPerBatch,
+          retryAttempts: config.syncRetryAttempts,
+          enablePrefetch: config.enablePrefetch,
         })
 
         const cycleBatches = []
diff --git a/src/class/ParallelDataSync.ts b/src/class/ParallelDataSync.ts
index 7825818..ae812ff 100644
--- a/src/class/ParallelDataSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -3,11 +3,22 @@ import * as crypto from '@shardus/crypto-utils'
 import { P2P, Utils as StringUtils } from '@shardus/types'
 import { config, DISTRIBUTOR_URL } from '../config'
 import { DataType } from './DataSync'
-import { CycleDB, ReceiptDB, OriginalTxDataDB } from '../storage'
-import { Cycle } from '../types'
+import {
+  CycleDB,
+  ReceiptDB,
+  OriginalTxDataDB,
+  // receiptDatabase,
+  // originalTxDataDatabase,
+  // cycleDatabase,
+} from '../storage'
+import { Cycle, Receipt, OriginalTxData } from '../types'
 import axios, { AxiosInstance } from 'axios'
 import http from 'http'
 import https from 'https'
+// import { checkpointWAL } from '../storage/sqlite3storage'
+
+// For Debugging Purpose - Set to false to skip processing data and saving to DB
+const processData = true
 
 /**
  * Configuration for parallel sync
@@ -81,12 +92,39 @@ export class ParallelDataSync {
   private httpsAgent: https.Agent
   private axiosInstance: AxiosInstance
 
+  // Accumulation buffers for batching DB writes - only write when threshold is reached
+  private receiptBuffer: Receipt[] = []
+  private originalTxBuffer: OriginalTxData[] = []
+  private cycleBuffer: Cycle[] = []
+  private readonly ACCUMULATION_THRESHOLD = 1000 // Write to DB when buffer reaches this size
+
+  // Mutex locks to prevent concurrent buffer access (race conditions)
+  private receiptBufferLock = false
+  private originalTxBufferLock = false
+  private cycleBufferLock = false
+
+  // // WAL checkpoint tracking
+  // private flushCount = 0 // Total number of buffer flushes
+  // private readonly CHECKPOINT_FREQUENCY = 10 // Run WAL checkpoint every N flushes to prevent WAL from growing too large
+
+  // // Flush pending flag to prevent multiple workers from waiting to flush
+  // private receiptFlushPending = false
+
+  // // Adaptive flush delay system - adds delays before DB writes to prevent overload
+  // private flushTimestamps: number[] = [] // Timestamps of recent flushes
+  // private readonly FLUSH_WINDOW_MS = 10000 // Track flushes in last 10 seconds
+  // private readonly FAST_FLUSH_THRESHOLD = 5 // If 5+ flushes in window, system is overloaded
+  // private minFlushDelay = 200 // Min delay before flush (ms)
+  // private maxFlushDelay = 1000 // Max delay before flush (ms)
+  // private readonly OVERLOAD_MIN_DELAY = 3000 // When overloaded, min delay increases to 3s
+  // private readonly OVERLOAD_MAX_DELAY = 5000 // When overloaded, max delay increases to 5s
+
   constructor(syncConfig?: Partial<ParallelSyncConfig>) {
     this.syncConfig = {
       concurrency: syncConfig?.concurrency || config.parallelSyncConcurrency || 10,
-      retryAttempts: syncConfig?.retryAttempts || config.syncRetryAttempts || 3,
+      cyclesPerBatch: syncConfig?.cyclesPerBatch || config.cyclesPerBatch || 100,
+      retryAttempts: syncConfig?.retryAttempts || config.syncRetryAttempts || 5,
       retryDelayMs: syncConfig?.retryDelayMs || 1000,
-      cyclesPerBatch: syncConfig?.cyclesPerBatch || config.cyclesPerBatch || 10,
       enablePrefetch: syncConfig?.enablePrefetch ?? config.enablePrefetch ?? true,
       prefetchDepth: syncConfig?.prefetchDepth || 1,
     }
@@ -308,7 +346,11 @@ export class ParallelDataSync {
         `Syncing ${cycleBatches.length} cycle batches created with ${this.syncConfig.cyclesPerBatch} cycles per batch`
       )
 
-      // Add all batch sync tasks to the queue
+      // Three-phase approach for optimal performance:
+      // Phase 1: Use main queue (concurrency: 5) for parallel API fetching
+      // Phase 2: Buffer data in memory until ACCUMULATION_THRESHOLD (1000) is reached
+      // Phase 3: DB writes are batched and serialized via storage-level queue
+      // This combines parallel I/O with batched, serialized DB writes to minimize contention
       const tasks = cycleBatches.map((batch) =>
         this.queue.add(() => this.syncDataByCycleRange(batch.startCycle, batch.endCycle))
       )
@@ -318,7 +360,11 @@ export class ParallelDataSync {
       // Wait for all tasks to complete (even if some fail)
       const results = await Promise.allSettled(tasks)
 
-      console.log('All tasks completed, setting end time...')
+      console.log('All tasks completed, flushing remaining buffers...')
+
+      // Flush any remaining buffered data to database
+      await this.flushAllBuffers()
+
       this.stats.endTime = Date.now()
 
       // Count successful and failed tasks
@@ -355,6 +401,12 @@ export class ParallelDataSync {
     } catch (error) {
       console.error('Fatal error in parallel sync:', error)
       this.stats.errors++
+      // Try to flush buffers even on error to preserve data
+      try {
+        await this.flushAllBuffers()
+      } catch (flushError) {
+        console.error('Error flushing buffers during error handling:', flushError)
+      }
       throw error
     }
   }
@@ -466,8 +518,8 @@ export class ParallelDataSync {
         cycleMarker: cycleRecord.marker,
       }))
 
-      // Bulk insert cycles
-      await CycleDB.bulkInsertCycles(cycleRecords)
+      // Add cycles to buffer - will flush to DB when buffer reaches threshold
+      await this.addToBuffer('cycle', cycleRecords)
 
       // Update stats
       this.stats.totalCycles += cycleRecords.length
@@ -606,8 +658,8 @@ export class ParallelDataSync {
           console.log(`Deserializing ${receipts.length} receipts took: ${elapsed}ms`)
         }
 
-        // Process receipts (overlaps with next fetch if prefetch enabled)
-        await ReceiptDB.processReceiptData(receipts)
+        // Add receipts to buffer - will flush to DB when buffer reaches threshold
+        await this.addToBuffer('receipt', receipts)
 
         totalFetched += receipts.length
         this.stats.totalReceipts += receipts.length
@@ -756,8 +808,8 @@ export class ParallelDataSync {
           console.log(`Deserializing ${originalTxs.length} originalTxs took ${elapsed}ms`)
         }
 
-        // Process originalTxs (overlaps with next fetch if prefetch enabled)
-        await OriginalTxDataDB.processOriginalTxData(originalTxs)
+        // Add originalTxs to buffer - will flush to DB when buffer reaches threshold
+        await this.addToBuffer('originalTx', originalTxs)
 
         totalFetched += originalTxs.length
         this.stats.totalOriginalTxs += originalTxs.length
@@ -804,25 +856,30 @@ export class ParallelDataSync {
         return response
       } catch (error: any) {
         const isLastAttempt = attempt === this.syncConfig.retryAttempts
-        const isRetryableError =
-          error.code === 'ECONNRESET' ||
-          error.code === 'ETIMEDOUT' ||
-          error.code === 'ECONNREFUSED' ||
-          error.code === 'EPIPE'
 
-        if (isRetryableError && !isLastAttempt) {
+        // Retry ALL errors (network errors, socket hang up, timeouts, etc.)
+        // This gives the collector time to recover when overloaded
+        if (!isLastAttempt) {
+          // Exponential backoff with longer delays to give collector time to recover
           const delay = this.syncConfig.retryDelayMs * Math.pow(2, attempt)
+          const errorCode = error.code || error.cause?.code || 'UNKNOWN'
+          const errorMsg = error.message || 'Unknown error'
           console.warn(
-            `ECONNRESET on ${route} fetch (cycles ${startCycle}-${endCycle}), ` +
-              `attempt ${attempt + 1}/${this.syncConfig.retryAttempts + 1}, ` +
-              `retrying in ${delay}ms...`
+            `Error (${errorCode}: ${errorMsg}) on ${route} fetch (cycles ${startCycle}-${endCycle}), ` +
+              `attempt ${attempt + 1}/${this.syncConfig.retryAttempts}, ` +
+              `retrying in ${delay}ms... (Giving collector time to process DB writes)`
           )
           await this.sleep(delay)
           continue
         }
 
-        // Non-retryable error or last attempt failed
-        console.error(`Error fetching ${route} for (cycles ${startCycle}-${endCycle}):`, error.message)
+        // Last attempt failed - throw error
+        console.error(
+          `Error fetching ${route} for (cycles ${startCycle}-${endCycle}) after ${
+            this.syncConfig.retryAttempts + 1
+          } attempts:`,
+          error.message
+        )
         throw error
       }
     }
@@ -876,6 +933,230 @@ export class ParallelDataSync {
     console.log(`${'='.repeat(60)}\n`)
   }
 
+  /**
+   * Generic function to add data to buffer and flush if threshold reached
+   * Handles all buffer types (receipts, originalTxs, cycles)
+   */
+  private async addToBuffer(
+    type: 'receipt' | 'originalTx' | 'cycle',
+    data: Receipt[] | OriginalTxData[] | Cycle[]
+  ): Promise<void> {
+    if (type === 'receipt') {
+      // Wait for lock to be released (prevents concurrent modification during flush)
+      while (this.receiptBufferLock) {
+        await new Promise((resolve) => setTimeout(resolve, 10))
+      }
+
+      // Add data to buffer
+      this.receiptBuffer.push(...(data as Receipt[]))
+
+      // Check if buffer reached threshold
+      if (this.receiptBuffer.length >= this.ACCUMULATION_THRESHOLD) {
+        await this.flushBuffer('receipt')
+      }
+    } else if (type === 'originalTx') {
+      // Wait for lock to be released (prevents concurrent modification during flush)
+      while (this.originalTxBufferLock) {
+        await new Promise((resolve) => setTimeout(resolve, 10))
+      }
+
+      // Add data to buffer
+      this.originalTxBuffer.push(...(data as OriginalTxData[]))
+
+      // Check if buffer reached threshold
+      if (this.originalTxBuffer.length >= this.ACCUMULATION_THRESHOLD) {
+        await this.flushBuffer('originalTx')
+      }
+    } else {
+      // Wait for lock to be released (prevents concurrent modification during flush)
+      while (this.cycleBufferLock) {
+        await new Promise((resolve) => setTimeout(resolve, 10))
+      }
+
+      // Add data to buffer
+      this.cycleBuffer.push(...(data as Cycle[]))
+
+      // Check if buffer reached threshold
+      if (this.cycleBuffer.length >= this.ACCUMULATION_THRESHOLD) {
+        await this.flushBuffer('cycle')
+      }
+    }
+  }
+
+  /**
+   * Generic function to flush buffer to database
+   * Handles all buffer types with adaptive delay and locking (adaptive cooling only for receipts)
+   */
+  private async flushBuffer(type: 'receipt' | 'originalTx' | 'cycle'): Promise<void> {
+    if (type === 'receipt') {
+      if (this.receiptBuffer.length === 0) return
+
+      // // If another worker is already flushing, return immediately (it will flush our data too)
+      // if (this.receiptFlushPending) {
+      //   return
+      // }
+
+      // // Mark flush as pending
+      // this.receiptFlushPending = true
+
+      // // Apply adaptive delay BEFORE acquiring lock to spread out DB writes (receipts only)
+      // const delay = this.getAdaptiveFlushDelay()
+      // if (delay > 0) {
+      //   const recentFlushCount = this.flushTimestamps.length
+      //   const delayRange = `${this.minFlushDelay}-${this.maxFlushDelay}ms`
+      //   console.log(
+      //     `[Adaptive Cooling] Receipts - Waiting ${delay}ms before flush ` +
+      //       `(recent flushes: ${recentFlushCount}, range: ${delayRange})`
+      //   )
+      //   await new Promise((resolve) => setTimeout(resolve, delay))
+      // }
+
+      // // If another worker is already locking, return immediately (it will flush our data too)
+      // if (this.receiptBufferLock) {
+      //   return
+      // }
+
+      this.receiptBufferLock = true
+      try {
+        const toFlush = [...this.receiptBuffer]
+        this.receiptBuffer = []
+        console.log(`[Buffer Flush] Flushing ${toFlush.length} receipts to database`)
+        if (processData) await ReceiptDB.processReceiptData(toFlush, false, false)
+
+        // // Track flush timestamp for adaptive delay system (receipts only)
+        // this.recordFlushTimestamp()
+      } finally {
+        this.receiptBufferLock = false
+
+        // // Clear flush pending flag
+        // this.receiptFlushPending = false
+      }
+    } else if (type === 'originalTx') {
+      if (this.originalTxBuffer.length === 0) return
+
+      // If another worker is already locking, return immediately (it will flush our data too)
+      if (this.originalTxBufferLock) {
+        return
+      }
+
+      this.originalTxBufferLock = true
+      try {
+        const toFlush = [...this.originalTxBuffer]
+        this.originalTxBuffer = []
+        console.log(`[Buffer Flush] Flushing ${toFlush.length} originaltxs to database`)
+        if (processData) await OriginalTxDataDB.processOriginalTxData(toFlush)
+      } finally {
+        this.originalTxBufferLock = false
+      }
+    } else {
+      if (this.cycleBuffer.length === 0) return
+
+      // If another worker is already locking, return immediately (it will flush our data too)
+      if (this.cycleBufferLock) {
+        return
+      }
+
+      this.cycleBufferLock = true
+      try {
+        const toFlush = [...this.cycleBuffer]
+        this.cycleBuffer = []
+        console.log(`[Buffer Flush] Flushing ${toFlush.length} cycles to database`)
+        if (processData) await CycleDB.bulkInsertCycles(toFlush)
+      } finally {
+        this.cycleBufferLock = false
+      }
+    }
+  }
+
+  /**
+   * Flush all buffers (call at end of sync)
+   */
+  private async flushAllBuffers(): Promise<void> {
+    await this.flushBuffer('receipt')
+    await this.flushBuffer('originalTx')
+    await this.flushBuffer('cycle')
+  }
+
+  // /**
+  //  * Conditionally checkpoint WAL files if enough flushes have occurred
+  //  * This prevents WAL files from growing too large during long sync operations
+  //  */
+  // private async maybeCheckpointWAL(): Promise<void> {
+  //   if (this.flushCount % this.CHECKPOINT_FREQUENCY === 0) {
+  //     console.log(
+  //       `[WAL Checkpoint] Running periodic checkpoint after ${this.flushCount} buffer flushes (~${
+  //         this.flushCount * this.ACCUMULATION_THRESHOLD
+  //       } records)`
+  //     )
+  //     // Run checkpoints on all three databases in parallel
+  //     // Use PASSIVE mode to avoid blocking readers
+  //     await Promise.all([
+  //       checkpointWAL(receiptDatabase, 'PASSIVE'),
+  //       checkpointWAL(originalTxDataDatabase, 'PASSIVE'),
+  //       checkpointWAL(cycleDatabase, 'PASSIVE'),
+  //     ])
+  //   }
+  // }
+
+  // /**
+  //  * Record flush timestamp and clean up old timestamps
+  //  * Used to track flush frequency and detect system overload
+  //  */
+  // private recordFlushTimestamp(): void {
+  //   const now = Date.now()
+  //   this.flushTimestamps.push(now)
+
+  //   // Clean up old timestamps outside the tracking window
+  //   this.flushTimestamps = this.flushTimestamps.filter((timestamp) => now - timestamp < this.FLUSH_WINDOW_MS)
+  // }
+
+  // /**
+  //  * Calculate adaptive flush delay based on recent flush frequency
+  //  * Returns a random delay within a range that adapts to system load
+  //  */
+  // private getAdaptiveFlushDelay(): number {
+  //   // Clean up old timestamps first
+  //   const now = Date.now()
+  //   this.flushTimestamps = this.flushTimestamps.filter((timestamp) => now - timestamp < this.FLUSH_WINDOW_MS)
+
+  //   // Check if system is overloaded (too many flushes in recent window)
+  //   const recentFlushCount = this.flushTimestamps.length
+  //   const isOverloaded = recentFlushCount >= this.FAST_FLUSH_THRESHOLD
+  //   const wasOverloaded = this.minFlushDelay === this.OVERLOAD_MIN_DELAY
+
+  //   // Adjust delay range based on system load
+  //   if (isOverloaded) {
+  //     // System overloaded - use longer delays
+  //     const wasNormal = this.minFlushDelay === 200
+  //     this.minFlushDelay = this.OVERLOAD_MIN_DELAY
+  //     this.maxFlushDelay = this.OVERLOAD_MAX_DELAY
+  //     if (wasNormal) {
+  //       // Log when transitioning from normal to overloaded
+  //       console.log(
+  //         `[Adaptive Cooling] ⚠️  OVERLOAD DETECTED! ${recentFlushCount} flushes in last ${
+  //           this.FLUSH_WINDOW_MS / 1000
+  //         }s. ` + `Increasing cooling delay: ${this.minFlushDelay}-${this.maxFlushDelay}ms`
+  //       )
+  //     }
+  //   } else if (recentFlushCount < this.FAST_FLUSH_THRESHOLD / 2) {
+  //     // System healthy - reduce delays back to normal
+  //     if (wasOverloaded) {
+  //       // Log when recovering from overload
+  //       console.log(
+  //         `[Adaptive Cooling] ✓ System recovered! ${recentFlushCount} flushes in last ${
+  //           this.FLUSH_WINDOW_MS / 1000
+  //         }s. ` + `Reducing cooling delay: 200-1000ms`
+  //       )
+  //     }
+  //     this.minFlushDelay = 200
+  //     this.maxFlushDelay = 1000
+  //   }
+
+  //   // Return random delay within current range to stagger DB writes
+  //   const delay = this.minFlushDelay + Math.floor(Math.random() * (this.maxFlushDelay - this.minFlushDelay))
+  //   return delay
+  // }
+
   /**
    * Get current statistics
    */
diff --git a/src/config/index.ts b/src/config/index.ts
index 0e1eb50..fae05f3 100644
--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -193,7 +193,7 @@ let config: Config = {
   useParallelSync: process.env.USE_PARALLEL_SYNC !== 'false', // Enable by default
   cyclesPerBatch: Number(process.env.CYCLES_PER_BATCH) || 100, // Batch 100 cycles together ( matching MAX_BETWEEN_CYCLES_PER_REQUEST, can be lower if needed )
   enablePrefetch: process.env.ENABLE_PREFETCH !== 'false', // Enable prefetch by default
-  syncRetryAttempts: Number(process.env.SYNC_RETRY_ATTEMPTS) || 3, // Retry failed requests 3 times
+  syncRetryAttempts: Number(process.env.SYNC_RETRY_ATTEMPTS) || 5, // Retry failed requests 5 times
   dexScreenerAPI:
     'https://api.dexscreener.com/latest/dex/search?q=0x693ed886545970F0a3ADf8C59af5cCdb6dDF0a76',
   dexScreenerLink: 'https://dexscreener.com/polygon/0x041e48a5b11c29fdbd92498eb05573c52728398c',
diff --git a/src/storage/account.ts b/src/storage/account.ts
index 02d2cb9..2d1fbef 100644
--- a/src/storage/account.ts
+++ b/src/storage/account.ts
@@ -79,7 +79,8 @@ export async function bulkInsertAccounts(accounts: Account[]): Promise<void> {
         ${keepNewerData('accountType')},
         ${keepNewerData('isGlobal')},
       createdTimestamp = MIN(accounts.createdTimestamp, excluded.createdTimestamp)`
-    await db.run(accountDatabase, sql, values)
+    // Serialize write through storage-level queue + transaction for atomicity
+    await db.executeDbWriteWithTransaction(accountDatabase, sql, values)
     console.log('Successfully bulk inserted Accounts', accounts.length)
   } catch (e) {
     console.log(e)
diff --git a/src/storage/accountHistoryState.ts b/src/storage/accountHistoryState.ts
index c517687..b0e1723 100644
--- a/src/storage/accountHistoryState.ts
+++ b/src/storage/accountHistoryState.ts
@@ -70,7 +70,8 @@ export async function bulkInsertAccountHistoryStates(
     )
 
     const sql = `INSERT OR REPLACE INTO accountHistoryState ${fields} VALUES ${allPlaceholders}`
-    await db.run(accountHistoryStateDatabase, sql, values)
+    // Serialize write through storage-level queue + transaction for atomicity
+    await db.executeDbWriteWithTransaction(accountHistoryStateDatabase, sql, values)
     console.log('Successfully bulk inserted AccountHistoryStates', accountHistoryStates.length)
   } catch (e) {
     console.log(e)
diff --git a/src/storage/cycle.ts b/src/storage/cycle.ts
index d295f7e..1bb1f43 100644
--- a/src/storage/cycle.ts
+++ b/src/storage/cycle.ts
@@ -62,7 +62,8 @@ export async function bulkInsertCycles(cycles: Cycle[]): Promise<void> {
     )
 
     const sql = `INSERT OR REPLACE INTO cycles ${fields} VALUES ${allPlaceholders}`
-    await db.run(cycleDatabase, sql, values)
+    // Serialize write through storage-level queue + transaction for atomicity
+    await db.executeDbWriteWithTransaction(cycleDatabase, sql, values)
     console.log('Successfully bulk inserted Cycles', cycles.length)
   } catch (e) {
     console.log(e)
@@ -248,7 +249,6 @@ export interface CycleGap {
  */
 export async function queryMissingCycleRanges(targetCycle: number): Promise<CycleGap[]> {
   try {
-
     // Get first and last cycle for edge gap detection
     const firstCycleResult = (await db.get(
       cycleDatabase,
diff --git a/src/storage/originalTxData.ts b/src/storage/originalTxData.ts
index 8134e23..50f37b2 100644
--- a/src/storage/originalTxData.ts
+++ b/src/storage/originalTxData.ts
@@ -60,7 +60,8 @@ export async function bulkInsertOriginalTxsData(originalTxsData: OriginalTxData[
     )
 
     const sql = `INSERT OR REPLACE INTO originalTxsData ${fields} VALUES ${allPlaceholders}`
-    await db.run(originalTxDataDatabase, sql, values)
+    // Serialize write through storage-level queue + transaction for atomicity
+    await db.executeDbWriteWithTransaction(originalTxDataDatabase, sql, values)
     console.log(`Successfully bulk inserted OriginalTxsData`, originalTxsData.length)
   } catch (e) {
     console.log(e)
diff --git a/src/storage/receipt.ts b/src/storage/receipt.ts
index 796fa48..b05d103 100644
--- a/src/storage/receipt.ts
+++ b/src/storage/receipt.ts
@@ -72,7 +72,8 @@ export async function bulkInsertReceipts(receipts: Receipt[]): Promise<void> {
     )
 
     const sql = `INSERT OR REPLACE INTO receipts ${fields} VALUES ${allPlaceholders}`
-    await db.run(receiptDatabase, sql, values)
+    // Serialize write through storage-level queue + transaction for atomicity
+    await db.executeDbWriteWithTransaction(receiptDatabase, sql, values)
     console.log('Successfully bulk inserted receipts', receipts.length)
   } catch (e) {
     console.log(e)
@@ -83,14 +84,28 @@ export async function bulkInsertReceipts(receipts: Receipt[]): Promise<void> {
 export async function processReceiptData(
   receipts: Receipt[],
   saveOnlyNewData = false,
+  filterExistingAccounts = true, // When true, queries DB to filter out older account data before insert
   forwardToSubscribers = false
 ): Promise<void> {
   if (receipts && receipts.length <= 0) return
-  const bucketSize = 1000
+  const bucketSize = 2000
+  const bucketSizeForReceipts = 1000 // Receipts size can be big, better to save less than the bucket size
   let combineReceipts: Receipt[] = []
   let combineAccounts: Account[] = [] // For new accounts to bulk insert; Not for accounts that are already stored in database
   let combineTransactions: Transaction[] = []
   let accountHistoryStateList: AccountHistoryStateDB.AccountHistoryState[] = []
+
+  // Optimization: If saveOnlyNewData is true, batch query existing receipt IDs BEFORE the loop
+  // to avoid N+1 query problem (individual SELECTs for each receipt)
+  let existingReceiptIds: Set<string> = new Set()
+  if (saveOnlyNewData && receipts.length > 0) {
+    const receiptIds = receipts.map((r) => r.tx.txId)
+    const placeholders = receiptIds.map(() => '?').join(', ')
+    const sql = `SELECT receiptId FROM receipts WHERE receiptId IN (${placeholders})`
+    const existingReceipts = (await db.all(receiptDatabase, sql, receiptIds)) as { receiptId: string }[]
+    existingReceiptIds = new Set(existingReceipts.map((r) => r.receiptId))
+  }
+
   for (const receiptObj of receipts) {
     const {
       afterStates,
@@ -118,8 +133,10 @@ export async function processReceiptData(
       applyTimestamp: applyTimestamp ?? calculatedApplyTimestamp,
     }
     if (saveOnlyNewData) {
-      const receiptExist = await queryReceiptByReceiptId(tx.txId)
-      if (!receiptExist) combineReceipts.push(modifiedReceiptObj as unknown as Receipt)
+      // Check against pre-fetched set instead of querying database for each receipt
+      if (!existingReceiptIds.has(tx.txId)) {
+        combineReceipts.push(modifiedReceiptObj as unknown as Receipt)
+      }
     } else combineReceipts.push(modifiedReceiptObj as unknown as Receipt)
     const txReceipt = appReceiptData
     receiptsMap.set(tx.txId, tx.timestamp)
@@ -128,8 +145,7 @@ export async function processReceiptData(
       forwardData(receiptObj)
     }
 
-    // Receipts size can be big, better to save per 100
-    if (combineReceipts.length >= 100) {
+    if (combineReceipts.length >= bucketSizeForReceipts) {
       await bulkInsertReceipts(combineReceipts)
       combineReceipts = []
     }
@@ -288,22 +304,30 @@ export async function processReceiptData(
     }
   }
 
-  // Batch query all collected account IDs once
-  const accountIdsToQuery = combineAccounts.map((acc) => acc.accountId)
-  const existingAccounts = await AccountDB.queryAccountTimestampsBatch(accountIdsToQuery)
-  for (const accObj of combineAccounts) {
-    const accountExist = existingAccounts.get(accObj.accountId)
-    if (accountExist) {
-      if (accountExist.timestamp > accObj.timestamp) {
-        // await AccountDB.updateAccount(accObj)
-        // Remove the account from the list
-        combineAccounts = combineAccounts.filter((acc) => acc.accountId !== accObj.accountId)
-      }
-      if (accountExist.createdTimestamp > accObj.createdTimestamp) {
-        await AccountDB.updateCreatedTimestamp(accObj.accountId, accObj.createdTimestamp)
+  // Optimization: The bulkInsertAccounts SQL already handles:
+  // 1. Keeping newer data via CASE WHEN excluded.timestamp > accounts.timestamp
+  // 2. Preserving oldest createdTimestamp via MIN(accounts.createdTimestamp, excluded.createdTimestamp)
+  // By default (filterExistingAccounts=false), we skip the batch query and individual updates - just bulk insert everything
+
+  if (filterExistingAccounts) {
+    // Legacy path: Batch query all collected account IDs once and filter before insert
+    const accountIdsToQuery = combineAccounts.map((acc) => acc.accountId)
+    const existingAccounts = await AccountDB.queryAccountTimestampsBatch(accountIdsToQuery)
+    for (const accObj of combineAccounts) {
+      const accountExist = existingAccounts.get(accObj.accountId)
+      if (accountExist) {
+        if (accountExist.timestamp > accObj.timestamp) {
+          // await AccountDB.updateAccount(accObj)
+          // Remove the account from the list
+          combineAccounts = combineAccounts.filter((acc) => acc.accountId !== accObj.accountId)
+        }
+        if (accountExist.createdTimestamp > accObj.createdTimestamp) {
+          await AccountDB.updateCreatedTimestamp(accObj.accountId, accObj.createdTimestamp)
+        }
       }
     }
   }
+
   // Insert the combined accounts in bucketSize
   if (combineAccounts.length > 0) {
     for (let i = 0; i < combineAccounts.length; i += bucketSize) {
@@ -399,7 +423,10 @@ export async function queryReceiptCountByCycles(
   let receipts: { cycle: number; 'COUNT(*)': number }[] = []
   try {
     const sql = `SELECT cycle, COUNT(*) FROM receipts GROUP BY cycle HAVING cycle BETWEEN ? AND ? ORDER BY cycle ASC`
-    receipts = (await db.all(receiptDatabase, sql, [start, end])) as { cycle: number; 'COUNT(*)': number }[]
+    receipts = (await db.all(receiptDatabase, sql, [start, end])) as {
+      cycle: number
+      'COUNT(*)': number
+    }[]
   } catch (e) {
     console.log(e)
   }
diff --git a/src/storage/sqlite3storage.ts b/src/storage/sqlite3storage.ts
index 4cd94ee..68ba51a 100644
--- a/src/storage/sqlite3storage.ts
+++ b/src/storage/sqlite3storage.ts
@@ -1,6 +1,11 @@
 import { Utils as StringUtils } from '@shardus/types'
 import { Database } from 'sqlite3'
 
+// Simple write queue using Promise chain - serializes all database writes
+// This prevents write contention while allowing parallel reads (SELECTs)
+// Only INSERT/UPDATE/DELETE operations should use this queue
+let writeQueueTail: Promise<unknown> = Promise.resolve()
+
 interface QueryTiming {
   id: number
   sql: string
@@ -29,7 +34,7 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
   await run(db, 'PRAGMA synchronous = NORMAL')
   await run(db, 'PRAGMA temp_store = MEMORY')
   await run(db, 'PRAGMA cache_size = -256000') // Increased to ~256MB cache for better performance
-  await run(db, 'PRAGMA wal_autocheckpoint = 5000') // Checkpoint every 5000 pages (less frequent = less lock contention)
+  await run(db, 'PRAGMA wal_autocheckpoint = 10000') // Checkpoint every 10000 pages (less frequent = less lock contention)
   await run(db, 'PRAGMA mmap_size = 536870912') // 512MB memory-mapped I/O for faster reads (reduced disk I/O)
   await run(db, 'PRAGMA busy_timeout = 30000') // Wait up to 30s if database is locked
   await run(db, 'PRAGMA threads = 4') // Use up to 4 threads for parallel operations
@@ -61,6 +66,73 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
   return db
 }
 
+/**
+ * Create read-only database connection optimized for SELECT queries
+ * - Shorter busy_timeout (reads shouldn't block in WAL mode)
+ * - No synchronous writes (read-only)
+ * - Large cache and mmap for fast reads
+ */
+export const createReadDB = async (dbPath: string, dbName: string): Promise<Database> => {
+  console.log('dbName (Read)', dbName, 'dbPath', dbPath)
+  const db = new Database(dbPath, (err) => {
+    if (err) {
+      console.log('Error opening read database:', err)
+      throw err
+    }
+  })
+  await run(db, 'PRAGMA journal_mode=WAL') // WAL mode allows concurrent reads with writes
+  await run(db, 'PRAGMA synchronous = OFF') // Read-only connection doesn't need sync
+  await run(db, 'PRAGMA temp_store = MEMORY')
+  await run(db, 'PRAGMA cache_size = -128000') // 128MB cache (smaller than write connection)
+  await run(db, 'PRAGMA mmap_size = 536870912') // 512MB memory-mapped I/O for faster reads
+  await run(db, 'PRAGMA busy_timeout = 5000') // Shorter timeout - reads shouldn't block in WAL mode
+  await run(db, 'PRAGMA threads = 4') // Use up to 4 threads for parallel operations
+  await run(db, 'PRAGMA query_only = ON') // Enforce read-only mode at SQLite level
+  db.on('profile', (sql, time) => {
+    const engineMs = typeof time === 'number' ? time : Number(time)
+    const queue = queuedBySql.get(sql)
+    const id = queue && queue.length > 0 ? queue[0] : undefined
+    if (id === undefined) {
+      printQueryTimingLog('profile event without pending query (read)', {
+        engineMs,
+        sql: formatSqlForLog(sql),
+      })
+      return
+    }
+    const entry = pendingQueries.get(id)
+    if (!entry) {
+      printQueryTimingLog('profile missing pending entry (read)', {
+        engineMs,
+        sql: formatSqlForLog(sql),
+      })
+      return
+    }
+    entry.engineMs = engineMs
+    if (engineMs > SQL_ENGINE_WARN_THRESHOLD_MS) {
+      console.warn(`[DB Engine Read] Slow Query: ${engineMs} ms for SQL: ${formatSqlForLog(sql)}`)
+    }
+  })
+  console.log(`Read Database ${dbName} Initialized!`)
+  return db
+}
+
+/**
+ * Manually checkpoint the WAL file to prevent it from growing too large
+ * Uses PASSIVE mode which won't block readers
+ * Call this periodically during long-running sync operations
+ */
+export async function checkpointWAL(
+  db: Database,
+  mode: 'PASSIVE' | 'FULL' | 'RESTART' = 'PASSIVE'
+): Promise<void> {
+  try {
+    await run(db, `PRAGMA wal_checkpoint(${mode})`)
+    console.log(`[WAL Checkpoint] Executed ${mode} checkpoint`)
+  } catch (error) {
+    console.error('[WAL Checkpoint] Failed to checkpoint WAL:', error)
+  }
+}
+
 /**
  * Close Database Connections Gracefully
  */
@@ -161,6 +233,81 @@ export async function all<T>(db: Database, sql: string, params = []): Promise<T[
   })
 }
 
+/**
+ * Executes a database write operation through the shared write queue
+ * Use this for INSERT/UPDATE/DELETE operations to prevent write contention
+ * Do NOT use for SELECT queries - they can run in parallel
+ */
+export async function executeDbWrite<T>(writeOperation: () => Promise<T>): Promise<T> {
+  const enqueuedAt = Date.now()
+
+  // Wait for previous write to finish, ignoring errors to prevent propagation
+  const myTurn = writeQueueTail.catch(() => undefined)
+
+  // Create and chain the new write operation
+  const currentWrite = myTurn.then(async () => {
+    const startedAt = Date.now()
+    const promiseQueueMs = startedAt - enqueuedAt
+
+    // Log if we waited a long time in the Promise queue
+    if (promiseQueueMs > 100) {
+      console.log(`[Promise Queue] Waited ${promiseQueueMs}ms in Promise queue before starting DB operation`)
+    }
+
+    const value = await writeOperation()
+    const completedAt = Date.now()
+    const executionMs = completedAt - startedAt
+
+    // Log slow DB operations (includes transaction + SQLite busy_timeout)
+    if (executionMs > 500) {
+      console.log(
+        `[DB Operation] Total: ${executionMs}ms (Promise queue: ${promiseQueueMs}ms, DB execution+waiting: ${executionMs}ms)`
+      )
+    }
+
+    return value
+  })
+
+  // Update queue tail to current write (for next operation to wait on)
+  writeQueueTail = currentWrite.catch(() => undefined)
+
+  // Return the actual operation result
+  return currentWrite
+}
+
+/**
+ * Execute work within a database transaction
+ * Uses BEGIN (deferred) since our write queue already serializes writes
+ * This reduces lock contention compared to BEGIN IMMEDIATE
+ * @param db Database instance
+ * @param work Async function containing the work to execute within the transaction
+ * @returns Result of the work function
+ */
+export async function executeInTransaction<T>(db: Database, work: () => Promise<T>): Promise<T> {
+  await run(db, 'BEGIN') // Deferred transaction - acquires RESERVED lock on first write, not at BEGIN
+  try {
+    const result = await work()
+    await run(db, 'COMMIT')
+    return result
+  } catch (error) {
+    await run(db, 'ROLLBACK')
+    throw error
+  }
+}
+
+export async function executeDbWriteWithTransaction(
+  db: Database,
+  sql: string,
+  params: unknown[] | object = []
+): Promise<void> {
+  // Serialize write through storage-level queue + transaction for atomicity
+  await executeDbWrite(() =>
+    executeInTransaction(db, async () => {
+      await run(db, sql, params)
+    })
+  )
+}
+
 export function extractValues(object: object): string[] {
   try {
     const inputs: string[] = []
diff --git a/src/storage/transaction.ts b/src/storage/transaction.ts
index f46238b..5779cfa 100644
--- a/src/storage/transaction.ts
+++ b/src/storage/transaction.ts
@@ -61,7 +61,8 @@ export async function bulkInsertTransactions(transactions: Transaction[]): Promi
     )
 
     const sql = `INSERT OR REPLACE INTO transactions ${fields} VALUES ${allPlaceholders}`
-    await db.run(transactionDatabase, sql, values)
+    // Serialize write through storage-level queue + transaction for atomicity
+    await db.executeDbWriteWithTransaction(transactionDatabase, sql, values)
     console.log('Successfully bulk inserted transactions', transactions.length)
   } catch (e) {
     console.log(e)
@@ -378,7 +379,9 @@ export async function queryActiveAccountsCountByTxFee(
       WHERE timestamp < ? AND timestamp > ? ${excludeZeroFeeTxs ? ' AND txFee > 0' : ''}
     `
     const values = [beforeTimestamp, afterTimestamp]
-    activeAccounts = (await db.get(transactionDatabase, sql, values)) as { 'COUNT(DISTINCT txFrom)': number }
+    activeAccounts = (await db.get(transactionDatabase, sql, values)) as {
+      'COUNT(DISTINCT txFrom)': number
+    }
   } catch (e) {
     console.log('Error querying active accounts by txFee:', e)
   }

From 0f4c91acbd963f054ac6108191d97857a5cd4a9b Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Sat, 15 Nov 2025 00:31:48 +0800
Subject: [PATCH 12/14] feat: optimize receipt deserialization by processing in
 chunks to prevent event loop blocking

---
 src/class/ParallelDataSync.ts | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/class/ParallelDataSync.ts b/src/class/ParallelDataSync.ts
index ae812ff..5332a12 100644
--- a/src/class/ParallelDataSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -648,16 +648,6 @@ export class ParallelDataSync {
           nextFetchPromise = null
         }
 
-        const startTime = Date.now()
-        // Deserialize receipts
-        receipts.forEach((receipt) => {
-          ReceiptDB.deserializeDbReceipt(receipt)
-        })
-        const elapsed = Date.now() - startTime
-        if (elapsed > 100) {
-          console.log(`Deserializing ${receipts.length} receipts took: ${elapsed}ms`)
-        }
-
         // Add receipts to buffer - will flush to DB when buffer reaches threshold
         await this.addToBuffer('receipt', receipts)
 
@@ -1018,8 +1008,28 @@ export class ParallelDataSync {
 
       this.receiptBufferLock = true
       try {
-        const toFlush = [...this.receiptBuffer]
+        const toFlush = [...this.receiptBuffer] as any
         this.receiptBuffer = []
+
+        const startTime = Date.now()
+        // Deserialize receipts in chunks to prevent event loop blocking
+        const CHUNK_SIZE = 20
+        for (let i = 0; i < toFlush.length; i += CHUNK_SIZE) {
+          const end = Math.min(i + CHUNK_SIZE, toFlush.length)
+          // Deserialize chunk of receipts
+          for (let j = i; j < end; j++) {
+            // eslint-disable-next-line security/detect-object-injection
+            ReceiptDB.deserializeDbReceipt(toFlush[j])
+          }
+          // Yield to event loop after each chunk (except the last one)
+          if (end < toFlush.length) {
+            await new Promise((resolve) => setImmediate(resolve))
+          }
+        }
+        const elapsed = Date.now() - startTime
+        if (elapsed > 100) {
+          console.log(`Deserializing ${toFlush.length} receipts took: ${elapsed}ms`)
+        }
         console.log(`[Buffer Flush] Flushing ${toFlush.length} receipts to database`)
         if (processData) await ReceiptDB.processReceiptData(toFlush, false, false)
 

From 0e88eea9287bd5e717c92177de969c83e7d69e64 Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Mon, 17 Nov 2025 21:16:17 +0800
Subject: [PATCH 13/14] refactor: Adjust Parallel Sync configurations for
 optimal performance

- Set as a config for manual WAL checkpointing every 10 buffer flushes to prevent WAL growth
- Reduce concurrency from 10 to 5 workers to balance throughput with DB pressure
- Add chunked receipt deserialization (20 per chunk) to prevent event loop blocking
- Put as a config for write queue infrastructure for serialized database operations
- Add processData flag for debugging and performance testing
---
 src/class/ParallelDataSync.ts | 176 ++++++++--------------------------
 src/config/index.ts           |   2 +-
 src/storage/sqlite3storage.ts |  32 +++++--
 3 files changed, 67 insertions(+), 143 deletions(-)

diff --git a/src/class/ParallelDataSync.ts b/src/class/ParallelDataSync.ts
index 5332a12..1af311f 100644
--- a/src/class/ParallelDataSync.ts
+++ b/src/class/ParallelDataSync.ts
@@ -7,19 +7,21 @@ import {
   CycleDB,
   ReceiptDB,
   OriginalTxDataDB,
-  // receiptDatabase,
-  // originalTxDataDatabase,
-  // cycleDatabase,
+  receiptDatabase,
+  originalTxDataDatabase,
+  cycleDatabase,
 } from '../storage'
 import { Cycle, Receipt, OriginalTxData } from '../types'
 import axios, { AxiosInstance } from 'axios'
 import http from 'http'
 import https from 'https'
-// import { checkpointWAL } from '../storage/sqlite3storage'
+import { useManualCheckPoint, checkpointWAL } from '../storage/sqlite3storage'
 
 // For Debugging Purpose - Set to false to skip processing data and saving to DB
 const processData = true
 
+const DESERIALIZE_RECEIPTS_CHUNK_SIZE = 20 // Number of receipts to deserialize at a time
+
 /**
  * Configuration for parallel sync
  */
@@ -103,25 +105,13 @@ export class ParallelDataSync {
   private originalTxBufferLock = false
   private cycleBufferLock = false
 
-  // // WAL checkpoint tracking
-  // private flushCount = 0 // Total number of buffer flushes
-  // private readonly CHECKPOINT_FREQUENCY = 10 // Run WAL checkpoint every N flushes to prevent WAL from growing too large
-
-  // // Flush pending flag to prevent multiple workers from waiting to flush
-  // private receiptFlushPending = false
-
-  // // Adaptive flush delay system - adds delays before DB writes to prevent overload
-  // private flushTimestamps: number[] = [] // Timestamps of recent flushes
-  // private readonly FLUSH_WINDOW_MS = 10000 // Track flushes in last 10 seconds
-  // private readonly FAST_FLUSH_THRESHOLD = 5 // If 5+ flushes in window, system is overloaded
-  // private minFlushDelay = 200 // Min delay before flush (ms)
-  // private maxFlushDelay = 1000 // Max delay before flush (ms)
-  // private readonly OVERLOAD_MIN_DELAY = 3000 // When overloaded, min delay increases to 3s
-  // private readonly OVERLOAD_MAX_DELAY = 5000 // When overloaded, max delay increases to 5s
+  // WAL checkpoint tracking
+  private flushCount = 0 // Total number of buffer flushes
+  private readonly CHECKPOINT_FREQUENCY = 10 // Run WAL checkpoint every N flushes to prevent WAL from growing too large
 
   constructor(syncConfig?: Partial<ParallelSyncConfig>) {
     this.syncConfig = {
-      concurrency: syncConfig?.concurrency || config.parallelSyncConcurrency || 10,
+      concurrency: syncConfig?.concurrency || config.parallelSyncConcurrency || 5,
       cyclesPerBatch: syncConfig?.cyclesPerBatch || config.cyclesPerBatch || 100,
       retryAttempts: syncConfig?.retryAttempts || config.syncRetryAttempts || 5,
       retryDelayMs: syncConfig?.retryDelayMs || 1000,
@@ -349,7 +339,7 @@ export class ParallelDataSync {
       // Three-phase approach for optimal performance:
       // Phase 1: Use main queue (concurrency: 5) for parallel API fetching
       // Phase 2: Buffer data in memory until ACCUMULATION_THRESHOLD (1000) is reached
-      // Phase 3: DB writes are batched and serialized via storage-level queue
+      // Phase 3: DB writes are batched and serialized via write queue
       // This combines parallel I/O with batched, serialized DB writes to minimize contention
       const tasks = cycleBatches.map((batch) =>
         this.queue.add(() => this.syncDataByCycleRange(batch.startCycle, batch.endCycle))
@@ -981,45 +971,19 @@ export class ParallelDataSync {
     if (type === 'receipt') {
       if (this.receiptBuffer.length === 0) return
 
-      // // If another worker is already flushing, return immediately (it will flush our data too)
-      // if (this.receiptFlushPending) {
-      //   return
-      // }
-
-      // // Mark flush as pending
-      // this.receiptFlushPending = true
-
-      // // Apply adaptive delay BEFORE acquiring lock to spread out DB writes (receipts only)
-      // const delay = this.getAdaptiveFlushDelay()
-      // if (delay > 0) {
-      //   const recentFlushCount = this.flushTimestamps.length
-      //   const delayRange = `${this.minFlushDelay}-${this.maxFlushDelay}ms`
-      //   console.log(
-      //     `[Adaptive Cooling] Receipts - Waiting ${delay}ms before flush ` +
-      //       `(recent flushes: ${recentFlushCount}, range: ${delayRange})`
-      //   )
-      //   await new Promise((resolve) => setTimeout(resolve, delay))
-      // }
-
-      // // If another worker is already locking, return immediately (it will flush our data too)
-      // if (this.receiptBufferLock) {
-      //   return
-      // }
-
       this.receiptBufferLock = true
       try {
-        const toFlush = [...this.receiptBuffer] as any
+        const toFlush = [...this.receiptBuffer]
         this.receiptBuffer = []
 
         const startTime = Date.now()
         // Deserialize receipts in chunks to prevent event loop blocking
-        const CHUNK_SIZE = 20
-        for (let i = 0; i < toFlush.length; i += CHUNK_SIZE) {
-          const end = Math.min(i + CHUNK_SIZE, toFlush.length)
+        for (let i = 0; i < toFlush.length; i += DESERIALIZE_RECEIPTS_CHUNK_SIZE) {
+          const end = Math.min(i + DESERIALIZE_RECEIPTS_CHUNK_SIZE, toFlush.length)
           // Deserialize chunk of receipts
           for (let j = i; j < end; j++) {
             // eslint-disable-next-line security/detect-object-injection
-            ReceiptDB.deserializeDbReceipt(toFlush[j])
+            ReceiptDB.deserializeDbReceipt(toFlush[j] as any)
           }
           // Yield to event loop after each chunk (except the last one)
           if (end < toFlush.length) {
@@ -1033,13 +997,13 @@ export class ParallelDataSync {
         console.log(`[Buffer Flush] Flushing ${toFlush.length} receipts to database`)
         if (processData) await ReceiptDB.processReceiptData(toFlush, false, false)
 
-        // // Track flush timestamp for adaptive delay system (receipts only)
-        // this.recordFlushTimestamp()
+        if (useManualCheckPoint) {
+          // Increment flush count and potentially checkpoint WAL
+          this.flushCount++
+          await this.maybeCheckpointWAL()
+        }
       } finally {
         this.receiptBufferLock = false
-
-        // // Clear flush pending flag
-        // this.receiptFlushPending = false
       }
     } else if (type === 'originalTx') {
       if (this.originalTxBuffer.length === 0) return
@@ -1087,85 +1051,27 @@ export class ParallelDataSync {
     await this.flushBuffer('cycle')
   }
 
-  // /**
-  //  * Conditionally checkpoint WAL files if enough flushes have occurred
-  //  * This prevents WAL files from growing too large during long sync operations
-  //  */
-  // private async maybeCheckpointWAL(): Promise<void> {
-  //   if (this.flushCount % this.CHECKPOINT_FREQUENCY === 0) {
-  //     console.log(
-  //       `[WAL Checkpoint] Running periodic checkpoint after ${this.flushCount} buffer flushes (~${
-  //         this.flushCount * this.ACCUMULATION_THRESHOLD
-  //       } records)`
-  //     )
-  //     // Run checkpoints on all three databases in parallel
-  //     // Use PASSIVE mode to avoid blocking readers
-  //     await Promise.all([
-  //       checkpointWAL(receiptDatabase, 'PASSIVE'),
-  //       checkpointWAL(originalTxDataDatabase, 'PASSIVE'),
-  //       checkpointWAL(cycleDatabase, 'PASSIVE'),
-  //     ])
-  //   }
-  // }
-
-  // /**
-  //  * Record flush timestamp and clean up old timestamps
-  //  * Used to track flush frequency and detect system overload
-  //  */
-  // private recordFlushTimestamp(): void {
-  //   const now = Date.now()
-  //   this.flushTimestamps.push(now)
-
-  //   // Clean up old timestamps outside the tracking window
-  //   this.flushTimestamps = this.flushTimestamps.filter((timestamp) => now - timestamp < this.FLUSH_WINDOW_MS)
-  // }
-
-  // /**
-  //  * Calculate adaptive flush delay based on recent flush frequency
-  //  * Returns a random delay within a range that adapts to system load
-  //  */
-  // private getAdaptiveFlushDelay(): number {
-  //   // Clean up old timestamps first
-  //   const now = Date.now()
-  //   this.flushTimestamps = this.flushTimestamps.filter((timestamp) => now - timestamp < this.FLUSH_WINDOW_MS)
-
-  //   // Check if system is overloaded (too many flushes in recent window)
-  //   const recentFlushCount = this.flushTimestamps.length
-  //   const isOverloaded = recentFlushCount >= this.FAST_FLUSH_THRESHOLD
-  //   const wasOverloaded = this.minFlushDelay === this.OVERLOAD_MIN_DELAY
-
-  //   // Adjust delay range based on system load
-  //   if (isOverloaded) {
-  //     // System overloaded - use longer delays
-  //     const wasNormal = this.minFlushDelay === 200
-  //     this.minFlushDelay = this.OVERLOAD_MIN_DELAY
-  //     this.maxFlushDelay = this.OVERLOAD_MAX_DELAY
-  //     if (wasNormal) {
-  //       // Log when transitioning from normal to overloaded
-  //       console.log(
-  //         `[Adaptive Cooling] ⚠️  OVERLOAD DETECTED! ${recentFlushCount} flushes in last ${
-  //           this.FLUSH_WINDOW_MS / 1000
-  //         }s. ` + `Increasing cooling delay: ${this.minFlushDelay}-${this.maxFlushDelay}ms`
-  //       )
-  //     }
-  //   } else if (recentFlushCount < this.FAST_FLUSH_THRESHOLD / 2) {
-  //     // System healthy - reduce delays back to normal
-  //     if (wasOverloaded) {
-  //       // Log when recovering from overload
-  //       console.log(
-  //         `[Adaptive Cooling] ✓ System recovered! ${recentFlushCount} flushes in last ${
-  //           this.FLUSH_WINDOW_MS / 1000
-  //         }s. ` + `Reducing cooling delay: 200-1000ms`
-  //       )
-  //     }
-  //     this.minFlushDelay = 200
-  //     this.maxFlushDelay = 1000
-  //   }
-
-  //   // Return random delay within current range to stagger DB writes
-  //   const delay = this.minFlushDelay + Math.floor(Math.random() * (this.maxFlushDelay - this.minFlushDelay))
-  //   return delay
-  // }
+  /**
+   * Conditionally checkpoint WAL files if enough flushes have occurred
+   * This prevents WAL files from growing too large during long sync operations
+   */
+  private async maybeCheckpointWAL(): Promise<void> {
+    if (!useManualCheckPoint) return
+    if (this.flushCount % this.CHECKPOINT_FREQUENCY === 0) {
+      console.log(
+        `[WAL Checkpoint] Running periodic checkpoint after ${this.flushCount} buffer flushes (~${
+          this.flushCount * this.ACCUMULATION_THRESHOLD
+        } records)`
+      )
+      // Run checkpoints on all three databases in parallel
+      // Use PASSIVE mode to avoid blocking readers
+      await Promise.all([
+        checkpointWAL(receiptDatabase, 'PASSIVE'),
+        checkpointWAL(originalTxDataDatabase, 'PASSIVE'),
+        checkpointWAL(cycleDatabase, 'PASSIVE'),
+      ])
+    }
+  }
 
   /**
    * Get current statistics
diff --git a/src/config/index.ts b/src/config/index.ts
index fae05f3..76cb9ab 100644
--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -189,7 +189,7 @@ let config: Config = {
     MAX_ACCOUNT_HISTORY_STATES_PER_REQUEST: 100,
     MAX_STATS_PER_REQUEST: 1000000,
   },
-  parallelSyncConcurrency: Number(process.env.PARALLEL_SYNC_CONCURRENCY) || 10, // 10 parallel workers
+  parallelSyncConcurrency: Number(process.env.PARALLEL_SYNC_CONCURRENCY) || 5, // 5 parallel sync fetches
   useParallelSync: process.env.USE_PARALLEL_SYNC !== 'false', // Enable by default
   cyclesPerBatch: Number(process.env.CYCLES_PER_BATCH) || 100, // Batch 100 cycles together ( matching MAX_BETWEEN_CYCLES_PER_REQUEST, can be lower if needed )
   enablePrefetch: process.env.ENABLE_PREFETCH !== 'false', // Enable prefetch by default
diff --git a/src/storage/sqlite3storage.ts b/src/storage/sqlite3storage.ts
index 68ba51a..2b673de 100644
--- a/src/storage/sqlite3storage.ts
+++ b/src/storage/sqlite3storage.ts
@@ -1,11 +1,16 @@
 import { Utils as StringUtils } from '@shardus/types'
 import { Database } from 'sqlite3'
 
+const enableWritingQueue = false
+
 // Simple write queue using Promise chain - serializes all database writes
 // This prevents write contention while allowing parallel reads (SELECTs)
 // Only INSERT/UPDATE/DELETE operations should use this queue
 let writeQueueTail: Promise<unknown> = Promise.resolve()
 
+// Control whether to use manual WAL checkpoints
+export const useManualCheckPoint = false
+
 interface QueryTiming {
   id: number
   sql: string
@@ -34,7 +39,11 @@ export const createDB = async (dbPath: string, dbName: string): Promise<Database
   await run(db, 'PRAGMA synchronous = NORMAL')
   await run(db, 'PRAGMA temp_store = MEMORY')
   await run(db, 'PRAGMA cache_size = -256000') // Increased to ~256MB cache for better performance
-  await run(db, 'PRAGMA wal_autocheckpoint = 10000') // Checkpoint every 10000 pages (less frequent = less lock contention)
+  let checkPointPageCount = 10000
+  if (useManualCheckPoint) {
+    checkPointPageCount = 0 // Disable automatic checkpoints
+  }
+  await run(db, `PRAGMA wal_autocheckpoint = ${checkPointPageCount}`) // Checkpoint every 10000 pages (less frequent = less lock contention)
   await run(db, 'PRAGMA mmap_size = 536870912') // 512MB memory-mapped I/O for faster reads (reduced disk I/O)
   await run(db, 'PRAGMA busy_timeout = 30000') // Wait up to 30s if database is locked
   await run(db, 'PRAGMA threads = 4') // Use up to 4 threads for parallel operations
@@ -300,12 +309,21 @@ export async function executeDbWriteWithTransaction(
   sql: string,
   params: unknown[] | object = []
 ): Promise<void> {
-  // Serialize write through storage-level queue + transaction for atomicity
-  await executeDbWrite(() =>
-    executeInTransaction(db, async () => {
-      await run(db, sql, params)
-    })
-  )
+  // Use write queue if enabled
+  if (enableWritingQueue) {
+    // Serialize write throuh promise queue
+    await executeDbWrite(() =>
+      executeInTransaction(db, async () => {
+        await run(db, sql, params)
+      })
+    )
+    return
+  }
+
+  // Use transaction directly
+  await executeInTransaction(db, async () => {
+    await run(db, sql, params)
+  })
 }
 
 export function extractValues(object: object): string[] {

From cfa8da847afd7af398f9517b3e8888c4a4d9d0ec Mon Sep 17 00:00:00 2001
From: jairajdev <jairajranamagar.dev@gmail.com>
Date: Mon, 17 Nov 2025 22:03:53 +0800
Subject: [PATCH 14/14] feat: enhance logging for cycle and receipt comparisons
 in DataSyncManager

---
 src/class/DataSyncManager.ts | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/class/DataSyncManager.ts b/src/class/DataSyncManager.ts
index 565c534..e8077f3 100644
--- a/src/class/DataSyncManager.ts
+++ b/src/class/DataSyncManager.ts
@@ -171,6 +171,8 @@ export class DataSyncManager {
 
     try {
       // Compare cycles data
+      console.log('\nComparing cycles data...')
+      console.log('CycleNumber', 'Local-Marker', ' Distributor-Marker')
       const localCycles = await CycleDB.queryCycleRecordsBetween(startCycle, endCycle)
       const distributorResponse = await queryFromDistributor(DataType.CYCLE, {
         start: startCycle,
@@ -188,7 +190,7 @@ export class DataSyncManager {
           const distributorCycle = distributorCycles.find(
             (c: { counter: number; marker: string }) => c.counter === localCycle.counter
           )
-
+          console.log(localCycle.counter, localCycle.cycleMarker, distributorCycle?.marker)
           if (!distributorCycle) {
             throw new Error(`Cycle ${localCycle.counter} exists locally but not in distributor`)
           } else if (localCycle.cycleMarker !== distributorCycle.marker) {
@@ -201,6 +203,8 @@ export class DataSyncManager {
       }
 
       // Compare receipts count
+      console.log('\nComparing receipts count...')
+      console.log('CycleNumber', 'Local-Receipts', 'Distributor-Receipts')
       const receiptsResponse = await queryFromDistributor(DataType.RECEIPT, {
         startCycle,
         endCycle,
@@ -210,9 +214,9 @@ export class DataSyncManager {
       if (receiptsResponse?.data?.receipts) {
         const distributorReceipts: { cycle: number; receipts: number }[] = receiptsResponse.data.receipts
         const localReceiptsCount = await ReceiptDB.queryReceiptCountByCycles(startCycle, endCycle)
-
         for (const distReceipt of distributorReceipts) {
           const localReceipt = localReceiptsCount.find((r) => r.cycle === distReceipt.cycle)
+          console.log(distReceipt.cycle, localReceipt?.receipts, distReceipt.receipts)
           if (localReceipt && localReceipt.receipts > distReceipt.receipts) {
             throw new Error(
               `Receipts count in local DB has more in cycle ${distReceipt.cycle}: ` +
@@ -223,6 +227,8 @@ export class DataSyncManager {
       }
 
       // Compare originalTxs count
+      console.log('\nComparing originalTxs count...')
+      console.log('CycleNumber', 'Local-OriginalTxs', 'Distributor-OriginalTxs')
       const originalTxsResponse = await queryFromDistributor(DataType.ORIGINALTX, {
         startCycle,
         endCycle,
@@ -239,6 +245,7 @@ export class DataSyncManager {
 
         for (const distTx of distributorOriginalTxs) {
           const localTx = localOriginalTxsCount.find((t) => t.cycle === distTx.cycle)
+          console.log(distTx.cycle, localTx?.originalTxsData, distTx.originalTxsData)
           if (localTx && localTx.originalTxsData > distTx.originalTxsData) {
             throw new Error(
               `OriginalTxs count mismatch in cycle ${distTx.cycle}: ` +
@@ -466,8 +473,6 @@ export class DataSyncManager {
       console.log(
         `Comparing cycles ${startCycle} to ${endCycle} with ${allDistributorReceipts.length} distributor receipts and ${allDistributorOriginalTxs.length} distributor originalTxs`
       )
-      console.log(allDistributorReceipts, localReceipts)
-      console.log(allDistributorOriginalTxs, localOriginalTxs)
 
       for (let cycle = startCycle; cycle <= endCycle; cycle++) {
         const distReceipts = allDistributorReceipts.find((r) => r.cycle === cycle)?.receipts || 0
@@ -631,8 +636,9 @@ export class DataSyncManager {
 
         const cycleBatches = []
         // For each range, create cycle batches and merge them into one
+        console.log('\nPreparing cycle batches for the following ranges:')
         for (const range of mergedRanges) {
-          console.log(`\nFor range: ${range.startCycle} to ${range.endCycle} (${range.gapSize} cycles)`)
+          console.log(` - range: ${range.startCycle} to ${range.endCycle} (${range.gapSize} cycles)`)
           const cycleBatch = parallelDataSync.createCycleBatches(range.startCycle, range.endCycle)
           cycleBatches.push(...cycleBatch)
         }