From f704a85e783f61911604f4a4af5ddc843e9ed707 Mon Sep 17 00:00:00 2001 From: Savio Dias <91362589+Savio629@users.noreply.github.com> Date: Fri, 24 May 2024 13:16:16 +0530 Subject: [PATCH] Update test.js --- test.js | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/test.js b/test.js index 8da04b8..50851fe 100644 --- a/test.js +++ b/test.js @@ -3,6 +3,10 @@ const fs = require('fs'); const { stringify } = require('csv-stringify'); const path = require('path'); +const args = process.argv.slice(2); +const startIndex = parseInt(args[0], 10); +const endIndex = parseInt(args[1], 10); + async function getOptions(page, selector) { return await page.evaluate((selector) => { let options = Array.from(document.querySelector(selector).options); @@ -26,22 +30,36 @@ async function scrapeData() { fs.mkdirSync(dataDir); } - - - for (let i = 0; i <= 29; i++) { + for (let i = startIndex; i <= endIndex; i++) { await page.select('#CPHPage_ddFinyear', '2024-2025'); - await new Promise(resolve => setTimeout(resolve, 1000)); + await new Promise(resolve => setTimeout(resolve, 1000)); await page.select('#CPHPage_ddState', '24'); - await new Promise(resolve => setTimeout(resolve, 1000)); + await new Promise(resolve => setTimeout(resolve, 1000)); await page.click('#CPHPage_btnShow'); - await new Promise(resolve => setTimeout(resolve, 7000)); + await new Promise(resolve => setTimeout(resolve, 12000)); + + const district = await page.evaluate((index) => { + const selector = `#CPHPage_rpt_lnkbtn_${index}`; + const selectedOption = document.querySelector(selector); + return selectedOption ? selectedOption.text : 'Unknown'; + }, i); + + console.log(`Processing data for district: ${district}`); + + await new Promise(resolve => setTimeout(resolve, 500)); await page.click(`#CPHPage_rpt_Village_${i}`); - await new Promise(resolve => setTimeout(resolve, 7000)); + await new Promise(resolve => setTimeout(resolve, 7000)); - await page.select('select[name="ctl00$CPHPage$ddPagrno"]', '0'); - await new Promise(resolve => setTimeout(resolve, 60000)); + try { + await page.select('select[name="ctl00$CPHPage$ddPagrno"]', '0'); + } catch (error) { + console.error(`Error: No element found for selector: select[name="ctl00$CPHPage$ddPagrno"] for village ID ${i}`); + continue; + } + await new Promise(resolve => setTimeout(resolve, 60000)); await page.waitForSelector('#tableReportTable'); + const data = await page.evaluate(() => { const rows = Array.from(document.querySelectorAll('#tableReportTable tr')); return rows.map(row => { @@ -50,17 +68,12 @@ async function scrapeData() { }); }); - const district = await page.evaluate(() => { - const selectedOption = document.querySelector('#CPHPage_ddDistrict option:checked'); - return selectedOption ? selectedOption.text : 'Unknown'; - }); - - const districtFolder = path.join(dataDir, '2024-2025', 'odisha', district.replace(/[\\/:*?"<>|]/g, '-')); + const districtFolder = path.join(dataDir, '2024-2025', 'odisha'); if (!fs.existsSync(districtFolder)) { fs.mkdirSync(districtFolder, { recursive: true }); } - const csvFilePath = path.join(districtFolder, `village_${i}.csv`); + const csvFilePath = path.join(districtFolder, `${district}.csv`); stringify(data, (err, output) => { if (err) throw err; fs.writeFile(csvFilePath, output, (err) => { @@ -69,7 +82,7 @@ async function scrapeData() { }); await page.goBack({ waitUntil: 'networkidle0' }); - await new Promise(resolve => setTimeout(resolve, 500)); + await new Promise(resolve => setTimeout(resolve, 500)); } await browser.close();