Skip to content

Commit

Permalink
Update test.js
Browse files Browse the repository at this point in the history
  • Loading branch information
Savio629 authored May 24, 2024
1 parent 23cb722 commit f704a85
Showing 1 changed file with 30 additions and 17 deletions.
47 changes: 30 additions & 17 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ const fs = require('fs');
const { stringify } = require('csv-stringify');
const path = require('path');

const args = process.argv.slice(2);
const startIndex = parseInt(args[0], 10);
const endIndex = parseInt(args[1], 10);

async function getOptions(page, selector) {
return await page.evaluate((selector) => {
let options = Array.from(document.querySelector(selector).options);
Expand All @@ -26,22 +30,36 @@ async function scrapeData() {
fs.mkdirSync(dataDir);
}



for (let i = 0; i <= 29; i++) {
for (let i = startIndex; i <= endIndex; i++) {
await page.select('#CPHPage_ddFinyear', '2024-2025');
await new Promise(resolve => setTimeout(resolve, 1000));
await new Promise(resolve => setTimeout(resolve, 1000));
await page.select('#CPHPage_ddState', '24');
await new Promise(resolve => setTimeout(resolve, 1000));
await new Promise(resolve => setTimeout(resolve, 1000));
await page.click('#CPHPage_btnShow');
await new Promise(resolve => setTimeout(resolve, 7000));
await new Promise(resolve => setTimeout(resolve, 12000));

const district = await page.evaluate((index) => {
const selector = `#CPHPage_rpt_lnkbtn_${index}`;
const selectedOption = document.querySelector(selector);
return selectedOption ? selectedOption.text : 'Unknown';
}, i);

console.log(`Processing data for district: ${district}`);

await new Promise(resolve => setTimeout(resolve, 500));
await page.click(`#CPHPage_rpt_Village_${i}`);
await new Promise(resolve => setTimeout(resolve, 7000));
await new Promise(resolve => setTimeout(resolve, 7000));

await page.select('select[name="ctl00$CPHPage$ddPagrno"]', '0');
await new Promise(resolve => setTimeout(resolve, 60000));
try {
await page.select('select[name="ctl00$CPHPage$ddPagrno"]', '0');
} catch (error) {
console.error(`Error: No element found for selector: select[name="ctl00$CPHPage$ddPagrno"] for village ID ${i}`);
continue;
}

await new Promise(resolve => setTimeout(resolve, 60000));
await page.waitForSelector('#tableReportTable');

const data = await page.evaluate(() => {
const rows = Array.from(document.querySelectorAll('#tableReportTable tr'));
return rows.map(row => {
Expand All @@ -50,17 +68,12 @@ async function scrapeData() {
});
});

const district = await page.evaluate(() => {
const selectedOption = document.querySelector('#CPHPage_ddDistrict option:checked');
return selectedOption ? selectedOption.text : 'Unknown';
});

const districtFolder = path.join(dataDir, '2024-2025', 'odisha', district.replace(/[\\/:*?"<>|]/g, '-'));
const districtFolder = path.join(dataDir, '2024-2025', 'odisha');
if (!fs.existsSync(districtFolder)) {
fs.mkdirSync(districtFolder, { recursive: true });
}

const csvFilePath = path.join(districtFolder, `village_${i}.csv`);
const csvFilePath = path.join(districtFolder, `${district}.csv`);
stringify(data, (err, output) => {
if (err) throw err;
fs.writeFile(csvFilePath, output, (err) => {
Expand All @@ -69,7 +82,7 @@ async function scrapeData() {
});

await page.goBack({ waitUntil: 'networkidle0' });
await new Promise(resolve => setTimeout(resolve, 500));
await new Promise(resolve => setTimeout(resolve, 500));
}

await browser.close();
Expand Down

0 comments on commit f704a85

Please sign in to comment.