From 38ddf3bcb37f92189df3cefd034958a58ed8e27e Mon Sep 17 00:00:00 2001 From: Yuta Nagamiya Date: Sun, 17 Dec 2023 12:53:52 +0900 Subject: [PATCH] Add a command to scrape the logbook --- CHANGELOG.md | 4 + README.md | 8 ++ bin/moonboard-web-scraper | 14 +++ src/Commands/ScrapeLogbookCommand.php | 32 +++++ src/Models/BoardAngle.php | 10 ++ src/UseCases/ScrapeLogbookAction.php | 164 ++++++++++++++++++++++++++ 6 files changed, 232 insertions(+) create mode 100644 src/Commands/ScrapeLogbookCommand.php create mode 100644 src/UseCases/ScrapeLogbookAction.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 3088c89..77130da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Release Notes +## [0.6.0](https://github.com/ngmy/moonboard-web-search/compare/0.5.0...0.6.0) - 2023-12-17 + +- Add a command to scrape the logbook. + ## [0.5.0](https://github.com/ngmy/moonboard-web-search/compare/0.4.0...0.5.0) - 2023-12-06 - Add a user ID to scraped data. diff --git a/README.md b/README.md index ad13d8a..9382d8e 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,14 @@ You can also specify the user IDs to scrape by passing the `--user-ids-file` opt bin/moonboard-web-scraper scrape-user-profiles --user-ids-file=user_ids.txt ``` +### Scrape the logbook + +Run the `scrape-logbook` command and wait for JSON files of the logbook to appear: + +```bash +bin/moonboard-web-scraper scrape-logbook +``` + ## Changelog Please see the [changelog](CHANGELOG.md). diff --git a/bin/moonboard-web-scraper b/bin/moonboard-web-scraper index 9b10e13..c9eb9e0 100755 --- a/bin/moonboard-web-scraper +++ b/bin/moonboard-web-scraper @@ -4,10 +4,12 @@ require __DIR__ . '/../vendor/autoload.php'; use App\Commands\ScrapeBenchmarksCommand; +use App\Commands\ScrapeLogbookCommand; use App\Commands\ScrapeUserProfilesCommand; use App\Models\HoldSetups; use App\Services\Authenticator; use App\UseCases\ScrapeBenchmarksAction; +use App\UseCases\ScrapeLogbookAction; use App\UseCases\ScrapeUserProfilesAction; use Monolog\Handler\StreamHandler; use Monolog\Level; @@ -46,4 +48,16 @@ $application->add(new ScrapeUserProfilesCommand( client: $client, logger: $logger, )); +$application->add(new ScrapeLogbookCommand( + new ScrapeLogbookAction( + client: $client, + authenticator: new Authenticator( + username: getenv('MOONBOARD_USERNAME'), + password: getenv('MOONBOARD_PASSWORD'), + logger: $logger, + ), + holdSetups: HoldSetups::all(), + logger: $logger, + ), +)); $application->run(); diff --git a/src/Commands/ScrapeLogbookCommand.php b/src/Commands/ScrapeLogbookCommand.php new file mode 100644 index 0000000..1445559 --- /dev/null +++ b/src/Commands/ScrapeLogbookCommand.php @@ -0,0 +1,32 @@ +action)(); + + return Command::SUCCESS; + } +} diff --git a/src/Models/BoardAngle.php b/src/Models/BoardAngle.php index 466decd..b1cf12f 100644 --- a/src/Models/BoardAngle.php +++ b/src/Models/BoardAngle.php @@ -18,4 +18,14 @@ public function getLabel(): string self::Degree40MoonBoard2016 => '', }; } + + public function isDegree40(): bool + { + return self::Degree40 === $this || self::Degree40MoonBoard2016 === $this; + } + + public function isDegree25(): bool + { + return self::Degree25 === $this; + } } diff --git a/src/UseCases/ScrapeLogbookAction.php b/src/UseCases/ScrapeLogbookAction.php new file mode 100644 index 0000000..7f3b4ba --- /dev/null +++ b/src/UseCases/ScrapeLogbookAction.php @@ -0,0 +1,164 @@ +authenticator->authenticate($this->client); + + foreach ($this->holdSetups as $holdSetup) { + // Skip the 25 degree MoonBoard logbook as it is not available on the website. + if ($holdSetup->getBoardAngle()->isDegree25()) { + continue; + } + + $logbookData = $this->scrapeLogbookData($holdSetup); + + $this->saveLogbookData($logbookData, $holdSetup); + } + } + + /** + * @return \stdClass[] + */ + private function scrapeLogbookData(HoldSetup $holdSetup): array + { + $this->logger->info('Scraping logbook data...', [ + 'boardType' => $holdSetup->getBoardType()->getLabel(), + 'boardAngle' => $holdSetup->getBoardAngle()->getLabel(), + ]); + + $this->client->request('GET', '/Logbook/Index'); + $this->client->executeScript("$('#Holdsetup').val('".$holdSetup->getBoardType()->value."').change()"); + + // From the DOM, it is impossible to determine that the hold setup has changed, + // so we have to wait a certain amount of time. + sleep(3); + + $crawler = $this->client->getCrawler(); + + $logbook = []; + $page = 1; + + $logbookExpandButtons = $crawler->filter('.k-i-expand'); + + if (0 === $logbookExpandButtons->count()) { + return $logbook; + } + + while (true) { + $this->client->executeScript("$('.k-i-expand').click()"); + + // From the DOM, it is impossible to determine that the hold setup has changed, + // so we have to wait a certain amount of time. + sleep(5); + + $masterRows = $crawler->filter('.k-master-row'); + $masterRows->each(static function (Crawler $masterRow) use (&$logbook): void { + $dateAdded = $masterRow->filter('.logbook-grid-header')->text(); + $dateAdded = explode(PHP_EOL, $dateAdded)[0]; + + $detailRow = $masterRow->nextAll()->eq(0); + + $logbookEntries = $detailRow->filter('.logbookentry'); + $logbookEntries->each(static function (Crawler $logbookEntry) use (&$logbook, $dateAdded): void { + $id = $logbookEntry->filter('h3 a')->attr('href'); + \assert(null !== $id); + $id = (int) explode('/', $id)[3]; + + $name = $logbookEntry->filter('h3 a')->text(); + + $setBy = $logbookEntry->filter('p')->eq(0)->text(); + + $grade = $logbookEntry->filter('p')->eq(1)->text(); + $grade = preg_match('/(.+)\. You graded this problem (.+)\./', $grade, $matches); + $grade = $matches[1]; + + $yourGrade = $logbookEntry->filter('p')->eq(1)->text(); + $yourGrade = preg_match('/(.+)\. You graded this problem (.+)\./', $yourGrade, $matches); + $yourGrade = $matches[2]; + + $method = $logbookEntry->filter('p')->eq(2)->text(); + + $rating = $logbookEntry->filter('ul')->eq(0)->filter('img[src="/Content/images/star.png"]')->count(); + + $yourRating = $logbookEntry->filter('ul')->eq(1)->filter('img[src="/Content/images/star.png"]')->count(); + + $numberOfTries = $logbookEntry->filter('p')->eq(3)->text(); + + $comment = null; + if ($logbookEntry->filter('p')->count() > 5) { + $comment = $logbookEntry->filter('p')->eq(5)->text(); + } + + $isBenchmark = 0 !== $logbookEntry->filter('.benchmark')->count(); + + $logbook[] = (object) [ + 'Id' => $id, + 'Name' => $name, + 'SetBy' => $setBy, + 'Grade' => $grade, + 'YourGrade' => $yourGrade, + 'Method' => $method, + 'Rating' => $rating, + 'YourRating' => $yourRating, + 'NumberOfTries' => $numberOfTries, + 'Comment' => $comment, + 'IsBenchmark' => $isBenchmark, + 'DateAdded' => $dateAdded, + ]; + }); + }); + + ++$page; + + $pageElement = $crawler->filter('[data-page="'.$page.'"]'); + + if (0 === $pageElement->count()) { + break; + } + + $this->client->executeScript("$('[data-page=\"{$page}\"]').click()"); + + // From the DOM, it is impossible to determine that the page has changed, + // so we have to wait a certain amount of time. + sleep(5); + } + + return $logbook; + } + + /** + * @param \stdClass[] $logbookData + */ + private function saveLogbookData(array $logbookData, HoldSetup $holdSetup): void + { + $this->logger->info('Saving logbook data...', [ + 'boardType' => $holdSetup->getBoardType()->getLabel(), + 'boardAngle' => $holdSetup->getBoardAngle()->getLabel(), + ]); + + file_put_contents( + sprintf('logbook %s %s.json', $holdSetup->getBoardType()->getLabel(), $holdSetup->getBoardAngle()->getLabel()), + json_encode($logbookData, JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE), + ); + } +}