Skip to content

Commit

Permalink
Add a command to scrape the logbook
Browse files Browse the repository at this point in the history
  • Loading branch information
ngmy committed Dec 17, 2023
1 parent bb5ae6d commit 38ddf3b
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Release Notes

## [0.6.0](https://github.com/ngmy/moonboard-web-search/compare/0.5.0...0.6.0) - 2023-12-17

- Add a command to scrape the logbook.

## [0.5.0](https://github.com/ngmy/moonboard-web-search/compare/0.4.0...0.5.0) - 2023-12-06

- Add a user ID to scraped data.
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ You can also specify the user IDs to scrape by passing the `--user-ids-file` opt
bin/moonboard-web-scraper scrape-user-profiles --user-ids-file=user_ids.txt
```

### Scrape the logbook

Run the `scrape-logbook` command and wait for JSON files of the logbook to appear:

```bash
bin/moonboard-web-scraper scrape-logbook
```

## Changelog

Please see the [changelog](CHANGELOG.md).
Expand Down
14 changes: 14 additions & 0 deletions bin/moonboard-web-scraper
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
require __DIR__ . '/../vendor/autoload.php';

use App\Commands\ScrapeBenchmarksCommand;
use App\Commands\ScrapeLogbookCommand;
use App\Commands\ScrapeUserProfilesCommand;
use App\Models\HoldSetups;
use App\Services\Authenticator;
use App\UseCases\ScrapeBenchmarksAction;
use App\UseCases\ScrapeLogbookAction;
use App\UseCases\ScrapeUserProfilesAction;
use Monolog\Handler\StreamHandler;
use Monolog\Level;
Expand Down Expand Up @@ -46,4 +48,16 @@ $application->add(new ScrapeUserProfilesCommand(
client: $client,
logger: $logger,
));
$application->add(new ScrapeLogbookCommand(
new ScrapeLogbookAction(
client: $client,
authenticator: new Authenticator(
username: getenv('MOONBOARD_USERNAME'),
password: getenv('MOONBOARD_PASSWORD'),
logger: $logger,
),
holdSetups: HoldSetups::all(),
logger: $logger,
),
));
$application->run();
32 changes: 32 additions & 0 deletions src/Commands/ScrapeLogbookCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

declare(strict_types=1);

namespace App\Commands;

use App\UseCases\ScrapeLogbookAction;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;

class ScrapeLogbookCommand extends Command
{
/** @var string */
protected static $defaultName = 'scrape-logbook';

/** @var string */
protected static $defaultDescription = 'Scrape logbook';

public function __construct(
private readonly ScrapeLogbookAction $action,
) {
parent::__construct();
}

protected function execute(InputInterface $input, OutputInterface $output): int
{
($this->action)();

return Command::SUCCESS;
}
}
10 changes: 10 additions & 0 deletions src/Models/BoardAngle.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,14 @@ public function getLabel(): string
self::Degree40MoonBoard2016 => '',
};
}

public function isDegree40(): bool
{
return self::Degree40 === $this || self::Degree40MoonBoard2016 === $this;
}

public function isDegree25(): bool
{
return self::Degree25 === $this;
}
}
164 changes: 164 additions & 0 deletions src/UseCases/ScrapeLogbookAction.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
<?php

declare(strict_types=1);

namespace App\UseCases;

use App\Models\HoldSetup;
use App\Models\HoldSetups;
use App\Services\Authenticator;
use Psr\Log\LoggerInterface;
use Symfony\Component\Panther\Client;
use Symfony\Component\Panther\DomCrawler\Crawler;

class ScrapeLogbookAction
{
public function __construct(
private readonly Client $client,
private readonly Authenticator $authenticator,
private readonly HoldSetups $holdSetups,
private readonly LoggerInterface $logger,
) {}

public function __invoke(): void
{
$this->authenticator->authenticate($this->client);

foreach ($this->holdSetups as $holdSetup) {
// Skip the 25 degree MoonBoard logbook as it is not available on the website.
if ($holdSetup->getBoardAngle()->isDegree25()) {
continue;
}

$logbookData = $this->scrapeLogbookData($holdSetup);

$this->saveLogbookData($logbookData, $holdSetup);
}
}

/**
* @return \stdClass[]
*/
private function scrapeLogbookData(HoldSetup $holdSetup): array
{
$this->logger->info('Scraping logbook data...', [
'boardType' => $holdSetup->getBoardType()->getLabel(),
'boardAngle' => $holdSetup->getBoardAngle()->getLabel(),
]);

$this->client->request('GET', '/Logbook/Index');
$this->client->executeScript("$('#Holdsetup').val('".$holdSetup->getBoardType()->value."').change()");

// From the DOM, it is impossible to determine that the hold setup has changed,
// so we have to wait a certain amount of time.
sleep(3);

$crawler = $this->client->getCrawler();

$logbook = [];
$page = 1;

$logbookExpandButtons = $crawler->filter('.k-i-expand');

if (0 === $logbookExpandButtons->count()) {
return $logbook;
}

while (true) {
$this->client->executeScript("$('.k-i-expand').click()");

// From the DOM, it is impossible to determine that the hold setup has changed,
// so we have to wait a certain amount of time.
sleep(5);

$masterRows = $crawler->filter('.k-master-row');
$masterRows->each(static function (Crawler $masterRow) use (&$logbook): void {
$dateAdded = $masterRow->filter('.logbook-grid-header')->text();
$dateAdded = explode(PHP_EOL, $dateAdded)[0];

$detailRow = $masterRow->nextAll()->eq(0);

$logbookEntries = $detailRow->filter('.logbookentry');
$logbookEntries->each(static function (Crawler $logbookEntry) use (&$logbook, $dateAdded): void {
$id = $logbookEntry->filter('h3 a')->attr('href');
\assert(null !== $id);
$id = (int) explode('/', $id)[3];

$name = $logbookEntry->filter('h3 a')->text();

$setBy = $logbookEntry->filter('p')->eq(0)->text();

$grade = $logbookEntry->filter('p')->eq(1)->text();
$grade = preg_match('/(.+)\. You graded this problem (.+)\./', $grade, $matches);
$grade = $matches[1];

$yourGrade = $logbookEntry->filter('p')->eq(1)->text();
$yourGrade = preg_match('/(.+)\. You graded this problem (.+)\./', $yourGrade, $matches);
$yourGrade = $matches[2];

$method = $logbookEntry->filter('p')->eq(2)->text();

$rating = $logbookEntry->filter('ul')->eq(0)->filter('img[src="/Content/images/star.png"]')->count();

$yourRating = $logbookEntry->filter('ul')->eq(1)->filter('img[src="/Content/images/star.png"]')->count();

$numberOfTries = $logbookEntry->filter('p')->eq(3)->text();

$comment = null;
if ($logbookEntry->filter('p')->count() > 5) {
$comment = $logbookEntry->filter('p')->eq(5)->text();
}

$isBenchmark = 0 !== $logbookEntry->filter('.benchmark')->count();

$logbook[] = (object) [
'Id' => $id,
'Name' => $name,
'SetBy' => $setBy,
'Grade' => $grade,
'YourGrade' => $yourGrade,
'Method' => $method,
'Rating' => $rating,
'YourRating' => $yourRating,
'NumberOfTries' => $numberOfTries,
'Comment' => $comment,
'IsBenchmark' => $isBenchmark,
'DateAdded' => $dateAdded,
];
});
});

++$page;

$pageElement = $crawler->filter('[data-page="'.$page.'"]');

if (0 === $pageElement->count()) {
break;
}

$this->client->executeScript("$('[data-page=\"{$page}\"]').click()");

// From the DOM, it is impossible to determine that the page has changed,
// so we have to wait a certain amount of time.
sleep(5);
}

return $logbook;
}

/**
* @param \stdClass[] $logbookData
*/
private function saveLogbookData(array $logbookData, HoldSetup $holdSetup): void
{
$this->logger->info('Saving logbook data...', [
'boardType' => $holdSetup->getBoardType()->getLabel(),
'boardAngle' => $holdSetup->getBoardAngle()->getLabel(),
]);

file_put_contents(
sprintf('logbook %s %s.json', $holdSetup->getBoardType()->getLabel(), $holdSetup->getBoardAngle()->getLabel()),
json_encode($logbookData, JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE),
);
}
}

0 comments on commit 38ddf3b

Please sign in to comment.