Skip to content

Commit

Permalink
Merge pull request #43 from resohead/main
Browse files Browse the repository at this point in the history
optionally allow partial matches and global groups
  • Loading branch information
freekmurze authored Apr 22, 2024
2 parents 84a4532 + a1457ef commit 1e81ddc
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
62 changes: 61 additions & 1 deletion src/RobotsTxt.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,38 @@ class RobotsTxt

protected array $disallowsPerUserAgent = [];

protected bool $matchExactly = true;

protected bool $includeGlobalGroup = true;

public function ignoreGlobalGroup(): self
{
$this->includeGlobalGroup = false;

return $this;
}

public function includeGlobalGroup(): self
{
$this->includeGlobalGroup = true;

return $this;
}

public function withPartialMatches(): self
{
$this->matchExactly = false;

return $this;
}

public function exactMatchesOnly(): self
{
$this->matchExactly = true;

return $this;
}

public static function readFrom(string $source): self
{
$content = @file_get_contents($source);
Expand Down Expand Up @@ -50,11 +82,39 @@ public function allows(string $url, string | null $userAgent = '*'): bool
}
}

$disallows = $this->disallowsPerUserAgent[strtolower(trim($userAgent ?? ''))] ?? $this->disallowsPerUserAgent['*'] ?? [];
$disallowsPerUserAgent = $this->includeGlobalGroup
? $this->disallowsPerUserAgent
: array_filter($this->disallowsPerUserAgent, fn ($key) => $key !== '*', ARRAY_FILTER_USE_KEY);

$normalizedUserAgent = strtolower(trim($userAgent ?? ''));

$disallows = $this->matchExactly
? $this->getDisallowsExactly($normalizedUserAgent, $disallowsPerUserAgent)
: $this->getDisallowsContaining($normalizedUserAgent, $disallowsPerUserAgent);

return ! $this->pathIsDenied($requestUri, $disallows);
}

protected function getDisallowsExactly(string $userAgent, array $disallowsPerUserAgent): array
{
return $disallowsPerUserAgent[$userAgent] ?? $disallowsPerUserAgent['*'] ?? [];
}

protected function getDisallowsContaining(string $userAgent, array $disallowsPerUserAgent): array
{
$disallows = [];

foreach ($disallowsPerUserAgent as $userAgentKey => $disallowsPerUserAgentKey) {
$contains = strpos($userAgent, $userAgentKey) !== false;

if ($contains || $userAgentKey === '*') {
$disallows = [...$disallows, ...$disallowsPerUserAgentKey];
}
}

return $disallows;
}

protected function pathIsDenied(string $requestUri, array $disallows): bool
{
foreach ($disallows as $disallow) {
Expand Down
20 changes: 20 additions & 0 deletions tests/RobotsTxtTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,26 @@ public function test_allowed_link_for_custom_user_agent()
$robots = RobotsTxt::readFrom(__DIR__.'/data/robots.txt');

$this->assertFalse($robots->allows('/test', 'google'));

$this->assertTrue($robots
->exactMatchesOnly()
->allows('/no-agents', 'Mozilla/5.0 (compatible; UserAgent007/1.1)')
);
$this->assertFalse($robots
->withPartialMatches()
->allows('/no-agents', 'Mozilla/5.0 (compatible; UserAgent007/1.1)')
);

$this->assertTrue($robots
->ignoreGlobalGroup()
->withPartialMatches()
->allows('/nl/admin/', 'Mozilla/5.0 (compatible; UserAgent007/1.1)')
);
$this->assertFalse($robots
->includeGlobalGroup()
->withPartialMatches()
->allows('/nl/admin/', 'Mozilla/5.0 (compatible; UserAgent007/1.1)')
);
}

/** @test */
Expand Down

0 comments on commit 1e81ddc

Please sign in to comment.