Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
Igor Timoshenkov committed Dec 8, 2014
1 parent ab5167a commit a15b32b
Show file tree
Hide file tree
Showing 11 changed files with 117 additions and 152 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
vendor/*
composer.lock
composer.phar
build/logs/coverage.xml
codeclimate.json
build/logs/*
11 changes: 3 additions & 8 deletions phpunit.xml
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit>
<phpunit backupGlobals="disabled"
bootstrap="test/bootstrap.php">
<logging>
<log type="coverage-clover" target="build/logs/clover.xml"/>
</logging>
<testsuites>
<testsuite name="">
<file>test/AtSymbol.php</file>
<file>test/CrawlDelay.php</file>
<file>test/EmptyDisallow.php</file>
<file>test/Host.php</file>
<file>test/CleanParam.php</file>
<file>test/Whitespaces.php</file>
<file>test/Comments.php</file>
<directory suffix=".php" phpVersion="5.3.0" phpVersionOperator=">=">test/cases</directory>
</testsuite>
</testsuites>
</phpunit>
170 changes: 109 additions & 61 deletions robotstxtparser.php → source/robotstxtparser.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public function __construct($content, $encoding = self::DEFAULT_ENCODING)

// Ensure that there's a newline at the end of the file, otherwise the
// last line is ignored
$this->content .= "\n";
$this->content .= PHP_EOL;

// set default state
$this->state = self::STATE_ZERO_POINT;
Expand All @@ -81,27 +81,6 @@ public function __construct($content, $encoding = self::DEFAULT_ENCODING)
$this->prepareRules();
}

public function getRules($userAgent = NULL)
{
if (is_null($userAgent)) {
//return all rules
return $this->rules;
}
else {
if (isset($this->rules[$userAgent])) {
return $this->rules[$userAgent];
}
else {
return array();
}
}
}

public function getContent()
{
return $this->content;
}

// signals

/**
Expand Down Expand Up @@ -151,10 +130,10 @@ protected function lineSeparator() {
*/
protected function newLine()
{
return ($this->current_char == "\n"
|| $this->current_word == "\r\n"
|| $this->current_word == "\n\r"
);
return in_array(PHP_EOL, array(
$this->current_char,
$this->current_word
));
}

/**
Expand Down Expand Up @@ -248,6 +227,7 @@ protected function zeroPoint()

/**
* Read directive
*
* @return RobotsTxtParser
*/
protected function readDirective()
Expand Down Expand Up @@ -275,6 +255,7 @@ protected function readDirective()

/**
* Skip space
*
* @return RobotsTxtParser
*/
protected function skipSpace()
Expand Down Expand Up @@ -314,41 +295,79 @@ protected function readValue()
return $this;
}

/**
* Add value to directive based on the directive type
*/
private function addValueToDirective()
{
if ($this->current_directive == self::DIRECTIVE_USERAGENT)
{
if (empty($this->rules[$this->current_word])) {
$this->rules[$this->current_word] = array();
}
$this->userAgent = $this->current_word;
}
elseif ($this->current_directive == self::DIRECTIVE_CRAWL_DELAY)
{
$this->rules[$this->userAgent][$this->current_directive] = (double) $this->current_word;
}
elseif ($this->current_directive == self::DIRECTIVE_SITEMAP) {
$this->rules[$this->userAgent][$this->current_directive][] = $this->current_word;
}
elseif ($this->current_directive == self::DIRECTIVE_CLEAN_PARAM) {
$this->rules[$this->userAgent][$this->current_directive][] = $this->current_word;
}
elseif ($this->current_directive == self::DIRECTIVE_HOST) {
$this->rules[$this->userAgent][$this->current_directive] = $this->current_word;
}
else {
if (!empty($this->current_word)) {
if ($this->current_directive == self::DIRECTIVE_ALLOW
|| $this->current_directive == self::DIRECTIVE_DISALLOW
) {
$this->current_word = "/".ltrim($this->current_word, '/');
}
$this->rules[$this->userAgent][$this->current_directive][] = self::prepareRegexRule($this->current_word);
}
}
$this->current_word = "";
$this->switchState(self::STATE_ZERO_POINT);
}
{
switch ($this->current_directive)
{
case self::DIRECTIVE_USERAGENT:
$this->setUserAgent($this->current_word);
break;

case self::DIRECTIVE_CRAWL_DELAY:
$this->addRule("floatval", false);
break;

case self::DIRECTIVE_SITEMAP:
case self::DIRECTIVE_CLEAN_PARAM:
$this->addRule();
break;

case self::DIRECTIVE_HOST:
$this->addRule("trim", false);
break;

case self::DIRECTIVE_ALLOW:
case self::DIRECTIVE_DISALLOW:
if (empty($this->current_word)) {
break;
}
$this->addRule("self::prepareRegexRule");
break;
}

// clean-up
$this->current_word = "";
$this->switchState(self::STATE_ZERO_POINT);
}

/**
* Set current user agent
* @param string $newAgent
*/
private function setUserAgent($newAgent = "*")
{
$this->userAgent = $newAgent;

// create empty array if not there yet
if (empty($this->rules[$this->userAgent])) {
$this->rules[$this->userAgent] = array();
}
}

/**
* Prepare rule value and set the one
* @param callable $convert
* @param bool $append
* @return void
*/
private function addRule($convert = null, $append = true)
{
// convert value
$value = (!is_null($convert))
? call_user_func($convert, $this->current_word)
: $this->current_word;

// set to rules
if ($append === true) {
$this->rules[$this->userAgent][$this->current_directive][] = $value;
}
else {
$this->rules[$this->userAgent][$this->current_directive] = $value;
}
}

/**
* Machine step
Expand Down Expand Up @@ -383,13 +402,14 @@ protected function step()

/**
* Convert robots.txt rules to php regex
*
*
* @link https://developers.google.com/webmasters/control-crawl-index/docs/robots_txt
* @param string $value
* @return string
*/
protected function prepareRegexRule($value)
{
$value = "/" . ltrim($value, '/');
$value = str_replace('$', '\$', $value);
$value = str_replace('?', '\?', $value);
$value = str_replace('.', '\.', $value);
Expand Down Expand Up @@ -509,4 +529,32 @@ public function getSitemaps($userAgent = '*')

return $this->rules[$userAgent][self::DIRECTIVE_SITEMAP];
}

/**
* Get rules based on user agent
*
* @param string|null $userAgent
* @return array
*/
public function getRules($userAgent = null)
{
// return all rules
if (is_null($userAgent)) {
return $this->rules;
}
elseif (isset($this->rules[$userAgent])) {
return $this->rules[$userAgent];
}
else {
return array();
}
}

/**
* @return string
*/
public function getContent()
{
return $this->content;
}
}
2 changes: 2 additions & 0 deletions test/bootstrap.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?php
require_once(realpath(__DIR__."/../source/robotstxtparser.php"));
11 changes: 0 additions & 11 deletions test/AtSymbol.php → test/cases/AtSymbol.php
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
<?php
/**
* @backupGlobals disabled
*/
class AtSymbolTest extends \PHPUnit_Framework_TestCase
{
/**
* Load library
*/
public static function setUpBeforeClass()
{
require_once(realpath(__DIR__.'/../robotstxtparser.php'));
}

/**
* @dataProvider generateDataForTest
* @covers RobotsTxtParser::isDisallowed
Expand Down
11 changes: 0 additions & 11 deletions test/CleanParam.php → test/cases/CleanParam.php
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
<?php
/**
* @backupGlobals disabled
*/
class CleanParamTest extends \PHPUnit_Framework_TestCase
{
/**
* Load library
*/
public static function setUpBeforeClass()
{
require_once(realpath(__DIR__.'/../robotstxtparser.php'));
}

/**
* @link https://help.yandex.ru/webmaster/controlling-robot/robots-txt.xml#clean-param
*
Expand Down
17 changes: 1 addition & 16 deletions test/Comments.php → test/cases/Comments.php
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
<?php
/**
* @backupGlobals disabled
*/
class CommentsTest extends \PHPUnit_Framework_TestCase
{
/**
* Load library
*/
public static function setUpBeforeClass()
{
require_once(realpath(__DIR__.'/../robotstxtparser.php'));
}

/**
* @dataProvider generateDataForTest
* @covers RobotsTxtParser::isDisallowed
Expand All @@ -22,9 +11,7 @@ public function testRemoveComments($robotsTxtContent)
{
$parser = new RobotsTxtParser($robotsTxtContent);
$this->assertInstanceOf('RobotsTxtParser', $parser);

$rules = $parser->getRules('*');

$this->assertEmpty($rules, 'expected remove comments');
}

Expand All @@ -38,9 +25,7 @@ public function testRemoveCommentsFromValue($robotsTxtContent, $expectedDisallow
{
$parser = new RobotsTxtParser($robotsTxtContent);
$this->assertInstanceOf('RobotsTxtParser', $parser);

$rules = $parser->getRules('*');

$this->assertNotEmpty($rules, 'expected data');
$this->assertArrayHasKey('disallow', $rules);
$this->assertNotEmpty($rules['disallow'], 'disallow expected');
Expand Down Expand Up @@ -82,7 +67,7 @@ public function generateDataFor2Test()
return array(
array(
"User-agent: *
Disallow: /tech #comment",
Disallow: /tech #comment",
'disallowValue' => '/tech',
),
);
Expand Down
11 changes: 0 additions & 11 deletions test/CrawlDelay.php → test/cases/CrawlDelay.php
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
<?php
/**
* @backupGlobals disabled
*/
class CrawlDelayTest extends \PHPUnit_Framework_TestCase
{
/**
* Load library
*/
public static function setUpBeforeClass()
{
require_once(realpath(__DIR__.'/../robotstxtparser.php'));
}

/**
* @dataProvider generateDataForTest
* @covers RobotsTxtParser::isDisallowed
Expand Down
11 changes: 0 additions & 11 deletions test/EmptyDisallow.php → test/cases/EmptyDisallow.php
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
<?php
/**
* @backupGlobals disabled
*/
class EmptyDisallowTest extends \PHPUnit_Framework_TestCase
{
/**
* Load library
*/
public static function setUpBeforeClass()
{
require_once(realpath(__DIR__.'/../robotstxtparser.php'));
}

/**
* @dataProvider generateDataForTest
* @covers RobotsTxtParser::isDisallowed
Expand Down
11 changes: 0 additions & 11 deletions test/Host.php → test/cases/Host.php
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
<?php
/**
* @backupGlobals disabled
*/
class HostTest extends \PHPUnit_Framework_TestCase
{
/**
* Load library
*/
public static function setUpBeforeClass()
{
require_once(realpath(__DIR__.'/../robotstxtparser.php'));
}

/**
* @dataProvider generateDataForTest
* @covers RobotsTxtParser::isDisallowed
Expand Down
Loading

0 comments on commit a15b32b

Please sign in to comment.