|
1 |
| -# php-scrape [](https://travis-ci.org/rajanrx/php-scrape) |
| 1 | +# PHP Scrape [](https://travis-ci.org/rajanrx/php-scrape) |
2 | 2 | A scraping framework written in PHP
|
3 | 3 |
|
4 |
| -## About PHP-scrape |
| 4 | +## About PHP Scrape |
5 | 5 | Php Scrape is a basic scraping framework for PHP based on configuration first
|
6 | 6 | concept. i.e once implemented changes should be made on configuration file as far
|
7 | 7 | as possible avoiding need for code update/addition.
|
8 | 8 |
|
9 | 9 | ## Getting Started
|
10 |
| -The easiest way to use Php-Scrape is via Composer. |
| 10 | +The easiest way to use PHP Scrape is via Composer. |
11 | 11 | ```
|
12 | 12 | composer require --dev rajanrx/php-scrape
|
13 | 13 | ```
|
|
25 | 25 | use Scraper\Scrape\Crawler\Types\GeneralCrawler;
|
26 | 26 | use Scraper\Scrape\Extractor\Types\MultipleRowExtractor;
|
27 | 27 | require_once(__DIR__ . '/../vendor/autoload.php');
|
28 |
| -date_default_timezone_set('UTC'); |
| 28 | + |
| 29 | +// Grab the crawler |
29 | 30 | $crawler = new GeneralCrawler('https://github.com/trending');
|
| 31 | + |
| 32 | +// Get config using configuration manager |
30 | 33 | $path = __DIR__ . "/Data/git-repo.json";
|
31 | 34 | $configurationManager =
|
32 | 35 | \Scraper\Scrape\ConfigurationManager::getInstance($path);
|
| 36 | + |
| 37 | +// Run extractor (Multiple) as we need to grab multiple rows for Github |
| 38 | +// trending repos |
33 | 39 | $extractor = new MultipleRowExtractor(
|
34 | 40 | $crawler, $configurationManager->getConfiguration()
|
35 | 41 | );
|
36 | 42 | $data = $extractor->extract();
|
| 43 | + |
| 44 | +// Print retrieved data |
37 | 45 | print_r($data);
|
38 | 46 | ```
|
39 | 47 |
|
|
69 | 77 | )
|
70 | 78 | ...
|
71 | 79 | ```
|
72 |
| -As easy as that. Docs in detail will be updated soon. |
| 80 | +As easy as that. Docs in detail will be updated soon. |
73 | 81 | Interested contributors are hearty welcome.
|
74 | 82 |
|
75 | 83 | ## Security Vulnerabilities
|
|
0 commit comments