diff --git a/.github/.gitkeep b/.github/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f668d39..4f90723 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,10 +13,10 @@ jobs: runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.experimental }} strategy: - max-parallel: 2 + max-parallel: 3 matrix: os: [ubuntu-latest] - php: [7.2, 7.4, 8.0, 8.1, 8.2, 8.3] + php: [8.1, 8.2, 8.3, 8.4, 8.5] experimental: [false] name: PHP ${{ matrix.php }} test on ${{ matrix.os }} diff --git a/.gitignore b/.gitignore index cf6509b..a603ced 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /vendor /composer.lock -/.idea \ No newline at end of file +/.idea +/.phpunit.cache diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..79d65a9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,24 @@ +# CHANGELOG + +## v3 - Jan/2026 + +### Breaking Changes: +* [ Robots ] [CHANGED] `bot()` method changed to `addRule()` - params structure is different +* [ Robots ] [CHANGED] `sitemap()` method renamed to `addSitemap()` +* [ Robots ] [CHANGED] `delay` array key renamed to `crawlDelay` +* [ Robots ] [ NEW ] `addComment()` - adds comments to robots.txt +* [ Robots ] [ NEW ] `saveTo()` - saves to file +* [ Robots ] [ NEW ] Implements `Stringable` interface +* [ Robots ] [REMOVED] Robots copyrights header removed +* [ Indexing ] [REMOVED] Sitemap Ping class, /ping?sitemap deprecated/removed by major search engines. +* [ Indexing ] [CHANGED] Refactored indexer classes! +* [ MetaTags ] [CHANGED] Return type changed from `MetaTags` to `self` for all methods +* [ MetaTags ] [CHANGED] Constructor supports array syntax for `og`, `twitter`, `link`, `meta` +* [ MetaTags ] [CHANGED] `robots()` now accepts string|array for options with associative array support +* [ MetaTags ] [CHANGED] `build()` now sanitizes attribute names to prevent XSS +* [ MetaTags ] [ NEW ] `verification()` - adds search engine verification meta tags +* [ MetaTags ] [ NEW ] `feed()` - adds RSS/Atom feed links +* [ MetaTags ] [ NEW ] `pagination()` - adds prev/next/first/last pagination links +* [ MetaTags ] [ NEW ] `hreflangs()` - batch method for multiple hreflang links with x-default support +* [ MetaTags ] [ NEW ] `articleMeta()` - adds article published/modified time and author +* [ MetaTags ] [ NEW ] `schema()` - adds schema object to be rendered with meta tags \ No newline at end of file diff --git a/README.md b/README.md index a3d4070..bee11d4 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,22 @@ -# PHP SEO -[![Build Status](https://github.com/melbahja/seo/workflows/Test/badge.svg)](https://github.com/melbahja/seo/actions?query=workflow%3ATest) [![GitHub license](https://img.shields.io/github/license/melbahja/seo)](https://github.com/melbahja/seo/blob/master/LICENSE) ![Packagist PHP Version Support](https://img.shields.io/packagist/php-v/melbahja/seo) ![Packagist Version](https://img.shields.io/packagist/v/melbahja/seo) [![Twitter](https://img.shields.io/twitter/url/https/github.com/melbahja/seo.svg?style=social)](https://twitter.com/intent/tweet?url=https%3A%2F%2Fgithub.com%2Fmelbahja%2Fseo) +# PHP SEO + +The SEO library for PHP is a simple and powerful PHP library to help developers ๐Ÿป do better on-page SEO optimizations. + +[![Build Status](https://github.com/melbahja/seo/workflows/Test/badge.svg)](https://github.com/melbahja/seo/actions?query=workflow%3ATest) +[![GitHub license](https://img.shields.io/github/license/melbahja/seo)](https://github.com/melbahja/seo/blob/master/LICENSE) +![Packagist PHP Version Support](https://img.shields.io/packagist/php-v/melbahja/seo) +![Packagist Version](https://img.shields.io/packagist/v/melbahja/seo) +[![Twitter](https://img.shields.io/twitter/url/https/github.com/melbahja/seo.svg?style=social)](https://twitter.com/intent/tweet?url=https%3A%2F%2Fgithub.com%2Fmelbahja%2Fseo) -Simple PHP library to help developers ๐Ÿป do better on-page SEO optimization ### PHP SEO features: -- [[๐Ÿ‘ท]](#-generate-schemaorg) **Generate schema.org ld+json** -- [[๐Ÿ›€]](#-meta-tags) **Generate meta tags with twitter and open graph support** -- [[๐ŸŒ]](#-sitemaps) **Generate sitemaps xml and indexes (supports: ๐Ÿ–บ news, ๐Ÿ–ผ images, ๐Ÿ“ฝ videos)** -- [[๐Ÿ“ค]](#-send-sitemaps-to-search-engines) **Submit new sitemaps to search engines** -- [[๐Ÿ“ค]](#-indexing-api) **Indexing API** -- [[๐Ÿ™ˆ]](https://github.com/melbahja/seo/blob/master/composer.json) **No dependencies** +- [[๐Ÿ‘ท]](#-generate-schemaorg) **Generate Rich Results schema.org ld+json** +- [[๐Ÿ›€]](#-meta-tags) **Generate Meta Tags with Twitter and Open Graph Support** +- [[๐ŸŒ]](#-sitemaps) **Generate XML Sitemaps (supports: ๐Ÿ–บ News Sitemaps, ๐Ÿ–ผ Images Sitemaps, ๐Ÿ“ฝ Video Sitemaps, Index Sitemaps)** +- [[๐Ÿ“ค]](#-indexing-api) **IndexNow and Google Indexing API** +- [โœ…] **Schema Rich Results Validator** +- [[๐Ÿงฉ]](https://github.com/melbahja/seo/blob/master/composer.json) **Zero Dependencies** ## Installation: ```bash @@ -18,19 +24,20 @@ composer require melbahja/seo ``` ## Usage: -Check this simple examples. (of course the composer autoload.php file is required) +Check this simple examples. #### ๐Ÿ‘ท Generate schema.org ```php use Melbahja\Seo\Schema; use Melbahja\Seo\Schema\Thing; +use Melbahja\Seo\Schema\Organization; $schema = new Schema( - new Thing('Organization', [ + new Organization([ 'url' => 'https://example.com', 'logo' => 'https://example.com/logo.png', - 'contactPoint' => new Thing('ContactPoint', [ + 'contactPoint' => new Thing(type: 'ContactPoint', props: [ 'telephone' => '+1-000-555-1212', 'contactType' => 'customer service' ]) @@ -44,21 +51,16 @@ echo $schema; ```html ``` @@ -66,20 +68,21 @@ echo $schema; ```php use Melbahja\Seo\Schema; use Melbahja\Seo\Schema\Thing; +use Melbahja\Seo\Schema\CreativeWork\WebPage; -$product = new Thing('Product'); +$product = new Thing(type: 'Product'); $product->name = "Foo Bar"; $product->sku = "sk12"; $product->image = "/image.jpeg"; $product->description = "testing"; -$product->offers = new Thing('Offer', [ +$product->offers = new Thing(type: 'Offer', props: [ 'availability' => 'https://schema.org/InStock', 'priceCurrency' => 'USD', "price" => "119.99", 'url' => 'https://gool.com', ]); -$webpage = new Thing("WebPage", [ +$webpage = new WebPage([ '@id' => "https://example.com/product/#webpage", 'url' => "https://example.com/product", 'name' => 'Foo Bar', @@ -93,35 +96,36 @@ $schema = new Schema( echo json_encode($schema, JSON_PRETTY_PRINT); ``` + **Results:** ```json { - "@context": "https://schema.org", - "@graph": [ - { - "name": "Foo Bar", - "sku": "sk12", - "image": "/image.jpeg", - "description": "testing", - "offers": { - "availability": "https://schema.org/InStock", - "priceCurrency": "USD", - "price": "119.99", - "url": "https://gool.com", - "@type": "Offer", - "@context": "https://schema.org/" - }, - "@type": "Product", - "@context": "https://schema.org/" - }, - { - "@id": "https://example.com/product/#webpage", - "url": "https://example.com/product", - "name": "Foo Bar", - "@type": "WebPage", - "@context": "https://schema.org/" - } - ] + "@context": "https:\/\/schema.org", + "@graph": [ + { + "@type": "Product", + "@context": "https:\/\/schema.org", + "name": "Foo Bar", + "sku": "sk12", + "image": "\/image.jpeg", + "description": "testing", + "offers": { + "@type": "Offer", + "@context": "https:\/\/schema.org", + "availability": "https:\/\/schema.org\/InStock", + "priceCurrency": "USD", + "price": "119.99", + "url": "https:\/\/gool.com" + } + }, + { + "@type": "WebPage", + "@context": "https:\/\/schema.org", + "@id": "https:\/\/example.com\/product\/#webpage", + "url": "https:\/\/example.com\/product", + "name": "Foo Bar" + } + ] } ``` @@ -135,12 +139,21 @@ $metatags = new MetaTags(); $metatags ->title('PHP SEO') ->description('This is my description') - ->meta('author', 'Mohamed Elabhja') + ->meta('author', 'Mohamed Elbahja') ->image('https://avatars3.githubusercontent.com/u/8259014') ->mobile('https://m.example.com') ->canonical('https://example.com') ->shortlink('https://git.io/phpseo') - ->amp('https://apm.example.com'); + ->amp('https://apm.example.com') + ->robots(['index', 'follow', 'max-snippet' => -1]) + ->robots(botName: 'bingbot', options: ['index', 'nofollow']) + ->feed("https://example.com/feed.rss") + ->verification("google", "token_value") + ->verification("yandex", "token_value") + ->hreflang("de", "https://de.example.com") + ->og("type", "website") + ->twitter("creator", "Mohamed Elbahja"); + // ->schema($schema) echo $metatags; @@ -151,401 +164,266 @@ echo $metatags; PHP SEO - + + + + + + + + + + + - - - + ``` -#### ๐Ÿ—บ Sitemaps -```php -$yourmap = new Sitemap(string $url, array $options = []): SitemapIndexInterface -``` -| Option name | Description | Required ? | Default | -| ------------- | ------------- | --------- | -------- | -| save_path | Generated sitemaps storage path | YES | | -| sitemaps_url | Sitemap index custom url for generated sitemaps | NO | $url | -| index_name | Custom sitemap index name | NO | sitemap.xml | +# ๐Ÿ—บ Sitemaps + +Generate XML sitemaps with support for images, videos, news, and localized URLs. + +## Basic Usage -##### Simple Example ```php use Melbahja\Seo\Sitemap; -$sitemap = new Sitemap('https://example.com', ['save_path' => '/path/to_save/files']); +$sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: '/path/to_save/files', +); $sitemap->links('blog.xml', function($map) { - $map->loc('/blog')->freq('daily')->priority('0.8') - ->loc('/blog/my-new-article')->freq('weekly')->lastMod('2019-03-01') - ->loc('/ุงู‡ู„ุง-ุจุงู„ุนุงู„ู…')->freq('weekly'); - $map->loc('/blog/hello')->freq('monthly'); + $map->loc('/blog') + ->changeFreq('daily') + ->priority(0.8) + ->loc('/blog/my-new-article') + ->changeFreq('weekly') + ->lastMod('2024-01-15') + ->loc('/ุงู‡ู„ุง-ุจุงู„ุนุงู„ู…') + ->changeFreq('weekly'); + + $map->loc('/blog/hello')->changeFreq('monthly'); }); -// return bool -// throws SitemapException if save_path options not exists -$sitemap->save(); +$sitemap->render(); ``` -**Results:** (๐Ÿ“‚ in: /path/to_save/files/) - -๐Ÿ“: sitemap.xml (formatted) -```xml - - - - - https://example.com/blog.xml - 2019-03-01T14:38:02+01:00 - - -``` +## Options -๐Ÿ“: blog.xml (formatted) -```xml - - - - - https://example.com/blog - daily - 0.8 - - - https://example.com/blog/my-new-article - weekly - 2019-03-01T00:00:00+01:00 - - - https://example.com/%D8%A7%D9%87%D9%84%D8%A7-%D8%A8%D8%A7%D9%84%D8%B9%D8%A7%D9%84%D9%85 - weekly - - - https://example.com/blog/hello - monthly - - -``` +| Option | Description | Required | Default | +| --- | --- | --- | --- | +| `saveDir` | Generated sitemaps storage path | Yes | \- | +| `sitemapBaseUrl` | Custom URL for generated sitemaps | No | Base URL | +| `indexName` | Custom sitemap index name | No | sitemap.xml | +| `mode` | Output mode (FILE, MEMORY, STREAM, TEMP) | No | TEMP | -##### Multipe Sitemaps && Images -```php -use Melbahja\Seo\Sitemap; +## URL Methods -$sitemap = new Sitemap('https://example.com'); +```php +$builder->loc('/page') // URL path relative or absolute + ->priority(0.8) // Priority 0.0-1.0 + ->changeFreq('weekly') // always, hourly, daily, weekly, monthly, yearly, never + ->lastMod('2024-01-15') // Last modified date in string or unix ts + ->image('/image.jpg') // Add image (requires 'images' => true) + ->video('Title', [...]) // Add video (requires 'videos' => true) + ->alternate('/es/page', 'es'); // Add hreflang alternate +``` -// Instead of passing save_path to the factory you can set it later via setSavePath -// also $sitemap->getSavePath() method to get the current save_path -$sitemap->setSavePath('your_save/path'); +## Advanced Features -// changing sitemap index name -$sitemap->setIndexName('index.xml'); +### Image Sitemaps -// For images you need to pass a option images => true -$sitemap->links(['name' => 'blog.xml', 'images' => true], function($map) +```php +$sitemap->links(['name' => 'gallery.xml', 'images' => true], function($builder) { - $map->loc('/blog')->freq('daily')->priority('0.8') - ->loc('/blog/my-new-article') - ->freq('weekly') - ->lastMod('2019-03-01') - ->image('/uploads/image.jpeg', ['caption' => 'My caption']) - ->loc('/ุงู‡ู„ุง-ุจุงู„ุนุงู„ู…')->freq('weekly'); - - // image(string $url, array $options = []), image options: caption, geo_location, title, license - // see References -> images - $map->loc('/blog/hello')->freq('monthly')->image('https://cdn.example.com/image.jpeg'); + $builder->loc('/gallery/1') + ->image('/images/photo1.jpg', [ + 'title' => 'Photo Title', + 'caption' => 'Photo caption' + ]); }); +``` + +### Video Sitemaps -// another file -$sitemap->links('blog_2.xml', function($map) +```php +$sitemap->links(['name' => 'videos.xml', 'videos' => true], function($builder) { - // Mabye you need to loop through posts form your database ? - foreach (range(0, 4) as $i) - { - $map->loc("/posts/{$i}")->freq('weekly')->priority('0.7'); - } + $builder->loc('/video/page') + ->video('Video Title', [ + 'thumbnail' => '/thumb.jpg', + 'description' => 'Video description', + 'content_loc' => '/video.mp4' + ]); }); - -$sitemap->save(); - ``` -**Results** - -๐Ÿ“: index.xml -```xml - - - - - https://example.com/blog.xml - 2019-03-01T15:13:22+01:00 - - - https://example.com/blog_2.xml - 2019-03-01T15:13:22+01:00 - - +### News Sitemaps -``` +```php +use Melbahja\Seo\Sitemap\NewsBuilder; -๐Ÿ“: blog.xml -```xml - - - - - https://example.com/blog - daily - 0.8 - - - https://example.com/blog/my-new-article - weekly - 2019-03-01T00:00:00+01:00 - - My caption - https://example.com/uploads/image.jpeg - - - - https://example.com/%D8%A7%D9%87%D9%84%D8%A7-%D8%A8%D8%A7%D9%84%D8%B9%D8%A7%D9%84%D9%85 - weekly - - - https://example.com/blog/hello - monthly - - https://cdn.example.com/image.jpeg - - - +$sitemap->news('news.xml', function(NewsBuilder $builder) +{ + $builder->setPublication('Your News', 'en'); + + $builder->loc('/article/1') + ->news([ + 'title' => 'Article Title', + 'publication_date' => '2024-01-15T10:00:00Z', + 'keywords' => 'news, breaking' + ]); +}); ``` -๐Ÿ“: blog_2.xml -```xml - - - - - https://example.com/posts/0 - weekly - 0.7 - - - https://example.com/posts/1 - weekly - 0.7 - - - https://example.com/posts/2 - weekly - 0.7 - - - https://example.com/posts/3 - weekly - 0.7 - - - https://example.com/posts/4 - weekly - 0.7 - - -``` +### Multilingual Sitemaps -##### Sitemap with videos ```php -$sitemap = (new Sitemap('https://example.com')) - ->setSavePath('./storage/sitemaps') - ->setSitemapsUrl('https://example.com/sitemaps') - ->setIndexName('index.xml'); - -$sitemap->links(['name' => 'posts.xml', 'videos' => true], function($map) +$sitemap->links(['name' => 'multilang.xml', 'localized' => true], function($builder) { - $map->loc('/posts/clickbait-video')->video('My Clickbait Video title', - [ - // or thumbnail_loc - 'thumbnail' => 'https://example.com/thumbnail.jpeg', - 'description' => 'My description', - // player_loc or content_loc one of them is required - 'player_loc' => 'https://example.com/embed/81287127' - - // for all available options see References -> videos - ]); - - $map->loc('posts/bla-bla'); + $builder->loc('/page') + ->alternate('/es/page', 'es') + ->alternate('/fr/page', 'fr'); }); - -$sitemap->save(); -``` -**Results** - -๐Ÿ“: index.xml -```xml - - - - - https://example.com/sitemaps/posts.xml - 2019-03-01T15:30:02+01:00 - - -``` -**Note:** lastmod in sitemap index files are generated automatically - -๐Ÿ“: posts.xml -```xml - - - - - https://example.com/posts/clickbait-video - - My description - https://example.com/embed/81287127 - My Clickbait Video title - https://example.com/thumbnail.jpeg - - - - https://example.com/posts/bla-bla - - ``` -##### News Sitemaps +## Output Modes -```php -use Melbahja\Seo\Factory; +### TEMP Mode (Default) -$sitemap = Factory::sitemap('https://example.com', +```php +$sitemap = new Sitemap('https://example.com', [ - // You can also customize your options by passing array to the factory like this - 'save_path' => './path', - 'sitemaps_url' => 'https://example.com/maps', - 'index_name' => 'news_index.xml' + 'saveDir' => './storage', + 'mode' => OutputMode::TEMP ]); +$sitemap->render(); // Saves to temp dir and save to disk only on generation success. +``` -$sitemap->news('my_news.xml', function($map) -{ - // publication: name, language - // Google quote about the name: "It must exactly match the name as - // it appears on your articles on news.google.com" - $map->setPublication('PHP NEWS', 'en'); - - $map->loc('/news/12')->news( - [ - 'title' => 'PHP 8 Released', - 'publication_date' => '2019-03-01T15:30:02+01:00', - ]); - - $map->loc('/news/13')->news( - [ - 'title' => 'PHP 8 And High Performance', - 'publication_date' => '2019-04-01T15:30:02+01:00' - ]); -}); +### File Mode -$sitemap->save(); +```php +$sitemap = new Sitemap('https://example.com', +[ + 'saveDir' => './storage', + 'mode' => OutputMode::FILE +]); +$sitemap->render(); // Saves to disk ``` -**Results** - -๐Ÿ“: news_index.xml -```xml - - - - - https://example.com/maps/my_news.xml - 2019-03-01T15:57:10+01:00 - - -``` +### Memory Mode -๐Ÿ“: my_news.xml -```xml - - - - - https://example.com/news/12 - - - PHP NEWS - en - - PHP 8 Released - 2019-03-01T15:30:02+01:00 - - - - https://example.com/news/13 - - - PHP NEWS - en - - PHP 8 And High Performance - 2019-04-01T15:30:02+01:00 - - - +```php +$sitemap = new Sitemap('https://example.com', [ + 'mode' => OutputMode::MEMORY +]); +$xml = $sitemap->render(); // Returns XML string ``` -**Google quote:** โš  "If you submit your News sitemap before your site has been reviewed and approved by our team, you may receive errors." โš  +### Stream Mode +```php +$stream = fopen('sitemap.xml', 'w'); +$builder = new LinksBuilder( + baseUrl: 'https://example.com', + stream: $stream, // defaults to stdout + mode: OutputMode::STREAM, +); +$builder->loc('/page')->render(); +fclose($stream); +``` -#### ๐Ÿค– Send Sitemaps To Search Engines - -According to the sitemaps protocol, search engines should have a url that allow you to inform them about your new sitemap files. like: /ping?sitemap=sitemap_url +## Complete Example ```php -use Melbahja\Seo\Ping; +$sitemap = new Sitemap(baseUrl: 'https://example.com', options: [ + 'saveDir' => './sitemaps', + 'indexName' => 'sitemap-index.xml' +]); -$ping = new Ping; +// Regular pages y can just pass array of links +$sitemap->links('pages.xml', ['/', '/about', '/contact']); -// the void method send() will inform via CURL: google, bing and yandex about your new file -$ping->send('https://example.com/sitemap_file.xml'); +// Products with images +$sitemap->links(['name' => 'products.xml', 'images' => true], function($builder) +{ + $builder->loc('/product/123') + ->priority(0.9) + ->image('/product-main.jpg', ['title' => 'Product Image']); +}); +// News section +$sitemap->news('news.xml', function($builder) +{ + $builder->setPublication('Tech News', 'en'); + $builder->loc('/article/1') + ->news(['title' => 'New Article', 'publication_date' => date('c')]); +}); + +// Generate everything +$sitemap->render(); +// Creates: sitemap-index.xml, pages.xml, products.xml, news.xml ``` ### Indexing API -This is the first PHP library to support the new search engines indexing API (aka indexnow.org). +Submit URLs to search engines for instant indexing using Google Indexing API and IndexNow protocol. + +#### Google Indexing API ```php -use Melbahja\Seo\Indexing; +use Melbahja\Seo\Indexing\GoogleIndexer; +use Melbahja\Seo\Indexing\URLIndexingType; + +$indexer = new GoogleIndexer('your-google-access-token'); + +// Index single URL +$indexer->submitUrl('https://www.example.com/page'); -$indexer = new Indexing('www.example.cpm', [ - 'bing.com' => 'your_api_key_here', - 'yandex.com' => 'your_api_key_here', +// Index multiple URLs +$indexer->submitUrls([ + 'https://www.example.com/page1', + 'https://www.example.com/page2' ]); +// Delete URL from index +$indexer->submitUrl('https://www.example.com/deleted-page', URLIndexingType::DELETE); +``` + +#### IndexNow Protocol + +```php +use Melbahja\Seo\Indexing\IndexNowIndexer; -// index single url. -$indexer->indexUrl('https://www.example.com/page'); +$indexer = new IndexNowIndexer('your-indexnow-api-key'); -// index multi urls. -$indexer->indexUrls(['https://www.example.com/page']); +// Submit to all supported engines +$indexer->submitUrl('https://www.example.com/page'); +// Submit multiple URLs +$indexer->submitUrls([ + 'https://www.example.com/page1', + 'https://www.example.com/page2' +]); ``` +## AI LLMs.txt Support + +LLMs.txt isn't an established industry standard (IMO training honypot), it's a newer format designed mainly to help bigtech companies train their AI models. from a SEO perspective I don't see clear benefits for webmasters at this time. if you find LLMs.txt valuable for your use case, contributions are welcome! feel free to submit a PR. + +## Documentation +the docs are coming soon with more features and complete examples. + ## Sponsors Special thanks to friends who support this work financially: @@ -563,4 +441,4 @@ Special thanks to friends who support this work financially: ## License -[MIT](https://github.com/melbahja/seo/blob/master/LICENSE) Copyright (c) 2019-present Mohamed Elbahja +[MIT](https://github.com/melbahja/seo/blob/master/LICENSE) Copyright (c) Mohamed Elbahja diff --git a/composer.json b/composer.json index 639af56..be19429 100644 --- a/composer.json +++ b/composer.json @@ -1,18 +1,23 @@ { "name": "melbahja/seo", "type": "library", - "description": "Simple PHP library to help developers ๐Ÿป do better on-page SEO optimization", + "description": "SEO library for PHP is a simple PHP library to help developers ๐Ÿป do better on-page SEO optimizations.", "keywords": [ "seo", "search engine optimization", - "php7", + "php8", "schema.org", "sitemaps", "sitemap.xml", "sitemap index", "meta tags", "open graph", - "twitter tags" + "twitter tags", + "rich results", + "images sitemaps", + "video sitemaps", + "index sitemaps", + "news sitemaps" ], "license": "MIT", "authors": [ @@ -24,7 +29,7 @@ } ], "require": { - "php": ">=7.2", + "php": ">=8.1", "ext-xml": "*", "ext-curl": "*", "ext-json": "*" @@ -34,13 +39,8 @@ "Melbahja\\Seo\\": "src/" } }, - "autoload-dev": { - "psr-4":{ - "Tests\\Melbahja\\Seo\\": "tests/" - } - }, "require-dev": { - "phpunit/phpunit": "^8.5" + "phpunit/phpunit": "^10.0" }, "minimum-stability": "dev", "prefer-stable": true diff --git a/phpunit.xml b/phpunit.xml index c08c88c..49792e4 100644 --- a/phpunit.xml +++ b/phpunit.xml @@ -1,25 +1,20 @@ - - - src - - - - - ./tests/ - - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.5/phpunit.xsd" + bootstrap="vendor/autoload.php" + colors="true" + cacheResult="false" + cacheDirectory=".phpunit.cache" +> + + + tests + + + + + src + + diff --git a/src/Exceptions/SeoException.php b/src/Exceptions/SeoException.php index 0ce2e68..0207aad 100644 --- a/src/Exceptions/SeoException.php +++ b/src/Exceptions/SeoException.php @@ -1,13 +1,10 @@ ', '<'], - ['&', ''', '"', '>', '<'], - $url['scheme'] . "://{$url['host']}{$url['path']}{$url['query']}" - ); - } -} - diff --git a/src/Indexing.php b/src/Indexing.php deleted file mode 100644 index 2d265f8..0000000 --- a/src/Indexing.php +++ /dev/null @@ -1,88 +0,0 @@ -host = $host; - $this->keys = $keys; - } - - /** - * Instant index single url. - * - * @param string $url - * @return array - */ - public function indexUrl(string $url): array - { - return $this->indexUrls([$url]); - } - - /** - * Instant index multiple urls. - * - * @param array $urls - * @return array - */ - public function indexUrls(array $urls): array - { - $accepted = []; - - foreach($this->keys as $engine => $key) - { - $accepted[$engine] = $this->index($engine, $key, $urls); - } - - return $accepted; - } - - /** - * Send index request to search engine. - * - * @param string $engine - * @param string $apiKey - * @param array $urls - * @return bool - */ - protected function index(string $engine, string $apiKey, array $urls): bool - { - $ch = curl_init("https://{$engine}/indexnow"); - curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode(['host' => $this->host, 'key' => $apiKey, 'urlList' => $urls])); - curl_setopt($ch, CURLOPT_HTTPHEADER, ['content-type: application/json']); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_exec($ch); - $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - curl_close($ch); - - return $code >= 200 && $code < 300; - } -} diff --git a/src/Indexing/GoogleIndexer.php b/src/Indexing/GoogleIndexer.php new file mode 100644 index 0000000..42b8975 --- /dev/null +++ b/src/Indexing/GoogleIndexer.php @@ -0,0 +1,101 @@ +accessToken = $accessToken; + $this->httpClient = $httpClient ?? new HttpClient(headers: [ + 'Authorization' => "Bearer {$this->accessToken}", + 'Content-Type' => 'application/json' + ]); + } + + /** + * Submit multiple URLs to google indexing API + * + * @todo Nice to support BATCH operation later. + * @param array $urls list of URLs to submit for indexing + * @param URLIndexingType $type The type of indexing operation UPDATE or DELETE + * @return array associative, URLs as keys and values as bool success state + */ + public function submitUrls(array $urls, URLIndexingType $type = URLIndexingType::UPDATE): array + { + $results = []; + foreach ($urls as $url) + { + $results[$url] = $this->submitUrl($url, $type); + } + + return $results; + } + + /** + * Submit a single URL to google indexing API + * + * @param string $url The URL to submit + * @param URLIndexingType $type The type of indexing operation UPDATE or DELETE + * @return bool true if HTTP status is 200, false otherwise + */ + public function submitUrl(string $url, URLIndexingType $type = URLIndexingType::UPDATE): bool + { + $payload = [ + 'url' => $url, + 'type' => $type === URLIndexingType::UPDATE ? 'URL_UPDATED' : 'URL_DELETED' + ]; + + $this->httpClient->request('POST', self::API_URL, $payload); + return $this->httpClient->getStatusCode() === 200; + } + + /** + * Not supported - this is just for IndexNow compatibility! + * + * @return never This method always throws an exception + * @throws SeoException Always throws as Google doesn't use this verification method + */ + public function serveKeyFile(): never + { + throw new SeoException('Google Indexing API does not use key.txt verification'); + } + + /** + * Create an GoogleIndexer instance from environment variable + * + * @param string $envVar The name of the env var of the API key, GOOGLE_INDEXING_ACCESS_TOKEN by default. + * @return self New GoogleIndexer instance + * @throws SeoException If the environment variable is not set or empty + */ + public static function fromEnvironment(string $envVar = 'GOOGLE_INDEXING_ACCESS_TOKEN'): self + { + if (!($token = $_ENV[$envVar] ?? getenv($envVar))) { + throw new SeoException("Google Indexing API access token not found in env var: {$envVar}"); + } + + return new self($token); + } +} diff --git a/src/Indexing/IndexNowEngine.php b/src/Indexing/IndexNowEngine.php new file mode 100644 index 0000000..2b633b3 --- /dev/null +++ b/src/Indexing/IndexNowEngine.php @@ -0,0 +1,30 @@ +value, urlencode($url), urlencode($key)); + } +} diff --git a/src/Indexing/IndexNowIndexer.php b/src/Indexing/IndexNowIndexer.php new file mode 100644 index 0000000..158768c --- /dev/null +++ b/src/Indexing/IndexNowIndexer.php @@ -0,0 +1,108 @@ +apiKey = $apiKey; + $this->httpClient = $httpClient ?? new HttpClient(); + } + + /** + * Submit a single URL to $engine for indexing + * + * @param string $url The URL to submit for indexing + * @param IndexNowEngine|null $engine The search engine to notify defaults to indexnow all supported engines + * @param URLIndexingType $type The type of indexing operation not needed now y can just send new or 404 urls + * @return bool true on successful, false on failure. + */ + public function submitUrl(string $url, IndexNowEngine $eng = IndexNowEngine::INDEXNOW, URLIndexingType $type = URLIndexingType::UPDATE): bool + { + $this->httpClient->request('GET', $eng->toUrl($url, $this->apiKey)); + return $this->httpClient->getStatusCode() < 400; + } + + /** + * Submit multiple URLs to $engine for indexing + * + * @param array $urls Array of URLs to submit for indexing + * @param IndexNowEngine|null $engine The search engine to notify defaults to indexnow all supported engines + * @param URLIndexingType $type The type of indexing operation not needed now y can just send new or 404 urls + * @return array associative, URLs as keys and values as bool success state + */ + public function submitUrls(array $urls, IndexNowEngine $eng = IndexNowEngine::INDEXNOW, URLIndexingType $type = URLIndexingType::UPDATE): array + { + $results = []; + foreach ($urls as $url) + { + $results[$url] = $this->submitUrl($url, $eng, $type); + } + + return $results; + } + + /** + * Serve the IndexNow verification key file + * + * This method handles requests to /{api-key}.txt and returns the key for domain verification. + * Returns 404 if the requested path doesn't match the expected key file path. + * + * @return never This method always exits and terminates runtime. + */ + public function serveKeyFile(): never + { + $uri = strtok($_SERVER['REQUEST_URI'] ?? '', '?'); + $exp = '/' . $this->apiKey . '.txt'; + + // Ignore/404 invalid attempts or revoked keys. + if ($uri !== $exp) { + + http_response_code(404); + header('Content-Type: text/plain'); + echo 'Page not found'; + exit; + } + + header('Content-Type: text/plain'); + header('Cache-Control: no-cache, no-store, must-revalidate'); + echo $this->apiKey; + exit; + } + + /** + * Create an IndexNowIndexer instance from environment variable + * + * @param string $envVar The name of the env var of the API key, INDEXNOW_API_KEY by default. + * @return self New IndexNowIndexer instance + * @throws RuntimeException If the environment variable is not set or empty + */ + public static function fromEnvironment(string $envVar = 'INDEXNOW_API_KEY'): self + { + if (!($key = $_ENV[$envVar] ?? getenv($envVar))) { + throw new RuntimeException("IndexNow API key not found in env var: {$envVar}"); + } + + return new self($key); + } +} diff --git a/src/Indexing/URLIndexingType.php b/src/Indexing/URLIndexingType.php new file mode 100644 index 0000000..be2c59c --- /dev/null +++ b/src/Indexing/URLIndexingType.php @@ -0,0 +1,14 @@ + $v) + foreach ($meta as $k => $v) { - if (method_exists(static::class, $k)) { + if (is_array($v)) { + + foreach ($v as $kk => $vv) + { + if (method_exists($this, $k)) { + $this->$k($kk, $vv); + continue; + } + + $this->push($k, $vv); + } + + continue; + } + + if (method_exists($this, $k)) { $this->$k($v); continue; } - $this->meta($k, $v); + + $this->push('meta', ['name' => $k, 'content' => $v]); + } + + foreach ($og as $k => $v) + { + $this->og($k, $v); + } + + foreach ($twitter as $k => $v) + { + $this->twitter($k, $v); } } @@ -59,20 +79,20 @@ public function __construct(array $tags = []) * Set page and meta title * * @param string $title - * @return MetaTags + * @return self */ - public function title(string $title): MetaTags + public function title(string $title): self { - $this->title = Helper::escape($title); + $this->title = $title; return $this->meta('title', $title)->og('title', $title)->twitter('title', $title); } /** * Set page description. * @param string $desc - * @return MetaTags + * @return self */ - public function description(string $desc): MetaTags + public function description(string $desc): self { return $this->meta('description', $desc)->og('description', $desc)->twitter('description', $desc); } @@ -81,9 +101,9 @@ public function description(string $desc): MetaTags * Set a mobile link (Http header "Vary: User-Agent" is required) * * @param string $url - * @return MetaTags + * @return self */ - public function mobile(string $url): MetaTags + public function mobile(string $url): self { return $this->push('link', [ 'href' => $url, @@ -93,38 +113,75 @@ public function mobile(string $url): MetaTags } /** - * Set robots meta tags. + * Set robots meta tags * - * @param string $options For example: follow, index, max-snippet:-1, max-video-preview:-1, max-image-preview:large - * @param string $botName bot name or robots for all. - * @return MetaTags + * @param string|array $options index,follow OR ['index', 'follow', 'max-snippet' => -1] + * @param string $botName robots|googlebot|bingbot|etc + * @return self */ - public function robots(string $options, string $botName = 'robots'): MetaTags + public function robots(string|array $options, string $botName = 'robots'): self { + if (is_array($options)) { + + $parts = []; + foreach ($options as $k => $v) + { + $parts[] = is_int($k) ? $v : "{$k}:{$v}"; + } + $options = implode(', ', $parts); + } + return $this->meta($botName, $options); } /** - * Set AMP link + * Set RSS or Atom feed link * - * @param string $url - * @return MetaTags + * @param string $url feed URL + * @param string $type application/rss+xml|application/atom+xml + * @param string|null $title feed title + * @return self */ - public function amp(string $url): MetaTags + public function feed(string $url, string $type = 'application/rss+xml', ?string $title = null): self { return $this->push('link', [ - 'rel' => 'amphtml', - 'href' => $url + 'rel' => 'alternate', + 'title' => $title, + 'type' => $type, + 'href' => $url, ]); } + /** + * Set search engine verification meta tag + * + * @param string $engine google|bing|yandex|pinterest|etc + * @param string $code verification code + * @return self + */ + public function verification(string $engine, string $code): self + { + return $this->meta("{$engine}-site-verification", $code); + } + + /** + * Set AMP link + * + * @param string $url + * @return self + */ + public function amp(string $url): self + { + return $this->push('link', ['rel' => 'amphtml', 'href' => $url]); + } + /** * Set canonical url * * @param string $url - * @return MetaTags + * @return self */ - public function canonical(string $url): MetaTags + public function canonical(string $url): self { return $this->push('link', [ 'rel' => 'canonical', @@ -137,9 +194,9 @@ public function canonical(string $url): MetaTags * Set social media url. * * @param string $url - * @return MetaTags + * @return self */ - public function url(string $url): MetaTags + public function url(string $url): self { return $this->og('url', $url)->twitter('url', $url); } @@ -149,9 +206,9 @@ public function url(string $url): MetaTags * * @param string $lang for eg: en * @param string $url alternate language page url. - * @return MetaTags + * @return self */ - public function hreflang(string $lang, string $url): MetaTags + public function hreflang(string $lang, string $url): self { return $this->push('link', [ 'rel' => 'alternate', @@ -160,19 +217,37 @@ public function hreflang(string $lang, string $url): MetaTags ]); } + /** + * Set multiple alternate language URLs at once + * + * @param array $langUrls Associative array of lang => url pairs (e.g., ['en' => 'url', 'fr' => 'url']) + * @param string|null $default Optional x-default URL for language fallback + * @return self + */ + public function hreflangs(array $langUrls, ?string $default = null): self + { + if ($default !== null) { + $langUrls['x-default'] = $default; + } + + foreach ($langUrls as $lang => $url) + { + $this->hreflang($lang, $url); + } + + return $this; + } + /** * Set a meta tag * * @param string $name * @param string $value - * @return MetaTags + * @return self */ - public function meta(string $name, string $value): MetaTags + public function meta(string $name, string $value): self { - return $this->push('meta', [ - 'name' => $name, - 'content' => $value, - ]); + return $this->push('meta', ['name' => $name,'content' => $value]); } /** @@ -180,9 +255,9 @@ public function meta(string $name, string $value): MetaTags * * @param string $name * @param array $attrs - * @return MetaTags + * @return self */ - public function push(string $name, array $attrs): MetaTags + public function push(string $name, array $attrs): self { foreach ($attrs as $k => $v) { @@ -198,11 +273,11 @@ public function push(string $name, array $attrs): MetaTags * * @param string $name * @param string $value - * @return MetaTags + * @return self */ - public function og(string $name, string $value): MetaTags + public function og(string $name, string $value): self { - $this->openGraphTags[] = ['meta', ['property' => "og:{$name}", 'content' => $value]]; + $this->tags[] = ['meta', ['property' => "og:{$name}", 'content' => $value]]; return $this; } @@ -212,26 +287,23 @@ public function og(string $name, string $value): MetaTags * * @param string $name * @param string $value - * @return MetaTags + * @return self */ - public function twitter(string $name, string $value): MetaTags + public function twitter(string $name, string $value): self { - $this->twitterTags[] = ['meta', ['property' => "twitter:{$name}", 'content' => $value]]; + $this->tags[] = ['meta', ['property' => "twitter:{$name}", 'content' => $value]]; return $this; } /** * Set short link tag - * + * * @param string $url - * @return MetaTags + * @return self */ - public function shortlink(string $url): MetaTags + public function shortlink(string $url): self { - return $this->push('link', [ - 'rel' => 'shortlink', - 'href' => $url - ]); + return $this->push('link', ['rel' => 'shortlink', 'href' => $url]); } /** @@ -239,13 +311,78 @@ public function shortlink(string $url): MetaTags * * @param string $url * @param string $card Twitter card - * @return MetaTags + * @return self */ - public function image(string $url, string $card = 'summary_large_image'): MetaTags + public function image(string $url, string $card = 'summary_large_image'): self { return $this->og('image', $url)->twitter('card', $card)->twitter('image', $url); } + /** + * Set article metadata + * + * @param string $published Article published time + * @param string|null $modified Article modified time + * @param string|null $author Article author + * @return self + */ + public function articleMeta(string $published, ?string $modified = null, ?string $author = null): self + { + $this->og('article:published_time', $published); + + if ($modified) { + $this->og('article:modified_time', $modified); + } + + if ($author) { + $this->og('article:author', $author); + } + + return $this; + } + + /** + * Set pagination links + * + * @param string|null $prev previous page URL + * @param string|null $next next page URL + * @param string|null $first first page URL (optional) + * @param string|null $last last page URL (optional) + * @return self + */ + public function pagination(?string $prev = null, ?string $next = null, ?string $first = null, ?string $last = null): self + { + if ($prev) { + $this->push('link', ['rel' => 'prev', 'href' => $prev]); + } + + if ($next) { + $this->push('link', ['rel' => 'next', 'href' => $next]); + } + + if ($first) { + $this->push('link', ['rel' => 'first', 'href' => $first]); + } + + if ($last) { + $this->push('link', ['rel' => 'last', 'href' => $last]); + } + + return $this; + } + + /** + * Add Schema objects to be rendered with metatags + * + * @param SchemaInterface $schema Any Schema object + * @return self + */ + public function schema(SchemaInterface $schema): self + { + $this->schema = $schema; + return $this; + } + /** * Build meta tags * @@ -254,20 +391,64 @@ public function image(string $url, string $card = 'summary_large_image'): MetaTa */ public function build(array $tags): string { - $out = ''; + // Sort tags for nice readability + usort($tags, function($a, $b) + { + $getType = function($tag) + { + if (isset($tag[1]['property'])) { + + if (str_starts_with($tag[1]['property'], 'og:')) { + return 3; + } + + if (str_starts_with($tag[1]['property'], 'twitter:')) { + return 4; + } + } + + if ($tag[0] === 'meta') { + return 1; + } + if ($tag[0] === 'link') { + return 2; + } + + return 5; + }; + + return $getType($a) <=> $getType($b); + }); + + $out = ''; foreach ($tags as $tag) { - $out .= "\n<{$tag[0]} "; + $out .= PHP_EOL . "<{$tag[0]} "; foreach ($tag[1] as $a => $v) { - $out .= $a .'="'. Helper::escape($v) .'" '; + // empty values will be skipped. + if (!$v) { + continue; + } + + // attrs values are escaped to avoid XSS attacks, but attrs names MUST be trusted! + // if you trust your users to set arbitary meta attr names that a STUPID idea, but + // anyway I did a small replace to avid common XSS chars that may hack you! + $a = str_replace(['"', "'", '<', '>', ' ', "\t", "\n", "\r"], '', $a); + + // Set attr=value + $out .= $a .'="'. Utils::escape($v) .'" '; } $out .= "/>"; } + if ($this->schema !== null) { + $out .= (string) $this->schema; + } + return $out; } @@ -281,9 +462,10 @@ public function __toString(): string { $title = ''; if ($this->title !== null) { - $title = "{$this->title}"; + $title = Utils::escape($this->title); + $title = "{$title}"; } - return $title . $this->build($this->tags) . $this->build($this->twitterTags) . $this->build($this->openGraphTags) ; + return $title . $this->build($this->tags); } } diff --git a/src/Ping.php b/src/Ping.php deleted file mode 100644 index d5b0eb9..0000000 --- a/src/Ping.php +++ /dev/null @@ -1,69 +0,0 @@ -engines = array_unique(array_merge($this->engines, $append)); - } - } - - /** - * Send sitemap url to registred engines - * - * @param string $sitemapUrl - * @return void - */ - public function send(string $sitemapUrl): void - { - foreach ($this->engines as $engine) - { - $this->inform($engine, $sitemapUrl); - } - } - - /** - * Inform search engine - * - * @param string $engine - * @param string $url - * @return void - */ - public function inform(string $engine, string $url): void - { - $req = curl_init("{$engine}/ping?sitemap={$url}"); - curl_setopt($req, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($req, CURLOPT_SSL_VERIFYPEER, false); - curl_setopt($req, CURLOPT_RETURNTRANSFER, 1); - curl_exec($req); - curl_close($req); - } -} diff --git a/src/Robots.php b/src/Robots.php index d57bc63..1dfdb5e 100644 --- a/src/Robots.php +++ b/src/Robots.php @@ -5,97 +5,123 @@ /** * @package Melbahja\Seo - * @since v2.0 * @see https://git.io/phpseo * @license MIT - * @copyright 2019-present Mohamed Elabhja + * @copyright Mohamed Elbahja + * */ -class Robots implements SeoInterface +class Robots implements SeoInterface, \Stringable { - /** - * Robots rules. + * All robot.txt rules * @var array */ - protected $rules = []; + protected array $rules = []; /** - * Sitemap urls. - * @var array + * Add a comment line. + * + * @param string $text Comment text + * @return self */ - protected $sitemaps = []; - + public function addComment(string $text): self + { + $this->rules[] = ['type' => 'comment', 'text' => $text]; + return $this; + } /** - * Add rules for bot by user agent name. + * Add a sitemap URL. * - * @param string $userAgent bot user agent name - * @param array $rules - * @return Robots + * @param string $url Sitemap URL + * @return self */ - public function bot(string $userAgent, array $rules): Robots + public function addSitemap(string $url): self { - $this->rules[$userAgent] = $rules; + $this->rules[] = ['type' => 'sitemap', 'url' => $url]; return $this; } /** - * Set sitemap url. + * Add rules for a bot by user agent name. * - * @param string $url - * @return Robots + * @param string $userAgent Bot user agent name + * @param array $disallow Array of paths to disallow + * @param array $allow Array of paths to allow + * @param int|null $crawlDelay Crawl delay in seconds + * @return self */ - public function sitemap(string $url): Robots + public function addRule(string $userAgent = '*', array $disallow = [], array $allow = [], ?int $crawlDelay = null): self { - $this->sitemaps[] = $url; + $this->rules[] = [ + 'type' => 'rule', + 'userAgent' => $userAgent, + 'disallow' => $disallow, + 'allow' => $allow, + 'crawlDelay' => $crawlDelay + ]; + return $this; } + /** + * Save robots txt to a file. + * + * @param string $path + * @return bool + */ + public function saveTo(string $path): bool + { + return file_put_contents($path, (string) $this) !== false; + } /** - * Build robots rules. + * Build robots txt content. * * @return string */ public function __toString(): string { - $out = "# Autogenerated by melbahja/seo\r\n"; - - foreach ($this->sitemaps as $url) + $out = ""; + foreach ($this->rules as $rule) { - $out .= "Sitemap: {$url}\r\n"; - } + switch ($rule['type']) + { + case 'comment': - if ($out !== "") { - $out .= "\r\n"; - } + foreach (explode("\n", $rule['text']) as $line) + { + $out .= "# {$line}". PHP_EOL; + } + break; - foreach($this->rules as $agent => $rules) - { - $out .= "User-agent: {$agent}\r\n"; + case 'sitemap': - if (isset($rules['allow'])) { - foreach ($rules['allow'] as $v) - { - $out .= "Allow: {$v}\r\n"; - } - } + $out .= "Sitemap: {$rule['url']}". PHP_EOL; + break; - if (isset($rules['disallow'])) { - foreach($rules['disallow'] as $v) - { - $out .= "Disallow: {$v}\r\n"; - } - } + case 'rule': - if (isset($rules['delay'])) { - $out .= "Crawl-delay: {$rules['delay']}\r\n"; - } + $out .= "User-agent: {$rule['userAgent']}". PHP_EOL; + foreach ($rule['disallow'] as $path) + { + $out .= "Disallow: {$path}". PHP_EOL; + } + + foreach ($rule['allow'] as $path) + { + $out .= "Allow: {$path}". PHP_EOL; + } - $out .= "\r\n"; + if ($rule['crawlDelay'] !== null) { + $out .= "Crawl-delay: {$rule['crawlDelay']}". PHP_EOL; + } + + $out .= PHP_EOL; + break; + } } return $out; } - } diff --git a/src/Schema.php b/src/Schema.php index 8f211c4..a6525ae 100644 --- a/src/Schema.php +++ b/src/Schema.php @@ -2,13 +2,13 @@ namespace Melbahja\Seo; use Melbahja\Seo\Interfaces\SchemaInterface; +use Melbahja\Seo\Schema\Thing; /** * @package Melbahja\Seo - * @since v2.0 * @see https://git.io/phpseo * @license MIT - * @copyright 2019-present Mohamed Elabhja + * @copyright Mohamed Elbahja */ class Schema implements SchemaInterface { @@ -30,14 +30,24 @@ public function __construct(SchemaInterface ...$things) /** * Add schema item to the graph. * - * @param SchemaInterface $thing + * @param Thing $thing */ - public function add(SchemaInterface $thing): SchemaInterface + public function add(Thing $thing): SchemaInterface { $this->things[] = $thing; return $this; } + /** + * Get schema items + * + * @return Thing[] + */ + public function all(): array + { + return $this->things; + } + /** * Get data as array * @@ -45,9 +55,13 @@ public function add(SchemaInterface $thing): SchemaInterface */ public function jsonSerialize(): array { + if (count($this->things) === 1) { + return $this->things[0]->jsonSerialize(); + } + return [ '@context' => 'https://schema.org', - '@graph' => $this->things + '@graph' => $this->things, ]; } diff --git a/src/Schema/CreativeWork.php b/src/Schema/CreativeWork.php new file mode 100644 index 0000000..2a8bf53 --- /dev/null +++ b/src/Schema/CreativeWork.php @@ -0,0 +1,12 @@ +data = $data; - $this->type = $type; + /** + * @param string|array|null $type Schema type(s), defaults to class name if null + */ + protected string|array|null $type = null, + + /** + * @param string|null $id The @id identifier for this object + */ + protected ?string $id = null, + + /** + * @param string $context The @context URL + */ + protected string $context = "https://schema.org" + ) { + + if ($this->id !== null) { + $this->props['@id'] = $this->id; + } + + if (empty($this->type)) { + $parts = explode("\\", static::class); + $this->type = end($parts); + } } + /** + * Get a prop value by name + * + * @param string $name + * @return mixed The prop value or null if not set + */ public function __get(string $name) { - return $this->data[$name] ?? null; + return $this->props[$name] ?? null; } - + /** + * Set a prop value by name + * + * @param string $name + * @param mixed $value + */ public function __set(string $name, $value) { - $this->data[$name] = $value; + $this->props[$name] = $value; } - public function jsonSerialize(): array + /** + * Dynamically set props via method calls with chainable syntax + * + * @param string $name + * @param array $args single value or array or Thing object + * @return self + */ + public function __call(string $name, array $args): self { - $data = [ - '@type' => $this->type, - '@context' => $this->context ?? "https://schema.org/", - ]; + $this->props[$name] = count($args) === 1 ? $args[0] : $args; + return $this; + } - return array_merge($this->data, $data); + /** + * Serialize to JSON-LD format + * + * @return array + */ + public function jsonSerialize(): array + { + return array_merge(['@type' => $this->type, '@context' => $this->context], $this->props); } + /** + * Render as JSON-LD script tag for HTML output + * + * @return string + */ public function __toString(): string { return ''; diff --git a/src/Schema/Things/ContactPoint.php b/src/Schema/Things/ContactPoint.php deleted file mode 100644 index c0d684f..0000000 --- a/src/Schema/Things/ContactPoint.php +++ /dev/null @@ -1,25 +0,0 @@ -data['telephone']=$value; - return $this; - } - - public function setContactType(string $value) :self - { - $this->data['contactType']=$value; - return $this; - } -} \ No newline at end of file diff --git a/src/Schema/Things/Offer.php b/src/Schema/Things/Offer.php deleted file mode 100644 index 59b1eb0..0000000 --- a/src/Schema/Things/Offer.php +++ /dev/null @@ -1,37 +0,0 @@ -data['availability']=$value; - return $this; - } - - public function setPriceCurrency(string $value) :self - { - $this->data['priceCurrency']=$value; - return $this; - } - - public function setPrice(float $value) :self - { - $this->data['price']=$value; - return $this; - } - - public function setUrl(string $value) :self - { - $this->data['url']=$value; - return $this; - } -} \ No newline at end of file diff --git a/src/Schema/Things/Organization.php b/src/Schema/Things/Organization.php deleted file mode 100644 index 05a7ebf..0000000 --- a/src/Schema/Things/Organization.php +++ /dev/null @@ -1,31 +0,0 @@ -data['url']=$value; - return $this; - } - - public function setLogo(string $value) :self - { - $this->data['logo']=$value; - return $this; - } - - public function setContactPoint(ContactPoint $value) :self - { - $this->data['contactPoint']=$value; - return $this; - } -} \ No newline at end of file diff --git a/src/Schema/Things/Product.php b/src/Schema/Things/Product.php deleted file mode 100644 index d2660d8..0000000 --- a/src/Schema/Things/Product.php +++ /dev/null @@ -1,43 +0,0 @@ -data['name']=$value; - return $this; - } - - public function setSku(string $value) :self - { - $this->data['sku']=$value; - return $this; - } - - public function setImage(string $value) :self - { - $this->data['image']=$value; - return $this; - } - - public function setDescription(string $value) :self - { - $this->data['description']=$value; - return $this; - } - - public function setOffers(Offer $value) :self - { - $this->data['offers']=$value; - return $this; - } -} \ No newline at end of file diff --git a/src/Schema/Things/WebPage.php b/src/Schema/Things/WebPage.php deleted file mode 100644 index 2b27d61..0000000 --- a/src/Schema/Things/WebPage.php +++ /dev/null @@ -1,31 +0,0 @@ -data['@id']=$value; - return $this; - } - - public function setUrl(string $value) :self - { - $this->data['url']=$value; - return $this; - } - - public function setName(string $value) :self - { - $this->data['name']=$value; - return $this; - } -} \ No newline at end of file diff --git a/src/Sitemap.php b/src/Sitemap.php index 2dc061f..f191360 100644 --- a/src/Sitemap.php +++ b/src/Sitemap.php @@ -1,252 +1,240 @@ null, - 'index_name' => 'sitemap.xml', - 'sitemaps_url' => null, - ] + protected array $sitemaps = []; /** - * Sitemap files + * Sitemaps data generators. + * * @var array */ - , $sitemaps = [] - - /** - * Sitemaps domain name - * @var string - */ - , $domain; + protected array $dSources = []; /** * Initialize new sitemap builder + */ + public function __construct( + + /** + * @param string $baseUrl The base URL for sitemap urls. + */ + public readonly string $baseUrl, + + /** + * @param string|null $saveDir Local directory where generated XML files will be written + */ + public readonly ?string $saveDir = null, + + /** + * @param string $indexName Filename for the root sitemap index file + */ + public readonly string $indexName = 'sitemap.xml', + + /** + * @param string|null $sitemapBaseUrl The generated sitemaps base url in the index sitemap defaults to $baseUrl + */ + private readonly ?string $sitemapBaseUrl = null, + + /** + * Sitemap builders map + * + * @var array + */ + private array $builders = [ + 'news' => NewsBuilder::class, + 'links' => LinksBuilder::class, + 'index' => IndexBuilder::class + ], + + /** + * The output mode of generated sitemaps. + * + * @var OutputMode + */ + protected OutputMode $mode = OutputMode::TEMP, + + /** + * Pretty print indent + * @param string|null + */ + protected readonly ?string $indent = ' ' + ) {} + + + /** + * Register a builder alias * - * @param string $domain The domain name only - * @param array $options + * @param string $alias the builder alias name + * @param string $builder The actual builder class namespace. + * @return self */ - public function __construct(string $domain, array $options = null) + public function register(string $alias, string $builder): self { - $this->domain = $domain; - - if ($options !== null) { - $this->setOptions($options); + if (is_subclass_of($builder, SitemapBuilderInterface::class) === false) { + throw new \SitemapException('The builder must implement SitemapBuilderInterface'); } - } - /** - * Set builer options - * - * @param array $options - * @return SitemapIndexInterface - */ - public function setOptions(array $options): SitemapIndexInterface - { - $this->options = array_merge($this->options, $options); + $this->builders[$alias] = $builder; return $this; } - /** - * Get all sitemap options - * - * @return array - */ - public function getOptions(): array - { - return $this->options; - } /** - * Set save path + * Get sitemaps base url * - * @param string $path - * @return SitemapIndexInterface + * @return string */ - public function setSavePath(string $path): SitemapIndexInterface + public function getSitemapBaseUrl(): string { - $this->options['save_path'] = $path; - return $this; + return $this->sitemapBaseUrl ?? $this->baseUrl; } /** - * Get save path + * Write sitemaps to a file/stream or return as index string in case of memory mode. * - * @return null|string + * @param string|null $uriPath URI path to render the sitemap into, or null will return the xml + * @return bool|string boolean when uri oath passed or a generated xml as string */ - public function getSavePath(): ?string + public function render(?string $uriPath = null): bool|string { - return $this->options['save_path']; - } - - /** - * Set index name - * - * @param string $name - * @return SitemapIndexInterface - */ - public function setIndexName(string $name): SitemapIndexInterface - { - $this->options['index_name'] = $name; - return $this; - } + $index = new IndexBuilder( + mode: $this->mode, + baseUrl: $this->getSitemapBaseUrl(), + filePath: $this->saveDir . DIRECTORY_SEPARATOR . $this->indexName, + options: ['indent' => $this->indent], + ); - /** - * Get Index name - * - * @return string - */ - public function getIndexName(): string - { - return $this->options['index_name']; - } + foreach ($this->sitemaps as $k => $sitemap) + { + if ($sitemap->mode !== OutputMode::MEMORY) { + $this->generate($k)->render(); + } - /** - * Set sitemaps url - * - * @param string $url - * @return SitemapIndexInterface - */ - public function setSitemapsUrl(string $url): SitemapIndexInterface - { - $this->options['sitemaps_url'] = $url; - return $this; - } + $index->addSitemap($k); + } - /** - * Get sitemaps url - * - * @return null|string - */ - public function getSitemapsUrl(): ?string - { - return $this->options['sitemaps_url'] ?? $this->domain; + return $index->render($uriPath); } /** - * Get sitemaps domain + * return the sitemap index as string in case of memory mode, or write to targets. * * @return string */ - public function getDomain(): string + public function __toString(): string { - return $this->domain; + return $this->render(); } /** - * Set sitemaps to a path + * Generate sitemaps * - * @param string $path - * @return bool + * @param array $name the name of registred sitemap. + * @return SitemapBuilderInterface retuns the generated sitemap object. */ - public function saveTo(string $path): bool + public function generate(string $name): SitemapBuilderInterface { - return SitemapIndex::build( - $this->getIndexName(), $path, $this->getSitemapsUrl(), $this->sitemaps - ); - } + $dataSource = $this->dSources[$name] ?? null; + if ($dataSource === null || isset($this->sitemaps[$name]) === false) { + throw new SitemapException("There is no data source or registred sitemap for {$name}!"); + } - /** - * {@method saveTo} by pre defined save_path option - * - * @param string $path - * @return bool - */ - public function save(): bool - { - if (is_string($this->options['save_path']) === false) { + $builder = $this->sitemaps[$name]; - throw new SitemapException('Invalid or missing save_path option'); + if (is_callable($dataSource)) { + call_user_func_array($dataSource, [$builder]); + return $builder; } - return $this->saveTo($this->options['save_path']); - } + foreach ($dataSource as $item) + { + // in case of array or even Traversable yeild as string + if (is_string($item)) { + $builder->loc($item); + continue; + } + if (($item instanceof SitemapUrl) === false) { + throw new SitemapException("Traversable yeilds can be strings or SitemapUrl object only"); + } - /** - * Generate sitemaps - * - * @param SitemapBuilderInterface $builder - * @param array $options - * @param callable $func - * @return SitemapIndexInterface - */ - public function build(SitemapBuilderInterface $builder, array $options, callable $func): SitemapIndexInterface - { - if (isset($this->sitemaps[$options['name']])) { - throw new SitemapException("The sitemap {$name} already registred!"); + $builder->addItem($item); } - // Generate urls. - call_user_func_array($func, [$builder]); - - return $this->buildTemp($options['name'], $builder); + return $builder; } /** - * Sitemaps generator + * Initialize sitemaps generator from alias * - * @param string $builder - * @param array $args + * @param string $alias The builder alias (e.g. 'links', 'news', 'index', 'yourBuilderAlias') + * @param array $args [$config, $dataSource] where $config is string|array * @return SitemapIndexInterface */ - public function __call(string $builder, array $args): SitemapIndexInterface + public function __call(string $alias, array $args): self { - if (class_exists($builder = '\Melbahja\Seo\Sitemap\\' . ucfirst($builder) . 'Builder')) { - - if (count($args) !== 2) { + $builder = $this->builders[$alias] ?? null; - throw new SitemapException("Invalid {$builder} arguments"); - - } elseif (is_string($args[0])) { + if ($builder === null) { + throw new SitemapException("The builder alias: '{$alias}' not found."); + } - $args[0] = ['name' => $args[0]]; - } + if (count($args) !== 2) { + throw new SitemapException("{$alias}() expects 2 arguments: [string|array \$options, callable|Traversable|array \$dataSource]"); + } - if (isset($args[0]['name']) === false) { + $options = is_string($args[0]) ? [ 'name' => $args[0] ] : $args[0]; + if (isset($options['name']) === false) { + throw new SitemapException("The {$alias} name is missing!"); + } - throw new SitemapException("Sitemap name is required for {$builder}"); - } + if (isset($this->sitemaps[$options['name']])) { + throw new SitemapException("The sitemap {$options['name']} already registred!"); + } - return $this->build(new $builder($this->domain, $args[0]), ...$args); + if (is_array($args[1]) === false && is_callable($args[1]) === false && ($args[1] instanceof \Traversable) === false) { + throw new SitemapException("{$alias}() Argument[1] must be array, callable, or Traversable"); } - throw new SitemapException("Sitemap builder {$builder} not exists"); - } + $name = $options['name']; + unset($options['name']); + + $options['indent'] = $options['indent'] ?? $this->indent; + + $this->dSources[$name] = $args[1]; + $this->sitemaps[$name] = new $builder( + mode: $this->mode, + baseUrl: $this->baseUrl, + filePath: $this->saveDir . DIRECTORY_SEPARATOR . $name, + options: $options, + ); - /** - * Build registred sitemap and save it on temp - * - * @param string $name - * @param SitemapBuilderInterface $builder - * @return SitemapIndexInterface - */ - protected function buildTemp(string $name, SitemapBuilderInterface $builder): SitemapIndexInterface - { - $this->sitemaps[$name] = $builder->saveTemp(); return $this; } } diff --git a/src/Sitemap/IndexBuilder.php b/src/Sitemap/IndexBuilder.php new file mode 100644 index 0000000..8e003de --- /dev/null +++ b/src/Sitemap/IndexBuilder.php @@ -0,0 +1,246 @@ + null, + ]; + + /** + * Initialize sitemap index builder + */ + public function __construct( + + /** + * @param string $baseUrl The base URL for sitemap URLs. + */ + public readonly string $baseUrl, + + /** + * @param string|null $filePath The sitemap index file name. + */ + public readonly ?string $filePath = null, + + /** + * @param int Maximum sitemaps in index + */ + public int $maxUrls = 25000, + + /** + * @param array Sitemap config options + */ + protected array $options = [], + + /** + * The output mode of generated sitemaps. + * + * @param OutputMode + */ + protected OutputMode $mode = OutputMode::TEMP, + + /** + * The resource in case of STREAM mode, defaults to stdout. + * + * @param resource + */ + private $stream = STDOUT, + ) { + + switch ($mode) + { + case OutputMode::TEMP: + + if ($filePath === null) { + throw new SitemapException("Output \$filePath can not be empty!"); + } + + $this->tempPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . md5(uniqid()) . '.xml'; + $this->writer = new XMLWriter(); + $this->writer->openUri($this->tempPath); + break; + + case OutputMode::FILE: + + if ($filePath === null) { + throw new SitemapException("Output \$filePath can not be empty!"); + } + + $this->writer = new XMLWriter(); + $this->writer->openUri($filePath); + break; + + case OutputMode::STREAM: + + if (method_exists(XMLWriter::class, 'toStream')) { + + $this->writer = XMLWriter::toStream($stream); + + } else { // php < 8.4 workaround + + $this->tempPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . md5(uniqid()) . '.xml'; + $this->writer = new XMLWriter(); + $this->writer->openUri($this->tempPath); + } + + break; + + default: + $this->writer = new XMLWriter(); + $this->writer->openMemory(); + break; + } + + $this->options = array_merge($this->defaultOptions, $options); + if ($this->options['indent'] !== null) { + $this->writer->setIndent(true); + $this->writer->setIndentString($this->options['indent']); + } + + // Start xml doc + $this->writer->startDocument(version: '1.0', encoding: 'UTF-8'); + $this->writer->writeComment(' Generated by https://git.io/phpseo '); + $this->writer->startElement('sitemapindex'); + $this->writer->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); + } + + /** + * addSitemap will commit last sitemap and start new one. + */ + public function addSitemap(string $url): self + { + return $this->commit()->url($url); + } + + /** + * Register new sitemap URL + */ + public function url(string $url): self + { + if ($this->maxUrls <= 0) { + throw new SitemapException("The maximum sitemaps has been exhausted"); + } + + $this->sitemap['loc'] = Utils::encodeSitemapUrl(Utils::resolveRelativeUrl($this->baseUrl, $url)); + return $this; + } + + /** + * Set last modification date + */ + public function lastMod(string|int $date): self + { + $this->sitemap['lastmod'] = Utils::formatDate($date); + return $this; + } + + /** + * Commit the pending sitemap to index + */ + public function commit(): self + { + if (empty($this->sitemap)) { + return $this; + } + + $this->writer->startElement('sitemap'); + foreach ($this->sitemap as $name => $value) + { + $this->writer->writeElement($name, (string) $value); + } + $this->writer->endElement(); + + $this->maxUrls--; + $this->sitemap = []; + + return $this; + } + + /** + * Save/Render generated sitemap xml + * + * @param string|null $uriPath can only be passed on OutputMode::MEMORY + * @return bool|string bool in case of mode is not memory, and string if writing to memory. + */ + public function render(?string $uriPath = null): bool|string + { + $this->commit(); + $this->writer->endElement(); + $this->writer->endDocument(); + + if ($this->mode === OutputMode::MEMORY) { + + if ($uriPath !== null) { + return file_put_contents($uriPath, $this->writer->outputMemory()) !== false; + } + + return $this->writer->outputMemory(); + } + + $this->writer->flush(); + if ($this->mode === OutputMode::TEMP) { + return rename($this->tempPath, $this->filePath); + } + + // php < 8.4 workaround + if ($this->mode === OutputMode::STREAM && method_exists(XMLWriter::class, 'toStream') === false) { + + $tempFd = fopen($this->tempPath, 'r'); + $stcopy = stream_copy_to_stream($tempFd, $this->stream); + fclose($tempFd); + + return (bool) $stcopy; + } + + return true; + } + + /** + * Get XML as string in case of memory mode, other modes will write to target. + */ + public function __toString(): string + { + return $this->render(); + } + + /** + * Cleanup + */ + public function __destruct() + { + if (isset($this->tempPath) && file_exists($this->tempPath)) { + @unlink($this->tempPath); + } + } +} \ No newline at end of file diff --git a/src/Sitemap/LinksBuilder.php b/src/Sitemap/LinksBuilder.php index 2c13524..47c924a 100644 --- a/src/Sitemap/LinksBuilder.php +++ b/src/Sitemap/LinksBuilder.php @@ -1,11 +1,573 @@ ['thumbnail_loc', 'title', 'description'], + 'freqs' => ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'] + ]; + + /** + * Current URL being built + */ + protected array $url = []; + + /** + * XMLWriter instance + */ + protected readonly XMLWriter $writer; + + /** + * the tmp file path in case of saving to a file or OutputMode is TEMP. + */ + protected readonly string $tempPath; + + /** + * Sitemap options + */ + protected array $defaultOptions = [ + 'images' => false, + 'videos' => false, + 'news' => false, + 'localized' => false, + 'cdata' => [], // fields to wrap in CDATA for eg: ['video:title', 'video:description', 'image:caption'] + 'indent' => null, // pretty print indent character + ]; + + /** + * Initialize sitemap builder + */ + public function __construct( + + /** + * @param string $baseUrl The base URL for sitemap urls. + */ + public readonly string $baseUrl, + + /** + * @param string|null $filePath The sitemap file name. + */ + public readonly ?string $filePath = null, + + /** + * @param int Maximum urls in to generate in current sitemap + */ + public int $maxUrls = 25000, + + /** + * The output mode of generated sitemaps. + * + * @param OutputMode + */ + public readonly OutputMode $mode = OutputMode::TEMP, + + /** + * @param array Sitemap config options + */ + protected array $options = [], + + /** + * The resource in case of STREAM mode, defaults to stdout. + * + * @param resource + */ + private $stream = STDOUT, + ){ + + // init xml writer + switch ($mode) + { + case OutputMode::TEMP: + + if ($filePath === null) { + throw new SitemapException("Output \$filePath can not be empty!"); + } + + $this->tempPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . md5(uniqid()) . '.xml'; + $this->writer = new XMLWriter(); + $this->writer->openUri($this->tempPath); + break; + + case OutputMode::FILE: + + if ($filePath === null) { + throw new SitemapException("Output \$filePath can not be empty!"); + } + + $this->writer = new XMLWriter(); + $this->writer->openUri($filePath); + break; + + case OutputMode::STREAM: + + if (method_exists(XMLWriter::class, 'toStream')) { + + $this->writer = XMLWriter::toStream($stream); + + } else { // php < 8.4 workaround + + $this->tempPath = sys_get_temp_dir() . DIRECTORY_SEPARATOR . md5(uniqid()) . '.xml'; + $this->writer = new XMLWriter(); + $this->writer->openUri($this->tempPath); + } + + break; + + default: + $this->writer = new XMLWriter(); + $this->writer->openMemory(); + break; + } + + $this->options = array_merge($this->defaultOptions, $options); + if ($this instanceof SitemapSetupableInterface) { + $this->options = $this->preSetup($this->options); + } + + // output fmt + if ($this->options['indent'] !== null) { + $this->writer->setIndent(true); + $this->writer->setIndentString($this->options['indent']); + } + + $this->writer->startDocument(version: '1.0', encoding: 'UTF-8'); + $this->writer->writeComment(' Generated by https://git.io/phpseo '); + $this->writer->startElement('urlset'); + $this->writer->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); + + if ($this->options['images']) { + $this->writer->writeAttribute('xmlns:image', static::IMAGE_NS); + } + + if ($this->options['news']) { + $this->writer->writeAttribute('xmlns:news', static::NEWS_NS); + } + + if ($this->options['videos']) { + $this->writer->writeAttribute('xmlns:video', static::VIDEO_NS); + } + + if ($this->options['localized']) { + $this->writer->writeAttribute('xmlns:xhtml', static::XHTML_NS); + } + } + + /** + * Loc will commit last url and start new one. + */ + public function loc(string $url): self + { + return $this->commit()->url($url); + } + + /** + * Register new url + */ + public function url(string $url): self + { + if ($this->maxUrls <= 0) { + throw new SitemapException("The maximum urls has been exhausted"); + } + + $this->url['loc'] = Utils::encodeSitemapUrl(Utils::resolveRelativeUrl($this->baseUrl, $url)); + return $this; + } + + /** + * Set alternative language url for multi lang support. + * + * @param string $url + * @param string $lang ISO 639-1 or ISO 3166-1 alpha-2 + */ + public function alternate(string $url, string $lang): self + { + $this->url['alternate'][] = [$url, $lang]; // encoded and escaped in commit + return $this; + } + + /** + * Add a SitemapUrl object to the sitemap + * + * @param SitemapUrl $url + * @return self + */ + public function addItem(SitemapUrl $url): self + { + $this->loc($url->url); + + if ($url->lastmod !== null) { + $this->lastMod($url->lastmod); + } + + if ($url->changefreq !== null) { + $this->changeFreq($url->changefreq); + } + + if ($url->priority !== null) { + $this->priority($url->priority); + } + + // I assume you're using a news builder! + if ($url->news !== null) { + $this->news($url->news); + } + + foreach ($url->images as $img) + { + $this->image($img['loc'], $img); + } + + foreach ($url->videos as $vid) + { + $this->video($vid['title'], $vid); + } + + foreach ($url->alternates as $alt) + { + $this->alternate($alt['href'], $alt['lang']); + } + + return $this; + } + + /** + * Commit the pending url to sitemap + */ + public function commit(): self + { + if (empty($this->url)) { + return $this; + } + + $this->writer->startElement('url'); + + foreach ($this->url as $name => $value) + { + // images handler + if ($name === 'image') { + + foreach ($value as $img) + { + $this->writer->startElementNs('image', 'image', null); + foreach ($img as $key => $val) + { + $this->writeElement('image', $key, $val); + } + $this->writer->endElement(); + } + continue; + } + + // videos handler (supports multiple videos per URL) + if ($name === 'video') { + + foreach ($value as $vid) + { + $this->writer->startElementNs('video', 'video', null); + + foreach ($vid as $key => $val) + { + // multiple video nodes + // like multi + if (is_array($val) && array_is_list($val)) { + + foreach ($val as $item) + { + $item = is_array($item) ? $item : ['value' => $item]; + + $this->writer->startElementNs('video', $key, null); + foreach (($item['attrs'] ?? []) as $attr => $aVal) + { + $this->writer->writeAttribute($attr, $aVal); + } + $this->writeText($item['value'], 'video:' . $key); + $this->writer->endElement(); + } + continue; + } + + $val = is_array($val) ? $val : ['value' => $val]; + + $this->writer->startElementNs('video', $key, null); + + foreach (($val['attrs'] ?? []) as $attr => $aVal) + { + $this->writer->writeAttribute($attr, $aVal); + } + + $this->writeText($val['value'], 'video:'. $key); + $this->writer->endElement(); + + // $this->writeCDataElement('video', $key, $val); + } + + $this->writer->endElement(); + } + continue; + } + + // news + if ($name === 'news') { + + $this->writer->startElementNs('news', 'news', null); + $this->writer->startElementNs('news', 'publication', null); + $this->writeElement('news', 'name', $value['name']); + $this->writer->writeElementNs('news', 'language', null, $value['language']); + $this->writer->endElement(); + + foreach ($value as $key => $val) + { + if ($key !== 'name' && $key !== 'language') { + $this->writeElement('news', $key, $val); + } + } + + $this->writer->endElement(); + continue; + } + + // alternate hreflangs + if ($name === 'alternate') { + + foreach ($value as $alt) + { + $this->writer->startElementNs('xhtml', 'link', null); + $this->writer->writeAttribute('rel', 'alternate'); + $this->writer->writeAttribute('href', Utils::encodeSitemapUrl(Utils::resolveRelativeUrl($this->baseUrl, $alt[0]))); + $this->writer->writeAttribute('hreflang', $alt[1]); + $this->writer->endElement(); + } + continue; + } + + // std elements loc, lastmod, changefreq, priority + $this->writer->writeElement($name, (string) $value); + } + + // End url + $this->writer->endElement(); + + $this->maxUrls--; + $this->url = []; + + return $this; + } + + /** + * Last modification date + */ + public function lastMod(string|int $date): self + { + $this->url['lastmod'] = Utils::formatDate($date); + return $this; + } + + /** + * Set a image + */ + public function image(string $imageUrl, array $options = []): self + { + if ($this->options['images'] === false) { + throw new SitemapException("Before set a image, enable images option"); + } + + $this->url['image'][] = array_merge($options, [ + 'loc' => Utils::resolveRelativeUrl($this->baseUrl, $imageUrl), + ]); + + return $this; + } + + /** + * Add a video to current url. + * + * @param string $title The title of the video. + * @param array $options Video options. + * For elem with attributes use: ['player_loc' => ['value' => 'url', 'attrs' => ['allow_embed' => 'yes']]] + */ + public function video(string $title, array $options = []): self + { + if ($this->options['videos'] === false) { + throw new SitemapException("Before set a video, enable videos option first"); + } + + $options['title'] = $title; + + // resolve alias + if (isset($options['thumbnail'])) { + $options['thumbnail_loc'] = $options['thumbnail']; + unset($options['thumbnail']); + } + + foreach ($this->validation['video'] as $v) + { + if (!isset($options[$v])) { + throw new SitemapException("video {$v} option is required"); + } + } + + if (!isset($options['content_loc']) && !isset($options['player_loc'])) { + throw new SitemapException("Raw video url content_loc or player_loc embed is required"); + } + + $options['thumbnail_loc'] = Utils::resolveRelativeUrl($this->baseUrl, $options['thumbnail_loc']); + $this->url['video'][] = $options; + + return $this; + } + + /** + * Set change frequency + */ + public function changeFreq(string $freq): self + { + if (!in_array($freq, $this->validation['freqs'], true)) { + throw new SitemapException("changefreq value not valid"); + } + + $this->url['changefreq'] = $freq; + return $this; + } + + /** + * Url priority + */ + public function priority(float|int|string $priority): self + { + // priority is 0.0 and 1.0 + $priority = (float) $priority; + if ($priority < 0.0 || $priority > 1.0) { + throw new SitemapException("Priority must be between 0.0 and 1.0"); + } + + $this->url['priority'] = number_format($priority, 1, '.', ''); + return $this; + } + + /** + * Save generated sitemap as file + * + * @param string|null $uriPath can only be passed on OutputMode::MEMORY, if null will just follow current mode. + * @return bool|string bool in case of mode is not memory, and string if it a writing to memory. + */ + public function render(?string $uriPath = null): bool|string + { + $this->commit(); + $this->writer->endElement(); + $this->writer->endDocument(); + + if ($this->mode === OutputMode::MEMORY) { + + if ($uriPath !== null) { + return file_put_contents($uriPath, $this->writer->outputMemory()) !== false; + } + + return $this->writer->outputMemory(); + } + + $this->writer->flush(); + if ($this->mode === OutputMode::TEMP) { + return rename($this->tempPath, $this->filePath); + } + + // php < 8.4 workaround + if ($this->mode === OutputMode::STREAM && method_exists(XMLWriter::class, 'toStream') === false) { + + $tempFd = fopen($this->tempPath, 'r'); + $stcopy = stream_copy_to_stream($tempFd, $this->stream); + fclose($tempFd); + + return (bool) $stcopy; + } + + return true; + } + + + /** + * Get XML as string in case of memory mode, or write to target. + */ + public function __toString(): string + { + return $this->render(); + } + + /** + * Write element with optional CDATA wrapping + * + * @param string $namespace namespace prefix (e.g., 'video', 'image', 'news') + * @param string $key element name + * @param mixed $value element value + */ + protected function writeElement(string $namespace, string $key, mixed $value): bool + { + + if ($this->shouldUseCData("{$namespace}:{$key}", $value)) { + + $this->writer->startElementNs($namespace, $key, null); + $this->writer->writeCData((string) $value); + return $this->writer->endElement(); + } + + return $this->writer->writeElementNs($namespace, $key, null, (string) $value); + } + + /** + * Write text/CDATA auto detceted or based on manully set fields + * + * @param mixed $value + * @param string $context Optional context for CDATA detection + */ + protected function writeText(mixed $value, string $field = ''): bool + { + if ($field !== '' && $this->shouldUseCData($field, $value)) { + return $this->writer->writeCData((string) $value); + } + + return $this->writer->text((string) $value); + } + + /** + * Check if field should use CDATA + */ + protected function shouldUseCData(string $field, mixed $value = null): bool + { + if (in_array($field, $this->options['cdata'], true)) { + return true; + } + + if ($value === null || ($value = (string) $value) === '') { + return false; + } + + return str_contains($value, '<') || str_contains($value, '>') || str_contains($value, '&'); + } + + /** + * Cleanup + */ + public function __destruct() + { + if (isset($this->tempPath) && file_exists($this->tempPath)) { + @unlink($this->tempPath); + } + } +} diff --git a/src/Sitemap/NewsBuilder.php b/src/Sitemap/NewsBuilder.php index da5d5b2..8cf95d6 100644 --- a/src/Sitemap/NewsBuilder.php +++ b/src/Sitemap/NewsBuilder.php @@ -3,17 +3,16 @@ use Melbahja\Seo\{ Exceptions\SitemapException, - Interfaces\SitemapBuilderInterface + Interfaces\SitemapSetupableInterface }; /** * @package Melbahja\Seo - * @since v2.0 * @see https://git.io/phpseo * @license MIT - * @copyright 2019-present Mohamed Elabhja + * @copyright Mohamed Elbahja */ -class NewsBuilder extends SitemapBuilder +class NewsBuilder extends LinksBuilder implements SitemapSetupableInterface { /** @@ -25,16 +24,15 @@ class NewsBuilder extends SitemapBuilder /** * Initialize NewsBuilder * - * @param string $domain - * @param array|null $options - * @param string $ns + * @param string $options + * @return array */ - public function __construct(string $domain, ?array $options = null, string $ns = '') + public function preSetup(array $options): array { - parent::__construct($domain, $options, $ns .' xmlns:news="'. static::NEWS_NS . '"'); + $options['news'] = true; + return $options; } - /** * Set dafault publication * @@ -44,12 +42,7 @@ public function __construct(string $domain, ?array $options = null, string $ns = */ public function setPublication(string $name, string $lang): SitemapBuilderInterface { - $this->publication = - [ - 'name' => $name, - 'lang' => $lang - ]; - + $this->publication = ['name' => $name, 'lang' => $lang]; return $this; } @@ -65,14 +58,14 @@ public function getPublication(): array /** - * Set a news (Fake news not allowed ^_~) + * Add news elem to the current url. * * @param array $options - * @return SitemapBuilderInterface + * @return self */ - public function news(array $options): SitemapBuilderInterface + public function news(array $options): self { - $options['name'] = $options['name'] ?? $this->publication['name']; + $options['name'] = $options['name'] ?? $this->publication['name']; $options['language'] = $options['language'] ?? $this->publication['lang']; if (isset($options['name'], $options['language'], $options['publication_date'], $options['title']) === false) { diff --git a/src/Sitemap/OutputMode.php b/src/Sitemap/OutputMode.php new file mode 100644 index 0000000..09cd09d --- /dev/null +++ b/src/Sitemap/OutputMode.php @@ -0,0 +1,17 @@ + ['thumbnail_loc', 'title', 'description'], - 'freq' => ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'] - ] - - /** - * Url tag - * @var array - */ - , $url = [] - - /** - * Sitemap domain name - * @var string - */ - , $domain - - /** - * Sitemap name - * @var string - */ - , $name - - /** - * Maximum urls in single sitemap (The maximum by google is 50000 urls but not 50MB in size) - * @var integer - */ - , $max = 30000 - - /** - * @var SimpleXMLElement - */ - , $doc - - /** - * Sitemap options - */ - , $options = - [ - 'images' => false, - 'videos' => false, - 'localized' => false, - ]; - - - /** - * Initialize sitemap builder - * - * @param string $domain - * @param array|null $options - * @param string $ns Additional namespaces. - */ - public function __construct(string $domain, ?array $options = null, string $ns = '') - { - $this->domain = $domain; - $this->options = array_merge($this->options, $options ?? []); - - $urlset = 'options['images']) { - $urlset .= ' xmlns:image="'. static::IMAGE_NS .'"'; - } - - if ($this->options['videos']) { - $urlset .= ' xmlns:video="'. static::VIDEO_NS .'"'; - } - - if ($this->options['localized']) { - $urlset .= 'xmlns:xhtml="'. static::XHTML_NS .'"'; - } - - $this->doc = new SimpleXMLElement('' . $urlset . "{$ns}/>"); - } - - /** - * Append last url and start new one - * - * @param string $path - * @return SitemapBuilderInterface - */ - public function loc(string $path): SitemapBuilderInterface - { - if ($path[0] !== '/') { - $path = "/{$path}"; - } - - return $this->append()->url($this->domain . $path); - } - - /** - * Register new url - * - * @param string $url - * @return SitemapBuilderInterface - */ - public function url(string $url): SitemapBuilderInterface - { - if ($this->max <= 0) { - throw new SitemapException("The maximum urls has been exhausted"); - } - - $this->url['loc'] = Helper::escapeUrl($url); - return $this; - } - - /** - * Set alternative language url for multi lang support. - * - * @param string $url - * @param string $lang ISO 639-1 or ISO 3166-1 alpha-2 - * @return SitemapBuilderInterface - */ - public function alternate(string $path, string $lang) - { - if ($path[0] !== '/') { - $path = "/{$path}"; - } - - $this->url['alternate'][] = [Helper::escapeUrl($this->domain . $path), $lang]; - return $this; - } - - /** - * Append url - * - * @return SitemapBuilderInterface - */ - public function append(): SitemapBuilderInterface - { - if (empty($this->url) === false) { - - $url = $this->doc->addChild('url'); - - foreach ($this->url as $n => $v) - { - if ($n === 'image' || $n === 'video') { - - foreach ($v as $options) - { - $child = $url->addChild( - "{$n}:{$n}", null, ($n === 'image' ? static::IMAGE_NS : static::VIDEO_NS) - ); - - foreach ($options as $x => $o) - { - $child->addChild("{$n}:{$x}", $o); - } - } - - continue; - - } elseif ($n === 'news') { - - $child = $url->addChild('news:news', null, static::NEWS_NS); - - $pub = $child->addChild('news:publication'); - $pub->addChild('news:name', $v['name']); - $pub->addChild('news:language', $v['language']); - unset($v['name'], $v['language']); - - foreach ($v as $k => $p) - { - $child->addChild("{$n}:{$k}", $p); - } - - continue; - - } elseif ($n === 'alternate') { - - - foreach ($v as $k => $alt) - { - $child = $url->addChild('xhtml:link', null, static::XHTML_NS); - $child->addAttribute('rel', 'alternate'); - $child->addAttribute('href', $alt[0]); - $child->addAttribute('hreflang', $alt[1]); - } - - continue; - } - - $url->addChild($n, $v); - } - - $this->max--; - $this->url = []; - } - - return $this; - } - - /** - * Last modification date - * - * @return SitemapBuilderInterface - */ - public function lastMod($date): SitemapBuilderInterface - { - $this->url['lastmod'] = $this->pasreDate($date); - - return $this; - } - - /** - * Set image - * - * @todo Validate image options - * @param string $imageUrl - * @param array $options - * @return SitemapBuilderInterface - */ - public function image(string $imageUrl, array $options = []): SitemapBuilderInterface - { - if ($this->options['images'] === false) { - throw new SitemapException("Before set a image, enable images option"); - } - - $options['loc'] = $this->getByRelativeUrl($imageUrl); - $this->url['image'][] = $options; - - return $this; - } - - /** - * Set a video - * - * @param string $title - * @param array $options - * @return SitemapBuilderInterface - */ - public function video(string $title, array $options = []): SitemapBuilderInterface - { - if ($this->options['videos'] === false) { - throw new SitemapException("Before set a video, enable videos option first"); - } - - $options['title'] = $title; - - if (isset($options['thumbnail'])) { - - $options['thumbnail_loc'] = $options['thumbnail']; - unset($options['thumbnail']); - } - - foreach ($this->validation['video'] as $v) - { - if (isset($options[$v]) === false) { - throw new SitemapException("video {$v} options is required"); - } - } - - if (isset($options['content_loc']) === false && isset($options['player_loc']) === false) { - throw new SitemapException("Raw video url content_loc or player_loc is required"); - } - - $this->url['video'][] = $options; - - return $this; - } - - /** - * @param string $freq - * @return SitemapBuilderInterface - */ - public function changeFreq(string $freq): SitemapBuilderInterface - { - if (in_array($freq, $this->validation['freq']) === false) { - throw new SitemapException("changefreq value not valid"); - } - - $this->url['changefreq'] = $freq; - - return $this; - } - - /** - * changefreq alias - * - * @param string $freq - * @return SitemapBuilderInterface - */ - public function freq(string $freq): SitemapBuilderInterface - { - return $this->changefreq($freq); - } - - /** - * Url priority - * - * @param string $priority - * @return SitemapBuilderInterface - */ - public function priority(string $priority): SitemapBuilderInterface - { - $this->url['priority'] = $priority; - return $this; - } - - /** - * Get domain name - * - * @return string - */ - public function getDomain(): string - { - return $this->domain; - } - - /** - * Save generated sitemap as file - * - * @param string $path - * @return bool - */ - public function saveTo(string $path): bool - { - return $this->append()->getDoc()->asXML($path); - } - - /** - * Save to temp - * - * @return string - */ - public function saveTemp(): string - { - if ($this->saveTo($temp = sys_get_temp_dir() . DIRECTORY_SEPARATOR . md5(uniqid()))) { - return $temp; - } - - throw new SitemapException("Saving {$this->name} to temp failed"); - } - - /** - * Get XML object - * - * @return SimpleXMLElement - */ - public function getDoc(): SimpleXMLElement - { - return $this->doc; - } - - /** - * Fix relative urls - * - * @param string $url - * @return string - */ - protected function getByRelativeUrl(string $url): string - { - if (strpos($url, '://') === false) { - $url = $this->domain . ($url[0] !== '/' ? "{$url}/" : $url); - } - - return $url; - } - - /** - * Convert date to ISO8601 format - * - * @param int|string $date - * @return string - */ - protected function pasreDate($date): string - { - if (is_int($date) === false) { - $date = strtotime($date); - } - - return date('c', $date); - } - -} diff --git a/src/Sitemap/SitemapIndex.php b/src/Sitemap/SitemapIndex.php deleted file mode 100644 index f802435..0000000 --- a/src/Sitemap/SitemapIndex.php +++ /dev/null @@ -1,52 +0,0 @@ -'); - - foreach ($maps as $name => $file) - { - if (rename($file, ($dest = $path . $name)) === false) { - throw new SitemapException("Moving the file {$dest} failed!"); - } - - $sitemap = $dom->addChild('sitemap'); - $sitemap->addChild('loc', $url . $name); - $sitemap->addChild('lastmod', date('c')); - } - - return $dom->asXML($path . $index); - } -} diff --git a/src/Sitemap/SitemapUrl.php b/src/Sitemap/SitemapUrl.php new file mode 100644 index 0000000..8ea4509 --- /dev/null +++ b/src/Sitemap/SitemapUrl.php @@ -0,0 +1,68 @@ +images[] = $options; + return $this; + } + + /** + * Add a video to this URL + * + * @param string $title Video title + * @param array $options Required: thumbnail_loc, description. Optional: content_loc, player_loc, etc. + * @return self + */ + public function video(string $title, array $options): self + { + $options['title'] = $title; + $this->videos[] = $options; + return $this; + } + + /** + * Set alternative url. + * + * @param string $url + * @param string $lang ISO 639-1 or ISO 3166-1 alpha-2 + */ + public function alternate(string $url, string $lang): self + { + $this->alternates[] = ['href' => $url, 'lang' => $lang]; + return $this; + } +} diff --git a/src/Utils/HttpClient.php b/src/Utils/HttpClient.php new file mode 100644 index 0000000..fa5d9e4 --- /dev/null +++ b/src/Utils/HttpClient.php @@ -0,0 +1,107 @@ +baseUrl = $baseUrl ? rtrim($baseUrl, '/') : null; + $this->headers = $headers ?? []; + } + + /** + * Execute HTTP request + * + * @param string $method HTTP method GET, POST... + * @param string $url Full URL or path to append to base URL + * @param mixed $body Request body if array will auto JSON encoded + * @param array $headers Additional headers + * @return string|null Response body or null on failure + */ + public function request(string $method, string $url, $body = null, array $headers = []): ?string + { + $ch = curl_init(); + + curl_setopt($ch, CURLOPT_URL, $this->buildUrl($url)); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method); + + $isJson = false; + if ($body !== null) { + if (is_array($body)) { + $body = json_encode($body); + $isJson = true; + } + curl_setopt($ch, CURLOPT_POSTFIELDS, $body); + } + + $headers = array_merge($this->headers, $headers); + $headersList = []; + $hasUAgent = false; + + foreach ($headers as $key => $value) + { + $key = strtolower($key); + if ($isJson && $key === 'content-type') { + continue; // if we have json payload we must have json ctype! + } else if ($key === 'user-agent') { + $hasUAgent = true; + } + $headersList[] = "$key: $value"; + } + + if ($isJson) { + $headersList[] = "content-type: application/json"; + } + + if ($hasUAgent === false) { + $headersList[] = "user-agent: phpseo/v3 (+http://git.io/phpseo)"; + } + + curl_setopt($ch, CURLOPT_HTTPHEADER, $headersList); + + $response = curl_exec($ch); + $this->lastStatus = curl_getinfo($ch, CURLINFO_HTTP_CODE); + + curl_close($ch); + + return $response !== false ? $response : null; + } + + /** + * Get last response status code + * + * @return int HTTP status code + */ + public function getStatusCode(): int + { + return $this->lastStatus; + } + + /** + * Build full URL + */ + private function buildUrl(string $url): string + { + if (filter_var($url, FILTER_VALIDATE_URL)) { + return $url; + } + + return $this->baseUrl . '/' . ltrim($url, '/'); + } +} diff --git a/src/Utils/Utils.php b/src/Utils/Utils.php new file mode 100644 index 0000000..bf58ae9 --- /dev/null +++ b/src/Utils/Utils.php @@ -0,0 +1,130 @@ +', '<'], + ['&', ''', '"', '>', '<'], + $url['scheme'] . "://{$url['host']}{$url['path']}{$url['query']}" + ); + } + + /** + * Encode url for sitemaps, other qoute/senstive chars are already escaped by xml writer! + * + * @param string $url + * @return string + */ + public static function encodeSitemapUrl(string $url): string + { + $url = parse_url($url); + $url['path'] = $url['path'] ?? ''; + $url['query'] = $url['query'] ?? ''; + + if ($url['path'] !== '') { + $url['path'] = implode('/', array_map('rawurlencode', explode('/', $url['path']))); + } + + if ($url['query'] !== '') { + $url['query'] = "?{$url['query']}"; + } + + return "{$url['scheme']}://{$url['host']}{$url['path']}{$url['query']}"; + } + + /** + * Wrap callable func into a Traversable generator. + * + * @param callable $func must have yields + * @return Traversable + */ + public static function generator(callable $func): \Traversable + { + return new class($func) implements \IteratorAggregate + { + public function __construct(private readonly mixed $callable){} + + public function getIterator(): \Traversable + { + return ($this->callable)(); + } + }; + } + + /** + * Resolve a relative URL against a base URL. + * + * @param string $baseUrl + * @param string $url relative or absolute URL. + * @return string absolute URL. + */ + public static function resolveRelativeUrl(string $baseUrl, string $url): string + { + if (str_contains($url, '://') === false) { + return rtrim($baseUrl, '/') . ($url[0] !== '/' ? "/{$url}" : $url); + } + + return $url; + } + + /** + * Normalize a date value to ISO 8601 format. + * + * @param string|int $date + * @return string ISO 8601 date. + * @throws SeoException if the format is invalid. + */ + public static function formatDate(string|int $date): string + { + if (($timestamp = is_int($date) ? $date : strtotime($date)) !== false) { + return date('c', $timestamp); + } + + throw new SeoException("Invalid date format: {$date}"); + } +} diff --git a/src/Validation/RobotsValidator.php b/src/Validation/RobotsValidator.php new file mode 100644 index 0000000..fc2dd52 --- /dev/null +++ b/src/Validation/RobotsValidator.php @@ -0,0 +1,110 @@ + $line) + { + $line = trim($line); + $realLine = $lineNum + 1; + + // Skip empty lines and comments + if ($line === '' || str_starts_with($line, '#')) { + continue; + } + + // Split by first colon + $parts = explode(':', $line, 2); + + if (count($parts) !== 2) { + $errors[] = "Line $realLine: Invalid format, missing colon"; + continue; + } + + $direc = trim(strtolower($parts[0])); + $value = trim($parts[1]); + + switch ($direc) + { + case 'user-agent': + + if (empty($value)) { + $errors[] = "Line $realLine: User-agent cannot be empty"; + } + + $currentAgent = $value; + $hasUserAgent = true; + break; + + case 'disallow': + case 'allow': + + if ($currentAgent === null) { + $errors[] = "Line $realLine: $direc must come after User-agent"; + } + if ($value !== '' && !str_starts_with($value, '/')) { + $errors[] = "Line $realLine: Path must start with / or be empty"; + } + break; + + case 'crawl-delay': + + if ($currentAgent === null) { + $errors[] = "Line $realLine: Crawl-delay must come after User-agent"; + } + if (!is_numeric($value) || $value < 0) { + $errors[] = "Line $realLine: Crawl-delay must be non-negative number"; + } + break; + + case 'sitemap': + + if (!filter_var($value, FILTER_VALIDATE_URL)) { + $errors[] = "Line $realLine: Invalid sitemap URL"; + } + break; + + default: + $errors[] = "Line $realLine: Unknown directive '$direc'"; + break; + } + } + + if (!$hasUserAgent && !empty($errors)) { + $errors[] = "No User-agent directive found"; + } + + return empty($errors) ? null : $errors; + } +} diff --git a/src/Validation/SchemaRules/AggregateRating.php b/src/Validation/SchemaRules/AggregateRating.php new file mode 100644 index 0000000..4ee6527 --- /dev/null +++ b/src/Validation/SchemaRules/AggregateRating.php @@ -0,0 +1,16 @@ + [ + 'type' => '@Thing', + 'required' => true, + ], + 'ratingValue' => [ + 'type' => 'string|int|float', + 'required' => true, + ], + 'ratingCount' => 'int', + 'reviewCount' => 'int', + 'bestRating' => 'string|int|float', + 'worstRating' => 'string|int|float', +]; diff --git a/src/Validation/SchemaRules/Article.php b/src/Validation/SchemaRules/Article.php new file mode 100644 index 0000000..5960bda --- /dev/null +++ b/src/Validation/SchemaRules/Article.php @@ -0,0 +1,15 @@ + 'string', + 'image' => [ + 'type' => 'array|url|@ImageObject', + 'item_type' => 'url|@ImageObject', + ], + 'datePublished' => 'iso_date', + 'dateModified' => 'iso_date', + 'author' => [ + 'type' => 'array|@Person|@Organization', + 'item_type' => '@Person|@Organization', + ], +]; diff --git a/src/Validation/SchemaRules/BlogPosting.php b/src/Validation/SchemaRules/BlogPosting.php new file mode 100644 index 0000000..7f1beee --- /dev/null +++ b/src/Validation/SchemaRules/BlogPosting.php @@ -0,0 +1,3 @@ + [ + 'type' => '@Person|@Organization|array', + 'item_type' => '@Person|@Organization', + 'required' => true, + ], + 'name' => [ + 'type' => 'string', + 'required' => true, + ], + 'url' => [ + 'type' => 'url', + 'required' => true, + ], + 'workExample' => [ + 'type' => 'array|@Book', + 'item_type' => '@Book', + ], + 'sameAs' => 'url', + 'bookFormat' => 'string', + 'inLanguage' => 'string', + 'isbn' => 'string', + 'datePublished' => 'iso_date', + 'identifier' => 'string', +]; diff --git a/src/Validation/SchemaRules/BreadcrumbList.php b/src/Validation/SchemaRules/BreadcrumbList.php new file mode 100644 index 0000000..3380187 --- /dev/null +++ b/src/Validation/SchemaRules/BreadcrumbList.php @@ -0,0 +1,9 @@ + [ + 'type' => 'array|@ListItem', + 'item_type' => '@ListItem', + 'required' => true, + ], +]; diff --git a/src/Validation/SchemaRules/ClaimReview.php b/src/Validation/SchemaRules/ClaimReview.php new file mode 100644 index 0000000..d8e9379 --- /dev/null +++ b/src/Validation/SchemaRules/ClaimReview.php @@ -0,0 +1,18 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'reviewRating' => [ + 'type' => '@Rating', + 'required' => true, + ], + 'url' => [ + 'type' => 'url', + 'required' => true, + ], + 'author' => '@Organization|@Person', + 'itemReviewed' => '@Thing', +]; diff --git a/src/Validation/SchemaRules/Course.php b/src/Validation/SchemaRules/Course.php new file mode 100644 index 0000000..250019c --- /dev/null +++ b/src/Validation/SchemaRules/Course.php @@ -0,0 +1,28 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'url' => [ + 'type' => 'url', + 'required' => true, + ], + 'description' => 'string', + 'provider' => [ + 'type' => 'array|@Organization|@Person', + 'item_type' => '@Organization|@Person', + ], + 'courseMode' => [ + 'type' => 'array|string', + 'item_type' => 'string', + ], + 'coursePrerequisites' => 'string', + 'educationalLevel' => 'string', + 'teaches' => [ + 'type' => 'array|string', + 'item_type' => 'string', + ], + 'timeRequired' => 'string', +]; diff --git a/src/Validation/SchemaRules/DataFeed.php b/src/Validation/SchemaRules/DataFeed.php new file mode 100644 index 0000000..9b98bea --- /dev/null +++ b/src/Validation/SchemaRules/DataFeed.php @@ -0,0 +1,14 @@ + [ + 'type' => 'array|@DataFeedItem', + 'item_type' => '@DataFeedItem', + 'rules' => [ + 'dateModified' => 'iso_date', + 'item' => '@Thing', + ], + ], + 'dateModified' => 'iso_date', + 'name' => 'string', +]; diff --git a/src/Validation/SchemaRules/Dataset.php b/src/Validation/SchemaRules/Dataset.php new file mode 100644 index 0000000..715768c --- /dev/null +++ b/src/Validation/SchemaRules/Dataset.php @@ -0,0 +1,42 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'name' => [ + 'type' => 'string', + 'required' => true, + ], + 'alternateName' => 'string', + 'creator' => [ + 'type' => '@Person|@Organization|array', + 'item_type' => '@Person|@Organization', + ], + 'citation' => 'string|@CreativeWork', + 'funder' => [ + 'type' => '@Person|@Organization|array', + 'item_type' => '@Person|@Organization', + ], + 'hasPart' => [ + 'type' => 'array|@Dataset', + 'item_type' => '@Dataset', + ], + 'isPartOf' => 'url|@Dataset', + 'identifier' => 'url|string|@Thing', + 'isAccessibleForFree' => 'bool', + 'keywords' => 'string', + 'license' => 'url|@CreativeWork', + 'measurementTechnique' => 'string|url', + 'sameAs' => 'url', + 'spatialCoverage' => 'string|@Place', + 'temporalCoverage' => 'string', + 'variableMeasured' => 'string|@Thing', + 'version' => 'string|int|float', + 'url' => 'url', + 'distribution' => [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + ], +]; diff --git a/src/Validation/SchemaRules/DiscussionForumPosting.php b/src/Validation/SchemaRules/DiscussionForumPosting.php new file mode 100644 index 0000000..5a0f380 --- /dev/null +++ b/src/Validation/SchemaRules/DiscussionForumPosting.php @@ -0,0 +1,26 @@ + [ + 'type' => '@Person|@Organization', + 'required' => true, + ], + 'datePublished' => [ + 'type' => 'string', + 'required' => true, + ], + 'text' => 'string', + 'image' => 'url|@ImageObject', + 'video' => '@VideoObject', + 'headline' => 'string', + 'comment' => [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + ], + 'interactionStatistic' => '@Thing', + 'url' => 'url', + 'dateModified' => 'string', + 'creativeWorkStatus' => 'string', + 'isPartOf' => 'url|@Thing', + 'sharedContent' => '@Thing', +]; diff --git a/src/Validation/SchemaRules/EmployerAggregateRating.php b/src/Validation/SchemaRules/EmployerAggregateRating.php new file mode 100644 index 0000000..77d53b0 --- /dev/null +++ b/src/Validation/SchemaRules/EmployerAggregateRating.php @@ -0,0 +1,19 @@ + [ + 'type' => '@Organization', + 'required' => true, + ], + 'ratingValue' => [ + 'type' => 'string|int|float', + 'required' => true, + ], + 'ratingCount' => [ + 'type' => 'int', + 'required' => true, + ], + 'reviewCount' => 'int', + 'bestRating' => 'int', + 'worstRating' => 'int', +]; diff --git a/src/Validation/SchemaRules/Event.php b/src/Validation/SchemaRules/Event.php new file mode 100644 index 0000000..f81f03c --- /dev/null +++ b/src/Validation/SchemaRules/Event.php @@ -0,0 +1,27 @@ + [ + 'type' => '@Place|@Thing', + 'required' => true, + ], + 'name' => [ + 'type' => 'string', + 'required' => true, + ], + 'startDate' => [ + 'type' => 'string', + 'required' => true, + ], + 'description' => 'string', + 'endDate' => 'string', + 'eventStatus' => 'string', + 'image' => [ + 'type' => 'array|url|@ImageObject', + 'item_type' => 'url|@ImageObject', + ], + 'offers' => '@Thing', + 'organizer' => '@Organization|@Person', + 'performer' => '@Person|@Organization', + 'previousStartDate' => 'string', +]; diff --git a/src/Validation/SchemaRules/FAQPage.php b/src/Validation/SchemaRules/FAQPage.php new file mode 100644 index 0000000..8fc4482 --- /dev/null +++ b/src/Validation/SchemaRules/FAQPage.php @@ -0,0 +1,9 @@ + [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + 'required' => true, + ], +]; diff --git a/src/Validation/SchemaRules/ImageObject.php b/src/Validation/SchemaRules/ImageObject.php new file mode 100644 index 0000000..353e323 --- /dev/null +++ b/src/Validation/SchemaRules/ImageObject.php @@ -0,0 +1,10 @@ + 'url', + 'caption' => 'string', + 'contentUrl' => 'url', + 'thumbnail' => 'url', + 'width' => 'int', + 'height' => 'int', +]; diff --git a/src/Validation/SchemaRules/ItemList.php b/src/Validation/SchemaRules/ItemList.php new file mode 100644 index 0000000..3380187 --- /dev/null +++ b/src/Validation/SchemaRules/ItemList.php @@ -0,0 +1,9 @@ + [ + 'type' => 'array|@ListItem', + 'item_type' => '@ListItem', + 'required' => true, + ], +]; diff --git a/src/Validation/SchemaRules/JobPosting.php b/src/Validation/SchemaRules/JobPosting.php new file mode 100644 index 0000000..5a80792 --- /dev/null +++ b/src/Validation/SchemaRules/JobPosting.php @@ -0,0 +1,14 @@ + 'string', + 'datePosted' => 'iso_date', + 'description' => 'string', + 'hiringOrganization' => '@Organization', + 'jobLocation' => '@Place', + 'baseSalary' => '@Thing', + 'directApply' => 'bool', + 'identifier' => '@Thing', + 'jobLocationType' => 'string', + 'validThrough' => 'iso_date', +]; diff --git a/src/Validation/SchemaRules/ListItem.php b/src/Validation/SchemaRules/ListItem.php new file mode 100644 index 0000000..37d5b6f --- /dev/null +++ b/src/Validation/SchemaRules/ListItem.php @@ -0,0 +1,7 @@ + 'url|@Thing', + 'name' => 'string', + 'position' => 'int', +]; diff --git a/src/Validation/SchemaRules/LocalBusiness.php b/src/Validation/SchemaRules/LocalBusiness.php new file mode 100644 index 0000000..f6d0276 --- /dev/null +++ b/src/Validation/SchemaRules/LocalBusiness.php @@ -0,0 +1,39 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'address' => [ + 'type' => '@PostalAddress', + 'required' => true, + ], + 'telephone' => 'string', + 'priceRange' => 'string', + 'openingHoursSpecification' => [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + 'rules' => [ + 'dayOfWeek' => [ + 'type' => 'string|array', + 'item_type' => 'string', + ], + 'opens' => 'string', + 'closes' => 'string', + ], + ], + 'geo' => [ + 'type' => '@Thing', + 'rules' => [ + 'latitude' => 'float|int', + 'longitude' => 'float|int', + ], + ], + 'url' => 'url', + 'sameAs' => [ + 'type' => 'array|url', + 'item_type' => 'url', + ], + 'image' => 'url|@ImageObject', +]; diff --git a/src/Validation/SchemaRules/MathSolver.php b/src/Validation/SchemaRules/MathSolver.php new file mode 100644 index 0000000..f3eba4e --- /dev/null +++ b/src/Validation/SchemaRules/MathSolver.php @@ -0,0 +1,13 @@ + [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + ], + 'url' => 'url', + 'usageInfo' => 'url', + 'inLanguage' => 'string|array', + 'assesses' => 'string|array', + 'learningResourceType' => 'string', +]; diff --git a/src/Validation/SchemaRules/Movie.php b/src/Validation/SchemaRules/Movie.php new file mode 100644 index 0000000..fe8e4e0 --- /dev/null +++ b/src/Validation/SchemaRules/Movie.php @@ -0,0 +1,23 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'url' => [ + 'type' => 'url', + 'required' => true, + ], + 'image' => [ + 'type' => 'array|url|@ImageObject', + 'item_type' => 'url|@ImageObject', + ], + 'dateCreated' => 'iso_date', + 'director' => [ + 'type' => 'array|@Person', + 'item_type' => '@Person', + ], + 'review' => '@Review', + 'aggregateRating' => '@AggregateRating', +]; diff --git a/src/Validation/SchemaRules/NewsArticle.php b/src/Validation/SchemaRules/NewsArticle.php new file mode 100644 index 0000000..7f1beee --- /dev/null +++ b/src/Validation/SchemaRules/NewsArticle.php @@ -0,0 +1,3 @@ + 'string', + 'url' => 'url', + 'logo' => 'string', + 'address' => '@PostalAddress', + 'contactPoint' => [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + ], + 'sameAs' => [ + 'type' => 'string|array', + 'item_type' => 'string', + ], + 'description' => 'string', + 'email' => 'email', + 'telephone' => 'string', + 'foundingDate' => 'iso_date', + 'numberOfEmployees' => '@Thing', +]; diff --git a/src/Validation/SchemaRules/Person.php b/src/Validation/SchemaRules/Person.php new file mode 100644 index 0000000..b7f13a5 --- /dev/null +++ b/src/Validation/SchemaRules/Person.php @@ -0,0 +1,16 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'url' => 'url', + 'image' => 'string|@ImageObject', + 'sameAs' => [ + 'type' => 'string|array', + 'item_type' => 'string', + ], + 'description' => 'string', + 'jobTitle' => 'string', +]; diff --git a/src/Validation/SchemaRules/Place.php b/src/Validation/SchemaRules/Place.php new file mode 100644 index 0000000..ad0b270 --- /dev/null +++ b/src/Validation/SchemaRules/Place.php @@ -0,0 +1,22 @@ + 'string', + 'address' => [ + 'type' => '@Thing', + 'rules' => [ + 'streetAddress' => 'string', + 'addressLocality' => 'string', + 'addressRegion' => 'string', + 'postalCode' => 'string', + 'addressCountry' => 'string', + ], + ], + 'geo' => [ + 'type' => '@Thing', + 'rules' => [ + 'latitude' => 'float|int', + 'longitude' => 'float|int', + ], + ], +]; diff --git a/src/Validation/SchemaRules/PostalAddress.php b/src/Validation/SchemaRules/PostalAddress.php new file mode 100644 index 0000000..b4817be --- /dev/null +++ b/src/Validation/SchemaRules/PostalAddress.php @@ -0,0 +1,9 @@ + 'string', + 'addressLocality' => 'string', + 'addressRegion' => 'string', + 'postalCode' => 'string', + 'addressCountry' => 'string', +]; diff --git a/src/Validation/SchemaRules/Product.php b/src/Validation/SchemaRules/Product.php new file mode 100644 index 0000000..7d71ac2 --- /dev/null +++ b/src/Validation/SchemaRules/Product.php @@ -0,0 +1,32 @@ + 'url', + 'name' => [ + 'type' => 'string', + 'required' => true, + ], + 'aggregateRating' => '@AggregateRating', + 'offers' => '@Thing', + 'review' => '@Review', + 'image' => [ + 'type' => 'array|url|@ImageObject', + 'item_type' => 'url|@ImageObject', + ], + 'description' => 'string', + 'sku' => 'string', + 'mpn' => 'string', + 'brand' => '@Organization', + 'color' => 'string', + 'size' => 'string', + 'material' => 'string', + 'pattern' => 'string', + 'gtin14' => 'string', + 'itemCondition' => 'string', + 'availability' => 'string', + 'price' => 'float|int', + 'priceCurrency' => 'string', + 'priceValidUntil' => 'iso_date', + 'isVariantOf' => '@ProductGroup', + 'inProductGroupWithID' => 'string', +]; diff --git a/src/Validation/SchemaRules/ProductGroup.php b/src/Validation/SchemaRules/ProductGroup.php new file mode 100644 index 0000000..c5e9a1b --- /dev/null +++ b/src/Validation/SchemaRules/ProductGroup.php @@ -0,0 +1,22 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'productGroupID' => 'string', + 'variesBy' => [ + 'type' => 'array|string', + 'item_type' => 'string', + ], + 'hasVariant' => [ + 'type' => 'array|@Product', + 'item_type' => '@Product', + ], + 'aggregateRating' => '@AggregateRating', + 'brand' => '@Organization', + 'description' => 'string', + 'review' => '@Review', + 'url' => 'url', +]; diff --git a/src/Validation/SchemaRules/ProfilePage.php b/src/Validation/SchemaRules/ProfilePage.php new file mode 100644 index 0000000..0afb7ee --- /dev/null +++ b/src/Validation/SchemaRules/ProfilePage.php @@ -0,0 +1,10 @@ + [ + 'type' => '@Person|@Organization', + 'required' => true, + ], + 'dateCreated' => 'iso_date', + 'dateModified' => 'iso_date', +]; diff --git a/src/Validation/SchemaRules/QAPage.php b/src/Validation/SchemaRules/QAPage.php new file mode 100644 index 0000000..70d75b5 --- /dev/null +++ b/src/Validation/SchemaRules/QAPage.php @@ -0,0 +1,8 @@ + [ + 'type' => '@Thing', + 'required' => true, + ], +]; diff --git a/src/Validation/SchemaRules/Quiz.php b/src/Validation/SchemaRules/Quiz.php new file mode 100644 index 0000000..69e2c5d --- /dev/null +++ b/src/Validation/SchemaRules/Quiz.php @@ -0,0 +1,14 @@ + [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + 'required' => true, + ], + 'about' => '@Thing', + 'educationalAlignment' => [ + 'type' => 'array|@Thing', + 'item_type' => '@Thing', + ], +]; diff --git a/src/Validation/SchemaRules/Rating.php b/src/Validation/SchemaRules/Rating.php new file mode 100644 index 0000000..c3947e9 --- /dev/null +++ b/src/Validation/SchemaRules/Rating.php @@ -0,0 +1,9 @@ + 'string', + 'bestRating' => 'int', + 'name' => 'string', + 'ratingValue' => 'int', + 'worstRating' => 'int', +]; diff --git a/src/Validation/SchemaRules/Recipe.php b/src/Validation/SchemaRules/Recipe.php new file mode 100644 index 0000000..ca896cb --- /dev/null +++ b/src/Validation/SchemaRules/Recipe.php @@ -0,0 +1,29 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'image' => [ + 'type' => 'array|url|@ImageObject', + 'item_type' => 'url|@ImageObject', + 'required' => true, + ], + 'recipeIngredient' => [ + 'type' => 'array|string', + 'item_type' => 'string', + 'required' => true, + ], + 'recipeInstructions' => [ + 'type' => 'array|string|@ItemList', + 'item_type' => 'string|@ItemList', + 'required' => true, + ], + 'author' => '@Person|@Organization', + 'datePublished' => 'iso_date', + 'description' => 'string', + 'recipeYield' => 'string|int', + 'aggregateRating' => '@AggregateRating', + 'video' => '@VideoObject', +]; diff --git a/src/Validation/SchemaRules/Restaurant.php b/src/Validation/SchemaRules/Restaurant.php new file mode 100644 index 0000000..085db4b --- /dev/null +++ b/src/Validation/SchemaRules/Restaurant.php @@ -0,0 +1,20 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'url' => [ + 'type' => 'url', + 'required' => true, + ], + 'image' => [ + 'type' => 'array|url|@ImageObject', + 'item_type' => 'url|@ImageObject', + ], + 'telephone' => 'string', + 'priceRange' => 'string', + 'address' => '@PostalAddress', + 'aggregateRating' => '@AggregateRating', +]; diff --git a/src/Validation/SchemaRules/Review.php b/src/Validation/SchemaRules/Review.php new file mode 100644 index 0000000..f8695a1 --- /dev/null +++ b/src/Validation/SchemaRules/Review.php @@ -0,0 +1,16 @@ + [ + 'type' => '@Person|@Organization', + 'required' => true, + ], + 'reviewRating' => [ + 'type' => '\Melbahja\Seo\Schema\Thing', + 'required' => true, + ], + 'itemReviewed' => '\Melbahja\Seo\Schema\Thing', + 'datePublished' => 'iso_date', + 'reviewBody' => 'string', +]; diff --git a/src/Validation/SchemaRules/SoftwareApplication.php b/src/Validation/SchemaRules/SoftwareApplication.php new file mode 100644 index 0000000..9049356 --- /dev/null +++ b/src/Validation/SchemaRules/SoftwareApplication.php @@ -0,0 +1,20 @@ + [ + 'type' => 'string', + 'required' => true, + ], + 'offers' => [ + 'type' => '@Thing|array', + 'item_type' => '@Thing', + 'required' => true, + ], + 'aggregateRating' => '@AggregateRating', + 'review' => [ + 'type' => '@Review|array', + 'item_type' => '@Review', + ], + 'applicationCategory' => 'string', + 'operatingSystem' => 'string', +]; diff --git a/src/Validation/SchemaRules/VacationRental.php b/src/Validation/SchemaRules/VacationRental.php new file mode 100644 index 0000000..4c55eab --- /dev/null +++ b/src/Validation/SchemaRules/VacationRental.php @@ -0,0 +1,43 @@ + [ + 'type' => '@Thing', + 'required' => true, + ], + 'identifier' => [ + 'type' => 'string', + 'required' => true, + ], + 'image' => [ + 'type' => 'array|url|@ImageObject', + 'item_type' => 'url|@ImageObject', + 'required' => true, + ], + 'latitude' => [ + 'type' => 'float', + 'required' => true, + ], + 'longitude' => [ + 'type' => 'float', + 'required' => true, + ], + 'name' => [ + 'type' => 'string', + 'required' => true, + ], + 'address' => '@PostalAddress', + 'aggregateRating' => '@AggregateRating', + 'brand' => '@Organization', + 'checkinTime' => 'string', + 'checkoutTime' => 'string', + 'description' => 'string', + 'knowsLanguage' => [ + 'type' => 'array|string', + 'item_type' => 'string', + ], + 'review' => [ + 'type' => 'array|@Review', + 'item_type' => '@Review', + ], +]; diff --git a/src/Validation/SchemaRules/VideoObject.php b/src/Validation/SchemaRules/VideoObject.php new file mode 100644 index 0000000..0c31499 --- /dev/null +++ b/src/Validation/SchemaRules/VideoObject.php @@ -0,0 +1,15 @@ + 'string', + 'description' => 'string', + 'thumbnailUrl' => [ + 'type' => 'array|url', + 'item_type' => 'url', + ], + 'contentUrl' => 'url', + 'embedUrl' => 'url', + 'uploadDate' => 'iso_date', + 'duration' => 'string', + 'expires' => 'iso_date', +]; diff --git a/src/Validation/SchemaValidator.php b/src/Validation/SchemaValidator.php new file mode 100644 index 0000000..70dab04 --- /dev/null +++ b/src/Validation/SchemaValidator.php @@ -0,0 +1,425 @@ +jsonSerialize(); + } elseif ($schema instanceof Schema) { + $graph = []; + foreach ($schema->all() as $thing) { + $graph[] = $thing->jsonSerialize(); + } + $schema = ['@graph' => $graph]; + } + + if (is_array($schema)) { + + // single entity + if (isset($schema['@type'])) { + return static::validateType($schema['@type'], $schema); + } + + // @graph structure + if (isset($schema['@graph'])) { + + $errors = []; + if (!is_array($schema['@graph'])) { + return ['@graph must be an array']; + } + + $idRefs = []; + foreach ($schema['@graph'] as $k => $node) + { + if ($node instanceof Thing) { + $node = $node->jsonSerialize(); + $schema['@graph'][$k] = $node; + } + + // sub types refs + if (isset($node['@id']) && is_string($node['@id']) && count($node) > 1) { + $idRefs[$node['@id']] = $node; + } + } + + foreach ($schema['@graph'] as $index => $item) + { + if (!is_array($item)) { + $errors[] = "@graph[{$index}] must be an array"; + continue; + } + + // handle @id refs, allowing some that refs ids that are not defined like page refs. + if (isset($item['@id']) && count($item) === 1) { + // if (!isset($idRefs[$item['@id']])) { + // $errors[] = "@graph[{$index}].@id references an unknown node: {$item['@id']}"; + // } + continue; + } + + // merge props if @id ref exists + if (isset($item['@id']) && isset($idRefs[$item['@id']])) { + $item = array_merge($idRefs[$item['@id']], $item); + } + + if (!isset($item['@type'])) { + $errors[] = "@graph[{$index}] missing @type property"; + continue; + } + + if (($itemErrors = static::validateType($item['@type'], $item)) !== null) { + + foreach ($itemErrors as $error) + { + $errors[] = "@graph[{$index}].{$error}"; + } + } + } + + return empty($errors) ? null : $errors; + } + + // array without @type or @graph + return ['Schema array must have @type property or @graph structure']; + } + + return ['Input must be a Schema, Thing, or array']; + } + + public static function validateType(string|array $schemaType, array $data): ?array + { + // handle one/multiple types like: ['Article', 'NewsArticle'] + $types = is_array($schemaType) ? $schemaType : [$schemaType]; + + $rules = []; + foreach ($types as $type) + { + $typeRules = self::loadRules($type); + $rules = array_merge($rules, $typeRules); + } + + // validate existing rules. + $errors = []; + foreach ($rules as $prop => $rule) + { + $value = $data[$prop] ?? null; + $rule = is_string($rule) ? ['type' => $rule] : $rule; + + if (!empty($rule['required']) && self::isEmpty($value)) { + $errors[] = "{$prop} is required"; + continue; + } + + // skip if not required and empty + if (empty($rule['required']) && self::isEmpty($value)) { + continue; + } + + // validate prop + if (isset($rule['type']) && ($propErrors = self::validateProp($prop, $value, $rule['type'], $rule)) !== null ) { + $errors = array_merge($errors, $propErrors); + } + } + + // validate nested props/objects that not in rules + foreach ($data as $prop => $value) + { + + if (in_array($prop, ['@type', '@context', '@id']) || isset($rules[$prop])) { + continue; + } + + // skip @id only refs + if (is_array($value) && isset($value['@id']) && count($value) === 1) { + continue; + } + + // recursively validate + if (is_array($value) && isset($value['@type'])) { + + if (($nErrors = self::validateType($value['@type'], $value)) !== null) { + foreach ($nErrors as $error) { + $errors[] = "{$prop}.{$error}"; + } + } + + } elseif (is_array($value)) { + + // in case of array of objects + foreach ($value as $index => $item) + { + if (is_array($item) && isset($item['@type'])) { + + if ( ($nErrors = self::validateType($item['@type'], $item)) !== null) { + foreach ($nErrors as $error) + { + $errors[] = "{$prop}[{$index}].{$error}"; + } + } + } + } + } + } + + return empty($errors) ? null : $errors; + } + + private static function validateProp(string $prop, $value, string $type, array $rule): ?array + { + $errors = []; + + // union types (string|array|type thing) + if (str_contains($type, '|')) { + + $typeMatched = false; + $matchedType = null; + foreach (explode('|', $type) as $singleType) + { + if (self::checkType($value, trim($singleType), $rule) === null) { + $typeMatched = true; + $matchedType = trim($singleType); + break; + } + } + + if ($typeMatched === false) { + return ["{$prop} must be one of: {$type}"]; + } + + // validate single item type from array + if ($matchedType === 'array' && isset($rule['item_type']) && is_array($value)) { + + foreach ($value as $index => $item) + { + if ( ($nErrors = self::checkType($item, $rule['item_type'], [])) !== null) { + foreach ($nErrors as $error) + { + $errors[] = "{$prop}[{$index}] {$error}"; + } + } + } + } + + return empty($errors) ? null : $errors; + } + + // if no union it's a single type! + if ( ($nErrors = self::checkType($value, $type, $rule)) !== null) { + + foreach ($nErrors as $error) + { + $errors[] = "{$prop}: {$error}"; + } + + return $errors; + } + + // array items if type is array + if ($type === 'array' && isset($rule['item_type']) && is_array($value)) { + + foreach ($value as $index => $item) + { + if ( ($nErrors = self::checkType($item, $rule['item_type'], [])) !== null) { + foreach ($nErrors as $error) + { + $errors[] = "{$prop}[{$index}] {$error}"; + } + } + } + } + + return empty($errors) ? null : $errors; + } + + private static function checkType($value, string $type, array $rule): ?array + { + $errors = []; + + // @ prefixed types, rule file references + if (str_starts_with($type, '@')) { + + $ruleName = substr($type, 1); + $baseRules = self::loadRules($ruleName); + + // merge rules with inline rules, inline rules > file rules. + if (isset($rule['rules']) && is_array($rule['rules'])) { + $baseRules = array_merge($baseRules, $rule['rules']); + } + + // TODO: maybe here we need just to return null when no rules are defined? + if (empty($baseRules)) { + return ["no rules found for @{$ruleName}"]; + } + + // value must be an array or Thing instance + if ($value instanceof Thing) { + + $value = $value->jsonSerialize(); + if ($value['@type'] !== $ruleName && $value['@type'] !== "Thing") { + return ["expected @type '{$ruleName}', got '{$value['@type']}'"]; + } + + } elseif (!is_array($value)) { + + return ["must be an array or Thing instance"]; + } + + // skip @id only references + if (isset($value['@id']) && count($value) === 1) { + return null; + } + + // check @type matches + if (isset($value['@type']) && $value['@type'] !== $ruleName && $ruleName !== 'Thing') { + return ["expected @type '{$ruleName}', got '{$value['@type']}'"]; + } + + // recursive rules validation + $errors = array_merge($errors, self::validateType($ruleName, $value) ?? []); + + return empty($errors) ? null : $errors; + } + + // Built-in types + switch ($type) + { + case 'string': + + if (!is_string($value)) { + $errors[] = "must be a string"; + } + break; + + case 'int': + case 'integer': + + if (!is_int($value) && !is_numeric($value)) { + $errors[] = "must be an integer"; + } + break; + + case 'float': + + if (!is_float($value) && !is_int($value) && !is_numeric($value)) { + $errors[] = "must be a float"; + } + break; + + case 'bool': + case 'boolean': + + if (!is_bool($value) && $value != 'true' && $value != 'false') { + $errors[] = "must be a boolean"; + } + break; + + case 'array': + + if (!is_array($value)) { + $errors[] = "must be an array"; + } + break; + + case 'iso_date': + + if (!is_string($value) || !preg_match('/^\d{4}-\d{2}-\d{2}/', $value)) { + $errors[] = "must be a valid ISO date (YYYY-MM-DD)"; + } + break; + + case 'url': + + if (!is_string($value) || !filter_var($value, FILTER_VALIDATE_URL)) { + $errors[] = "must be a valid URL"; + } + break; + + case 'email': + + if (!is_string($value) || !filter_var($value, FILTER_VALIDATE_EMAIL)) { + $errors[] = "must be a valid email"; + } + break; + + default: + + // class type + if (!class_exists($type)) { + $errors[] = "class {$type} does not exist"; + break; + } + + // handle Thing instances + if ($value instanceof Thing) { + $value = $value->jsonSerialize(); + } + + // If value is array, validate against class rules + if (is_array($value)) { + + if (isset($value['@id']) && count($value) === 1) { + break; + } + + $typeName = self::getClassNameFromType($type); + + // check if @type matches, only if it it's not a generic Thing + if (isset($value['@type']) && $value['@type'] !== $typeName && $value['@type'] !== 'Thing' && $typeName !== 'Thing') { + $errors[] = "expected @type '{$typeName}', got '{$value['@type']}'"; + break; + } + + // recursive type validation + $errors = array_merge($errors, self::validateType($value['@type'] ?? $typeName, $value) ?? []); + break; + } + + $errors[] = "must be an instance of {$type} or array representing {$type}"; + break; + } + + return empty($errors) ? null : $errors; + } + + private static function getClassNameFromType(string|Thing $type): string + { + if (is_object($type)) { + return static::getClassNameFromType($type::class); + } + + $parts = explode('\\', $type); + return end($parts); + } + + private static function loadRules(string $schemaType): array + { + if (!file_exists($ruleFile = __DIR__ . "/SchemaRules/{$schemaType}.php")) { + return []; + } + + return include $ruleFile; + } + + private static function isEmpty($value): bool + { + if (is_array($value)) { + return empty($value); + } else if (is_string($value)) { + return trim($value) === ''; + } + + return $value === null; + } +} diff --git a/tests/IndexingTest.php b/tests/IndexingTest.php new file mode 100644 index 0000000..ba4c80d --- /dev/null +++ b/tests/IndexingTest.php @@ -0,0 +1,229 @@ +createMock(HttpClient::class); + $mockClient->method('request')->willReturn('{"status":"ok"}'); + $mockClient->method('getStatusCode')->willReturn(200); + + $indexer = new GoogleIndexer('valid_token', $mockClient); + $result = $indexer->submitUrl('https://example.com'); + + $this->assertTrue($result); + } + + public function testGoogleSubmitUrlFailure() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn(null); + $mockClient->method('getStatusCode')->willReturn(401); + + $indexer = new GoogleIndexer('invalid_token', $mockClient); + $result = $indexer->submitUrl('https://example.com'); + + $this->assertFalse($result); + } + + public function testGoogleSubmitUrls() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn('{"status":"ok"}'); + $mockClient->method('getStatusCode')->willReturn(200); + + $indexer = new GoogleIndexer('valid_token', $mockClient); + $urls = ['https://example.com/page1', 'https://example.com/page2']; + + $results = $indexer->submitUrls($urls); + + $this->assertIsArray($results); + $this->assertCount(2, $results); + $this->assertTrue($results['https://example.com/page1']); + $this->assertTrue($results['https://example.com/page2']); + } + + public function testGoogleSubmitUrlWithDeleteType() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn('{"status":"ok"}'); + $mockClient->method('getStatusCode')->willReturn(200); + + $indexer = new GoogleIndexer('valid_token', $mockClient); + $result = $indexer->submitUrl('https://example.com', URLIndexingType::DELETE); + + $this->assertTrue($result); + } + + public function testGoogleEmptyAccessToken() + { + $this->expectException(SeoException::class); + $this->expectExceptionMessage('Access token cannot be empty'); + + new GoogleIndexer(''); + } + + public function testGoogleServeKeyFileThrowsException() + { + $indexer = new GoogleIndexer('test_token'); + + $this->expectException(SeoException::class); + $this->expectExceptionMessage('Google Indexing API does not use key.txt verification'); + + $indexer->serveKeyFile(); + } + + public function testGoogleFromEnvironment() + { + $_ENV['GOOGLE_INDEXING_ACCESS_TOKEN'] = 'test_token_from_env'; + + $indexer = GoogleIndexer::fromEnvironment(); + + $this->assertInstanceOf(GoogleIndexer::class, $indexer); + + unset($_ENV['GOOGLE_INDEXING_ACCESS_TOKEN']); + } + + public function testGoogleFromEnvironmentThrowsException() + { + $this->expectException(SeoException::class); + $this->expectExceptionMessage('Google Indexing API access token not found in env var: CUSTOM_VAR'); + + GoogleIndexer::fromEnvironment('CUSTOM_VAR'); + } + + public function testIndexNowSubmitUrlSuccess() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn(''); + $mockClient->method('getStatusCode')->willReturn(200); + + $indexer = new IndexNowIndexer('valid_key', $mockClient); + $result = $indexer->submitUrl('https://example.com'); + + $this->assertTrue($result); + } + + public function testIndexNowSubmitUrlFailure() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn(null); + $mockClient->method('getStatusCode')->willReturn(403); + + $indexer = new IndexNowIndexer('invalid_key', $mockClient); + $result = $indexer->submitUrl('https://example.com'); + + $this->assertFalse($result); + } + + public function testIndexNowSubmitUrls() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn(''); + $mockClient->method('getStatusCode')->willReturn(200); + + $indexer = new IndexNowIndexer('valid_key', $mockClient); + $urls = ['https://example.com/page1', 'https://example.com/page2', 'https://example.com/page3']; + + $results = $indexer->submitUrls($urls); + + $this->assertIsArray($results); + $this->assertCount(3, $results); + $this->assertTrue($results['https://example.com/page1']); + $this->assertTrue($results['https://example.com/page2']); + $this->assertTrue($results['https://example.com/page3']); + } + + public function testIndexNowSubmitUrlWithDifferentEngines() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn(''); + $mockClient->method('getStatusCode')->willReturn(200); + + $indexer = new IndexNowIndexer('valid_key', $mockClient); + + $resultBing = $indexer->submitUrl('https://example.com', IndexNowEngine::BING); + $resultYandex = $indexer->submitUrl('https://example.com', IndexNowEngine::YANDEX); + + $this->assertTrue($resultBing); + $this->assertTrue($resultYandex); + } + + public function testIndexNowSubmitUrlWithDeleteType() + { + $mockClient = $this->createMock(HttpClient::class); + $mockClient->method('request')->willReturn(''); + $mockClient->method('getStatusCode')->willReturn(200); + + $indexer = new IndexNowIndexer('valid_key', $mockClient); + $result = $indexer->submitUrl('https://example.com', IndexNowEngine::INDEXNOW, URLIndexingType::DELETE); + + $this->assertTrue($result); + } + + public function testIndexNowEmptyApiKey() + { + $this->expectException(SeoException::class); + $this->expectExceptionMessage('API key cannot be empty'); + + new IndexNowIndexer(''); + } + + public function testIndexNowFromEnvironment() + { + $_ENV['INDEXNOW_API_KEY'] = 'test_key_from_env'; + + $indexer = IndexNowIndexer::fromEnvironment(); + + $this->assertInstanceOf(IndexNowIndexer::class, $indexer); + + unset($_ENV['INDEXNOW_API_KEY']); + } + + public function testIndexNowEngineToUrl() + { + $url = IndexNowEngine::INDEXNOW->toUrl('https://example.com/page', 'mykey123'); + + $this->assertStringContainsString('api.indexnow.org', $url); + $this->assertStringContainsString('url=', $url); + $this->assertStringContainsString('key=', $url); + $this->assertStringContainsString('mykey123', $url); + } + + public function testIndexNowAllEngines() + { + $engines = [ + IndexNowEngine::INDEXNOW, + IndexNowEngine::BING, + IndexNowEngine::YANDEX, + IndexNowEngine::AMAZON, + IndexNowEngine::NAVER, + IndexNowEngine::SEZNAM, + IndexNowEngine::YEP + ]; + + foreach ($engines as $engine) { + $url = $engine->toUrl('https://example.com', 'key'); + $this->assertIsString($url); + $this->assertStringContainsString('https://', $url); + } + } + + public function testURLIndexingTypeValues() + { + $this->assertEquals('update', URLIndexingType::UPDATE->value); + $this->assertEquals('delete', URLIndexingType::DELETE->value); + } +} diff --git a/tests/MetaTagsTest.php b/tests/MetaTagsTest.php index 9ea56fd..f5d418f 100644 --- a/tests/MetaTagsTest.php +++ b/tests/MetaTagsTest.php @@ -1,7 +1,10 @@ 'Mohamed Elbahja' ]); - $this->assertEquals('My new article', + $this->assertEquals('My new article', str_replace("\n", '', (string) $metatags) ); @@ -26,7 +29,7 @@ public function testMetaTags() $metatags ->title('PHP SEO') ->description('This is my description') - ->meta('author', 'Mohamed Elabhja') + ->meta('author', 'Mohamed Elbahja') ->image('https://avatars3.githubusercontent.com/u/8259014') ->mobile('https://m.example.com') ->canonical('https://example.com') @@ -36,9 +39,131 @@ public function testMetaTags() $this->assertNotEmpty((string) $metatags); - $this->assertEquals('PHP SEO', + $this->assertEquals('PHP SEO', str_replace("\n", '', (string)$metatags) ); } + + public function testConstructorProps() + { + $metatags = new MetaTags( + meta: [ + 'title' => 'Test Page', + 'description' => 'Test description', + 'keywords' => 'php, test', + 'author' => 'Mohamed Elbahja', + 'theme-color' => '#ffffff', + 'robots' => 'index, follow', + 'canonical' => 'https://example.com', + + // two args methods like verification: + // methdName(arg, value), in this case verification(google, abc123) + // verification, robots, feed, hreflang, image... + 'verification' => [ + 'google' => 'abc123', + ], + + // Set multi tags + 'link' => [ + ['rel' => 'alternate', 'href' => 'https://example.com/fr', 'hreflang' => 'fr'], + ['rel' => 'alternate', 'href' => 'https://example.com/es', 'hreflang' => 'es'], + ], + + ], + og: [ + 'type' => 'article', + 'locale' => 'en_US', + 'site_name' => 'My Site', + ], + twitter: [ + 'card' => 'summary_large_image', + 'author' => '@dev0x0', + ], + ); + + $output = (string) $metatags; + + // var_export($output); + + $this->assertStringContainsString('Test Page', $output); + $this->assertStringContainsString('name="description" content="Test description"', $output); + $this->assertStringContainsString('name="keywords" content="php, test"', $output); + $this->assertStringContainsString('name="author" content="Mohamed Elbahja"', $output); + $this->assertStringContainsString('name="theme-color" content="#ffffff"', $output); + $this->assertStringContainsString('name="robots" content="index, follow"', $output); + $this->assertStringContainsString('name="google-site-verification" content="abc123"', $output); + $this->assertStringContainsString('rel="canonical" href="https://example.com"', $output); + $this->assertStringContainsString('hreflang="fr"', $output); + $this->assertStringContainsString('hreflang="es"', $output); + $this->assertStringContainsString('property="og:type" content="article"', $output); + $this->assertStringContainsString('property="og:locale" content="en_US"', $output); + $this->assertStringContainsString('property="og:site_name" content="My Site"', $output); + $this->assertStringContainsString('property="twitter:card" content="summary_large_image"', $output); + $this->assertStringContainsString('property="twitter:author" content="@dev0x0"', $output); + } + + + public function testMetaTagsWithSchema() + { + $metatags = new MetaTags([ + 'title' => 'Test Page', + 'description' => 'Test description', + 'keywords' => 'php, test', + 'author' => 'Mohamed Elbahja', + 'theme-color' => '#ffffff', + 'robots' => 'index, follow', + 'canonical' => 'https://example.com', + ]); + + $metatags->schema(new Schema( + new Thing(type: 'Organization', props: [ + 'url' => 'https://example.com', + 'logo' => 'https://example.com/logo.png', + 'name' => 'Example Org', + 'contactPoint' => new Thing(type: 'ContactPoint', props: [ + 'telephone' => '+1-000-555-1212', + 'contactType' => 'customer service' + ]) + ]) + )); + + $output = (string) $metatags; + + + $this->assertEquals($output, (string) new MetaTags([ + 'title' => 'Test Page', + 'description' => 'Test description', + 'keywords' => 'php, test', + 'author' => 'Mohamed Elbahja', + 'theme-color' => '#ffffff', + 'robots' => 'index, follow', + 'canonical' => 'https://example.com', + 'schema' => new Schema( + new Thing(type: 'Organization', props: [ + 'url' => 'https://example.com', + 'logo' => 'https://example.com/logo.png', + 'name' => 'Example Org', + 'contactPoint' => new Thing(type: 'ContactPoint', props: [ + 'telephone' => '+1-000-555-1212', + 'contactType' => 'customer service' + ]) + ]) + ), + ])); + + + $this->assertStringContainsString('Test Page', $output); + $this->assertStringContainsString('name="description" content="Test description"', $output); + $this->assertStringContainsString('name="keywords" content="php, test"', $output); + $this->assertStringContainsString('name="author" content="Mohamed Elbahja"', $output); + $this->assertStringContainsString('name="theme-color" content="#ffffff"', $output); + $this->assertStringContainsString('name="robots" content="index, follow"', $output); + $this->assertStringContainsString('rel="canonical" href="https://example.com"', $output); + $this->assertStringContainsString('"@type":"ContactPoint"', $output); + $this->assertStringContainsString('"@type":"Organization"', $output); + $this->assertStringContainsString('"@context":"https:\/\/schema.org"', $output); + $this->assertStringContainsString('', (string) $schema); + $this->assertEquals('', (string) $schema); } } diff --git a/tests/SchemaValidationTest.php b/tests/SchemaValidationTest.php new file mode 100644 index 0000000..6c696c4 --- /dev/null +++ b/tests/SchemaValidationTest.php @@ -0,0 +1,405 @@ + 'Casablanca Cafe', + 'address' => [ + '@type' => 'PostalAddress', + 'streetAddress' => '123 Avenue Mohammed V', + 'addressLocality' => 'Casablanca', + 'addressRegion' => 'Casablanca-Settat', + 'postalCode' => '20000', + 'addressCountry' => 'MA' + ], + 'url' => 'https://example.com', + 'telephone' => '+212524111111', + 'priceRange' => '$$', + 'servesCuisine' => ['Moroccan', 'Mediterranean'] + ]); + + + $errors = SchemaValidator::validate($biz); + $this->assertNull($errors, 'LocalBusiness validation should pass with valid data'); + } + + public function testSchemaWithMultipleTypes() + { + $data = [ + '@type' => ['Article', 'NewsArticle'], + 'headline' => 'Breaking News', + 'datePublished' => '2024-01-15' + ]; + + $errors = SchemaValidator::validateType(['Article', 'NewsArticle'], $data); + $this->assertNull($errors); + + $schema = new CreativeWork(type: ['Article', 'NewsArticle'], props: [ + 'headline' => 'Breaking News', + 'datePublished' => '2024-01-15' + ]); + + $errors = SchemaValidator::validate($schema); + $this->assertNull($errors); + } + + public function testThingWithWrongType() + { + $data = [ + 'name' => 'Test', + 'address' => [ + '@type' => 'Rating', // Wrong! Should be PostalAddress + 'streetAddress' => '123 Main St' + ], + 'url' => 'https://example.com', + 'telephone' => '+212524111111' + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertIsArray($errors); + $this->assertStringContainsString("expected @type 'PostalAddress'", implode(' ', $errors)); + } + + public function testLocalBusinessWithInvalidOpeningHours() + { + $data = [ + 'name' => 'Test Restaurant', + 'address' => [ + '@type' => 'PostalAddress', + 'streetAddress' => '123 Main St', + 'addressLocality' => 'Casablanca', + 'addressRegion' => 'Casablanca-Settat', + 'postalCode' => '20000', + 'addressCountry' => 'MA' + ], + 'url' => 'https://example.com', + 'telephone' => '+212524111111', + 'openingHoursSpecification' => [ + [ + '@type' => 'OpeningHoursSpecification', + 'dayOfWeek' => 'Monday', + 'opens' => '09:00', + 'closes' => '18:00' + ], + [ + '@type' => 'OpeningHoursSpecification', + 'dayOfWeek' => 123, // Wrong! Should be string + 'opens' => 900, // Wrong! Should be string + 'closes' => true // Wrong! Should be string + ], + 'not an array' // Wrong! Should be array/object + ] + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertIsArray($errors); + $this->assertStringContainsString('openingHoursSpecification', implode(' ', $errors)); + } + + public function testThingObjectWithCorrectType() + { + $rating = new Thing(type: 'Rating', props: ['ratingValue' => 4.5]); + + $review = [ + '@type' => 'Review', + 'reviewRating' => $rating, // Thing object instead of Rating class + 'reviewBody' => 'Great!', + 'author' => ['@id' => 'https://example.com/authors/mohamed'], // ref only + ]; + + // Should pass since Thing has correct @type + $errors = SchemaValidator::validate($review); + $this->assertNull($errors); + } + + public function testNumericTypeFlexibility() + { + $rating1 = new Thing(type: 'Rating', props: ['ratingValue' => 5]); // int + $rating2 = new Thing(type: 'Rating', props: ['ratingValue' => 5.0]); // float + $rating3 = new Thing(type: 'Rating', props: ['ratingValue' => '5']); // string + + $this->assertNull(SchemaValidator::validate($rating1)); + $this->assertNull(SchemaValidator::validate($rating2)); + $this->assertNull(SchemaValidator::validate($rating3)); + } + + public function testArrayItemsValidation() + { + $data = [ + 'name' => 'Test Restaurant', + 'url' => 'https://example.com', + 'sameAs' => ['https://reddit.com/example', 123, 'https://github.com/example'], // 123 is invalid + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertIsArray($errors); + $this->assertStringContainsString('must be a valid URL', implode(' ', $errors)); + } + + public function testDeeplyNestedValidation() + { + $data = [ + '@type' => 'Review', + 'author' => [ + '@type' => 'Person', + 'name' => 'John', + 'address' => [ + '@type' => 'PostalAddress', + 'streetAddress' => 123 // Invalid: should be string + ] + ], + 'reviewBody' => 'Great!' + ]; + + $errors = SchemaValidator::validate($data); + $this->assertIsArray($errors); + + $thing = new Thing([ + '@type' => 'Review', + 'author' => [ + '@type' => 'Person', + 'name' => 'John', + 'address' => [ + '@type' => 'PostalAddress', + 'streetAddress' => 123 // Invalid: should be string + ] + ], + 'reviewBody' => 'Great!' + ]); + + $errors = SchemaValidator::validate($thing); + $this->assertIsArray($errors); + } + + public function testEmptyStringValues() + { + $data = [ + 'name' => ' ', // Empty after trim + 'url' => 'https://example.com' + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertIsArray($errors); + $this->assertContains('name is required', $errors); + } + + public function testInvalidSchemaStructure() + { + $data = ['name' => 'Test']; // No @type, no @graph + + $errors = SchemaValidator::validate($data); + $this->assertIsArray($errors); + $this->assertContains('Schema array must have @type property or @graph structure', $errors); + } + + public function testGraphWithInvalidStaff() + { + $data = ['@graph' => 'not an array']; + + $errors = SchemaValidator::validate($data); + $this->assertIsArray($errors); + $this->assertContains('@graph must be an array', $errors); + + $data = [ + '@graph' => [ + ['name' => 'Test'] // Missing @type + ] + ]; + + $errors = SchemaValidator::validate($data); + $this->assertIsArray($errors); + $this->assertStringContainsString('missing @type property', implode(' ', $errors)); + + $data = [ + '@graph' => [ + 'not an assoc array' + ] + ]; + + $errors = SchemaValidator::validate($data); + $this->assertIsArray($errors); + $this->assertStringContainsString('must be an array', implode(' ', $errors)); + + + $data = [ + 'name' => 'Test', + 'url' => 'https://example.com', + 'image' => 123 // Should be string|array + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertIsArray($errors); + $this->assertStringContainsString('must be one of: url|@ImageObject', implode(' ', $errors)); + } + + public function testSchemaValidatorWithIdReferences() + { + // Create Thing with @id + $restaurant = new LocalBusiness([ + '@id' => 'https://example.com/#restaurant', + 'name' => 'Marrakech Palace', + 'address' => [ + '@type' => 'PostalAddress', + 'streetAddress' => '456 Rue de la Koutoubia', + 'addressLocality' => 'Marrakech', + 'addressRegion' => 'Marrakech-Safi', + 'postalCode' => '40000', + 'addressCountry' => 'MA' + ], + 'telephone' => '+212524445566', + 'url' => 'https://example.com/restaurant' + ]); + + // Create Organization with reference to restaurant + $organization = new Organization([ + '@id' => 'https://example.com/#organization', + 'name' => 'Marrakech Hospitality Group', + 'owns' => ['@id' => 'https://example.com/#restaurant'] + ]); + + // Create Review that references the restaurant + $review = new Thing(type: 'Review', props: [ + 'author' => new Thing(type: 'Person', props: ['name' => 'Ahmed Benali']), + 'reviewRating' => new Intangible(type: 'Rating', props: ['ratingValue' => 5]), + 'itemReviewed' => ['@id' => 'https://example.com/#restaurant'], + 'reviewBody' => 'Amazing traditional food!', + 'datePublished' => '2024-01-15' + ]); + + // Create Schema with all three things + $schema = new Schema($restaurant, $organization, $review); + + // Validate the schema + $errors = SchemaValidator::validate($schema); + $this->assertNull($errors, 'Schema with @id references should pass validation'); + + // Also test with array format + $schemaArray = $schema->jsonSerialize(); + $errors = SchemaValidator::validate($schemaArray); + $this->assertNull($errors, 'Schema array with @graph should pass validation'); + + // Verify the structure contains @id references + $this->assertArrayHasKey('@graph', $schemaArray); + $this->assertCount(3, $schemaArray['@graph']); + } + + public function testSchemaValidatorWithGraphStructure() + { + // Simulate schema with @graph structure + $graphData = [ + '@graph' => [ + [ + '@type' => 'LocalBusiness', + 'name' => 'Restaurant', + 'address' => [ + 'streetAddress' => '789 Boulevard Pasteur', + 'addressLocality' => 'Tangier', + 'addressRegion' => 'Tanger-Tรฉtouan-Al Hoceรฏma', + 'postalCode' => '90000', + 'addressCountry' => 'MA' + ], + 'url' => 'https://example.com', + 'telephone' => '+212511111111' + ], + [ + '@type' => 'Organization', + 'name' => 'Restaurant Group', + 'url' => 'https://example.com/group', + 'sameAs' => ['https://facebook.com/example'] + ], + [ + '@type' => 'WebPage', + 'name' => 'Home Page', + 'url' => 'https://example.com', + 'description' => 'Welcome to our restaurant' + ] + ] + ]; + + // Note: The validator doesn't handle @graph structure directly + // We would validate each entity separately + $errors = []; + + // Validate each item in @graph + foreach ($graphData['@graph'] as $entity) + { + $type = $entity['@type']; + $entityErrors = SchemaValidator::validateType($type, $entity); + if ($entityErrors !== null) { + foreach ($entityErrors as $error) { + $errors[] = "{$type}: {$error}"; + } + } + } + + $this->assertEmpty($errors, 'All entities in @graph should pass validation'); + $this->assertNull(SchemaValidator::validateType('LocalBusiness', $graphData['@graph'][0])); + $this->assertNull(SchemaValidator::validateType('Organization', $graphData['@graph'][1])); + $this->assertNull(SchemaValidator::validateType('WebPage', $graphData['@graph'][2])); + } + + public function testLocalBusinessValidatorWithMissingRequiredFields() + { + // Test with missing required fields + $data = [ + 'url' => 'https://example.com', + 'telephone' => '+212524111111' + // Missing required 'name' and 'address' + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertIsArray($errors); + $this->assertContains('name is required', $errors); + $this->assertContains('address is required', $errors); + } + + public function testLocalBusinessValidatorWithInvalidTypes() + { + // Test with invalid data types + $data = [ + 'name' => 123, // Should be string + 'address' => 'not an array', // Should be array or PostalAddress + 'url' => 'invalid-url', // Invalid URL + 'telephone' => 212524111111, // Should be string + 'priceRange' => 100, // Should be string + 'servesCuisine' => 123, // Should be string or array + 'acceptsReservations' => 'yes' // Should be bool + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertIsArray($errors); + } + + public function testLocalBusinessValidatorWithValidNestedAddress() + { + // Test with valid nested PostalAddress as array + $data = [ + 'name' => 'Marrakech Restaurant', + 'address' => [ + 'streetAddress' => '456 Rue de la Koutoubia', + 'addressLocality' => 'Marrakech', + 'addressRegion' => 'Marrakech-Safi', + 'postalCode' => '40000', + 'addressCountry' => 'MA' + ], + 'url' => 'https://example.com', + 'telephone' => '+212524111111' + ]; + + $errors = SchemaValidator::validateType('LocalBusiness', $data); + $this->assertNull($errors, 'Should pass with valid nested address array'); + } +} diff --git a/tests/SitemapsTest.php b/tests/SitemapsTest.php index c93bb01..b775782 100644 --- a/tests/SitemapsTest.php +++ b/tests/SitemapsTest.php @@ -1,354 +1,1340 @@ testDir = sys_get_temp_dir() . '/sitemap_seo_tests'; - $posts = $sitemap->links(['name' => 'posts.xml'], function(SitemapBuilder $builder) - { - $builder->loc('/posts/12')->priority("0.9"); - $builder->loc('/posts/13')->priority("0.9")->lastMod(date('c')); - }); + if (!is_dir($this->testDir)) { + mkdir($this->testDir, 0755, true); + } + } - $this->expectException(SitemapException::class); + protected function tearDown(): void + { + if (is_dir($this->testDir)) { + $this->removeDirectory($this->testDir); + } + } - $sitemap->save(); + private function removeDirectory(string $dir): void + { + if (!is_dir($dir)) { + return; + } + + $items = array_diff(scandir($dir), ['.', '..']); + + foreach ($items as $item) + { + $path = $dir . DIRECTORY_SEPARATOR . $item; + is_dir($path) ? $this->removeDirectory($path) : unlink($path); + } + + rmdir($dir); } - public function testNonWritableDir() + public function testSitemapBuilderBasic() { - $this->expectException(SitemapException::class); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $sitemap = new Sitemap('https://example.com', ['save_path' => '/']); - $sitemap->links(['name' => 't.xml'], function($builder) + $sitemap->links(['name' => 'posts.xml'], function(LinksBuilder $builder) { - $builder->loc('/about'); + $builder->loc('/posts/12')->priority(0.9); + $builder->loc('/posts/13')->priority(0.9)->lastMod(date('c')); + }); - })->save(); + $this->assertTrue($sitemap->render()); + $this->assertFileExists($this->testDir . '/posts.xml'); + $this->assertFileExists($this->testDir . '/sitemap.xml'); } + public function testSitemapMemoryMode() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + mode: OutputMode::MEMORY + ); - public function testSitemapSave() + $sitemap->links('blog.xml', function(LinksBuilder $builder) + { + // + // by design the memory mode in this func will never be called + // for heavy db queries and rare use cases that you may want + // to serve only the sitemap index. + // + $builder->loc('/blog/post-1')->priority(0.8); + $builder->loc('/blog/post-2')->priority(0.7); + + exit("ERROR: This function/generator MUST not be called"); + }); + + $xml = $sitemap->render(); + + $this->assertIsString($xml); + $this->assertStringContainsString('assertStringContainsString('blog.xml', $xml); + } + + public function testSitemapSaveWithPriorities() { - $sitemap = $this->sitemapProvider(); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $dest = '_blog.xml'; + $sitemap->links(['name' => 'blog.xml'], function(LinksBuilder $builder) + { + $builder->loc('/blog/this_is_php')->priority(0.9); + $builder->loc('/blog/nx')->priority(0.0); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/blog.xml'); + + $this->assertStringContainsString('https://example.com/blog/this_is_php', $content); + $this->assertStringContainsString('0.9', $content); + $this->assertStringContainsString('https://example.com/blog/nx', $content); + $this->assertStringContainsString('0.0', $content); + } - $articles = $sitemap->links(['name' => $dest], function($builder) + public function testSitemapWithImages() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'images.xml', 'images' => true], function(LinksBuilder $builder) { - $builder->loc('/blog/god_bless_php')->priority("0.9"); + $builder->loc('/php') + ->image('http://php.net/images/logos/php-logo.png', ['title' => 'PHP logo']) + ->image('https://pear.php.net/gifs/pearsmall.gif', ['caption' => 'php pear']); - $builder->loc('/blog/nx')->priority("0.0"); + $builder->loc('/the-place') + ->image('/uploads/image.jpeg', ['geo_location' => '40.7590,-73.9845']); }); - $this->assertTrue($sitemap->save()); + $this->assertTrue($sitemap->render()); - $buildedFile = $sitemap->getSavePath() .'/'. $dest; + $content = file_get_contents($this->testDir . '/images.xml'); - $this->assertFileExists($buildedFile); + $this->assertStringContainsString('xmlns:image=', $content); + $this->assertStringContainsString('PHP logo', $content); + $this->assertStringContainsString('php pear', $content); + $this->assertStringContainsString('40.7590,-73.9845', $content); + } - $this->assertEquals(' - -https://example.com/blog/god_bless_php0.9https://example.com/blog/nx0.0', - trim(file_get_contents($buildedFile)) + public function testEscapedUrls() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE ); - } + $sitemap->links(['name' => 'escaped.xml'], function(LinksBuilder $builder) + { + $builder->loc('/รผmlat/test?item=12&desc=vacation_hawaii'); + $builder->loc('ุงู‡ู„ุง-ุจุงู„ุนุงู„ู…'); + }); + + $this->assertTrue($sitemap->render()); + $content = file_get_contents($this->testDir . '/escaped.xml'); - public function testSitemapSaveWithImages() + $this->assertStringContainsString('%C3%BCmlat/test?item=12&desc=vacation_hawaii', $content); + $this->assertStringContainsString('%D8%A7%D9%87%D9%84%D8%A7-%D8%A8%D8%A7%D9%84%D8%B9%D8%A7%D9%84%D9%85', $content); + } + + public function testVideoRequiredOptionsException() { - $sitemap = $this->sitemapProvider(); + $this->expectException(SitemapException::class); - $dest = '_blog.xml'; + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $articles = $sitemap->links(['name' => $dest, 'images' => true], function($builder) + $sitemap->links(['name' => 'videos.xml', 'videos' => true], function(LinksBuilder $builder) { - $builder->loc('/php') - ->image('http://php.net/images/logos/php-logo.png', ['title' => 'PHP logo']) - ->image('https://pear.php.net/gifs/pearsmall.gif', ['caption' => 'php pear']); + $builder->loc('/videos/12')->video('Watch my new video', [ + 'description' => 'Test' + ]); + }); - $builder->loc('/the-place') - ->image('/uploads/image.jpeg', ['geo_location' => '40.7590,-73.9845']); + $sitemap->render(); + } + + public function testVideoNoContentOrPlayerLocException() + { + $this->expectException(SitemapException::class); + $this->expectExceptionMessage('Raw video url content_loc or player_loc embed is required'); + + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + $sitemap->links(['name' => 'videos.xml', 'videos' => true], function(LinksBuilder $builder) + { + $builder->loc('/videos/12')->video('My new video', [ + 'thumbnail' => 'https://example.com/th.jpeg', + 'description' => 'My descriptions' + ]); }); - $this->assertTrue($sitemap->save()); + $sitemap->render(); + } - $this->assertEquals(' - -https://example.com/phpPHP logohttp://php.net/images/logos/php-logo.pngphp pearhttps://pear.php.net/gifs/pearsmall.gifhttps://example.com/the-place40.7590,-73.9845https://example.com/uploads/image.jpeg', - trim(file_get_contents($sitemap->getSavePath() . '/' .$dest)) + public function testBuildedSitemapWithVideos() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE ); + $sitemap->links(['name' => 'videos.xml', 'videos' => true], function(LinksBuilder $builder) + { + $builder->loc('/blog/12')->changeFreq('weekly')->priority(0.7); + $builder->loc('/blog/13')->changeFreq('monthly')->priority(0.8)->video('My new video', [ + 'thumbnail' => 'https://example.com/th.jpeg', + 'description' => 'My descriptions', + 'content_loc' => 'https://example.com/video.mp4' + ]); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/videos.xml'); + + $this->assertStringContainsString('xmlns:video=', $content); + $this->assertStringContainsString('My new video', $content); + $this->assertStringContainsString('My descriptions', $content); + $this->assertStringContainsString('https://example.com/video.mp4', $content); + $this->assertStringContainsString('https://example.com/th.jpeg', $content); } + public function testBuildedSitemapWithLocalizedUrls() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - public function testEscapedUrls() + $sitemap->links(['name' => 'localized.xml', 'localized' => true], function(LinksBuilder $builder) + { + $builder->loc('/blog/12')->changeFreq('weekly')->priority(0.7); + $builder->loc('/blog/13')->changeFreq('monthly')->priority(0.8) + ->alternate('/ar/blog/13', 'ar') + ->alternate('/de/blog/13', 'de'); + $builder->loc('/blog/14') + ->alternate('/ar/blog/14', 'ar') + ->alternate('/de/blog/14', 'de'); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/localized.xml'); + + $this->assertStringContainsString('xmlns:xhtml=', $content); + $this->assertStringContainsString('hreflang="ar"', $content); + $this->assertStringContainsString('hreflang="de"', $content); + $this->assertStringContainsString('rel="alternate"', $content); + } + + public function testSitemapIndexGeneration() { - $sitemap = $this->sitemapProvider(); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $name = '_maps.xml'; + $sitemap->links(['name' => 'posts.xml'], function(LinksBuilder $builder) + { + $builder->loc('/post/1')->priority(0.8); + }); - $sitemap->links(['name' => $name], function($builder) + $sitemap->links(['name' => 'pages.xml'], function(LinksBuilder $builder) { - $builder->loc('/รผmlat/test?item=12&desc=vacation_hawaii') - ->loc('ุงู‡ู„ุง-ุจุงู„ุนุงู„ู…'); + $builder->loc('/about')->priority(0.5); }); - $this->assertTrue($sitemap->save()); + $this->assertTrue($sitemap->render()); - $this->assertEquals(' - -https://example.com/%C3%BCmlat/test?item=12&desc=vacation_hawaiihttps://example.com/%D8%A7%D9%87%D9%84%D8%A7-%D8%A8%D8%A7%D9%84%D8%B9%D8%A7%D9%84%D9%85', - trim(file_get_contents($sitemap->getSavePath() . '/' . $name)) - ); + $this->assertFileExists($this->testDir . '/sitemap.xml'); + $this->assertFileExists($this->testDir . '/posts.xml'); + $this->assertFileExists($this->testDir . '/pages.xml'); + + $indexContent = file_get_contents($this->testDir . '/sitemap.xml'); + + $this->assertStringContainsString('assertStringContainsString('posts.xml', $indexContent); + $this->assertStringContainsString('pages.xml', $indexContent); } + public function testCustomIndexName() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + indexName: 'custom_index.xml', + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'blog.xml'], function(LinksBuilder $builder) + { + $builder->loc('/blog')->priority(0.5); + }); + + $this->assertTrue($sitemap->render()); + $this->assertFileExists($this->testDir . '/custom_index.xml'); + } - public function testVideoRequiredOptionsExceptions() + public function testCustomSitemapBaseUrl() { - $sitemap = $this->sitemapProvider(); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + sitemapBaseUrl: 'https://cdn.example.com/sitemaps', + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'blog.xml'], function(LinksBuilder $builder) + { + $builder->loc('/blog')->priority(0.5); + }); + + $this->assertTrue($sitemap->render()); + $indexContent = file_get_contents($this->testDir . '/sitemap.xml'); + + $this->assertStringContainsString('https://cdn.example.com/sitemaps/blog.xml', $indexContent); + } + + public function testMaxSitemapUrls() + { $this->expectException(SitemapException::class); + $this->expectExceptionMessage('The maximum urls has been exhausted'); - $sitemap->links(['name' => 'map.xml', 'videos' => true], function($builder) + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'max.xml'], function(LinksBuilder $builder) { - $builder->loc('/videos/12') - ->video('Watch my new video'); + $builder->maxUrls = 10; + + for ($i = 0; $i < 20; $i++) + { + $builder->loc("/post/{$i}"); + } }); + + $sitemap->render(); } + public function testNewsSitemap() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->news(['name' => 'news.xml'], function(NewsBuilder $builder) + { + $builder->loc('/news/12')->news([ + 'name' => 'DogNews', + 'language' => 'en', + 'publication_date' => '1997-07-16T19:20:30+01:00', + 'title' => 'Breaking Cat Flying A Plane' + ]); + + $builder->loc('/news/13')->news([ + 'name' => 'DogNews', + 'language' => 'en', + 'publication_date' => '2000-07-16T19:22:30+01:00', + 'title' => 'Breaking Cat Flying A Private Jet' + ]); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/news.xml'); - public function testVideoNoContentOrPlayerLocExceptions() + $this->assertStringContainsString('xmlns:news=', $content); + $this->assertStringContainsString('DogNews', $content); + $this->assertStringContainsString('en', $content); + $this->assertStringContainsString('Breaking Cat Flying A Plane', $content); + } + + public function testSitemapUrlObject() { - $sitemap = $this->sitemapProvider(); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $this->expectException(SitemapException::class); + $sitemap->links(['name' => 'objects.xml', 'images' => true], function(LinksBuilder $builder) + { + + $url = new SitemapUrl( + url: '/test-page', + lastmod: time(), + priority: 0.9, + changefreq: 'weekly' + ); - $sitemap->links(['name' => 'map.xml', 'videos' => true], function($builder) + $url->image('/image.jpg', ['title' => 'Test Image']); + + $builder->addItem($url); + + $builder->addItem(new SitemapUrl(url: "https://world.example.com/another_ref.xml")); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/objects.xml'); + + $this->assertStringContainsString('https://example.com/test-page', $content); + $this->assertStringContainsString('0.9', $content); + $this->assertStringContainsString('weekly', $content); + $this->assertStringContainsString('Test Image', $content); + $this->assertStringContainsString('https://world.example.com/another_ref.xml', $content); + } + + public function testCDataAutoDetection() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'cdata.xml', 'videos' => true], function(LinksBuilder $builder) { - $builder->loc('/videos/12')->video('My new video', + $builder->loc('/video')->video('Title with & chars', [ - 'thumbnail' => 'https://example.com/th.jpeg', - 'description' => 'My descriptions' + 'thumbnail' => 'https://example.com/thumb.jpg', + 'description' => 'Description with & ampersands', + 'content_loc' => 'https://example.com/video.mp4' ]); }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/cdata.xml'); + + $this->assertStringContainsString(' & chars]]>', $content); + $this->assertStringContainsString(' & ampersands]]>', $content); + } + + public function testIndentation() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'indented.xml', 'indent' => "\t"], function(LinksBuilder $builder) + { + $builder->loc('/page-1')->priority(0.8); + $builder->loc('/page-2')->priority(0.7); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/indented.xml'); + + $this->assertStringContainsString("\t", $content); + $this->assertStringContainsString("\t\t", $content); } + public function testVideoWithAttributes() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - public function testBuildedSitemapWithVideos() + $sitemap->links(['name' => 'video_attrs.xml', 'videos' => true], function(LinksBuilder $builder) + { + $builder->loc('/video')->video('Test Video', [ + 'thumbnail' => 'https://example.com/thumb.jpg', + 'description' => 'Test description', + 'player_loc' => [ + 'value' => 'https://example.com/player', + 'attrs' => ['allow_embed' => 'yes', 'autoplay' => 'ap=1'] + ] + ]); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/video_attrs.xml'); + + $this->assertStringContainsString('allow_embed="yes"', $content); + $this->assertStringContainsString('autoplay="ap=1"', $content); + } + + public function testMultipleVideosPerUrl() { - $sitemap = $this->sitemapProvider(); - $name = 'm.xml'; + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $sitemap->links(['name' => $name, 'videos' => true], function($map) + $sitemap->links(['name' => 'multi_videos.xml', 'videos' => true], function(LinksBuilder $builder) { - $map->loc('/blog/12')->freq('weekly')->priority('0.7') - ->loc('/blog/13')->freq('monthly')->priority('0.8')->video('My new video', - [ - 'thumbnail' => 'https://example.com/th.jpeg', - 'description' => 'My descriptions', - 'content_loc' => 'https://example.com/video.mp4' + $builder->loc('/page') + ->video('First Video', [ + 'thumbnail' => 'https://example.com/thumb1.jpg', + 'description' => 'First description', + 'content_loc' => 'https://example.com/video1.mp4' + ]) + ->video('Second Video', [ + 'thumbnail' => 'https://example.com/thumb2.jpg', + 'description' => 'Second description', + 'content_loc' => 'https://example.com/video2.mp4' ]); }); - $this->assertTrue($sitemap->save()); + $this->assertTrue($sitemap->render()); - $this->assertEquals(' - -https://example.com/blog/12weekly0.7https://example.com/blog/13monthly0.8My descriptionshttps://example.com/video.mp4My new videohttps://example.com/th.jpeg', - trim(file_get_contents($sitemap->getSavePath() . "/{$name}")) - ); + $content = file_get_contents($this->testDir . '/multi_videos.xml'); + $this->assertStringContainsString('First Video', $content); + $this->assertStringContainsString('Second Video', $content); } + public function testArabicContent() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - public function testBuildedSitemapWithLocalizedUrls() + $sitemap->links(['name' => 'arabic.xml', 'videos' => true], function(LinksBuilder $builder) + { + $builder->loc('/arabic-page')->video('ุนู†ูˆุงู† ุงู„ููŠุฏูŠูˆ', [ + 'thumbnail' => 'https://cdn.example.com/thumb.jpg', + 'description' => 'ูˆุตู ุงู„ููŠุฏูŠูˆ ุจุงู„ุนุฑุจูŠุฉ', + 'content_loc' => 'https://example.com/video.mp4' + ]); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/arabic.xml'); + + $this->assertStringContainsString('https://cdn.example.com/thumb.jpg', $content); + $this->assertStringContainsString('ุนู†ูˆุงู† ุงู„ููŠุฏูŠูˆ', $content); + $this->assertStringContainsString('ูˆุตู ุงู„ููŠุฏูŠูˆ ุจุงู„ุนุฑุจูŠุฉ', $content); + } + + public function testLargeNumberOfUrls() { - $sitemap = $this->sitemapProvider(); - $name = 'm.xml'; + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $sitemap->links(['name' => $name, 'localized' => true], function($map) + $sitemap->links(['name' => 'large.xml'], function(LinksBuilder $builder) { - $map->loc('/blog/12')->freq('weekly')->priority('0.7'); + for ($i = 0; $i < 20000; $i++) + { + $builder->loc("/page-{$i}")->priority(0.5); + } + }); + + $this->assertTrue($sitemap->render()); + $this->assertFileExists($this->testDir . '/large.xml'); + + $content = file_get_contents($this->testDir . '/large.xml'); + $this->assertStringContainsString('/page-0', $content); + $this->assertStringContainsString('/page-19999', $content); + } + + public function testGeneratorDataSource() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $map->loc('/blog/13')->freq('monthly')->priority('0.8')->alternate("/ar/blog/13", "ar")->alternate("/de/blog/13", "de"); + // y can do this, but u can use lazy Utils::generator() for heavy db queries for eg! + $generator = function() { + for ($i = 0; $i < 100; $i++) + { + yield new SitemapUrl( + url: "/post-{$i}", + lastmod: time(), + priority: 0.8, + changefreq: 'daily' + ); + } + }; + + $sitemap->links(['name' => 'generator.xml'], $generator()); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/generator.xml'); + $this->assertStringContainsString('/post-0', $content); + $this->assertStringContainsString('/post-99', $content); + $this->assertStringContainsString('daily', $content); + } + + public function testArrayDataSource() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $urls = ['/page-1', '/page-2', '/page-3']; + + $sitemap->links(['name' => 'array.xml'], $urls); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/array.xml'); + $this->assertStringContainsString('/page-1', $content); + $this->assertStringContainsString('/page-2', $content); + $this->assertStringContainsString('/page-3', $content); + } + + public function testMixedSitemapUrlObjects() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'objects.xml'], [ + new SitemapUrl(url: '/page-1', lastmod: time(), priority: 0.9, changefreq: 'weekly'), + new SitemapUrl(url: '/page-2', lastmod: time(), priority: 0.7, changefreq: 'monthly'), + "/test-url", // [1] + ]); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/objects.xml'); + $this->assertStringContainsString('0.9', $content); + $this->assertStringContainsString('weekly', $content); + $this->assertStringContainsString('/page-2', $content); + $this->assertStringContainsString('https://example.com/test-url', $content); // [1] str + } + + public function testInvalidPriorityException() + { + $this->expectException(SitemapException::class); + $this->expectExceptionMessage('Priority must be between 0.0 and 1.0'); + + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $map->loc('/blog/14')->alternate("/ar/blog/14", "ar")->alternate("/de/blog/14", "de"); + $sitemap->links(['name' => 'invalid.xml'], function(LinksBuilder $builder) + { + $builder->loc('/page')->priority(1.5); }); - $this->assertTrue($sitemap->save()); + $sitemap->render(); + } + + public function testInvalidChangeFreqException() + { + $this->expectException(SitemapException::class); + $this->expectExceptionMessage('changefreq value not valid'); - $this->assertEquals(' - -https://example.com/blog/12weekly0.7https://example.com/blog/13monthly0.8https://example.com/blog/14', - trim(file_get_contents($sitemap->getSavePath() . "/{$name}")) + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE ); + $sitemap->links(['name' => 'invalid.xml'], function(LinksBuilder $builder) + { + $builder->loc('/page')->changeFreq('sometimes'); + }); + + $sitemap->render(); } + public function testInvalidDateException() + { + $this->expectException(SeoException::class); + $this->expectExceptionMessage('Invalid date format'); + + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - public function testSitemapExistent() + $sitemap->links(['name' => 'invalid.xml'], function(LinksBuilder $builder) + { + $builder->loc('/page')->lastMod('not-a-date'); + }); + + $sitemap->render(); + } + + public function testImageWithoutEnablingException() { - $sitemap = $this->sitemapProvider(); + $this->expectException(SitemapException::class); + $this->expectExceptionMessage('enable images option'); - $sitemap->links('posts.xml', function($builder) + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'test.xml'], function(LinksBuilder $builder) { - $builder->loc('/post/122'); + $builder->loc('/page')->image('/image.jpg'); }); - $this->assertTrue($sitemap->save()); + $sitemap->render(); + } + + public function testVideoWithoutEnablingException() + { + $this->expectException(SitemapException::class); + $this->expectExceptionMessage('enable videos option'); + + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'test.xml'], function(LinksBuilder $builder) + { + $builder->loc('/page')->video('Test', + [ + 'thumbnail' => 'thumb.jpg', + 'description' => 'desc', + 'content_loc' => 'video.mp4' + ]); + }); - $this->assertFileExists($sitemap->getSavePath() . '/sitemap.xml'); + $sitemap->render(); } - public function testSitemapsWithCustomIndexName() + public function testComplexMixedSitemap() { - $sitemap = $this->sitemapProvider()->setIndexName('sitemap_index.xml'); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $sitemap->links(['name' => 'blog.xml', 'videos' => true, 'images' => true], function($map) + $sitemap->links(['name' => 'complex.xml', 'images' => true, 'videos' => true, 'localized' => true], function(LinksBuilder $builder) { - $map->loc('/videos/ุงู‡ู„ุง-ุจุงู„ุนุงู„ู…') - ->video('ุงู‡ู„ุง ุจุงู„ุนุงู„ู…', - [ - 'thumbnail' => 'https://example.com/th.jpeg', - 'description' => 'My descriptions', - 'player_loc' => 'https://example.com/embed/video/1212' + $builder->loc('/product/123') + ->priority(0.9) + ->changeFreq('daily') + ->lastMod(time()) + ->image('/product-main.jpg', ['title' => 'Product Main Image']) + ->image('/product-thumb.jpg', ['caption' => 'Thumbnail']) + ->video('Product Demo', [ + 'thumbnail' => '/video-thumb.jpg', + 'description' => 'Watch our product demo', + 'content_loc' => '/videos/demo.mp4', + 'duration' => 120 ]) - ->image('https://example.com/bla_bla.jpeg') - ->freq('yearly'); + ->alternate('/fr/product/123', 'fr') + ->alternate('https://de.example.com/product/123', 'de'); // y can do another host not based on default base url. + }); - $map->loc('/blog/post/94') - ->image('https://example.com/bla_bla.jpeg', ['caption' => 'bla bla']); + $this->assertTrue($sitemap->render()); - $map->loc('/categories/php'); - }); + $content = file_get_contents($this->testDir . '/complex.xml'); + + $this->assertStringContainsString('xmlns:image=', $content); + $this->assertStringContainsString('xmlns:video=', $content); + $this->assertStringContainsString('xmlns:xhtml=', $content); + $this->assertStringContainsString('Product Main Image', $content); + $this->assertStringContainsString('Product Demo', $content); + $this->assertStringContainsString('hreflang="fr"', $content); + $this->assertStringContainsString('href="https://de.example.com/product/123"', $content); + } - $sitemap->links('blog_2.xml', function($map) + public function testMultipleSitemapsWithDifferentOptions() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'posts.xml', 'indent' => ' '], function(LinksBuilder $builder) { - $map->loc('/blog')->priority('0.5'); - $map->loc('/blog/my_first_post')->priority('0.7'); + $builder->loc('/blog/post-1')->priority(0.8); }); - $this->assertTrue($sitemap->save()); + $sitemap->links(['name' => 'videos.xml', 'videos' => true, 'indent' => "\t"], function(LinksBuilder $builder) + { + $builder->loc('/videos/1')->video('Video 1', [ + 'thumbnail' => 'thumb.jpg', + 'description' => 'desc', + 'content_loc' => 'video.mp4' + ]); + }); - $this->assertFileExists($sitemap->getSavePath() . '/sitemap_index.xml'); - $this->assertFileExists($sitemap->getSavePath() . '/blog_2.xml'); - $this->assertFileExists($sitemap->getSavePath() . '/blog.xml'); + $sitemap->links(['name' => 'gallery.xml', 'images' => true], function(LinksBuilder $builder) + { + $builder->loc('/gallery/1')->image('image.jpg'); + }); - $xml = simplexml_load_file($sitemap->getSavePath() . '/sitemap_index.xml'); + $this->assertTrue($sitemap->render()); - $urls = ['https://example.com/blog_2.xml', 'https://example.com/blog.xml']; + $this->assertFileExists($this->testDir . '/posts.xml'); + $this->assertFileExists($this->testDir . '/videos.xml'); + $this->assertFileExists($this->testDir . '/gallery.xml'); + $this->assertFileExists($this->testDir . '/sitemap.xml'); - $this->assertSame(2, $xml->count()); - $this->assertTrue(in_array($xml->sitemap[0]->loc, $urls)); - $this->assertTrue(in_array($xml->sitemap[1]->loc, $urls)); + $indexContent = file_get_contents($this->testDir . '/sitemap.xml'); + $this->assertStringContainsString('posts.xml', $indexContent); + $this->assertStringContainsString('videos.xml', $indexContent); + $this->assertStringContainsString('gallery.xml', $indexContent); } + public function testNewsWithAllFields() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->news(['name' => 'news-full.xml'], function(NewsBuilder $builder) + { + $builder->loc('/news/breaking')->news([ + 'name' => 'Example News', + 'language' => 'en', + 'publication_date' => '2024-01-15T10:00:00+00:00', + 'title' => 'Breaking News Title', + 'keywords' => 'breaking, news, important', + 'stock_tickers' => 'NASDAQ:ACOM' + ]); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/news-full.xml'); + + $this->assertStringContainsString('Breaking News Title', $content); + $this->assertStringContainsString('breaking, news, important', $content); + $this->assertStringContainsString('NASDAQ:ACOM', $content); + } - public function testStitemapsWithCustomUrl() + public function testRelativeAndAbsoluteUrls() { - $sitemap = new Sitemap('https://example.con', - [ - 'save_path' => sys_get_temp_dir(), - 'sitemaps_url' => 'https://example.com/sitemaps' - ]); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $sitemap->links('blog.xml', function($map) + $sitemap->links(['name' => 'urls.xml', 'images' => true], function(LinksBuilder $builder) { - $map->loc('/blog')->priority('0.5'); - $map->loc('/blog/my_first_post')->priority('0.7'); + $builder->loc('/relative-url')->image('/relative-image.jpg'); + $builder->loc('https://example.com/absolute-url')->image('https://cdn.example.com/absolute-image.jpg'); }); - $sitemap->links('blog_2.xml', function($map) + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/urls.xml'); + + $this->assertStringContainsString('https://example.com/relative-url', $content); + $this->assertStringContainsString('https://example.com/relative-image.jpg', $content); + $this->assertStringContainsString('https://example.com/absolute-url', $content); + $this->assertStringContainsString('https://cdn.example.com/absolute-image.jpg', $content); + } + + public function testSpecialCharactersInUrls() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'special.xml'], function(LinksBuilder $builder) { - $map->loc('/blog')->priority('0.5'); - $map->loc('/blog/my_first_post')->priority('0.7'); + $builder->loc('/path?param1=value¶m2=value2'); + $builder->loc('/cafรฉ/naรฏve'); + $builder->loc('/ไธญๆ–‡/่ทฏๅพ„'); }); - $this->assertTrue($sitemap->save()); + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/special.xml'); - $this->assertFileExists($sitemap->getSavePath() . '/sitemap.xml'); - $this->assertFileExists($sitemap->getSavePath() . '/blog_2.xml'); - $this->assertFileExists($sitemap->getSavePath() . '/blog.xml'); + $this->assertStringContainsString('&', $content); + $this->assertStringContainsString('%C3%A9', $content); // cafรฉ encoded + $this->assertStringContainsString('%C3%AF', $content); // naรฏve encoded + } - $xml = simplexml_load_file($sitemap->getSavePath() . '/sitemap.xml'); + public function testEmptySitemapStillGeneratesIndex() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'empty.xml'], function(LinksBuilder $builder) + { + // No URLs added + }); - $urls = ['https://example.com/sitemaps/blog_2.xml', 'https://example.com/sitemaps/blog.xml']; + $this->assertTrue($sitemap->render()); - $this->assertSame(2, $xml->count()); - $this->assertTrue(in_array($xml->sitemap[0]->loc, $urls)); - $this->assertTrue(in_array($xml->sitemap[1]->loc, $urls)); + $this->assertFileExists($this->testDir . '/empty.xml'); + $this->assertFileExists($this->testDir . '/sitemap.xml'); } + public function testManualCDataFields() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - public function testMaxSitemapUrls() + $sitemap->links(['name' => 'cdata-manual.xml', 'videos' => true, 'cdata' => ['video:content_loc']], function(LinksBuilder $builder) + { + $builder->loc('/video')->video('Normal Title', [ + 'thumbnail' => 'thumb.jpg', + 'description' => 'Normal description', + 'content_loc' => 'This will be wrapped in CDATA' + ]); + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/cdata-manual.xml'); + + // content_loc should be in CDATA because we specified it + $this->assertStringContainsString('', $content); + } + + public function testTempModeCleanup() { - $sitemap = $this->sitemapProvider(); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::TEMP + ); - $this->expectException(SitemapException::class); + $sitemap->links(['name' => 'temp.xml'], function(LinksBuilder $builder) + { + $builder->loc('/page')->priority(0.5); + }); - $sitemap->links('max.xml', function($map) + $this->assertTrue($sitemap->render()); + $this->assertFileExists($this->testDir . '/temp.xml'); + + // tmp file should not exist after cleaned up + $tempFiles = glob(sys_get_temp_dir() . '/*.xml'); + $this->assertEmpty( + array_filter($tempFiles, fn($f) => str_contains(basename($f), md5(''))) + ); + } + + public function testMemoryModeToFile() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + mode: OutputMode::MEMORY + ); + + $sitemap->links('mem:2', function(LinksBuilder $builder) { - $i = 30001; + $builder->loc('/page-1')->priority(0.8); + $builder->loc('/page-2')->priority(0.6); + }); - do + $success = $sitemap->render($this->testDir . '/output.xml'); + + $this->assertTrue($success); + $this->assertFileExists($this->testDir . '/output.xml'); + + $content = file_get_contents($this->testDir . '/output.xml'); + $this->assertStringContainsString('testDir, + mode: OutputMode::FILE + ); + + $validFreqs = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never']; + + $sitemap->links(['name' => 'freqs.xml'], function(LinksBuilder $builder) use ($validFreqs) + { + foreach ($validFreqs as $freq) { - $map->loc("/post/{$i}"); - $i--; + $builder->loc("/page-{$freq}")->changeFreq($freq); + } + }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/freqs.xml'); - } while($i !== 0); + foreach ($validFreqs as $freq) + { + $this->assertStringContainsString("{$freq}", $content); + } + } + + public function testDateFormats() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'dates.xml'], function(LinksBuilder $builder) + { + $builder->loc('/page-1')->lastMod(time()); + $builder->loc('/page-2')->lastMod('2024-01-15'); + $builder->loc('/page-3')->lastMod('2024-01-15 10:30:00'); }); + + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/dates.xml'); + + $this->assertStringContainsString('', $content); + + // ISO 8601 format + $this->assertMatchesRegularExpression('/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/', $content); } + public function testVideoThumbnailAlias() + { + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->links(['name' => 'video-alias.xml', 'videos' => true], function($builder) + { + $builder->loc('/video')->video('Test Video', [ + 'thumbnail' => 'thumb.jpg', // Using alias + 'description' => 'Test', + 'content_loc' => 'video.mp4' + ]); + }); - public function testNewsMaps() + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/video-alias.xml'); + + $this->assertStringContainsString('https://example.com/thumb.jpg', $content); + } + + public function testPriorityFormatting() { - $sitemap = $this->sitemapProvider(); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); - $sitemap->news('breaking.xml', function($map) + $sitemap->links(['name' => 'priority.xml'], function(LinksBuilder $builder) { - $map->loc('/news/12')->news( - [ - 'name' => 'DogNews', - 'language' => 'en', - 'publication_date' => '1997-07-16T19:20:30+01:00', - 'title' => 'Breaking Cat Flying A Plane' - ]) - ->loc('/news/13')->news( - [ - 'name' => 'DogNews', - 'language' => 'en', - 'publication_date' => '2000-07-16T19:22:30+01:00', - 'title' => 'Breaking Cat Flying A Private Jet With Girls' - ]); + $builder->loc('/page-1')->priority(1.0); + $builder->loc('/page-2')->priority(0.5); + $builder->loc('/page-3')->priority(0); + $builder->loc('/page-4')->priority('0.7'); }); - $this->assertTrue($sitemap->save()); + $this->assertTrue($sitemap->render()); + + $content = file_get_contents($this->testDir . '/priority.xml'); + + // float format check + $this->assertStringContainsString('1.0', $content); + $this->assertStringContainsString('0.5', $content); + $this->assertStringContainsString('0.0', $content); + $this->assertStringContainsString('0.7', $content); + } + + + public function testManualBuildersWithoutSitemapHelper() + { + // manual LinksBuilder + $linksBuilder = new LinksBuilder( + baseUrl: 'https://example.com', + filePath: $this->testDir . '/manual-links.xml', + mode: OutputMode::FILE, + options: ['indent' => "\t", 'images' => true] + ); + + $linksBuilder + ->loc('/page-1')->priority(0.9)->lastMod(time()) + ->loc('/page-2')->priority(0.7)->changeFreq('weekly') + ->loc('/gallery')->image('/img1.jpg', ['title' => 'Image 1']) + ->image('/img2.jpg', ['caption' => 'Image 2']); + + $this->assertTrue($linksBuilder->render()); + $this->assertFileExists($this->testDir . '/manual-links.xml'); + + // manual IndexBuilder + $indexBuilder = new IndexBuilder( + baseUrl: 'https://example.com', + filePath: $this->testDir . '/manual-index.xml', + mode: OutputMode::FILE, + options: ['indent' => ' '] + ); + + $indexBuilder + ->addSitemap('https://example.com/manual-links.xml')->lastMod(time()) + ->addSitemap('https://example.com/sitemap-posts.xml')->lastMod('2024-01-15'); - $this->assertFileExists($sitemap->getSavePath() . '/breaking.xml'); + $this->assertTrue($indexBuilder->render()); + $this->assertFileExists($this->testDir . '/manual-index.xml'); - $this->assertEquals(' - -https://example.com/news/12DogNewsen1997-07-16T19:20:30+01:00Breaking Cat Flying A Planehttps://example.com/news/13DogNewsen2000-07-16T19:22:30+01:00Breaking Cat Flying A Private Jet With Girls', - trim(file_get_contents($sitemap->getSavePath() . '/breaking.xml')) + $indexContent = file_get_contents($this->testDir . '/manual-index.xml'); + + $this->assertStringContainsString('assertStringContainsString(' assertStringContainsString('manual-links.xml', $indexContent); + $this->assertStringContainsString('sitemap-posts.xml', $indexContent); + $this->assertStringContainsString('', $indexContent); + + $linksContent = file_get_contents($this->testDir . '/manual-links.xml'); + + $this->assertStringContainsString("assertStringContainsString("\t", $linksContent); + $this->assertStringContainsString('xmlns:image=', $linksContent); + $this->assertStringContainsString('Image 1', $linksContent); + } + + + public function testStreamOutputMode() + { + $streamFile = $this->testDir . '/stream-output.xml'; + $stream = fopen($streamFile, 'w'); + + $this->assertIsResource($stream); + + $builder = new LinksBuilder( + baseUrl: 'https://example.com', + filePath: null, + mode: OutputMode::STREAM, + stream: $stream, + options: ['indent' => "\t"] ); + $builder + ->loc('/stream-page-1')->priority(0.8) + ->loc('/stream-page-2')->priority(0.6)->changeFreq('daily') + ->loc('/stream-page-3')->lastMod(time()); + + $this->assertTrue($builder->render()); + + fclose($stream); + + $this->assertFileExists($streamFile); + + $content = file_get_contents($streamFile); + + $this->assertStringContainsString('', $content); + $this->assertStringContainsString('assertStringContainsString('', $content); + $this->assertStringContainsString('/stream-page-1', $content); + $this->assertStringContainsString('/stream-page-2', $content); + $this->assertStringContainsString('daily', $content); + $this->assertStringContainsString('0.8', $content); } - public function sitemapProvider() + + public function testTraversableWithUtilsGenerator() { - return new Sitemap('https://example.com', ['save_path' => sys_get_temp_dir()]); + $sitemap = new Sitemap( + baseUrl: 'https://example.com', + saveDir: $this->testDir, + mode: OutputMode::FILE + ); + + $sitemap->news(['name' => 'traversable-news.xml', 'videos' => true, 'localized' => true], Utils::generator(function() + { + for ($i = 0; $i < 50; $i++) + { + $item = new SitemapUrl( + url: "/article-ุจุงู„ุนุงู„ู…-{$i}?param=value&lang=ar", + lastmod: time(), + priority: 0.9, + changefreq: 'daily', + news: [ + 'title' => "News Title ุจุงู„ุนุงู„ู… > Article {$i}", + 'genres' => 'PressRelease', + 'name' => 'ExampleNews', + 'language' => 'ar', + 'publication_date' => date('c'), + 'keywords' => 'keyword1, keyword2' + ] + ); + + yield $item->alternate("/fr/article-ุจุงู„ุนุงู„ู…-{$i}?param=value&lang=fr", "fr") + ->alternate("/en/article-ุจุงู„ุนุงู„ู…-{$i}?param=value&lang=en", "en") + ->video("Video Title {$i}", [ + 'thumbnail' => "/thumb-{$i}.jpg?size=large&quality=high", + 'description' => "Video description with special chars <>&", + 'content_loc' => "/videos/video-{$i}.mp4?quality=hd", + 'restriction' => [ + [ + 'attrs' => ['relationship' => 'deny'], + 'value' => 'US GB' + ], + [ + 'attrs' => ['relationship' => 'deny'], + 'value' => 'MA EG' + ], + ] + ]) + ->video("Second Video {$i}", [ + 'thumbnail' => "/thumb2-ุจุงู„ุนุงู„ู…-{$i}.jpg", + 'description' => 'Second video description', + 'uploader' => [ + 'value' => 'YVideos', + 'attrs' => ['info' => 'https://example.com/@yvideos'], + ], + 'player_loc' => [ + 'value' => "/player/{$i}", + 'attrs' => ['allow_embed' => 'yes', 'autoplay' => 'ap=1'] + ], + 'view_count' => 1000 + $i, + 'restriction' => [ + 'attrs' => ['relationship' => 'allow'], + 'value' => 'US CA GB' + ] + ]); + } + }) + ); + + $sitemap->links(['name' => 'traversable-links.xml', 'images' => true], Utils::generator(function() + { + for ($i = 0; $i < 30; $i++) + { + yield (new SitemapUrl( + url: "/page-{$i}", + lastmod: time() - ($i * 86400), + priority: 0.5 + ($i * 0.01), + changefreq: 'weekly' + ))->image("/images/img-{$i}.jpg", ['title' => "Image {$i}", 'caption' => "Caption {$i}"]); + } + + for ($i=0; $i < 10; $i++) + { + yield "str-page-{$i}"; + } + }) + ); + + $this->assertTrue($sitemap->render()); + + // news sitemap + $this->assertFileExists($this->testDir . '/traversable-news.xml'); + $newsContent = file_get_contents($this->testDir . '/traversable-news.xml'); + + $this->assertStringContainsString('xmlns:news=', $newsContent); + $this->assertStringContainsString('xmlns:video=', $newsContent); + $this->assertStringContainsString('xmlns:xhtml=', $newsContent); + $this->assertStringContainsString('/article-%D8%A8%D8%A7%D9%84%D8%B9%D8%A7%D9%84%D9%85-0', $newsContent); + $this->assertStringContainsString('https://example.com/article-%D8%A8%D8%A7%D9%84%D8%B9%D8%A7%D9%84%D9%85-49', $newsContent); + $this->assertStringContainsString('', $newsContent); + $this->assertStringContainsString('ุจุงู„ุนุงู„ู…', $newsContent); + $this->assertStringContainsString('Video Title', $newsContent); + $this->assertStringContainsString('Second Video', $newsContent); + $this->assertStringContainsString('allow_embed="yes"', $newsContent); + $this->assertStringContainsString('hreflang="fr"', $newsContent); + $this->assertStringContainsString('hreflang="en"', $newsContent); + $this->assertStringContainsString('US CA GB', $newsContent); + $this->assertStringContainsString('MA EG', $newsContent); + $this->assertStringContainsString('US GB', $newsContent); + $this->assertStringContainsString('YVideos', $newsContent); + $this->assertStringContainsString('&]]>', $newsContent); + + + + // links sitemap + $this->assertFileExists($this->testDir . '/traversable-links.xml'); + $linksContent = file_get_contents($this->testDir . '/traversable-links.xml'); + + $this->assertStringContainsString('xmlns:image=', $linksContent); + $this->assertStringContainsString('/page-0', $linksContent); + $this->assertStringContainsString('/page-29', $linksContent); + $this->assertStringContainsString('Image', $linksContent); + $this->assertStringContainsString('Caption', $linksContent); + + // yield "str-page-{$i}"; + $this->assertStringContainsString('/str-page', $linksContent); + $this->assertStringContainsString('https://example.com/str-page-9', $linksContent); + + // verify index + $this->assertFileExists($this->testDir . '/sitemap.xml'); + $indexContent = file_get_contents($this->testDir . '/sitemap.xml'); + + $this->assertStringContainsString('traversable-news.xml', $indexContent); + $this->assertStringContainsString('traversable-links.xml', $indexContent); } } diff --git a/tests/bootstrap.php b/tests/bootstrap.php deleted file mode 100644 index 7087688..0000000 --- a/tests/bootstrap.php +++ /dev/null @@ -1,5 +0,0 @@ -