diff --git a/composer.json b/composer.json index 0facf85..ff04127 100644 --- a/composer.json +++ b/composer.json @@ -1,25 +1,34 @@ { - "name": "anzenkodo/small-rss-php", - "description": "RSS & Atom feed parser for PHP. Very small and easy-to-use library for parsing your feeds.", - "keywords": ["rss", "atom", "feed"], - "homepage": "https://github.com/AnzenKodo/small-rss-php", - "license": ["unlicense"], - "authors": [ - { - "name": "AnzenKodo", - "email": "AnzenKodo@altmails.com", - "homepage": "https://AnzenKodo.github.io/AnzenKodo", - "role": "Developer" - } - ], - "support": { - "issues": "https://github.com/AnzenKodo/small-rss-php/issues", - "source": "https://github.com/AnzenKodo/small-rss-php" - }, - "require": { - "php": ">=8.0" - }, - "autoload": { - "classmap": ["src/"] - } + "name": "anzenkodo/rss-atom-parser", + "description": "RSS & Atom feed parser for PHP. Very small and easy-to-use library for parsing your feeds.", + "type": "library", + "keywords": [ "rss", "atom", "feed" ], + "homepage": "https://github.com/AnzenKodo/rss-atom-parser", + "readme": "https://github.com/AnzenKodo/rss-atom-parser/blob/main/README.md", + "license": "MIT", + "authors": [ + { + "name": "AnzenKodo", + "email": "AnzenKodo@altmails.com", + "homepage": "https://AnzenKodo.github.io/AnzenKodo", + "role": "Developer" + } + ], + "support": { + "issues": "https://github.com/AnzenKodo/rss-atom-parser/issues", + "source": "https://github.com/AnzenKodo/rss-atom-parser", + "docs": "https://github.com/AnzenKodo/rss-atom-parser/blob/main/README.md", + }, + "funding": [ + { + "type": "website", + "url": "https://AnzenKodo.github.io/AnzenKodo#support" + } + ], + "require": { + "php": ">=8.0" + }, + "autoload": { + "classmap": [ "src/" ] + } } diff --git a/src/rss.php b/src/rss.php index 98efcba..f3cd52a 100644 --- a/src/rss.php +++ b/src/rss.php @@ -1,113 +1,166 @@ item[0]->date); + } + array_multisort($ord, SORT_DESC, $data); + + return (object)$data; } // For single feed in string type. - public static function feed_data(string $url): array { + public static function getFeed(string $url): object { + ini_set('user_agent', self::$useragent); + // Checks if $url content type is html. If it is HTML then finds the Feed // url and changes $url to feed url. + $its_html = self::checkContentType($url, "html"); + if($its_html) { + $content_html = self::getContent($url); + $url = self::getFeedUrl($content_html); + } - $its_html = self::check_content_type($url, "html"); - if($its_html) { $url = self::get_feed_url($content); } + $content = self::getContent($url); - $content = self::get_content($url); + $xml = new SimpleXmlElement($content, true); + $data = (object)array(); + if ($xml->channel) { // If feed is xml vesrion 1.0 + $data = self::getRSS($xml, $url); + } else if($xml->entry) { // If feed is xml version 2.0 + $data = self::getAtom($xml, $url); + } else { + } - $xml = self::get_feed($content); + return (object)$data; + } - return self::format_data($xml); + private static function isHttp(string $str, string $url, bool $boolean = false) { + if (preg_match("/^(https|http):\/\//", $str)) return $str; + + if ($str[0] == "/") return preg_replace('/(?<=(\w|\d))\/.*$/', $str, $url); + + if ($boolean) return false; + + return $url; } + + private static function getAtom(SimpleXMLElement $xml, string $url): array { + $title = $xml->id ? $xml->id : $url; + $title = $xml->title != "" ? $xml->title : $title; + + // Find proper name + $replace = ""; + if (isset($xml->link)) { + for ($x = 0; $x < 2; $x++) { + if ($xml->link[$x]["rel"] == "alternate") { + $replace = $xml->link[$x]["href"]; + break; + } else { + $replace = $url; + } + } + } else if (isset($xml->author->uri)) { + $replace = $xml->author->uri; + } else { + $replace = $url; + } - // Simplifies the feed data. - private static function format_data(object $xml): array { - // Formats time and returns $type of time format. - $time_format = function(object|string $time, string $format = "d-M-Y H:i"): - string { + $data = [ + "title" => $title, + "description" => "$xml->subtitle", + "date" => self::timeFormat($xml->updated, "d-M-Y"), + "time" => self::timeFormat($xml->updated, "H:i"), + "link" => self::isHttp($replace, $url, true) ? self::isHttp($replace, $url) : self::isHttp($xml->id, $url), + "feed" => "$url", + "item" => array() + ]; - // Convert to time format - $timestamp = strtotime("$time"); + foreach ($xml->entry as $item) { + $time = $item->updated ? $item->updated : $item->published; - $time = date($format, $timestamp); + $data_item = [ + "title" => "$item->title", + "link" => self::isHttp($item->link["href"], $url), + "date" => self::timeFormat($time, "d-M-Y"), + "time" => self::timeFormat($time, "H:i") + ]; - return $time; - }; + array_push($data["item"], (object)$data_item); + } + + return $data; + } + + private static function getRSS(SimpleXMLElement $xml, string $url): array { + $xml = $xml->channel; + $time = $xml->lastBuildDate ? $xml->lastBuildDate : $xml->pubDate; - // Formats the xml data $data = [ - "title" => "$xml->title", + "title" => $xml->title != "" ? $xml->title : $xml->link, "description" => "$xml->description", - "date" => $time_format($xml->lastBuildDate, "d-M-Y"), - "time" => $time_format($xml->lastBuildDate, "H:i"), - "link" => "$xml->link", + "date" => self::timeFormat($time, "d-M-Y"), + "time" => self::timeFormat($time, "H:i"), + "link" => $xml->link != "" ? self::isHttp($xml->link, $url) : $url, + "feed" => $url, "item" => array() ]; - // Formats the xml items data foreach ($xml->item as $item) { $data_item = [ - "title" => "$item->title", - "link" => "$item->link", - "date" => $time_format($item->pubDate, "d-M-Y"), - "time" => $time_format($item->pubDate, "H:i") + "title" => $item->title != "" ? $item->title : $item->link, + "link" => $item->link ? self::isHttp($item->link, $url) : $item->enclosure["url"], + "date" => self::timeFormat($item->pubDate, "d-M-Y"), + "time" => self::timeFormat($item->pubDate, "H:i") ]; - array_push($data["item"], $data_item); + array_push($data["item"], (object)$data_item); } return $data; } - // Checks feed xml version and returns content as per feed - private static function get_feed(string $content): object { - // Converts $content to object - $xml_data = new SimpleXmlElement($content, LIBXML_NOCDATA); + private static function timeFormat(object|string $time, string $format = + "d-M-Y H:i"): string { - $xml = (object)array(); // Convert array to object + // Convert to time format + $timestamp = strtotime("$time"); - if ($xml_data->channel) // If feed is xml vesrion 1.0 - $xml = $xml_data->channel; - else if($xml_data->entry) // If feed is xml version 2.0 - $xml = $xml_data; - else { - /* self::console_log("$url: Invalid content type"); */ - } + $time = date($format, $timestamp); - return $xml; + return $time; } // Returns given $url page content - private static function get_content(string $url, $useragent = "FeedFetcher-Google"): string { + private static function getContent(string $url): string { $curl = curl_init($url); // Return the transfer as a string. @@ -115,7 +168,9 @@ private static function get_content(string $url, $useragent = "FeedFetcher-Googl // The maximum number of seconds to allow cURL functions to execute. curl_setopt($curl, CURLOPT_TIMEOUT, 60); // Changes curl useragent - curl_setopt($curl, CURLOPT_USERAGENT, $useragent); + curl_setopt($curl, CURLOPT_USERAGENT, self::$useragent); + // To make cURL follow a redirect + curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); // Given $url content $content = curl_exec($curl); @@ -126,7 +181,7 @@ private static function get_content(string $url, $useragent = "FeedFetcher-Googl } // Finds Feed url from HTML. - private static function get_feed_url(string $content): string { + private static function getFeedUrl(string $content): string { // Suppresses DOMDocuments errors libxml_use_internal_errors(true); @@ -153,24 +208,26 @@ private static function get_feed_url(string $content): string { // Checks given $content_type_name value matches given $url header content // type. - private static function check_content_type(string $url, string + private static function checkContentType(string $url, string $type_name): bool { // Get $url header $header = get_headers($url, true); - - $content_type = "Content-Type"; - // Contet type in lower case - $content_type_lower = strtolower($content_type); + $content_type = ""; // Find $type_name in header - $content_pos = strpos($header[$content_type], $type_name); - $content_pos_lower = strpos($header[$content_type_lower], $type_name); - - if ($content_pos) + if (isset($header["Content-Type"])) + $content_type = $header["Content-Type"]; + else + $content_type = $header["content-type"]; + + if (gettype($content_type) == "array") + $content_type = $content_type[0]; + + if (strpos($content_type, $type_name)) $checked_header = true; - else if ($content_pos_lower) + else { $checked_header = false; - else {} + } return $checked_header; }