From d0227277d06a2a5651113a20f10491e942344d32 Mon Sep 17 00:00:00 2001 From: marvin255 Date: Thu, 22 Jul 2021 19:37:20 +0200 Subject: [PATCH] Retry feature for downloader (#44) --- src/Downloader/CurlDownloader.php | 197 +++++++++++++++----- tests/src/Downloader/CurlDownloaderTest.php | 81 ++++---- 2 files changed, 191 insertions(+), 87 deletions(-) diff --git a/src/Downloader/CurlDownloader.php b/src/Downloader/CurlDownloader.php index 48aaeaa9..c9b70e2c 100644 --- a/src/Downloader/CurlDownloader.php +++ b/src/Downloader/CurlDownloader.php @@ -18,12 +18,19 @@ class CurlDownloader implements Downloader */ private $additionalCurlOptions; + /** + * @var int + */ + private $maxAttempts; + /** * @param array $additionalCurlOptions + * @param int $maxAttempts */ - public function __construct(array $additionalCurlOptions = []) + public function __construct(array $additionalCurlOptions = [], int $maxAttempts = 10) { $this->additionalCurlOptions = $additionalCurlOptions; + $this->maxAttempts = $maxAttempts; } /** @@ -35,85 +42,185 @@ public function download(string $url, SplFileInfo $localFile): void throw new InvalidArgumentException("Wrong url format: {$url}"); } - $fh = $this->openLocalFile($localFile); - $requestOptions = $this->createRequestOptions($url, $fh); + $headers = $this->getHeadResponseHeaders($url); + $contentLength = (int) ($headers['content-length'] ?? 0); + $isRangeSupported = $contentLength > 0 && ($headers['accept-ranges'] ?? '') === 'bytes'; + + $options = [ + \CURLOPT_FOLLOWLOCATION => true, + \CURLOPT_FRESH_CONNECT => true, + \CURLOPT_CONNECTTIMEOUT => 5, + \CURLOPT_TIMEOUT => 60 * 25, + \CURLOPT_FILE => $this->openLocalFile($localFile, 'wb'), + ]; + + for ($i = 0; $i < $this->maxAttempts; ++$i) { + $response = $this->runRequest($url, $options); + if ($response['isOk'] && empty($response['error'])) { + break; + } + // в случае ошибки пробуем скачать файл еще раз, + // но для этого нужно переоткрыть ресурс файла + fclose($options[\CURLOPT_FILE]); + // если уже скачали какие-то данные и сервер поддерживает Range, + // пробуем продолжить с того же места + clearstatcache(true, $localFile->getRealPath()); + $fileSize = (int) filesize($localFile->getRealPath()); + if ($fileSize > 0 && $isRangeSupported) { + $options[\CURLOPT_FILE] = $this->openLocalFile($localFile, 'ab'); + $options[\CURLOPT_RANGE] = $fileSize . '-' . ($contentLength - 1); + } else { + $options[\CURLOPT_FILE] = $this->openLocalFile($localFile, 'wb'); + } + } - [$res, $httpCode, $error] = $this->curlDownload($requestOptions); - fclose($fh); + fclose($options[\CURLOPT_FILE]); - if ($res === false) { - throw new DownloaderException("Error while downloading '{$url}': {$error}"); - } elseif ($httpCode !== 200) { - throw new DownloaderException("Url '{$url}' returns status: {$httpCode}"); + if (!empty($response['error'])) { + $message = sprintf( + "There was an error while downloading '%s': %s.", + $url, + $response['error'] + ); + throw new DownloaderException($message); + } + + if (empty($response['isOk'])) { + $status = 'xxx'; + if (!empty($response['status'])) { + $status = $response['status']; + } + $message = sprintf( + "Url '%s' returned status: %s.", + $url, + $status + ); + throw new DownloaderException($message); } } /** - * Загружает файл по ссылке в указанный файл. + * Возвращает список заголовков из ответа на HEAD запрос. * - * @param array $requestOptions + * @param string $url * * @return array - * - * @throws DownloaderException */ - protected function curlDownload(array $requestOptions): array + private function getHeadResponseHeaders(string $url): array { - $ch = curl_init(); - if ($ch === false) { - throw new DownloaderException("Can't init curl resource."); - } - - curl_setopt_array($ch, $requestOptions); - - $res = curl_exec($ch); - $httpCode = (int) curl_getinfo($ch, \CURLINFO_HTTP_CODE); - $error = curl_error($ch); - curl_close($ch); - - return [$res, $httpCode, $error]; + $response = $this->runRequest( + $url, + [ + \CURLOPT_HEADER => true, + \CURLOPT_NOBODY => true, + \CURLOPT_RETURNTRANSFER => true, + ] + ); + + return $response['headers'] ?? []; } /** - * Открывает локальный файл, в который будет вестись запись и возвращает его - * ресурс. + * Открывает локальный файл, в который будет вестись запись, + * и возвращает его ресурс. * * @param SplFileInfo $localFile + * @param string $mode * * @return resource - * - * @throws DownloaderException */ - protected function openLocalFile(SplFileInfo $localFile) + private function openLocalFile(SplFileInfo $localFile, string $mode) { - $hLocal = @fopen($localFile->getPathname(), 'wb'); + $hLocal = @fopen($localFile->getPathname(), $mode); if ($hLocal === false) { - throw new DownloaderException( - "Can't open local file for writing: " . $localFile->getPathname() + $message = sprintf( + "Can't open local file for writing: %s.", + $localFile->getPathname() ); + throw new DownloaderException($message); } return $hLocal; } /** - * Создаем массив настроек для запроса. + * Отправляет запрос с помощью curl и возвращает содержимое, статус ответа и список заголовков. + * + * @param string $url + * @param array $options + * + * @return array + */ + protected function runRequest(string $url, array $options): array + { + $fullOptionsList = $this->additionalCurlOptions + $options; + $fullOptionsList[\CURLOPT_URL] = $url; + + [$statusCode, $content, $error] = $this->runCurlRequest($fullOptionsList); + + return [ + 'status' => $statusCode, + 'isOk' => $statusCode >= 200 && $statusCode < 300, + 'headers' => $this->extractHeadersFromContent($content), + 'error' => $error, + ]; + } + + /** + * Отправляет запрос с помощью curl и возвращает содержимое, статус ответа и список заголовков. * - * @param string $url - * @param resource $fh + * @param array $options * * @return array */ - protected function createRequestOptions(string $url, $fh): array + protected function runCurlRequest(array $options): array + { + $ch = curl_init(); + if ($ch === false) { + throw new DownloaderException("Can't init curl resource."); + } + + curl_setopt_array($ch, $options); + $content = curl_exec($ch); + $statusCode = (int) curl_getinfo($ch, \CURLINFO_HTTP_CODE); + $response = [ + (int) curl_getinfo($ch, \CURLINFO_HTTP_CODE), + $content, + curl_error($ch), + ]; + curl_close($ch); + + return $response; + } + + /** + * Получает список заголовков из http ответа. + * + * @param mixed $content + * + * @return array + */ + private function extractHeadersFromContent($content): array { - $requestOptions = $this->additionalCurlOptions ?: []; + if (!\is_string($content)) { + return []; + } - $requestOptions[\CURLOPT_URL] = $url; - $requestOptions[\CURLOPT_FILE] = $fh; - $requestOptions[\CURLOPT_FOLLOWLOCATION] = true; - $requestOptions[\CURLOPT_FRESH_CONNECT] = true; + $explodeHeadersContent = explode("\n\n", $content, 2); + + $headers = []; + $rawHeaders = explode("\n", $explodeHeadersContent[0]); + foreach ($rawHeaders as $rawHeader) { + $rawHeaderExplode = explode(':', $rawHeader, 2); + if (\count($rawHeaderExplode) < 2) { + continue; + } + $name = str_replace('_', '-', strtolower(trim($rawHeaderExplode[0]))); + $value = strtolower(trim($rawHeaderExplode[1])); + $headers[$name] = $value; + } - return $requestOptions; + return $headers; } } diff --git a/tests/src/Downloader/CurlDownloaderTest.php b/tests/src/Downloader/CurlDownloaderTest.php index 9cc39408..ce32f1cf 100644 --- a/tests/src/Downloader/CurlDownloaderTest.php +++ b/tests/src/Downloader/CurlDownloaderTest.php @@ -31,17 +31,17 @@ public function testDownload(): void $destination = new SplFileInfo($destinationPath); $curl = $this->createDownloaderMock( - [ - true, - 200, - null, - ], - function (array $requestOptions) use ($source) { - return \in_array($source, $requestOptions) - && isset($requestOptions[\CURLOPT_FILE]) - && \is_resource($requestOptions[\CURLOPT_FILE]) - && !empty($requestOptions[\CURLOPT_CONNECT_ONLY]) - ; + function (array $options) use ($source) { + if ( + !empty($options[\CURLOPT_HEADER]) + || $options[\CURLOPT_URL] === $source + && \is_resource($options[\CURLOPT_FILE]) + && !empty($options[\CURLOPT_CONNECT_ONLY]) + ) { + return [200, '', null]; + } + + return [500, '', 'error']; }, [ \CURLOPT_CONNECT_ONLY => true, @@ -63,7 +63,7 @@ public function testDownloadBrokenUrlException(): void $destinationPath = $this->getPathToTestFile('archive.rar'); $destination = new SplFileInfo($destinationPath); - $curl = $this->createDownloaderMock(); + $curl = new CurlDownloader(); $this->expectException(InvalidArgumentException::class); $curl->download($source, $destination); @@ -81,11 +81,13 @@ public function testDownloadCurlErrorException(): void $destination = new SplFileInfo($destinationPath); $curl = $this->createDownloaderMock( - [ - false, - 0, - 'error', - ] + function (array $options) { + if (!empty($options[\CURLOPT_HEADER])) { + return [200, '', null]; + } + + return [200, false, 'error']; + }, ); $this->expectException(DownloaderException::class); @@ -104,11 +106,13 @@ public function testDownloadWrongResponseCodeException(): void $destination = new SplFileInfo($destinationPath); $curl = $this->createDownloaderMock( - [ - true, - 413, - null, - ] + function (array $options) { + if (!empty($options[\CURLOPT_HEADER])) { + return [200, '', null]; + } + + return [500, '', null]; + }, ); $this->expectException(DownloaderException::class); @@ -126,7 +130,11 @@ public function testDownloadCantOpenFileException(): void $destinationPath = '/wrong/path/to/file.rar'; $destination = new SplFileInfo($destinationPath); - $curl = $this->createDownloaderMock(); + $curl = $this->createDownloaderMock( + function (array $options) { + return [200, '', null]; + } + ); $this->expectException(DownloaderException::class); $curl->download($source, $destination); @@ -135,21 +143,17 @@ public function testDownloadCantOpenFileException(): void /** * Создает настроенный мок для curl загрузчика. * - * @param mixed $return - * @param callable|null $with - * @param array $additionalCurlOptions + * @param callable $with + * @param array $additionalCurlOptions * * @return Downloader */ - private function createDownloaderMock( - $return = null, - ?callable $with = null, - array $additionalCurlOptions = [] - ): Downloader { + private function createDownloaderMock(callable $with, array $additionalCurlOptions = []): Downloader + { $downloader = $this->getMockBuilder(CurlDownloader::class) ->onlyMethods( [ - 'curlDownload', + 'runCurlRequest', ] ) ->setConstructorArgs( @@ -159,16 +163,9 @@ private function createDownloaderMock( ) ->getMock(); - $expects = $return === null ? $this->never() : $this->once(); - $method = $downloader->expects($expects)->method('curlDownload'); - - if ($with) { - $method->with($this->callback($with)); - } - - if (\is_array($return)) { - $method->willReturn($return); - } + $downloader->expects($this->atLeastOnce()) + ->method('runCurlRequest') + ->willReturnCallback($with); return $downloader; }