From ba25587124f772a9dc0d1d6b6f9c94c39167ce6d Mon Sep 17 00:00:00 2001 From: gidlov Date: Wed, 30 Aug 2017 22:09:36 +0200 Subject: [PATCH] X-CSRF token login support --- src/Gidlov/Copycat/Copycat.php | 127 ++++++++++++++++++++++++--------- 1 file changed, 95 insertions(+), 32 deletions(-) diff --git a/src/Gidlov/Copycat/Copycat.php b/src/Gidlov/Copycat/Copycat.php index 379b147..b0c7a59 100644 --- a/src/Gidlov/Copycat/Copycat.php +++ b/src/Gidlov/Copycat/Copycat.php @@ -1,14 +1,15 @@ 1, CURLOPT_CONNECTTIMEOUT => 5, CURLOPT_USERAGENT => 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)', - CURLOPT_FOLLOWLOCATION => true, - CURLOPT_COOKIESESSION => true )); } @@ -112,9 +118,21 @@ public function get() { return null; } + /** + * Send a HTTP POST request. + * + * @return array + */ + public function post($post) { + if (isset($post)) { + $this->_post = $post; + } + return $this; + } + /** * The list of address to scan. - * + * * @param string/array $urls * @return object */ @@ -134,7 +152,7 @@ public function URLs($urls) { * 'imdb+stay', * ) * ) - * + * * There is a possibility to add any matching addresses by adding 'to' => 'matches'​​, * in the array. * @@ -151,7 +169,7 @@ public function fillURLs($searches) { * * Set A multidimensional array where the key represents a predefined CURL-constant * and where the value is the value of the constant. This method is optional. - * + * * @param array const * @return object */ @@ -159,7 +177,7 @@ public function setCURL($const) { $this->_curl_options = $const + $this->_curl_options; $this->_curl = curl_init(); curl_setopt_array($this->_curl, $this->_curl_options); - curl_exec($this->_curl); + //curl_exec($this->_curl); return $this; } @@ -181,8 +199,8 @@ public function setCURL($const) { * the key 'title'. It will then add .jpg and save the file in a folder called items. * * Other keys to use are: before_value, after_value, before_key, after_key. Fairly - * self-explanatory name. before_value may be useful if the page uses relative addresses. - * + * self-explanatory name. before_value may be useful if the page uses relative addresses. + * * @param array regex * @return object */ @@ -193,7 +211,7 @@ public function match($regex) { /** * Same as match() but utilizes preg_match_all(). - * + * * @param array regex * @return object */ @@ -204,7 +222,7 @@ public function matchAll($regex) { /** * Callback functions to apply to all results. - * + * * @param array function * @return object */ @@ -214,7 +232,7 @@ public function callback($function) { } /** - * Saves the result of a webpage in $_html + * Saves the result of a webpage in $_html. * * @param string url */ @@ -222,10 +240,55 @@ protected function _setHTML($url) { $this->_html = $this->_getCURL($url); } + /** + * Send HTTP POST request. + */ + protected function _sendPost() { + if (isset($this->_curl_options[CURLOPT_HTTPHEADER])) { + $old_httpheader = $this->_curl_options[CURLOPT_HTTPHEADER]; + } + foreach ($this->_post as $url => $data) { + $this->_setHTML($url); + $postfields = array(); + foreach ($data as $name => $value) { + if (is_array($value)) { + $result = $this->_filter($value[0], $this->_html, 1, $name); + $postfields[] = $name.'='.$result; + } else { + $postfields[] = $name.'='.$value; + } + } + $postfields = implode('&', $postfields); + $httpheader = array( + 'X-CSRF-Token: '.$result, + 'Content-Length: '.strlen($postfields), + 'Connection: Keep-Alive', + 'Keep-Alive: 300', + ); + if (isset($old_httpheader)) { + $httpheader = array_merge($httpheader, $old_httpheader); + } + $this->setCURL(array( + CURLOPT_POST => 1, + CURLOPT_POSTFIELDS => $postfields, + CURLOPT_HTTPHEADER => $httpheader, + )); + $this->_setHTML($url); + } + unset($this->_curl_options[CURLOPT_POST]); + unset($this->_curl_options[CURLOPT_POSTFIELDS]); + if (isset($old_httpheader)) { + $this->_curl_options[CURLOPT_HTTPHEADER] = $old_httpheader; + } + } + /** * Starts the process to load pages and saves the matching results. */ protected function _getURLs() { + if ($this->_post) { + $this->_sendPost(); + } if ($this->_fill_urls) { $this->_getFillURLs(); } @@ -241,7 +304,7 @@ protected function _getURLs() { /** * Find and save the results of the current page. - * + * * @param string $key */ protected function _getMatch($key = 0) { @@ -257,7 +320,7 @@ protected function _getMatch($key = 0) { /** * Same as _getMatch(). - * + * * @param string $key */ protected function _getMatchAll($key = 0) { @@ -273,7 +336,7 @@ protected function _getMatchAll($key = 0) { /** * Modifies the file name, key-value values ​​for files. - * + * * @param string $name * @param string $var * @param string $key @@ -290,7 +353,7 @@ protected function _setFile($name, $var, $key = 0) { $after_key = isset($var['after_key']) ? $var['after_key'] : ''; $before_value = isset($var['before_value']) ? $var['before_value'] : ''; $after_value = isset($var['after_value']) ? $var['after_value'] : ''; - + $match = $before_value.$this->_filter($var['regex'], $this->_html, 1, $name).$after_value; $filename = $before_key.$k.$after_key; $directory = isset($var['directory']) ? $var['directory'] : ''; @@ -300,7 +363,7 @@ protected function _setFile($name, $var, $key = 0) { /** * Save the file. - * + * * @param string $url * @param string $filename * @param string $directory @@ -332,7 +395,7 @@ protected function _getFillURLs() { /** * Load the data from the URL. - * + * * @param string $url * @return string */ @@ -344,7 +407,7 @@ protected function _getCURL($url) { /** * Apply the regular expression to the content and return all matches. - * + * * @param string $regex * @param string $content * @param int $i @@ -358,7 +421,7 @@ protected function _filterAll($regex, $content, $i = 1, $key = '') { if (isset($this->_callback['_all_'])) { foreach ($this->_callback['_all_'] as $filter) { $result = array_map($filter, $result); - } + } } if ($key != '' && isset($this->_callback[$key])) { foreach ($this->_callback[$key] as $filter) { @@ -370,7 +433,7 @@ protected function _filterAll($regex, $content, $i = 1, $key = '') { /** * Apply the regular expression to the content and return first match. - * + * * @param string $regex * @param string $content * @param int $i @@ -393,10 +456,10 @@ protected function _filter($regex, $content, $i = 1, $key = '') { } return false; } - + /** * Check if a URL is valid. - * + * * @param string $url * @return bool */ @@ -407,4 +470,4 @@ protected function _validateURL($url) { return false; } -} \ No newline at end of file +}