diff options
Diffstat (limited to 'lib/private/httphelper.php')
-rw-r--r-- | lib/private/httphelper.php | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/lib/private/httphelper.php b/lib/private/httphelper.php new file mode 100644 index 00000000000..8b7aebb3d4d --- /dev/null +++ b/lib/private/httphelper.php @@ -0,0 +1,177 @@ +<?php +/** + * Copyright (c) 2014 Lukas Reschke <lukas@owncloud.com> + * This file is licensed under the Affero General Public License version 3 or + * later. + * See the COPYING-README file. + */ + +namespace OC; + +class HTTPHelper { + const USER_AGENT = 'ownCloud Server Crawler'; + + /** @var \OC\AllConfig */ + private $config; + + /** + * @param \OC\AllConfig $config + */ + public function __construct(AllConfig $config) { + $this->config = $config; + } + + /** + * Returns the default context array + * @return array + */ + public function getDefaultContextArray() { + return array( + 'http' => array( + 'header' => 'User-Agent: ' . self::USER_AGENT . "\r\n", + 'timeout' => 10, + 'follow_location' => false, // Do not follow the location since we can't limit the protocol + ), + 'ssl' => array( + 'disable_compression' => true + ) + ); + } + + /** + * Get URL content + * @param string $url Url to get content + * @throws \Exception If the URL does not start with http:// or https:// + * @return string of the response or false on error + * This function get the content of a page via curl, if curl is enabled. + * If not, file_get_contents is used. + */ + public function getUrlContent($url) { + if (!$this->isHTTPURL($url)) { + throw new \Exception('$url must start with https:// or http://', 1); + } + + $proxy = $this->config->getSystemValue('proxy', null); + $proxyUserPwd = $this->config->getSystemValue('proxyuserpwd', null); + if (function_exists('curl_init')) { + $curl = curl_init(); + $max_redirects = 10; + + curl_setopt($curl, CURLOPT_HEADER, 0); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10); + curl_setopt($curl, CURLOPT_URL, $url); + curl_setopt($curl, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); + curl_setopt($curl, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS); + + curl_setopt($curl, CURLOPT_USERAGENT, self::USER_AGENT); + if ($proxy !== null) { + curl_setopt($curl, CURLOPT_PROXY, $proxy); + } + if ($proxyUserPwd !== null) { + curl_setopt($curl, CURLOPT_PROXYUSERPWD, $proxyUserPwd); + } + + if (ini_get('open_basedir') === '' && (ini_get('safe_mode') === false) || strtolower(ini_get('safe_mode')) === 'off') { + curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($curl, CURLOPT_MAXREDIRS, $max_redirects); + $data = curl_exec($curl); + } else { + curl_setopt($curl, CURLOPT_FOLLOWLOCATION, false); + $mr = $max_redirects; + if ($mr > 0) { + $newURL = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL); + $rcurl = curl_copy_handle($curl); + curl_setopt($rcurl, CURLOPT_HEADER, true); + curl_setopt($rcurl, CURLOPT_NOBODY, true); + curl_setopt($rcurl, CURLOPT_FORBID_REUSE, false); + curl_setopt($rcurl, CURLOPT_RETURNTRANSFER, true); + curl_setopt($rcurl, CURLOPT_USERAGENT, self::USER_AGENT); + do { + curl_setopt($rcurl, CURLOPT_URL, $newURL); + $header = curl_exec($rcurl); + if (curl_errno($rcurl)) { + $code = 0; + } else { + $code = curl_getinfo($rcurl, CURLINFO_HTTP_CODE); + if ($code == 301 || $code == 302) { + preg_match('/Location:(.*?)\n/', $header, $matches); + $newURL = trim(array_pop($matches)); + } else { + $code = 0; + } + } + } while ($code && --$mr); + curl_close($rcurl); + if ($mr > 0) { + curl_setopt($curl, CURLOPT_URL, $newURL); + } + } + + if ($mr == 0 && $max_redirects > 0) { + $data = false; + } else { + $data = curl_exec($curl); + } + } + curl_close($curl); + } else { + $url = $this->getFinalLocationOfURL($url); + $contextArray = $this->getDefaultContextArray(); + + if ($proxy !== null) { + $contextArray['http']['proxy'] = $proxy; + } + + $ctx = stream_context_create( + $contextArray + ); + $data = @file_get_contents($url, 0, $ctx); + + } + return $data; + } + + /** + * Returns the response headers of a HTTP URL without following redirects + * @param string $location Needs to be a HTTPS or HTTP URL + * @return array + */ + public function getHeaders($location) { + stream_context_set_default($this->getDefaultContextArray()); + return get_headers($location, 1); + } + + /** + * Checks whether the supplied URL begins with HTTPS:// or HTTP:// (case insensitive) + * @param string $url + * @return bool + */ + public function isHTTPURL($url) { + return stripos($url, 'https://') === 0 || stripos($url, 'http://') === 0; + } + + /** + * Returns the last HTTP or HTTPS site the request has been redirected too using the Location HTTP header + * This is a very ugly workaround about the missing functionality to restrict fopen() to protocols + * @param string $location Needs to be a HTTPS or HTTP URL + * @throws \Exception In case the initial URL is not a HTTP or HTTPS one + * @return string + */ + public function getFinalLocationOfURL($location) { + if(!$this->isHTTPURL($location)) { + throw new \Exception('URL must begin with HTTPS or HTTP.'); + } + $headerArray = $this->getHeaders($location, 1); + + if($headerArray !== false && isset($headerArray['Location'])) { + while($this->isHTTPURL($headerArray['Location'])) { + $location = $headerArray['Location']; + $headerArray = $this->getHeaders($location); + } + } + + return $location; + } + +} |