diff options
Diffstat (limited to 'include')
-rwxr-xr-x | include/Scrape.php | 17 | ||||
-rwxr-xr-x | include/network.php | 19 |
2 files changed, 24 insertions, 12 deletions
diff --git a/include/Scrape.php b/include/Scrape.php index 52405ae2d..1835892eb 100755 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -230,11 +230,16 @@ function scrape_feed($url) { $ret = array(); $s = fetch_url($url); - if(! $s) + $headers = $a->get_curl_headers(); + $code = $a->get_curl_code(); + + logger('scrape_feed: returns: ' . $code . ' headers=' . $headers, LOGGER_DEBUG); + + if(! $s) { + logger('scrape_feed: no data returned for ' . $url); return $ret; + } - $headers = $a->get_curl_headers(); - logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG); $lines = explode("\n",$headers); if(count($lines)) { @@ -258,8 +263,10 @@ function scrape_feed($url) { logger('scrape_feed: parse error: ' . $e); } - if(! $dom) + if(! $dom) { + logger('scrape_feed: failed to parse.'); return $ret; + } $head = $dom->getElementsByTagName('base'); @@ -556,7 +563,7 @@ function probe_url($url, $mode = PROBE_NORMAL) { if($check_feed) { $feedret = scrape_feed(($poll) ? $poll : $url); - logger('probe_url: scrape_feed returns: ' . print_r($feedret,true), LOGGER_DATA); + logger('probe_url: scrape_feed ' . (($poll)? $poll : $url) . ' returns: ' . print_r($feedret,true), LOGGER_DATA); if(count($feedret) && ($feedret['feed_atom'] || $feedret['feed_rss'])) { $poll = ((x($feedret,'feed_atom')) ? unamp($feedret['feed_atom']) : unamp($feedret['feed_rss'])); if(! x($vcard)) diff --git a/include/network.php b/include/network.php index 531c3ea4c..c72919dd8 100755 --- a/include/network.php +++ b/include/network.php @@ -17,7 +17,7 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ if (!is_null($accept_content)){ curl_setopt($ch,CURLOPT_HTTPHEADER, array ( - "Accept: "+$accept_content + "Accept: " . $accept_content )); } @@ -60,6 +60,7 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ $curl_info = @curl_getinfo($ch); $http_code = $curl_info['http_code']; +// logger('fetch_url:' . $http_code . ' data: ' . $s); $header = ''; // Pull out multiple headers, e.g. proxy and continuation headers @@ -74,11 +75,13 @@ function fetch_url($url,$binary = false, &$redirects = 0, $timeout = 0, $accept_ if($http_code == 301 || $http_code == 302 || $http_code == 303 || $http_code == 307) { $matches = array(); preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); - $url = trim(array_pop($matches)); - $url_parsed = @parse_url($url); + $newurl = trim(array_pop($matches)); + if(strpos($newurl,'/') === 0) + $newurl = $url . $newurl; + $url_parsed = @parse_url($newurl); if (isset($url_parsed)) { $redirects++; - return fetch_url($url,$binary,$redirects,$timeout); + return fetch_url($newurl,$binary,$redirects,$timeout); } } @@ -163,11 +166,13 @@ function post_url($url,$params, $headers = null, &$redirects = 0, $timeout = 0) if($http_code == 301 || $http_code == 302 || $http_code == 303) { $matches = array(); preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches); - $url = trim(array_pop($matches)); - $url_parsed = @parse_url($url); + $newurl = trim(array_pop($matches)); + if(strpos($newurl,'/') === 0) + $newurl = $url . $newurl; + $url_parsed = @parse_url($newurl); if (isset($url_parsed)) { $redirects++; - return post_url($url,$params,$headers,$redirects,$timeout); + return fetch_url($newurl,$binary,$redirects,$timeout); } } $a->set_curl_code($http_code); |