From ee45dee9324be48cd87a883405bdd9e11e3f39f5 Mon Sep 17 00:00:00 2001 From: Friendika Date: Wed, 2 Feb 2011 14:48:27 -0800 Subject: suppress some scraping errors when confronted with hybrid/strange feeds that provide insufficient content-type and choke the html parser. --- include/Scrape.php | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/Scrape.php') diff --git a/include/Scrape.php b/include/Scrape.php index bb42c3bdd..ff9899252 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -8,12 +8,18 @@ function scrape_dfrn($url) { $a = get_app(); $ret = array(); + + logger('scrape_dfrn: url=' . $url); + $s = fetch_url($url); if(! $s) return $ret; $headers = $a->get_curl_headers(); + logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG); + + $lines = explode("\n",$headers); if(count($lines)) { foreach($lines as $line) { @@ -93,12 +99,17 @@ function scrape_meta($url) { $a = get_app(); $ret = array(); + + logger('scrape_meta: url=' . $url); + $s = fetch_url($url); if(! $s) return $ret; $headers = $a->get_curl_headers(); + logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG); + $lines = explode("\n",$headers); if(count($lines)) { foreach($lines as $line) { @@ -135,6 +146,9 @@ function scrape_vcard($url) { $a = get_app(); $ret = array(); + + logger('scrape_vcard: url=' . $url); + $s = fetch_url($url); if(! $s) @@ -190,15 +204,17 @@ function scrape_feed($url) { return $ret; $headers = $a->get_curl_headers(); + logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG); + $lines = explode("\n",$headers); if(count($lines)) { foreach($lines as $line) { if(stristr($line,'content-type:')) { - if(stristr($line,'application/atom+xml')) { + if(stristr($line,'application/atom+xml') || stristr($s,'