aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorFriendika <info@friendika.com>2011-02-02 14:48:27 -0800
committerFriendika <info@friendika.com>2011-02-02 14:48:27 -0800
commitee45dee9324be48cd87a883405bdd9e11e3f39f5 (patch)
treeae4588ffabf2c2099e705f049bbf80d5ec494774 /include
parentfce9988f73ee8ad3624586e6866a68a2ae952ef8 (diff)
downloadvolse-hubzilla-ee45dee9324be48cd87a883405bdd9e11e3f39f5.tar.gz
volse-hubzilla-ee45dee9324be48cd87a883405bdd9e11e3f39f5.tar.bz2
volse-hubzilla-ee45dee9324be48cd87a883405bdd9e11e3f39f5.zip
suppress some scraping errors when confronted with hybrid/strange
feeds that provide insufficient content-type and choke the html parser.
Diffstat (limited to 'include')
-rw-r--r--include/Scrape.php20
1 files changed, 18 insertions, 2 deletions
diff --git a/include/Scrape.php b/include/Scrape.php
index bb42c3bdd..ff9899252 100644
--- a/include/Scrape.php
+++ b/include/Scrape.php
@@ -8,12 +8,18 @@ function scrape_dfrn($url) {
$a = get_app();
$ret = array();
+
+ logger('scrape_dfrn: url=' . $url);
+
$s = fetch_url($url);
if(! $s)
return $ret;
$headers = $a->get_curl_headers();
+ logger('scrape_dfrn: headers=' . $headers, LOGGER_DEBUG);
+
+
$lines = explode("\n",$headers);
if(count($lines)) {
foreach($lines as $line) {
@@ -93,12 +99,17 @@ function scrape_meta($url) {
$a = get_app();
$ret = array();
+
+ logger('scrape_meta: url=' . $url);
+
$s = fetch_url($url);
if(! $s)
return $ret;
$headers = $a->get_curl_headers();
+ logger('scrape_meta: headers=' . $headers, LOGGER_DEBUG);
+
$lines = explode("\n",$headers);
if(count($lines)) {
foreach($lines as $line) {
@@ -135,6 +146,9 @@ function scrape_vcard($url) {
$a = get_app();
$ret = array();
+
+ logger('scrape_vcard: url=' . $url);
+
$s = fetch_url($url);
if(! $s)
@@ -190,15 +204,17 @@ function scrape_feed($url) {
return $ret;
$headers = $a->get_curl_headers();
+ logger('scrape_feed: headers=' . $headers, LOGGER_DEBUG);
+
$lines = explode("\n",$headers);
if(count($lines)) {
foreach($lines as $line) {
if(stristr($line,'content-type:')) {
- if(stristr($line,'application/atom+xml')) {
+ if(stristr($line,'application/atom+xml') || stristr($s,'<feed')) {
$ret['feed_atom'] = $url;
return $ret;
}
- if(stristr($line,'application/rss+xml')) {
+ if(stristr($line,'application/rss+xml') || stristr($s,'<rss')) {
$ret['feed_rss'] = $url;
return ret;
}