diff options
author | fabrixxm <fabrix.xm@gmail.com> | 2011-02-02 08:00:50 +0100 |
---|---|---|
committer | fabrixxm <fabrix.xm@gmail.com> | 2011-02-02 08:00:50 +0100 |
commit | a3fd84661f74b0759f9edad1f068b32820b03c5d (patch) | |
tree | 043af8c4f6abb6f860f4fa22670f1e7f968e4b83 /include/Scrape.php | |
parent | 670dba666633fe8db5c56db6748e341eebcf0138 (diff) | |
parent | 777c47aaa697ff46cf554fb4f36d0c252ae42e94 (diff) | |
download | volse-hubzilla-a3fd84661f74b0759f9edad1f068b32820b03c5d.tar.gz volse-hubzilla-a3fd84661f74b0759f9edad1f068b32820b03c5d.tar.bz2 volse-hubzilla-a3fd84661f74b0759f9edad1f068b32820b03c5d.zip |
Merge branch 'friendika-master'
Diffstat (limited to 'include/Scrape.php')
-rw-r--r-- | include/Scrape.php | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/include/Scrape.php b/include/Scrape.php index e4f7a0878..bb42c3bdd 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -5,12 +5,25 @@ require_once('library/HTML5/Parser.php'); if(! function_exists('scrape_dfrn')) { function scrape_dfrn($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -77,12 +90,26 @@ function validate_dfrn($a) { if(! function_exists('scrape_meta')) { function scrape_meta($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + + + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -105,12 +132,24 @@ function scrape_meta($url) { if(! function_exists('scrape_vcard')) { function scrape_vcard($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + $dom = HTML5_Parser::parse($s); if(! $dom) @@ -142,12 +181,31 @@ function scrape_vcard($url) { if(! function_exists('scrape_feed')) { function scrape_feed($url) { + $a = get_app(); + $ret = array(); $s = fetch_url($url); if(! $s) return $ret; + $headers = $a->get_curl_headers(); + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + if(stristr($line,'content-type:')) { + if(stristr($line,'application/atom+xml')) { + $ret['feed_atom'] = $url; + return $ret; + } + if(stristr($line,'application/rss+xml')) { + $ret['feed_rss'] = $url; + return ret; + } + } + } + } + $dom = HTML5_Parser::parse($s); if(! $dom) |