From 793967a1d3c23fcf1f3b00a2832f51e6f473f4bd Mon Sep 17 00:00:00 2001 From: Friendika Date: Mon, 4 Apr 2011 19:36:18 -0700 Subject: better handling of troublesome feeds. --- include/Scrape.php | 2 +- include/items.php | 23 +++++++++++++++-------- include/poller.php | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/Scrape.php b/include/Scrape.php index ff9899252..21820ddaf 100644 --- a/include/Scrape.php +++ b/include/Scrape.php @@ -216,7 +216,7 @@ function scrape_feed($url) { } if(stristr($line,'application/rss+xml') || stristr($s,'type) $o .= '' . xmlify($r->type) . '' . "\r\n"; if($r->id) @@ -206,7 +206,7 @@ function construct_activity_target($item) { if($item['target']) { $o = '' . "\r\n"; - $r = @simplexml_load_string($item['target']); + $r = parse_xml_string($item['target']); if($r->type) $o .= '' . xmlify($r->type) . '' . "\r\n"; if($r->id) @@ -241,8 +241,14 @@ function get_atom_elements($feed,$item) { $res = array(); $author = $item->get_author(); - $res['author-name'] = unxmlify($author->get_name()); - $res['author-link'] = unxmlify($author->get_link()); + if($author) { + $res['author-name'] = unxmlify($author->get_name()); + $res['author-link'] = unxmlify($author->get_link()); + } + else { + $res['author-name'] = unxmlify($feed->get_title()); + $res['author-link'] = unxmlify($feed->get_permalink()); + } $res['uri'] = unxmlify($item->get_id()); $res['title'] = unxmlify($item->get_title()); $res['body'] = unxmlify($item->get_content()); @@ -343,7 +349,6 @@ function get_atom_elements($feed,$item) { // the wild, by sanitising it and converting supported tags to bbcode before we rip out any remaining // html. - if((strpos($res['body'],'<') !== false) || (strpos($res['body'],'>') !== false)) { $res['body'] = preg_replace('#]+>.+?' . 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s', @@ -783,7 +788,7 @@ function dfrn_deliver($owner,$contact,$atom, $dissolve = false) { return 3; } - $res = simplexml_load_string($xml); + $res = parse_xml_string($xml); if((intval($res->status) != 0) || (! strlen($res->challenge)) || (! strlen($res->dfrn_id))) return (($res->status) ? $res->status : 3); @@ -878,7 +883,7 @@ function dfrn_deliver($owner,$contact,$atom, $dissolve = false) { return 3; } - $res = simplexml_load_string($xml); + $res = parse_xml_string($xml); return $res->status; @@ -916,6 +921,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee if($feed->error()) logger('consume_feed: Error parsing XML: ' . $feed->error()); + $permalink = $feed->get_permalink(); // Check at the feed level for updated contact name and/or photo @@ -1230,6 +1236,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee // Head post of a conversation. Have we seen it? If not, import it. $item_id = $item->get_id(); + $datarray = get_atom_elements($feed,$item); $r = q("SELECT `uid`, `last-child`, `edited`, `body` FROM `item` WHERE `uri` = '%s' AND `uid` = %d LIMIT 1", @@ -1275,7 +1282,7 @@ function consume_feed($xml,$importer,&$contact, &$hub, $datedir = 0, $secure_fee if(! is_array($contact)) return; - if($contact['network'] === 'stat') { + if($contact['network'] === 'stat' || stristr($permalink,'twitter.com')) { if(strlen($datarray['title'])) unset($datarray['title']); $datarray['last-child'] = 1; diff --git a/include/poller.php b/include/poller.php index 3b80c1c04..9362c28b3 100644 --- a/include/poller.php +++ b/include/poller.php @@ -203,7 +203,7 @@ function poller_run($argv, $argc){ } - $res = simplexml_load_string($xml); + $res = parse_xml_string($xml); if(intval($res->status) == 1) { logger("poller: $url replied status 1 - marking for death "); -- cgit v1.2.3