diff options
Diffstat (limited to 'include/feedutils.php')
-rw-r--r-- | include/feedutils.php | 311 |
1 files changed, 179 insertions, 132 deletions
diff --git a/include/feedutils.php b/include/feedutils.php index 4638ef66a..369193fce 100644 --- a/include/feedutils.php +++ b/include/feedutils.php @@ -15,15 +15,6 @@ */ function get_public_feed($channel, $params) { -/* $type = 'xml'; - $begin = NULL_DATE; - $end = ''; - $start = 0; - $records = 40; - $direction = 'desc'; - $pages = 0; -*/ - if(! $params) $params = []; @@ -106,23 +97,15 @@ function get_feed_for($channel, $observer_hash, $params) { $owner = atom_render_author('zot:owner',$channel); $atom .= replace_macros($feed_template, array( - '$version' => xmlify(Zotlabs\Lib\System::get_project_version()), - '$red' => xmlify(Zotlabs\Lib\System::get_platform_name()), - '$feed_id' => xmlify($channel['xchan_url']), - '$feed_title' => xmlify($channel['channel_name']), - '$feed_updated' => xmlify(datetime_convert('UTC', 'UTC', 'now', ATOM_TIME)), - '$author' => $feed_author, - '$owner' => $owner, - '$name' => xmlify($channel['channel_name']), - '$profile_page' => xmlify($channel['xchan_url']), - '$mimephoto' => xmlify($channel['xchan_photo_mimetype']), - '$photo' => xmlify($channel['xchan_photo_l']), - '$thumb' => xmlify($channel['xchan_photo_m']), - '$picdate' => '', - '$uridate' => '', - '$namdate' => '', - '$birthday' => '', - '$community' => '', + '$version' => xmlify(Zotlabs\Lib\System::get_project_version()), + '$generator' => xmlify(Zotlabs\Lib\System::get_platform_name()), + '$generator_uri' => 'https://hubzilla.org', + '$feed_id' => xmlify($channel['xchan_url']), + '$feed_title' => xmlify($channel['channel_name']), + '$feed_updated' => xmlify(datetime_convert('UTC', 'UTC', 'now', ATOM_TIME)), + '$author' => $feed_author, + '$owner' => $owner, + '$profile_page' => xmlify($channel['xchan_url']), )); @@ -270,19 +253,18 @@ function construct_activity_target($item) { return ''; } + /** - * @brief Return an array with a parsed atom item. + * @brief Return an array with a parsed atom author. * * @param SimplePie $feed - * @param array $item - * @param[out] array $author - * @return array Associative array with the parsed item data + * @param SimplePie $item + * @return array $author */ -function get_atom_elements($feed, $item, &$author) { - require_once('include/html2bbcode.php'); +function get_atom_author($feed, $item) { - $res = array(); + $author = []; $found_author = $item->get_author(); if($found_author) { @@ -307,52 +289,6 @@ function get_atom_elements($feed, $item, &$author) { if(substr($author['author_link'],-1,1) == '/') $author['author_link'] = substr($author['author_link'],0,-1); - $res['mid'] = normalise_id(unxmlify($item->get_id())); - $res['title'] = unxmlify($item->get_title()); - $res['body'] = unxmlify($item->get_content()); - $res['plink'] = unxmlify($item->get_link(0)); - $res['item_rss'] = 1; - - - $summary = unxmlify($item->get_description(true)); - - // removing the content of the title if its identically to the body - // This helps with auto generated titles e.g. from tumblr - - if (title_is_body($res['title'], $res['body'])) - $res['title'] = ""; - - if($res['plink']) - $base_url = implode('/', array_slice(explode('/',$res['plink']),0,3)); - else - $base_url = ''; - - - $rawcreated = $item->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'published'); - if($rawcreated) - $res['created'] = unxmlify($rawcreated[0]['data']); - - $rawedited = $item->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'updated'); - if($rawedited) - $res['edited'] = unxmlify($rawedited[0]['data']); - - if((x($res,'edited')) && (! (x($res,'created')))) - $res['created'] = $res['edited']; - - if(! $res['created']) - $res['created'] = $item->get_date('c'); - - if(! $res['edited']) - $res['edited'] = $item->get_date('c'); - - $rawverb = $item->get_item_tags(NAMESPACE_ACTIVITY, 'verb'); - - // select between supported verbs - - if($rawverb) { - $res['verb'] = unxmlify($rawverb[0]['data']); - } - // look for a photo. We should check media size and find the best one, // but for now let's just find any author photo @@ -431,6 +367,122 @@ function get_atom_elements($feed, $item, &$author) { } } + $rawowner = $item->get_item_tags(NAMESPACE_DFRN, 'owner'); + if(! $rawowner) + $rawowner = $item->get_item_tags(NAMESPACE_ZOT, 'owner'); + + if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']) + $author['owner_name'] = unxmlify($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']); + elseif($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data']) + $author['owner_name'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data']); + if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']) + $author['owner_link'] = unxmlify($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']); + elseif($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data']) + $author['owner_link'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data']); + + if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']) { + $base = $rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']; + + foreach($base as $link) { + if(!x($author, 'owner_photo') || ! $author['owner_photo']) { + if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') + $author['owner_photo'] = unxmlify($link['attribs']['']['href']); + } + } + } + + // build array to pass to hook + $arr = [ + 'feed' => $feed, + 'item' => $item, + 'author' => $author + ]; + /** + * @hooks parse_atom + * * \e SimplePie \b feed - The original SimplePie feed + * * \e SimplePie \b item + * * \e array \b result - the result array that will also get returned + */ + call_hooks('parse_atom_author', $arr); + + logger('author: ' . print_r($arr['author'], true), LOGGER_DATA); + + return $arr['author']; +} + + +/** + * @brief Return an array with a parsed atom item. + * + * @param SimplePie $feed + * @param SimplePie $item + * @param[out] array $author + * @return array Associative array with the parsed item data + */ + +function get_atom_elements($feed, $item) { + + require_once('include/html2bbcode.php'); + + $res = array(); + + + $res['mid'] = normalise_id(unxmlify($item->get_id())); + $res['title'] = unxmlify($item->get_title()); + $res['body'] = unxmlify($item->get_content()); + $res['plink'] = unxmlify($item->get_link(0)); + $res['item_rss'] = 1; + + + $summary = unxmlify($item->get_description(true)); + + if($summary === $res['body']) + $summary = ''; + + if(($summary) && ((strpos($summary,'<') !== false) || (strpos($summary,'>') !== false))) { + $summary = purify_html($summary); + $summary = html2bbcode($summary); + } + + + + // removing the content of the title if its identically to the body + // This helps with auto generated titles e.g. from tumblr + + if (title_is_body($res['title'], $res['body'])) + $res['title'] = ""; + + if($res['plink']) + $base_url = implode('/', array_slice(explode('/',$res['plink']),0,3)); + else + $base_url = ''; + + + $rawcreated = $item->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'published'); + if($rawcreated) + $res['created'] = unxmlify($rawcreated[0]['data']); + + $rawedited = $item->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'updated'); + if($rawedited) + $res['edited'] = unxmlify($rawedited[0]['data']); + + if((x($res,'edited')) && (! (x($res,'created')))) + $res['created'] = $res['edited']; + + if(! $res['created']) + $res['created'] = $item->get_date('c'); + + if(! $res['edited']) + $res['edited'] = $item->get_date('c'); + + $rawverb = $item->get_item_tags(NAMESPACE_ACTIVITY, 'verb'); + + // select between supported verbs + + if($rawverb) { + $res['verb'] = unxmlify($rawverb[0]['data']); + } + $rawcnv = $item->get_item_tags(NAMESPACE_OSTATUS, 'conversation'); if($rawcnv) { // new style @@ -445,7 +497,7 @@ function get_atom_elements($feed, $item, &$author) { } } - $ostatus_protocol = (($ostatus_conversation) ? true : false); + $ostatus_protocol = (($ostatus_conversation || $res['verb']) ? true : false); $mastodon = (($item->get_item_tags('http://mastodon.social/schema/1.0','scope')) ? true : false); if($mastodon) { @@ -454,6 +506,8 @@ function get_atom_elements($feed, $item, &$author) { $res['item_private'] = 1; } + logger('ostatus_protocol: ' . intval($ostatus_protocol), LOGGER_DEBUG); + $apps = $item->get_item_tags(NAMESPACE_STATUSNET, 'notice_info'); if($apps && $apps[0]['attribs']['']['source']) { $res['app'] = strip_tags(unxmlify($apps[0]['attribs']['']['source'])); @@ -557,9 +611,8 @@ function get_atom_elements($feed, $item, &$author) { ); } - // turn Mastodon content warning into a #nsfw hashtag - if($mastodon && $summary) { - $res['body'] = $summary . "\n\n" . $res['body'] . "\n\n#ContentWarning\n"; + if($summary && $res['body']) { + $res['body'] = '[summary]' . $summary . '[/summary]' . $res['body']; } @@ -588,29 +641,6 @@ function get_atom_elements($feed, $item, &$author) { $res['created'] = datetime_convert('UTC','UTC',$res['created']); $res['edited'] = datetime_convert('UTC','UTC',$res['edited']); - $rawowner = $item->get_item_tags(NAMESPACE_DFRN, 'owner'); - if(! $rawowner) - $rawowner = $item->get_item_tags(NAMESPACE_ZOT, 'owner'); - - if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']) - $author['owner_name'] = unxmlify($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']); - elseif($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data']) - $author['owner_name'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['name'][0]['data']); - if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']) - $author['owner_link'] = unxmlify($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']); - elseif($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data']) - $author['owner_link'] = unxmlify($rawowner[0]['child'][NAMESPACE_DFRN]['uri'][0]['data']); - - if($rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']) { - $base = $rawowner[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']; - - foreach($base as $link) { - if(!x($author, 'owner_photo') || ! $author['owner_photo']) { - if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') - $author['owner_photo'] = unxmlify($link['attribs']['']['href']); - } - } - } $rawgeo = $item->get_item_tags(NAMESPACE_GEORSS, 'point'); if($rawgeo) @@ -773,20 +803,17 @@ function get_atom_elements($feed, $item, &$author) { $arr = [ 'feed' => $feed, 'item' => $item, - 'author' => $author, 'result' => $res ]; /** * @hooks parse_atom * * \e SimplePie \b feed - The original SimplePie feed - * * \e array \b item - * * \e array \b author + * * \e SimplePie \b item * * \e array \b result - the result array that will also get returned */ call_hooks('parse_atom', $arr); - logger('author: ' .print_r($arr['author'], true), LOGGER_DATA); - logger('result: ' .print_r($arr['result'], true), LOGGER_DATA); + logger('result: ' . print_r($arr['result'], true), LOGGER_DATA); return $arr['result']; } @@ -985,9 +1012,9 @@ function process_feed_tombstones($feed,$importer,$contact,$pass) { * @param string $xml * The (atom) feed to consume - RSS isn't as fully supported but may work for simple feeds. * @param array $importer - * The contact_record (joined to user_record) of the local user who owns this + * The channel record of the local user who owns this * relationship. It is this person's stuff that is going to be updated. - * @param[in,out] array $contact + * @param[in,out] array $contact (abook record joined to xchan record) * The person who is sending us stuff. If not set, we MAY be processing a "follow" activity * from an external network and MAY create an appropriate contact record. Otherwise, we MUST * have a contact record. @@ -1074,8 +1101,8 @@ function consume_feed($xml, $importer, &$contact, $pass = 0) { // Have we seen it? If not, import it. - $author = array(); - $datarray = get_atom_elements($feed,$item,$author); + $author = get_atom_author($feed,$item); + $datarray = get_atom_elements($feed,$item); if(! $datarray['mid']) continue; @@ -1327,8 +1354,8 @@ function consume_feed($xml, $importer, &$contact, $pass = 0) { // Head post of a conversation. Have we seen it? If not, import it. - $author = array(); - $datarray = get_atom_elements($feed,$item,$author); + $author = get_atom_author($feed,$item); + $datarray = get_atom_elements($feed,$item); if(! $datarray['mid']) continue; @@ -1530,11 +1557,11 @@ function normalise_id($id) { */ function process_salmon_feed($xml, $importer) { - $ret = array(); + $ret = []; if(! strlen($xml)) { logger('process_feed: empty input'); - return; + return $ret; } $feed = new SimplePie(); @@ -1548,8 +1575,10 @@ function process_salmon_feed($xml, $importer) { $feed->init(); - if($feed->error()) + if($feed->error()) { logger('Error parsing XML: ' . $feed->error()); + return $ret; + } $permalink = $feed->get_permalink(); @@ -1576,16 +1605,13 @@ function process_salmon_feed($xml, $importer) { if($is_reply) $ret['parent_mid'] = $parent_mid; - $ret['author'] = array(); - - $datarray = get_atom_elements($feed, $item, $ret['author']); + $ret['author'] = get_atom_author($feed,$item); + $ret['item'] = get_atom_elements($feed,$item); // reset policies which are restricted by default for RSS connections // This item is likely coming from GNU-social via salmon and allows public interaction - $datarray['public_policy'] = ''; - $datarray['comment_policy'] = 'authenticated'; - - $ret['item'] = $datarray; + $ret['item']['public_policy'] = ''; + $ret['item']['comment_policy'] = 'authenticated'; } } @@ -1801,12 +1827,17 @@ function compat_photos_list($s) { if($found) { foreach($matches as $match) { - $ret[] = [ + $entry = [ 'href' => $match[2], - 'length' => 0, 'type' => guess_image_type($match[2]) ]; + $sizer = new \Zotlabs\Lib\Img_filesize($match[2]); + $size = $sizer->getSize(); + if(intval($size)) { + $entry['length'] = intval($size); + } + $ret[] = $entry; } } @@ -1838,10 +1869,24 @@ function atom_entry($item, $type, $author, $owner, $comment = false, $cid = 0, $ create_export_photo_body($item); - if($item['allow_cid'] || $item['allow_gid'] || $item['deny_cid'] || $item['deny_gid']) - $body = fix_private_photos($item['body'],$owner['uid'],$item,$cid); + // provide separate summary and content unless compat is true; as summary represents a content-warning on some networks + + $matches = false; + if(preg_match('|\[summary\](.*?)\[/summary\]|ism',$item['body'],$matches)) + $summary = $matches[1]; else - $body = $item['body']; + $summary = ''; + + $body = $item['body']; + + if($summary) + $body = preg_replace('|^(.*?)\[summary\](.*?)\[/summary\](.*?)$|ism','$1$3',$item['body']); + + if($compat) + $summary = ''; + + if($item['allow_cid'] || $item['allow_gid'] || $item['deny_cid'] || $item['deny_gid']) + $body = fix_private_photos($body,$owner['uid'],$item,$cid); if($compat) { $compat_photos = compat_photos_list($body); @@ -1882,6 +1927,8 @@ function atom_entry($item, $type, $author, $owner, $comment = false, $cid = 0, $ } else { $o .= '<title>' . xmlify($item['title']) . '</title>' . "\r\n"; + if($summary) + $o .= '<summary type="' . $type . '" >' . xmlify(prepare_text($summary,$item['mimetype'])) . '</summary>' . "\r\n"; $o .= '<content type="' . $type . '" >' . xmlify(prepare_text($body,$item['mimetype'])) . '</content>' . "\r\n"; } |