From 38eb79705e4de252f29e56543de8a287132b488e Mon Sep 17 00:00:00 2001 From: redmatrix Date: Wed, 16 Mar 2016 18:00:13 -0700 Subject: lots of work on federated channel discovery --- doc/hook/discover_by_webbie.bb | 1 - doc/hook/discover_channel_webfinger.bb | 14 ++ doc/hooklist.bb | 2 +- include/follow.php | 4 +- include/items.php | 50 +++++- include/network.php | 294 ++++++++++++++++++++++++++------- mod/wfinger.php | 6 +- version.inc | 2 +- 8 files changed, 303 insertions(+), 70 deletions(-) delete mode 100644 doc/hook/discover_by_webbie.bb create mode 100644 doc/hook/discover_channel_webfinger.bb diff --git a/doc/hook/discover_by_webbie.bb b/doc/hook/discover_by_webbie.bb deleted file mode 100644 index f9228932e..000000000 --- a/doc/hook/discover_by_webbie.bb +++ /dev/null @@ -1 +0,0 @@ -[h2]discover_by_webbie[/h2] diff --git a/doc/hook/discover_channel_webfinger.bb b/doc/hook/discover_channel_webfinger.bb new file mode 100644 index 000000000..b0eb5f2c4 --- /dev/null +++ b/doc/hook/discover_channel_webfinger.bb @@ -0,0 +1,14 @@ +[h2]discover_channel_webfinger[/h2] + +Called after performing channel discovery using RFC7033 webfinger and where the channel is not recognised as zot. + +Passed an array: + + address: URL or address that is being discovered + success: set to true if the plugin discovers something + webfinger: array of webfinger links (output of webfinger_rfc7033()) + + + if your plugin indicates success you are expected to generate and populate an xchan (and hubloc) record prior to returning. + + \ No newline at end of file diff --git a/doc/hooklist.bb b/doc/hooklist.bb index 9172628a0..bae641585 100644 --- a/doc/hooklist.bb +++ b/doc/hooklist.bb @@ -127,7 +127,7 @@ Hooks allow plugins/addons to "hook into" the code at many points and alter the [zrl=[baseurl]/help/hook/directory_item]directory_item[/zrl] Called when generating a directory listing for display -[zrl=[baseurl]/help/hook/discover_by_webbie]discover_by_webbie[/zrl] +[zrl=[baseurl]/help/hook/discover_channel_webfinger]discover_channel_webfinger[/zrl] Called when performing a webfinger lookup [zrl=[baseurl]/help/hook/display_item]display_item[/zrl] diff --git a/include/follow.php b/include/follow.php index 5e1146657..319e9e41b 100644 --- a/include/follow.php +++ b/include/follow.php @@ -175,7 +175,9 @@ function new_contact($uid,$url,$channel,$interactive = false, $confirm = false) return $result; } - $x = array('channel_id' => $uid, 'follow_address' => $url, 'xchan' => $r[0], 'allowed' => 1, 'singleton' => 0); + $allowed = ($r[0]['xchan_network'] === 'zot' || $r[0]['xchan_network'] === 'rss') ? 1 : 0); + + $x = array('channel_id' => $uid, 'follow_address' => $url, 'xchan' => $r[0], 'allowed' => $allowed, 'singleton' => 0); call_hooks('follow_allow',$x); diff --git a/include/items.php b/include/items.php index 7f5932cf3..fbcf20636 100755 --- a/include/items.php +++ b/include/items.php @@ -4069,29 +4069,65 @@ function process_salmon_feed($xml, $importer) { } /* - * Given an xml (atom) feed, find any links with rel="hub" and return an array of href links or false + * Given an xml (atom) feed, find author and hub links */ -function find_hubs($xml) { +function feed_meta($xml) { require_once('library/simplepie/simplepie.inc'); + $ret = array(); + if(! strlen($xml)) { logger('empty input'); - return false; + return $ret; } $feed = new SimplePie(); $feed->set_raw_data($xml); $feed->init(); - if($feed->error()) + if($feed->error()) { logger('Error parsing XML: ' . $feed->error()); + return $ret; + } + + $ret['hubs'] = $feed->get_links('hub'); + +// logger('consume_feed: hubs: ' . print_r($hubs,true), LOGGER_DATA); + + $author = array(); + + $found_author = $feed->get_author(); + if($found_author) { + $author['author_name'] = unxmlify($found_author->get_name()); + $author['author_link'] = unxmlify($found_author->get_link()); + + $rawauthor = $feed->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10,'author'); + logger('rawauthor: ' . print_r($rawauthor,true)); + + if($rawauthor && $rawauthor[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']) { + $base = $rawauthor[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['link']; + foreach($base as $link) { + if(!x($author, 'author_photo') || ! $author['author_photo']) { + if($link['attribs']['']['rel'] === 'photo' || $link['attribs']['']['rel'] === 'avatar') { + $author['author_photo'] = unxmlify($link['attribs']['']['href']); + break; + } + } + } + if($rawauthor[0]['child'][NAMESPACE_POCO]['displayName'][0]['data']) + $author['full_name'] = unxmlify($rawauthor[0]['child'][NAMESPACE_POCO]['displayName'][0]['data']); + } + } - $hubs = $feed->get_links('hub'); - logger('consume_feed: hubs: ' . print_r($hubs,true), LOGGER_DATA); - return $hubs; + if(substr($author['author_link'],-1,1) == '/') + $author['author_link'] = substr($author['author_link'],0,-1); + + $ret['author'] = $author; + + return $ret; } diff --git a/include/network.php b/include/network.php index 7399b4526..7d41a7eb7 100644 --- a/include/network.php +++ b/include/network.php @@ -1043,75 +1043,184 @@ function discover_by_url($url,$arr = null) { } + +function convert_salmon_key($key) { + + if(strstr($key,',')) + $rawkey = substr($key,strpos($key,',')+1); + else + $rawkey = substr($key,5); + + $key_info = explode('.',$rawkey); + + $m = base64url_decode($key_info[1]); + $e = base64url_decode($key_info[2]); + + logger('key details: ' . print_r($key_info,true), LOGGER_DEBUG); + $salmon_key = metopem($m,$e); + return $salmon_key; + +} + + function discover_by_webbie($webbie) { require_once('library/HTML5/Parser.php'); + $result = array(); + $network = null; + $diaspora = false; + $gnusoc = false; + + $has_salmon = false; + $salmon_key = false; + $atom_feed = false; + $diaspora_base = ''; + $diaspora_guid = ''; + $diaspora_key = ''; + $dfrn = false; + $webbie = strtolower($webbie); $x = webfinger_rfc7033($webbie,true); if($x && array_key_exists('links',$x) && $x['links']) { foreach($x['links'] as $link) { - if(array_key_exists('rel',$link) && $link['rel'] == 'http://purl.org/zot/protocol') { - logger('discover_by_webbie: zot found for ' . $webbie, LOGGER_DEBUG); - if(array_key_exists('zot',$x) && $x['zot']['success']) - $i = import_xchan($x['zot']); - else { - $z = z_fetch_url($link['href']); - if($z['success']) { - $j = json_decode($z['body'],true); - $i = import_xchan($j); - return true; + if(array_key_exists('rel',$link)) { + if($link['rel'] == 'http://purl.org/zot/protocol') { + logger('discover_by_webbie: zot found for ' . $webbie, LOGGER_DEBUG); + if(array_key_exists('zot',$x) && $x['zot']['success']) + $i = import_xchan($x['zot']); + else { + $z = z_fetch_url($link['href']); + if($z['success']) { + $j = json_decode($z['body'],true); + $i = import_xchan($j); + return true; + } + } + } + if($link['rel'] == 'magic-public-key') { + if(substr($link['href'],0,5) === 'data:') { + $salmon_key = convert_salmon_key($link['href']); } } + if($link['rel'] == 'salmon') { + $has_salmon = true; + } + if($link['rel'] == 'http://schemas.google.com/g/2010#updates-from') { + $atom_feed = $link['href']; + } } } } - $arr = array('address' => $webbie, 'success' => false); - call_hooks('discover_by_webbie', $arr); + + logger('webfing: ' . print_r($x,true)); + + $arr = array('address' => $webbie, 'success' => false, 'webfinger' => $x); + call_hooks('discover_channel_webfinger', $arr); if($arr['success']) return true; - $result = array(); - $network = null; - $diaspora = false; + if($salmon_key && $has_salmon && $atom_feed) { + + $gnusoc = true; + $addr = $x['address']; - $diaspora_base = ''; - $diaspora_guid = ''; - $diaspora_key = ''; - $dfrn = false; + $m = parse_url($x['location']); + + $k = z_fetch_url($atom_feed); + if($k['success']) + $feed_meta = feed_meta($k['body']); + if($feed_meta && $feed_meta['author']) { + $r = q("select * from xchan where xchan_hash = '%s' limit 1", + dbesc($addr) + ); + if($r) { + $r = q("update xchan set xchan_name = '%s', xchan_network = '%s', xchan_name_date = '%s' where xchan_hash = '%s' limit 1", + dbesc(($feed_meta['author']['author_name']) ? $feed_meta['author']['author_name'] : $x['nickname']), + dbesc('gnusoc'), + dbesc(datetime_convert()), + dbesc($addr) + ); + } + else { - $x = old_webfinger($webbie); - if($x) { - logger('old_webfinger: ' . print_r($x,true)); - foreach($x as $link) { - if($link['@attributes']['rel'] === NAMESPACE_DFRN) - $dfrn = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'salmon') - $notify = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === NAMESPACE_FEED) - $poll = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') - $hcard = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://webfinger.net/rel/profile-page') - $profile = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://portablecontacts.net/spec/1.0') - $poco = unamp($link['@attributes']['href']); - if($link['@attributes']['rel'] === 'http://joindiaspora.com/seed_location') { - $diaspora_base = unamp($link['@attributes']['href']); - $diaspora = true; + $r = q("insert into xchan ( xchan_hash, xchan_guid, xchan_pubkey, xchan_addr, xchan_url, xchan_name, xchan_network, xchan_name_date ) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') ", + dbesc($addr), + dbesc($x['location']), + dbesc($salmon_key), + dbesc($addr), + dbesc($x['location']), + dbesc(($feed_meta['author']['author_name']) ? $feed_meta['author']['author_name'] : $x['nickname']), + dbesc('gnusoc'), + dbescdate(datetime_convert()) + ); } - if($link['@attributes']['rel'] === 'http://joindiaspora.com/guid') { - $diaspora_guid = unamp($link['@attributes']['href']); - $diaspora = true; + + $r = q("select * from hubloc where hubloc_hash = '%s' limit 1", + dbesc($addr) + ); + + if(! $r) { + + $r = q("insert into hubloc ( hubloc_guid, hubloc_hash, hubloc_addr, hubloc_network, hubloc_url, hubloc_host, hubloc_callback, hubloc_updated, hubloc_primary ) values ('%s','%s','%s','%s','%s','%s','%s','%s', 1)", + dbesc($x['location']), + dbesc($addr), + dbesc($addr), + dbesc('gnusoc'), + dbesc($m['scheme'] . '://' . $m['host']), + dbesc($m['host']), + dbesc($salmon), + dbescdate(datetime_convert()) + ); } - if($link['@attributes']['rel'] === 'diaspora-public-key') { - $diaspora_key = base64_decode(unamp($link['@attributes']['href'])); - if(strstr($diaspora_key,'RSA ')) - $pubkey = rsatopem($diaspora_key); - else - $pubkey = $diaspora_key; - $diaspora = true; + $photos = import_xchan_photo($feed_meta['author']['author_photo'],$addr); + $r = q("update xchan set xchan_photo_date = '%s', xchan_photo_l = '%s', xchan_photo_m = '%s', xchan_photo_s = '%s', xchan_photo_mimetype = '%s' where xchan_hash = '%s'", + dbescdate(datetime_convert()), + dbesc($photos[0]), + dbesc($photos[1]), + dbesc($photos[2]), + dbesc($photos[3]), + dbesc($addr) + ); + return true; + + } + } + else { + + $x = old_webfinger($webbie); + if($x) { + logger('old_webfinger: ' . print_r($x,true)); + foreach($x as $link) { + if($link['@attributes']['rel'] === NAMESPACE_DFRN) + $dfrn = unamp($link['@attributes']['href']); + if($link['@attributes']['rel'] === 'salmon') + $notify = unamp($link['@attributes']['href']); + if($link['@attributes']['rel'] === NAMESPACE_FEED) + $poll = unamp($link['@attributes']['href']); + if($link['@attributes']['rel'] === 'http://microformats.org/profile/hcard') + $hcard = unamp($link['@attributes']['href']); + if($link['@attributes']['rel'] === 'http://webfinger.net/rel/profile-page') + $profile = unamp($link['@attributes']['href']); + if($link['@attributes']['rel'] === 'http://portablecontacts.net/spec/1.0') + $poco = unamp($link['@attributes']['href']); + if($link['@attributes']['rel'] === 'http://joindiaspora.com/seed_location') { + $diaspora_base = unamp($link['@attributes']['href']); + $diaspora = true; + } + if($link['@attributes']['rel'] === 'http://joindiaspora.com/guid') { + $diaspora_guid = unamp($link['@attributes']['href']); + $diaspora = true; + } + if($link['@attributes']['rel'] === 'diaspora-public-key') { + $diaspora_key = base64_decode(unamp($link['@attributes']['href'])); + if(strstr($diaspora_key,'RSA ')) + $pubkey = rsatopem($diaspora_key); + else + $pubkey = $diaspora_key; + $diaspora = true; + } } } @@ -1167,7 +1276,7 @@ function discover_by_webbie($webbie) { } else { - $r = q("insert into xchan ( xchan_hash, xchan_guid, xchan_pubkey, xchan_addr, xchan_url, xchan_name, xchan_network, xchan_instance_url, xchan_name_date ) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') ", + $r = q("insert into xchan ( xchan_hash, xchan_guid, xchan_pubkey, xchan_addr, xchan_url, xchan_name, xchan_network, xchan_name_date ) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') ", dbesc($addr), dbesc($guid), dbesc($pubkey), @@ -1175,7 +1284,6 @@ function discover_by_webbie($webbie) { dbesc($profile), dbesc($vcard['fn']), dbesc($network), - dbesc(z_root()), dbescdate(datetime_convert()) ); } @@ -1199,7 +1307,7 @@ function discover_by_webbie($webbie) { } $photos = import_xchan_photo($vcard['photo'],$addr); $r = q("update xchan set xchan_photo_date = '%s', xchan_photo_l = '%s', xchan_photo_m = '%s', xchan_photo_s = '%s', xchan_photo_mimetype = '%s' where xchan_hash = '%s'", - dbescdate(datetime_convert('UTC','UTC',$arr['photo_updated'])), + dbescdate(datetime_convert()), dbesc($photos[0]), dbesc($photos[1]), dbesc($photos[2]), @@ -1209,7 +1317,7 @@ function discover_by_webbie($webbie) { return true; } - + } return false; /* @@ -1278,10 +1386,6 @@ LSIeXnd14lQYK/uxW/8cTFjcmddsKxeXysoQxbSa9VdDK+KkpZdgYXYrTTofXs6v+ ) */ - - - - } } @@ -1308,13 +1412,89 @@ function webfinger_rfc7033($webbie,$zot = false) { $s = z_fetch_url('https://' . $rhs . '/.well-known/webfinger?f=&resource=' . $resource . (($zot) ? '&zot=1' : '')); - if($s['success']) + if($s['success']) { $j = json_decode($s['body'],true); + + // We could have a number of URL aliases and webbies + // make an executive decision about the most likely "best" of each + // by comparing against some examples from known networks we're likely to encounter. + // Otherwise we have to store every alias that we may ever encounter and + // validate every URL we ever find against every possible alias + + // @fixme pump.io is going to be a real bugger since it doesn't return subject or aliases + // or provide lookup by url + + $j['address'] = find_webfinger_address($j,$rhs); + $j['location'] = find_webfinger_location($j,$rhs); + if($j['address']) + $j['nickname'] = substr($j['address'],0,strpos($j['address'],'@')); + } else return false; + return($j); } +function find_webfinger_address($j,$rhs) { + if(is_array($j) && ($j)) { + if(strpos($j['subject'],'acct:') !== false && strpos($j['subject'],'@' . $rhs)) + return str_replace('acct:','',$j['subject']); + if($j['aliases']) { + foreach($j['aliases'] as $alias) { + if(strpos($alias,'acct:') !== false && strpos($alias,'@' . $rhs)) { + return str_replace('acct:','',$alias); + } + } + } + } + return ''; +} + + +function find_webfinger_location($j,$rhs) { + if(is_array($j) && ($j)) { + if(strpos($j['subject'],'http') === 0) { + $x = match_webfinger_location($j['subject'],$rhs); + if($x) + return $x; + } + if($j['aliases']) { + foreach($j['aliases'] as $alias) { + if(strpos($alias,'http') === 0) { + $x = match_webfinger_location($alias,$rhs); + if($x) + return($x); + } + } + } + } + return ''; +} + +function match_webfinger_location($s,$h) { + + // GNU-social and the older StatusNet + if(preg_match('|' . $h . '/user/([0-9]*?)$|',$s)) + return $s; + // Redmatrix / hubzilla + if(preg_match('|' . $h . '/channel/|',$s)) + return $s; + // Friendica + if(preg_match('|' . $h . '/profile/|',$s)) + return $s; + + $arr = array('test' => $s, 'host' => $h, 'success' => false); + call_hooks('match_webfinger_location',$arr); + if($arr['success']) + return $s; + return ''; +} + + + + + + function old_webfinger($webbie) { diff --git a/mod/wfinger.php b/mod/wfinger.php index 3f9826f9b..5c1a74f10 100644 --- a/mod/wfinger.php +++ b/mod/wfinger.php @@ -73,13 +73,15 @@ function wfinger_init(&$a) { $result['aliases'] = array(); - $result['properties'] = array('http://webfinger.net/ns/name' => $r[0]['channel_name']); + $result['properties'] = array( + 'http://webfinger.net/ns/name' => $r[0]['channel_name'], + 'http://xmlns.com/foaf/0.1/name' => $r[0]['channel_name'] + ); foreach($aliases as $alias) if($alias != $resource) $result['aliases'][] = $alias; - $result['links'] = array( array( diff --git a/version.inc b/version.inc index 5ade25348..7825710e4 100644 --- a/version.inc +++ b/version.inc @@ -1 +1 @@ -2016-03-15.1336H +2016-03-16.1337H -- cgit v1.2.3