aboutsummaryrefslogtreecommitdiffstats
path: root/include/Scrape.php
diff options
context:
space:
mode:
Diffstat (limited to 'include/Scrape.php')
-rw-r--r--include/Scrape.php119
1 files changed, 83 insertions, 36 deletions
diff --git a/include/Scrape.php b/include/Scrape.php
index bfe795e19..cc46af644 100644
--- a/include/Scrape.php
+++ b/include/Scrape.php
@@ -44,8 +44,9 @@ function scrape_dfrn($url) {
$x = $item->getAttribute('rel');
if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml'))
$ret['feed_atom'] = $item->getAttribute('href');
- if(substr($x,0,5) == "dfrn-")
+ if(substr($x,0,5) == "dfrn-") {
$ret[$x] = $item->getAttribute('href');
+ }
if($x === 'lrdd') {
$decoded = urldecode($item->getAttribute('href'));
if(preg_match('/acct:([^@]*)@/',$decoded,$matches))
@@ -55,17 +56,28 @@ function scrape_dfrn($url) {
// Pull out hCard profile elements
+ $largest_photo = 0;
+
$items = $dom->getElementsByTagName('*');
foreach($items as $item) {
if(attribute_contains($item->getAttribute('class'), 'vcard')) {
$level2 = $item->getElementsByTagName('*');
foreach($level2 as $x) {
- if(attribute_contains($x->getAttribute('class'),'fn'))
+ if(attribute_contains($x->getAttribute('class'),'fn')) {
$ret['fn'] = $x->textContent;
- if(attribute_contains($x->getAttribute('class'),'photo'))
- $ret['photo'] = $x->getAttribute('src');
- if(attribute_contains($x->getAttribute('class'),'key'))
+ }
+ if((attribute_contains($x->getAttribute('class'),'photo'))
+ || (attribute_contains($x->getAttribute('class'),'avatar'))) {
+ $size = intval($x->getAttribute('width'));
+ // dfrn prefers 175, so if we find this, we set largest_size so it can't be topped.
+ if(($size > $largest_photo) || ($size == 175) || (! $largest_photo)) {
+ $ret['photo'] = $x->getAttribute('src');
+ $largest_photo = (($size == 175) ? 9999 : $size);
+ }
+ }
+ if(attribute_contains($x->getAttribute('class'),'key')) {
$ret['key'] = $x->textContent;
+ }
}
}
}
@@ -190,8 +202,9 @@ function scrape_vcard($url) {
}
}
if((attribute_contains($x->getAttribute('class'),'nickname'))
- || (attribute_contains($x->getAttribute('class'),'uid')))
+ || (attribute_contains($x->getAttribute('class'),'uid'))) {
$ret['nick'] = $x->textContent;
+ }
}
}
}
@@ -289,7 +302,29 @@ function scrape_feed($url) {
}}
-function probe_url($url) {
+/**
+ *
+ * Probe a network address to discover what kind of protocols we need to communicate with it.
+ *
+ * Warning: this function is a bit touchy and there are some subtle dependencies within the logic flow.
+ * Edit with care.
+ *
+ */
+
+/**
+ *
+ * PROBE_DIASPORA has a bias towards returning Diaspora information
+ * while PROBE_NORMAL has a bias towards dfrn/zot - in the case where
+ * an address (such as a Friendika address) supports more than one type
+ * of network.
+ *
+ */
+
+
+define ( 'PROBE_NORMAL', 0);
+define ( 'PROBE_DIASPORA', 1);
+
+function probe_url($url, $mode = PROBE_NORMAL) {
require_once('include/email.php');
$result = array();
@@ -366,7 +401,7 @@ function probe_url($url) {
}
}
}
- else {
+ elseif($mode == PROBE_NORMAL) {
// Check email
@@ -411,38 +446,46 @@ function probe_url($url) {
}
}
- if(strlen($zot)) {
- $s = fetch_url($zot);
- if($s) {
- $j = json_decode($s);
- if($j) {
- $network = NETWORK_ZOT;
- $vcard = array(
- 'fn' => $j->fullname,
- 'nick' => $j->nickname,
- 'photo' => $j->photo
- );
- $profile = $j->url;
- $notify = $j->post;
- $pubkey = $j->pubkey;
- $poll = 'N/A';
+ if($mode == PROBE_NORMAL) {
+ if(strlen($zot)) {
+ $s = fetch_url($zot);
+ if($s) {
+ $j = json_decode($s);
+ if($j) {
+ $network = NETWORK_ZOT;
+ $vcard = array(
+ 'fn' => $j->fullname,
+ 'nick' => $j->nickname,
+ 'photo' => $j->photo
+ );
+ $profile = $j->url;
+ $notify = $j->post;
+ $pubkey = $j->pubkey;
+ $poll = 'N/A';
+ }
}
}
- }
- if(strlen($dfrn)) {
- $ret = scrape_dfrn($dfrn);
- if(is_array($ret) && x($ret,'dfrn-request')) {
- $network = NETWORK_DFRN;
- $request = $ret['dfrn-request'];
- $confirm = $ret['dfrn-confirm'];
- $notify = $ret['dfrn-notify'];
- $poll = $ret['dfrn-poll'];
+ if(strlen($dfrn)) {
+ $ret = scrape_dfrn($dfrn);
+ if(is_array($ret) && x($ret,'dfrn-request')) {
+ $network = NETWORK_DFRN;
+ $request = $ret['dfrn-request'];
+ $confirm = $ret['dfrn-confirm'];
+ $notify = $ret['dfrn-notify'];
+ $poll = $ret['dfrn-poll'];
+
+ $vcard = array();
+ $vcard['fn'] = $ret['fn'];
+ $vcard['nick'] = $ret['nick'];
+ $vcard['photo'] = $ret['photo'];
+ }
}
}
if($diaspora && $diaspora_base && $diaspora_guid) {
- $notify = $diaspora_base . 'receive/post/' . $diaspora_guid;
+ if($mode == PROBE_DIASPORA || ! $notify)
+ $notify = $diaspora_base . 'receive/users/' . $diaspora_guid;
if(strpos($url,'@'))
$addr = str_replace('acct:', '', $url);
}
@@ -454,7 +497,7 @@ function probe_url($url) {
$network = NETWORK_OSTATUS;
$priority = 0;
- if($hcard) {
+ if($hcard && ! $vcard) {
$vcard = scrape_vcard($hcard);
// Google doesn't use absolute url in profile photos
@@ -498,10 +541,11 @@ function probe_url($url) {
logger('probe_url: scrape_feed returns: ' . print_r($feedret,true), LOGGER_DATA);
if(count($feedret) && ($feedret['feed_atom'] || $feedret['feed_rss'])) {
$poll = ((x($feedret,'feed_atom')) ? unamp($feedret['feed_atom']) : unamp($feedret['feed_rss']));
- $vcard = array();
+ if(! x($vcard))
+ $vcard = array();
}
- if(x($feedret,'photo'))
+ if(x($feedret,'photo') && (! x($vcard,'photo')))
$vcard['photo'] = $feedret['photo'];
require_once('library/simplepie/simplepie.inc');
$feed = new SimplePie();
@@ -518,9 +562,11 @@ function probe_url($url) {
if($feed->error())
logger('probe_url: scrape_feed: Error parsing XML: ' . $feed->error());
+
if(! x($vcard,'photo'))
$vcard['photo'] = $feed->get_image_url();
$author = $feed->get_author();
+
if($author) {
$vcard['fn'] = unxmlify(trim($author->get_name()));
if(! $vcard['fn'])
@@ -568,6 +614,7 @@ function probe_url($url) {
}
}
}
+
if((! $vcard['photo']) && strlen($email))
$vcard['photo'] = gravatar_img($email);
if($poll === $profile)