diff options
author | friendica <info@friendica.com> | 2014-08-20 17:15:13 -0700 |
---|---|---|
committer | friendica <info@friendica.com> | 2014-08-20 17:15:13 -0700 |
commit | a6829f7dcb6735ee0b2f003647cc168e55002a5f (patch) | |
tree | e85d0ddd4c516092f9a7da5ef779a8b35e1da3a4 | |
parent | e842359e9b3c02ad2bbfe8f6bb9b702d1502f1c5 (diff) | |
download | volse-hubzilla-a6829f7dcb6735ee0b2f003647cc168e55002a5f.tar.gz volse-hubzilla-a6829f7dcb6735ee0b2f003647cc168e55002a5f.tar.bz2 volse-hubzilla-a6829f7dcb6735ee0b2f003647cc168e55002a5f.zip |
move Friendica photo migrator to addons, bring back a few XML scraping functions that we're going to require (unfortunately)
-rw-r--r-- | include/network.php | 166 | ||||
-rw-r--r-- | mod/frphotos.php | 87 | ||||
-rw-r--r-- | util/frphotohelper.php | 75 | ||||
-rw-r--r-- | view/tpl/frphotos.tpl | 13 |
4 files changed, 165 insertions, 176 deletions
diff --git a/include/network.php b/include/network.php index 1a974a681..614049299 100644 --- a/include/network.php +++ b/include/network.php @@ -965,4 +965,168 @@ logger('fetch_xrd_links: ' . $url); logger('fetch_xrd_links: ' . print_r($links,true), LOGGER_DATA); return $links; -}
\ No newline at end of file +} + + +function scrape_vcard($url) { + + $a = get_app(); + + $ret = array(); + + logger('scrape_vcard: url=' . $url); + + $x = z_fetch_url($url); + if(! $x['success']) + return $ret; + + $s = $x['body']; + + if(! $s) + return $ret; + + $headers = $x['header']; + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + // don't try and run feeds through the html5 parser + if(stristr($line,'content-type:') && ((stristr($line,'application/atom+xml')) || (stristr($line,'application/rss+xml')))) + return ret; + } + } + + try { + $dom = HTML5_Parser::parse($s); + } catch (DOMException $e) { + logger('scrape_vcard: parse error: ' . $e); + } + + if(! $dom) + return $ret; + + // Pull out hCard profile elements + + $largest_photo = 0; + + $items = $dom->getElementsByTagName('*'); + foreach($items as $item) { + if(attribute_contains($item->getAttribute('class'), 'vcard')) { + $level2 = $item->getElementsByTagName('*'); + foreach($level2 as $x) { + if(attribute_contains($x->getAttribute('class'),'fn')) + $ret['fn'] = $x->textContent; + if((attribute_contains($x->getAttribute('class'),'photo')) + || (attribute_contains($x->getAttribute('class'),'avatar'))) { + $size = intval($x->getAttribute('width')); + if(($size > $largest_photo) || (! $largest_photo)) { + $ret['photo'] = $x->getAttribute('src'); + $largest_photo = $size; + } + } + if((attribute_contains($x->getAttribute('class'),'nickname')) + || (attribute_contains($x->getAttribute('class'),'uid'))) { + $ret['nick'] = $x->textContent; + } + } + } + } + + return $ret; +} + + + +function scrape_feed($url) { + + $a = get_app(); + + $ret = array(); + $level = 0; + $x = z_fetch_url($url,false,$level,array('novalidate' => true)); + + if(! $x['success']) + return $ret; + + $headers = $x['header']; + $code = $x['return_code']; + $s = $x['body']; + + logger('scrape_feed: returns: ' . $code . ' headers=' . $headers, LOGGER_DEBUG); + + if(! $s) { + logger('scrape_feed: no data returned for ' . $url); + return $ret; + } + + + $lines = explode("\n",$headers); + if(count($lines)) { + foreach($lines as $line) { + if(stristr($line,'content-type:')) { + if(stristr($line,'application/atom+xml') || stristr($s,'<feed')) { + $ret['feed_atom'] = $url; + return $ret; + } + if(stristr($line,'application/rss+xml') || stristr($s,'<rss')) { + $ret['feed_rss'] = $url; + return $ret; + } + } + } + // perhaps an RSS version 1 feed with a generic or incorrect content-type? + if(stristr($s,'</item>')) { + $ret['feed_rss'] = $url; + return $ret; + } + } + + try { + $dom = HTML5_Parser::parse($s); + } catch (DOMException $e) { + logger('scrape_feed: parse error: ' . $e); + } + + if(! $dom) { + logger('scrape_feed: failed to parse.'); + return $ret; + } + + + $head = $dom->getElementsByTagName('base'); + if($head) { + foreach($head as $head0) { + $basename = $head0->getAttribute('href'); + break; + } + } + if(! $basename) + $basename = implode('/', array_slice(explode('/',$url),0,3)) . '/'; + + $items = $dom->getElementsByTagName('link'); + + // get Atom/RSS link elements, take the first one of either. + + if($items) { + foreach($items as $item) { + $x = $item->getAttribute('rel'); + if(($x === 'alternate') && ($item->getAttribute('type') === 'application/atom+xml')) { + if(! x($ret,'feed_atom')) + $ret['feed_atom'] = $item->getAttribute('href'); + } + if(($x === 'alternate') && ($item->getAttribute('type') === 'application/rss+xml')) { + if(! x($ret,'feed_rss')) + $ret['feed_rss'] = $item->getAttribute('href'); + } + } + } + + // Drupal and perhaps others only provide relative URL's. Turn them into absolute. + + if(x($ret,'feed_atom') && (! strstr($ret['feed_atom'],'://'))) + $ret['feed_atom'] = $basename . $ret['feed_atom']; + if(x($ret,'feed_rss') && (! strstr($ret['feed_rss'],'://'))) + $ret['feed_rss'] = $basename . $ret['feed_rss']; + + return $ret; +} + diff --git a/mod/frphotos.php b/mod/frphotos.php deleted file mode 100644 index 8d6197fa3..000000000 --- a/mod/frphotos.php +++ /dev/null @@ -1,87 +0,0 @@ -<?php - - - -function frphotos_init(&$a) { - - if(! local_user()) - return; - - if(intval(get_pconfig(local_user(),'frphotos','complete'))) - return; - - $channel = $a->get_channel(); - - $fr_server = $_REQUEST['fr_server']; - $fr_username = $_REQUEST['fr_username']; - $fr_password = $_REQUEST['fr_password']; - - $cookies = 'store/[data]/frphoto_cookie_' . $channel['channel_address']; - - if($fr_server && $fr_username && $fr_password) { - - $ch = curl_init($fr_server . '/api/friendica/photos/list'); - - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt ($ch, CURLOPT_COOKIEFILE, $cookies); - curl_setopt ($ch, CURLOPT_COOKIEJAR, $cookies); - curl_setopt($ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); - curl_setopt($ch, CURLOPT_USERPWD, $fr_username . ':' . $fr_password); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_USERAGENT, 'RedMatrix'); - - $output = curl_exec($ch); - curl_close($ch); - - $j = json_decode($output,true); - -// echo print_r($j,true); - - $total = 0; - if(count($j)) { - foreach($j as $jj) { - - $r = q("select uid from photo where resource_id = '%s' and uid = %d limit 1", - dbesc($jj), - intval($channel['channel_id']) - ); - if($r) - continue; - - $total ++; - proc_run('php','util/frphotohelper.php',$jj, $channel['channel_address'], urlencode($fr_server)); - sleep(3); - } - } - if($total) { - set_pconfig(local_user(),'frphotos','complete','1'); - } - @unlink($cookies); - goaway(z_root() . '/photos/' . $channel['channel_address']); - } -} - - -function frphotos_content(&$a) { - - if(! local_user()) { - notice( t('Permission denied') . EOL); - return; - } - - if(intval(get_pconfig(local_user(),'frphotos','complete'))) { - info('Friendica photos have already been imported into this channel.'); - return; - } - - $o = replace_macros(get_markup_template('frphotos.tpl'),array( - '$header' => t('Friendica Photo Album Import'), - '$desc' => t('This will import all your Friendica photo albums to this Red channel.'), - '$fr_server' => array('fr_server', t('Friendica Server base URL'),'',''), - '$fr_username' => array('fr_username', t('Friendica Login Username'),'',''), - '$fr_password' => array('fr_password', t('Friendica Login Password'),'',''), - '$submit' => t('Submit'), - )); - return $o; -} diff --git a/util/frphotohelper.php b/util/frphotohelper.php deleted file mode 100644 index 484e7fcaf..000000000 --- a/util/frphotohelper.php +++ /dev/null @@ -1,75 +0,0 @@ -<?php - -require_once('include/cli_startup.php'); - -cli_startup(); - -$a = get_app(); - - -$photo_id = $argv[1]; -$channel_address = $argv[2]; -$fr_server = urldecode($argv[3]); -require_once('include/photos.php'); - -$cookies = 'store/[data]/frphoto_cookie_' . $channel_address; - - $c = q("select * from channel left join xchan on channel_hash = xchan_hash where channel_address = '%s' limit 1", - dbesc($channel_address) - ); - if(! $c) { - logger('frphotohelper: channel not found'); - killme(); - } - $channel = $c[0]; - - - $ch = curl_init($fr_server . '/api/friendica/photo?f=&photo_id=' . $photo_id); - - curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); - curl_setopt ($ch, CURLOPT_COOKIEFILE, $cookies); - curl_setopt ($ch, CURLOPT_COOKIEJAR, $cookies); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_USERAGENT, 'RedMatrix'); - - $output = curl_exec($ch); - curl_close($ch); - - $j = json_decode($output,true); - -// logger('frphotohelper: ' . print_r($j,true)); - - $args = array(); - $args['data'] = base64_decode($j['data']); - $args['filename'] = $j['filename']; - $args['resource_id'] = $j['resource-id']; - $args['scale'] = $j['scale']; - $args['album'] = $j['album']; - $args['not_visible'] = 1; - $args['created'] = $j['created']; - $args['edited'] = $j['edited']; - $args['title'] = $j['title']; - $args['description'] = $j['desc']; - - if($j['allow_cid'] || $j['allow_gid'] || $j['deny_cid'] || $j['deny_gid']) - $args['contact_allow'] = $channel['channel_hash']; - - $args['type'] = $j['type']; - - - - $r = q("select * from photo where resource_id = '%s' and uid = %d limit 1", - dbesc($args['resource_id']), - intval($channel['channel_id']) - ); - if($r) { - killme(); - } - - - $ret = photo_upload($channel,$channel,$args); - logger('photo_import: ' . print_r($ret,true)); - - killme(); - diff --git a/view/tpl/frphotos.tpl b/view/tpl/frphotos.tpl deleted file mode 100644 index b8e978825..000000000 --- a/view/tpl/frphotos.tpl +++ /dev/null @@ -1,13 +0,0 @@ -<h3>{{$header}}</h3> - -<p class="descriptive-text">{{$desc}}</p> - -<form action="frphotos" method="post" autocomplete="off" > - -{{include file="field_input.tpl" field=$fr_server}} -{{include file="field_input.tpl" field=$fr_username}} -{{include file="field_password.tpl" field=$fr_password}} - -<input type="submit" name="submit" value="{{$submit}}" /> -</form> - |