aboutsummaryrefslogtreecommitdiffstats
path: root/include/Import
diff options
context:
space:
mode:
authorhabeascodice <habeascodice@federated.social>2014-11-13 13:30:29 -0800
committerhabeascodice <habeascodice@federated.social>2014-11-13 13:30:29 -0800
commit2dbecf95454993cdf4996d2d65463d7759cd479b (patch)
treecbef39a4779efed1c0ab3b61a9ff09e37fadb49e /include/Import
parentbc02b933727da7a000d43d2d7d495f066616dc81 (diff)
parentac27db22c18ee7a82a52cbadb3efe2760b910499 (diff)
downloadvolse-hubzilla-2dbecf95454993cdf4996d2d65463d7759cd479b.tar.gz
volse-hubzilla-2dbecf95454993cdf4996d2d65463d7759cd479b.tar.bz2
volse-hubzilla-2dbecf95454993cdf4996d2d65463d7759cd479b.zip
Merge branch 'master' of https://github.com/habeascodice/red
Diffstat (limited to 'include/Import')
-rw-r--r--include/Import/Importer.php94
-rw-r--r--include/Import/refimport.php282
2 files changed, 376 insertions, 0 deletions
diff --git a/include/Import/Importer.php b/include/Import/Importer.php
new file mode 100644
index 000000000..c42344236
--- /dev/null
+++ b/include/Import/Importer.php
@@ -0,0 +1,94 @@
+<?php /** @file */
+
+namespace Redmatrix\Import;
+
+
+class Import {
+
+ $credentials = null;
+ $itemlist = null;
+ $src_items = null;
+
+ $items = null;
+
+ function get_credentials() {
+
+ }
+
+ function get_itemlist() {
+
+
+ }
+
+
+ function get_item_ident($item) {
+
+ }
+
+ function get_item($item_ident) {
+
+ }
+
+ function get_taxonomy($item_ident) {
+
+ }
+
+ function get_children($item_ident) {
+
+ }
+
+ function convert_item($item_ident) {
+
+ }
+
+ function convert_taxonomy($item_ident) {
+
+
+ }
+
+ function convert_child($child) {
+
+ }
+
+ function store($item,$update = false) {
+
+ }
+
+ function run() {
+
+ $this->credentials = $this->get_credentials();
+ $this->itemlist = $this->get_itemlist();
+ if($this->itemlist) {
+ $this->src_items = array();
+ $this->items = array();
+ $cnt = 0;
+ foreach($this->itemlist as $item) {
+ $ident = $item->get_item_ident($item);
+ $this->src_items[$ident]['item'] = $this->get_item($ident);
+ $this->src_items[$ident]['taxonomy'] = $this->get_taxonomy($ident);
+ $this->src_items[$ident]['children'] = $this->get_children($ident);
+ $this->items[$cnt]['item'] = $this->convert_item($ident);
+ $this->items[$cnt]['item']['term'] = $this->convert_taxonomy($ident);
+ if($this->src_items[$ident]['children']) {
+ $this->items[$cnt]['children'] = array();
+ foreach($this->src_items[$ident]['children'] as $child) {
+ $this[$cnt]['children'][] = $this->convert_child($child);
+ }
+ }
+ $cnt ++;
+ }
+
+
+
+
+ }
+
+
+
+
+
+
+ }
+
+
+} \ No newline at end of file
diff --git a/include/Import/refimport.php b/include/Import/refimport.php
new file mode 100644
index 000000000..4f2572660
--- /dev/null
+++ b/include/Import/refimport.php
@@ -0,0 +1,282 @@
+<?php
+
+require_once('include/html2bbcode.php');
+require_once('include/hubloc.php');
+
+// Sample module for importing conversation data from Reflection CMS. Some preparation was used to
+// dump relevant posts, categories and comments into individual JSON files, and also JSON dump of
+// the user table to search for avatars. Importation was also batched in sets of 20 posts per page
+// visit so as to survive shared hosting process limits. This provides some clues as how to handle
+// WordPress imports, which use a somewhat similar DB structure. The batching and individual files
+// might not be needed in VPS environments. As such this could be considered an extreme test case, but
+// the importation was successful in all regards using this code. The module URL was visited repeatedly
+// with a browser until all the posts had been imported.
+
+
+
+
+define('REDMATRIX_IMPORTCHANNEL','mike');
+define('REFLECT_EXPORTUSERNAME','mike');
+define('REFLECT_BLOGNAME','Diary and Other Rantings');
+define('REFLECT_BASEURL','http://example.com/');
+define('REFLECT_USERFILE','user.json');
+
+// set to true if you need to process everything again
+define('REFLECT_OVERWRITE',false);
+
+// we'll only process a small number of posts at a time on a shared host.
+
+define('REFLECT_MAXPERRUN',30);
+
+function reflect_get_channel() {
+
+ // this will be the channel_address or nickname of the red channel
+
+ $c = q("select * from channel left join xchan on channel_hash = xchan_hash
+ where channel_address = '%s' limit 1",
+ dbesc(REDMATRIX_IMPORTCHANNEL)
+ );
+ return $c[0];
+}
+
+
+function refimport_content(&$a) {
+
+ $channel = reflect_get_channel();
+
+ // load the user file. We need that to find the commenter's avatars
+
+ $u = file_get_contents(REFLECT_USERFILE);
+ if($u) {
+ $users = json_decode($u,true);
+ }
+
+ $ignored = 0;
+ $processed = 0;
+
+ $files = glob('article/*');
+ if(! $files)
+ return;
+
+ foreach($files as $f) {
+ $s = file_get_contents($f);
+ $j = json_decode($s,true);
+
+ if(! $j)
+ continue;
+
+ $arr = array();
+
+ // see if this article was already processed
+ $r = q("select * from item where mid = '%s' and uid = %d limit 1",
+ dbesc($j['guid']),
+ intval($channel['channel_id'])
+ );
+ if($r) {
+ if(REFLECT_OVERWRITE)
+ $arr['id'] = $r[0]['id'];
+ else {
+ $ignored ++;
+ rename($f,str_replace('article','done',$f));
+ continue;
+ }
+ }
+
+ $arr['uid'] = $channel['channel_account_id'];
+ $arr['aid'] = $channel['channel_id'];
+ $arr['mid'] = $arr['parent_mid'] = $j['guid'];
+ $arr['created'] = $j['created'];
+ $arr['edited'] = $j['edited'];
+ $arr['author_xchan'] = $channel['channel_hash'];
+ $arr['owner_xchan'] = $channel['channel_hash'];
+ $arr['app'] = REFLECT_BLOGNAME;
+ $arr['item_flags'] = ITEM_ORIGIN|ITEM_WALL|ITEM_THREAD_TOP;
+ $arr['verb'] = ACTIVITY_POST;
+
+ // this is an assumption
+ $arr['comment_policy'] = 'contacts';
+
+
+ // import content. In this case the content is XHTML.
+
+ $arr['title'] = html2bbcode($j['title']);
+ $arr['title'] = htmlspecialchars($arr['title'],ENT_COMPAT,'UTF-8',false);
+
+
+ $arr['body'] = html2bbcode($j['body']);
+ $arr['body'] = htmlspecialchars($arr['body'],ENT_COMPAT,'UTF-8',false);
+
+
+ // convert relative urls to other posts on that service to absolute url on our service.
+ $arr['body'] = preg_replace_callback("/\[url\=\/+article\/(.*?)\](.*?)\[url\]/",'reflect_article_callback',$arr['body']);
+
+ // also import any photos
+ $arr['body'] = preg_replace_callback("/\[img(.*?)\](.*?)\[\/img\]/",'reflect_photo_callback',$arr['body']);
+
+
+ // add categories
+
+ if($j['taxonomy'] && is_array($j['taxonomy']) && count($j['taxonomy'])) {
+ $arr['term'] = array();
+ foreach($j['taxonomy'] as $tax) {
+ $arr['term'][] = array(
+ 'uid' => $channel['channel_id'],
+ 'type' => TERM_CATEGORY,
+ 'otype' => TERM_OBJ_POST,
+ 'term' => trim($tax['name']),
+ 'url' => $channel['xchan_url'] . '?f=&cat=' . urlencode(trim($tax['name']))
+ );
+ }
+ }
+
+ // store the item
+
+ if($arr['id'])
+ item_store_update($arr);
+ else
+ item_store($arr);
+
+ // if there are any comments, process them
+ // $comment['registered'] is somebody with an account on the system. Others are mostly anonymous
+
+ if($j['comments']) {
+ foreach($j['comments'] as $comment) {
+ $user = (($comment['registered']) ? reflect_find_user($users,$comment['author']) : null);
+ reflect_comment_store($channel,$arr,$comment,$user);
+ }
+ }
+ $processed ++;
+
+ if(REFLECT_MAXPERRUN && $processed > REFLECT_MAXPERRUN)
+ break;
+ }
+ return 'processed: ' . $processed . EOL . 'completed: ' . $ignored . EOL;
+
+}
+
+function reflect_article_callback($matches) {
+ return '[zrl=' . z_root() . '/display/'. $matches[1] . ']' . $matches[2] . '[/zrl]';
+}
+
+function reflect_photo_callback($matches) {
+
+ if(strpos($matches[2],'http') !== false)
+ return $matches[0];
+
+ $prefix = REFLECT_BASEURL;
+ $x = z_fetch_url($prefix.$matches[2],true);
+
+ $hash = basename($matches[2]);
+
+ if($x['success']) {
+ $channel = reflect_get_channel();
+ require_once('include/photos.php');
+ $p = photo_upload($channel,$channel,
+ array('data' => $x['body'],
+ 'resource_id' => str_replace('-','',$hash),
+ 'filename' => $hash . '.jpg',
+ 'type' => 'image/jpeg',
+ 'not_visible' => true
+ )
+ );
+
+ if($p['success'])
+ $newlink = $p['resource_id'] . '-0.jpg';
+
+
+ // import photo and locate the link for it.
+ return '[zmg]' . z_root() . '/photo/' . $newlink . '[/zmg]';
+
+ }
+ // no replacement. Leave it alone.
+ return $matches[0];
+}
+
+function reflect_find_user($users,$name) {
+ if($users) {
+ foreach($users as $x) {
+ if($x['name'] === $name) {
+ return $x;
+ }
+ }
+ }
+
+ return false;
+
+}
+
+function reflect_comment_store($channel,$post,$comment,$user) {
+
+ // if the commenter was the channel owner, use their redmatrix xchan
+
+ if($comment['author'] === REFLECT_EXPORTUSERNAME && $comment['registered'])
+ $hash = $channel['xchan_hash'];
+ else {
+ // we need a unique hash for the commenter. We don't know how many may have supplied
+ // http://yahoo.com as their URL, so we'll use their avatar guid if they have one.
+ // anonymous folks may get more than one xchan_hash if they commented more than once.
+
+ $hash = (($comment['registered'] && $user) ? $user['avatar'] : '');
+ if(! $hash)
+ $hash = random_string() . '.unknown';
+
+ // create an xchan for them which will also import their profile photo
+ // they will have a network type 'unknown'.
+
+ $x = array(
+ 'hash' => $hash,
+ 'guid' => $hash,
+ 'url' => (($comment['url']) ? $comment['url'] : z_root()),
+ 'photo' => (($user) ? REFLECT_BASEURL . $user['avatar'] : z_root() . '/' . get_default_profile_photo()),
+ 'name' => $comment['author']
+ );
+ xchan_store($x);
+
+ }
+
+ $arr = array();
+
+ $r = q("select * from item where mid = '%s' and uid = %d limit 1",
+ dbesc($comment['guid']),
+ intval($channel['channel_id'])
+ );
+ if($r) {
+ if(REFLECT_OVERWRITE)
+ $arr['id'] = $r[0]['id'];
+ else
+ return;
+ }
+
+ // this is a lot like storing the post except for subtle differences, like parent_mid, flags, author_xchan,
+ // and we don't have a comment edited field so use creation date
+
+ $arr['uid'] = $channel['channel_account_id'];
+ $arr['aid'] = $channel['channel_id'];
+ $arr['mid'] = $comment['guid'];
+ $arr['parent_mid'] = $post['mid'];
+ $arr['created'] = $comment['created'];
+ $arr['edited'] = $comment['created'];
+ $arr['author_xchan'] = $hash;
+ $arr['owner_xchan'] = $channel['channel_hash'];
+ $arr['item_flags'] = ITEM_ORIGIN|ITEM_WALL;
+ $arr['verb'] = ACTIVITY_POST;
+ $arr['comment_policy'] = 'contacts';
+
+
+ $arr['title'] = html2bbcode($comment['title']);
+ $arr['title'] = htmlspecialchars($arr['title'],ENT_COMPAT,'UTF-8',false);
+
+
+ $arr['body'] = html2bbcode($comment['body']);
+ $arr['body'] = htmlspecialchars($arr['body'],ENT_COMPAT,'UTF-8',false);
+ $arr['body'] = preg_replace_callback("/\[url\=\/+article\/(.*?)\](.*?)\[url\]/",'reflect_article_callback',$arr['body']);
+ $arr['body'] = preg_replace_callback("/\[img(.*?)\](.*?)\[\/img\]/",'reflect_photo_callback',$arr['body']);
+
+ // logger('comment: ' . print_r($arr,true));
+
+ if($arr['id'])
+ item_store_update($arr);
+ else
+ item_store($arr);
+
+}