aboutsummaryrefslogtreecommitdiffstats
path: root/include/Import/refimport.php
blob: b95411c81cbd367daa325537f612845639a7d4f1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
<?php

require_once('include/html2bbcode.php');
require_once('include/hubloc.php');

// Sample module for importing conversation data from Reflection CMS. Some preparation was used to 
// dump relevant posts, categories and comments into individual JSON files, and also JSON dump of 
// the user table to search for avatars. Importation was also batched in sets of 20 posts per page
// visit so as to survive shared hosting process limits. This provides some clues as how to handle 
// WordPress imports, which use a somewhat similar DB structure. The batching and individual files
// might not be needed in VPS environments. As such this could be considered an extreme test case, but
// the importation was successful in all regards using this code. The module URL was visited repeatedly
// with a browser until all the posts had been imported.     


define('REDMATRIX_IMPORTCHANNEL','mike');
define('REFLECT_EXPORTUSERNAME','mike');
define('REFLECT_BLOGNAME','Diary and Other Rantings');
define('REFLECT_BASEURL','http://example.com/');
define('REFLECT_USERFILE','user.json');

// set to true if you need to process everything again
define('REFLECT_OVERWRITE',false);

// we'll only process a small number of posts at a time on a shared host.

define('REFLECT_MAXPERRUN',30);

function reflect_get_channel() {

	// this will be the channel_address or nickname of the red channel

	$c = q("select * from channel left join xchan on channel_hash = xchan_hash 
		where channel_address = '%s' limit 1",
		dbesc(REDMATRIX_IMPORTCHANNEL)
	);
	return $c[0];
}


function refimport_content(&$a) {

	$channel = reflect_get_channel();

	// load the user file. We need that to find the commenter's avatars

	$u = file_get_contents(REFLECT_USERFILE);
	if($u) {
		$users = json_decode($u,true);
	}

	$ignored = 0;
	$processed = 0;

	$files = glob('article/*');
	if(! $files) 
		return;

	foreach($files as $f) {
		$s = file_get_contents($f);
		$j = json_decode($s,true);

		if(! $j)
			continue;

		$arr = array();

		// see if this article was already processed
		$r = q("select * from item where mid = '%s' and uid = %d limit 1",
			dbesc($j['guid']),
			intval($channel['channel_id'])
		);
		if($r) {
			if(REFLECT_OVERWRITE)
				$arr['id'] = $r[0]['id'];
			else {
				$ignored ++;
				rename($f,str_replace('article','done',$f));
				continue;
			}
		}

		$arr['uid'] = $channel['channel_account_id'];
		$arr['aid'] = $channel['channel_id'];
		$arr['mid'] = $arr['parent_mid'] = $j['guid'];
		$arr['created'] = $j['created'];
		$arr['edited'] = $j['edited'];
		$arr['author_xchan'] = $channel['channel_hash'];
		$arr['owner_xchan'] = $channel['channel_hash'];
		$arr['app'] = REFLECT_BLOGNAME;
		$arr['item_flags'] = ITEM_ORIGIN|ITEM_THREAD_TOP;
		$arr['item_wall'] = 1;
		$arr['verb'] = ACTIVITY_POST;

		// this is an assumption
		$arr['comment_policy'] = 'contacts';


		// import content. In this case the content is XHTML.

		$arr['title'] = html2bbcode($j['title']);
		$arr['title'] = htmlspecialchars($arr['title'],ENT_COMPAT,'UTF-8',false);


		$arr['body'] = html2bbcode($j['body']);
		$arr['body'] = htmlspecialchars($arr['body'],ENT_COMPAT,'UTF-8',false);


		// convert relative urls to other posts on that service to absolute url on our service.
		$arr['body'] = preg_replace_callback("/\[url\=\/+article\/(.*?)\](.*?)\[url\]/",'reflect_article_callback',$arr['body']);

		// also import any photos
		$arr['body'] = preg_replace_callback("/\[img(.*?)\](.*?)\[\/img\]/",'reflect_photo_callback',$arr['body']);


		// add categories

		if($j['taxonomy'] && is_array($j['taxonomy']) && count($j['taxonomy'])) {
			$arr['term'] = array();
			foreach($j['taxonomy'] as $tax) {
				$arr['term'][] = array(
					'uid'   => $channel['channel_id'],
					'type'  => TERM_CATEGORY,
					'otype' => TERM_OBJ_POST,
					'term'  => trim($tax['name']),
            	    'url'   => $channel['xchan_url'] . '?f=&cat=' . urlencode(trim($tax['name']))
				);
			}
		}

		// store the item

		if($arr['id'])
			item_store_update($arr);
		else
			item_store($arr);

		// if there are any comments, process them
		// $comment['registered'] is somebody with an account on the system. Others are mostly anonymous

		if($j['comments']) {
			foreach($j['comments'] as $comment) {
				$user = (($comment['registered']) ? reflect_find_user($users,$comment['author']) : null);
				reflect_comment_store($channel,$arr,$comment,$user);
			}
		}
		$processed ++;

		if(REFLECT_MAXPERRUN && $processed > REFLECT_MAXPERRUN)
			break;
	}
	return 'processed: ' . $processed . EOL . 'completed: ' . $ignored . EOL;

}

function reflect_article_callback($matches) {
	return '[zrl=' . z_root() . '/display/'. $matches[1] . ']' . $matches[2] . '[/zrl]';
}

function reflect_photo_callback($matches) {

	if(strpos($matches[2],'http') !== false)
		return $matches[0];

	$prefix = REFLECT_BASEURL;
	$x = z_fetch_url($prefix.$matches[2],true);

	$hash = basename($matches[2]);

	if($x['success']) {
		$channel = reflect_get_channel();
		require_once('include/photos.php');
		$p = photo_upload($channel,$channel,
			array('data' => $x['body'],
				'resource_id' => str_replace('-','',$hash),
				'filename' => $hash . '.jpg',
				'type' => 'image/jpeg',
				'visible' => false
			)
		);

		if($p['success'])
			$newlink = $p['resource_id'] . '-0.jpg';
	
				
		// import photo and locate the link for it.
		return '[zmg]' . z_root() . '/photo/' . $newlink . '[/zmg]';

	}		
	// no replacement. Leave it alone.
	return $matches[0];
}

function reflect_find_user($users,$name) {
	if($users) {
		foreach($users as $x) {
			if($x['name'] === $name) {
				return $x;
			}
		}
	}

	return false;

}

function reflect_comment_store($channel,$post,$comment,$user) {

	// if the commenter was the channel owner, use their hubzilla xchan

	if($comment['author'] === REFLECT_EXPORTUSERNAME && $comment['registered'])
		$hash = $channel['xchan_hash'];
	else {
		// we need a unique hash for the commenter. We don't know how many may have supplied
		// http://yahoo.com as their URL, so we'll use their avatar guid if they have one. 
		// anonymous folks may get more than one xchan_hash if they commented more than once.

		$hash = (($comment['registered'] && $user) ? $user['avatar'] : '');
		if(! $hash)
			$hash = random_string() . '.unknown';

		// create an xchan for them which will also import their profile photo
		// they will have a network type 'unknown'.

		$x = array(
			'hash' => $hash,
			'guid' => $hash,
			'url' => (($comment['url']) ? $comment['url'] : z_root()),
			'photo' => (($user) ? REFLECT_BASEURL . $user['avatar'] : z_root() . '/' . get_default_profile_photo()),
			'name' => $comment['author']
		);
		xchan_store($x);

	}

	$arr = array();

	$r = q("select * from item where mid = '%s' and uid = %d limit 1",
		dbesc($comment['guid']),
		intval($channel['channel_id'])
	);
	if($r) {
		if(REFLECT_OVERWRITE)
			$arr['id'] = $r[0]['id'];
		else
			return;
	}

	// this is a lot like storing the post except for subtle differences, like parent_mid, flags, author_xchan,
	// and we don't have a comment edited field so use creation date

	$arr['uid'] = $channel['channel_account_id'];
	$arr['aid'] = $channel['channel_id'];
	$arr['mid'] = $comment['guid'];
	$arr['parent_mid'] = $post['mid'];
	$arr['created'] = $comment['created'];
	$arr['edited'] = $comment['created'];
	$arr['author_xchan'] = $hash;
	$arr['owner_xchan'] = $channel['channel_hash'];
	$arr['item_flags'] = ITEM_ORIGIN;
	$arr['item_wall'] = 1;
	$arr['verb'] = ACTIVITY_POST;
	$arr['comment_policy'] = 'contacts';


	$arr['title'] = html2bbcode($comment['title']);
	$arr['title'] = htmlspecialchars($arr['title'],ENT_COMPAT,'UTF-8',false);


	$arr['body'] = html2bbcode($comment['body']);
	$arr['body'] = htmlspecialchars($arr['body'],ENT_COMPAT,'UTF-8',false);
	$arr['body'] = preg_replace_callback("/\[url\=\/+article\/(.*?)\](.*?)\[url\]/",'reflect_article_callback',$arr['body']);
	$arr['body'] = preg_replace_callback("/\[img(.*?)\](.*?)\[\/img\]/",'reflect_photo_callback',$arr['body']);

	// logger('comment: ' . print_r($arr,true));

	if($arr['id'])
		item_store_update($arr);
	else
		item_store($arr);

}