summaryrefslogtreecommitdiffstats
path: root/import-posts.php
diff options
context:
space:
mode:
authorHarald Eilertsen <haraldei@anduin.net>2022-07-31 15:59:59 +0200
committerHarald Eilertsen <haraldei@anduin.net>2022-07-31 16:00:10 +0200
commit614030f9947a54868797d6a77acbeb6d6a096b29 (patch)
tree620d79a3a1a8fa2f8c7b4431110cc6939a2f594f /import-posts.php
downloadmigrate-614030f9947a54868797d6a77acbeb6d6a096b29.tar.gz
migrate-614030f9947a54868797d6a77acbeb6d6a096b29.tar.bz2
migrate-614030f9947a54868797d6a77acbeb6d6a096b29.zip
Initial commit
Importing the posts works, but not images.
Diffstat (limited to 'import-posts.php')
-rw-r--r--import-posts.php186
1 files changed, 186 insertions, 0 deletions
diff --git a/import-posts.php b/import-posts.php
new file mode 100644
index 0000000..6697289
--- /dev/null
+++ b/import-posts.php
@@ -0,0 +1,186 @@
+<?php
+/**
+ * Script to import posts from RefineryCMS to WordPress for Heavymetal.no
+ *
+ * We expect the posts to be in a zip-archive containing files as exported
+ * by the export-articles script in the old install. That is html files
+ * with a prepended frontmatter block containing the relevant metadata.
+ *
+ * We don't care about any subdirectories in the archive, or indeed file
+ * names, as we only iterate through the archive linearly by index.
+ *
+ * @package hmno.migrate
+ */
+
+if ( $argc < 2 ) {
+ echo "Usage: {$argv[0]} <filename.zip>" . PHP_EOL;
+ exit( -1 );
+}
+
+$filename = $argv[1];
+
+
+// Load WordPress
+require_once getcwd() . '/wp-load.php';
+
+// Required for the taxonomy functions (categories/tags)
+require_once getcwd() . '/wp-admin/includes/taxonomy.php';
+
+// Required for kses functionality
+require_once getcwd() . '/wp-includes/kses.php';
+
+
+$archive = new ZipArchive();
+if ( true === $archive->open( $filename, ZipArchive::RDONLY ) ) {
+ $files = $archive->count();
+ echo "Opened archive with {$files} files." . PHP_EOL;
+
+ for ( $i = 0; $i < $files; $i++ ) {
+ echo " => {$archive->getNameIndex( $i )}..." . PHP_EOL;
+ $meta = parse_old_post( $archive->getFromIndex( $i ) );
+ migrate_post( $meta );
+ }
+
+ $archive->close();
+}
+
+function parse_old_post( $input ) : array {
+ $parts = explode( '---', $input );
+
+ // $parts[0] is empty
+ $meta = parse_post_frontmatter( $parts[1] );
+ $meta['body'] = trim( $parts[2] );
+
+ return $meta;
+}
+
+function parse_post_frontmatter( $input ) : array {
+ $lines = array_filter( explode( "\n", $input ) );
+ return array_reduce(
+ $lines,
+ function( $acc, $line ) {
+ $kv = explode( ': ', $line, 2 );
+ if ( isset( $kv[0], $kv[1] ) ) {
+ $acc[ $kv[0] ] = $kv[1];
+ }
+ return $acc;
+ },
+ array()
+ );
+}
+
+function get_wp_category( $name, $parent = 0 ) : int {
+ $cat = category_exists( $name );
+
+ if ( ! $cat ) {
+ $cat = wp_create_category( $name, $parent );
+ }
+
+ return $cat;
+}
+
+function migrate_post( $meta ) {
+ global $wpdb;
+
+ $categories = array_map(
+ fn( $cat ) => get_wp_category( $cat ),
+ explode( ',', $meta['categories'] )
+ );
+
+ $images = migrate_images( $meta );
+
+ $post_id = wp_insert_post(
+ array(
+ 'post_author' => $meta['user_id'],
+ 'post_date' => $meta['published_at'],
+ 'post_date_gmt' => $meta['published_at'],
+ 'post_content' => $meta['body'],
+ 'post_title' => $meta['title'],
+ 'post_excerpt' => wp_filter_kses( $meta['custom_teaser'] ),
+ 'post_status' => 'publish',
+ 'comment_status' => 'closed',
+ 'ping_status' => 'closed',
+ 'post_modified' => $meta['updated_at'],
+ 'post_modified_gmt' => $meta['updated_at'],
+ 'post_parent' => '0',
+ 'post_category' => $categories,
+ 'post_tags' => explode( ',', $meta['tags'] ),
+ 'menu_order' => '0',
+ 'post_type' => 'post',
+ )
+ );
+
+ if ( 0 === $post_id ) {
+ die( "Error: Unable to insert post: {$meta['title']}." . PHP_EOL );
+ }
+
+ return array( $post_id, $thumb );
+}
+
+function migrate_images( array $meta ) : array {
+ $post_date = new DateTimeImmutable( $meta['published_at'] );
+ if ( ! $post_date ) {
+ die( "Error: Could not parse published_at date: {$meta['published_at']}." );
+ }
+
+ $thumb = null;
+
+ if ( isset( $meta['body'] ) ) {
+ $meta['body'] = preg_replace_callback(
+ '/<img src="([^"]*)" title="([^"]*)" alt="([^"]*)"[^>]*>/',
+ function( array $matches ) {
+ $remote_filename = $matches[1];
+ $filename = basename( $remote_filename );
+ $title = $matches[2];
+ $alt = $matches[3];
+
+ $uploads = wp_upload_dir( $post_date->format( 'Y/m' ) );
+ $target_file = implode( '/', array( $uploads['path'], $filename ) );
+ $target_url = implide( '/', array( $uploads['url'], $filename ) );
+
+ import_photo( $remote_filename, $target_file );
+
+ $mime = wp_get_image_mime( $target_file );
+ if ( ! $mime ) {
+ die( "Error: Unknown mime type for {$target_file}" . PHP_EOL );
+ }
+
+ $attachment_id = wp_insert_attachment(
+ array(
+ 'post_mime_type' => $mime,
+ 'guid' => $target_url,
+ 'post_paren' => 0,
+ 'post_title' => $filename,
+ ),
+ $target_file
+ );
+
+ if ( 0 === $attachment_id ) {
+ die( "Error: Could not create attachment for {$target_file}" . PHP_EOL );
+ }
+
+ if ( ! $thumb ) {
+ $thumb = $attachment_id;
+ return '';
+ } else {
+ return "<img src=\"{$target_url}\" title=\"{$title}\" alt=\"{$alt}\">";
+ }
+ },
+ $meta['body'],
+ );
+ }
+
+ return $thumb;
+}
+
+function import_photo( $remote_path, $target_filename ) {
+ $curl = curl_init( implode( '/', array( 'https://heavymetal.no', $remote_path ) ) );
+
+ if ( false === $curl ) {
+ die( 'Error: Unable to init curl' );
+ }
+
+ curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
+ $res = curl_exec( $curl );
+ file_put_contents( $target_filename, $res );
+}