summaryrefslogblamecommitdiffstats
path: root/import-posts.php
blob: 6697289bad6ea4e0ba571e76ac4b5926b05fc0f4 (plain) (tree)

























































































































































































                                                                                                                 
<?php
/**
 * Script to import posts from RefineryCMS to WordPress for Heavymetal.no
 *
 * We expect the posts to be in a zip-archive containing files as exported
 * by the export-articles script in the old install. That is html files
 * with a prepended frontmatter block containing the relevant metadata.
 *
 * We don't care about any subdirectories in the archive, or indeed file
 * names, as we only iterate through the archive linearly by index.
 *
 * @package hmno.migrate
 */

if ( $argc < 2 ) {
	echo "Usage: {$argv[0]} <filename.zip>" . PHP_EOL;
	exit( -1 );
}

$filename = $argv[1];


// Load WordPress
require_once getcwd() . '/wp-load.php';

// Required for the taxonomy functions (categories/tags)
require_once getcwd() . '/wp-admin/includes/taxonomy.php';

// Required for kses functionality
require_once getcwd() . '/wp-includes/kses.php';


$archive = new ZipArchive();
if ( true === $archive->open( $filename, ZipArchive::RDONLY ) ) {
	$files = $archive->count();
	echo "Opened archive with {$files} files." . PHP_EOL;

	for ( $i = 0; $i < $files; $i++ ) {
		echo " => {$archive->getNameIndex( $i )}..." . PHP_EOL;
		$meta = parse_old_post( $archive->getFromIndex( $i ) );
		migrate_post( $meta );
	}

	$archive->close();
}

function parse_old_post( $input ) : array {
	$parts = explode( '---', $input );

	// $parts[0] is empty
	$meta = parse_post_frontmatter( $parts[1] );
	$meta['body'] = trim( $parts[2] );

	return $meta;
}

function parse_post_frontmatter( $input ) : array {
	$lines = array_filter( explode( "\n", $input ) );
	return array_reduce(
		$lines,
		function( $acc, $line ) {
			$kv = explode( ': ', $line, 2 );
			if ( isset( $kv[0], $kv[1] ) ) {
				$acc[ $kv[0] ] = $kv[1];
			}
			return $acc;
		},
		array()
	);
}

function get_wp_category( $name, $parent = 0 ) : int {
	$cat = category_exists( $name );

	if ( ! $cat ) {
		$cat = wp_create_category( $name, $parent );
	}

	return $cat;
}

function migrate_post( $meta ) {
    global $wpdb;

	$categories = array_map(
		fn( $cat ) => get_wp_category( $cat ),
		explode( ',', $meta['categories'] )
	);

	$images = migrate_images( $meta );

	$post_id = wp_insert_post(
		array(
			'post_author' => $meta['user_id'],
			'post_date' => $meta['published_at'],
			'post_date_gmt' => $meta['published_at'],
			'post_content' => $meta['body'],
			'post_title' => $meta['title'],
			'post_excerpt' => wp_filter_kses( $meta['custom_teaser'] ),
			'post_status' => 'publish',
			'comment_status' => 'closed',
			'ping_status' => 'closed',
			'post_modified' => $meta['updated_at'],
			'post_modified_gmt' => $meta['updated_at'],
			'post_parent' => '0',
			'post_category' => $categories,
			'post_tags' => explode( ',', $meta['tags'] ),
			'menu_order' => '0',
			'post_type' => 'post',
		)
    );

	if ( 0 === $post_id ) {
		die( "Error: Unable to insert post: {$meta['title']}." . PHP_EOL );
	}

    return array( $post_id, $thumb );
}

function migrate_images( array $meta ) : array {
	$post_date = new DateTimeImmutable( $meta['published_at'] );
	if ( ! $post_date ) {
		die( "Error: Could not parse published_at date: {$meta['published_at']}." );
	}

	$thumb = null;

	if ( isset( $meta['body'] ) ) {
		$meta['body'] = preg_replace_callback(
			'/<img src="([^"]*)" title="([^"]*)" alt="([^"]*)"[^>]*>/',
			function( array $matches ) {
				$remote_filename = $matches[1];
				$filename = basename( $remote_filename );
				$title = $matches[2];
				$alt = $matches[3];

				$uploads = wp_upload_dir( $post_date->format( 'Y/m' ) );
				$target_file = implode( '/', array( $uploads['path'], $filename ) );
				$target_url = implide( '/', array( $uploads['url'], $filename ) );

				import_photo( $remote_filename, $target_file );

				$mime = wp_get_image_mime( $target_file );
				if ( ! $mime ) {
					die( "Error: Unknown mime type for {$target_file}" . PHP_EOL );
				}

				$attachment_id = wp_insert_attachment(
					array(
						'post_mime_type' => $mime,
						'guid' => $target_url,
						'post_paren' => 0,
						'post_title' => $filename,
					),
					$target_file
				);

				if ( 0 === $attachment_id ) {
					die( "Error: Could not create attachment for {$target_file}" . PHP_EOL );
				}

				if ( ! $thumb ) {
					$thumb = $attachment_id;
					return '';
				} else {
					return "<img src=\"{$target_url}\" title=\"{$title}\" alt=\"{$alt}\">";
				}
			},
			$meta['body'],
		);
	}

	return $thumb;
}

function import_photo( $remote_path, $target_filename ) {
	$curl = curl_init( implode( '/', array( 'https://heavymetal.no', $remote_path ) ) );

	if ( false === $curl ) {
		die( 'Error: Unable to init curl' );
	}

	curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
	$res = curl_exec( $curl );
	file_put_contents( $target_filename, $res );
}