<?php
/**
* Script to import posts from RefineryCMS to WordPress for Heavymetal.no
*
* We expect the posts to be in a zip-archive containing files as exported
* by the export-articles script in the old install. That is html files
* with a prepended frontmatter block containing the relevant metadata.
*
* We don't care about any subdirectories in the archive, or indeed file
* names, as we only iterate through the archive linearly by index.
*
* @package hmno.migrate
*/
if ( $argc < 2 ) {
echo "Usage: {$argv[0]} <filename.zip>" . PHP_EOL;
exit( -1 );
}
$filename = $argv[1];
// Load WordPress
require_once getcwd() . '/wp-load.php';
// Required for the taxonomy functions (categories/tags)
require_once getcwd() . '/wp-admin/includes/taxonomy.php';
// Required for kses functionality
require_once getcwd() . '/wp-includes/kses.php';
// Import media handling to get post thumbnails to work
require_once getcwd() . '/wp-includes/media.php';
set_post_thumbnail_size( 500, 300, true );
$archive = new ZipArchive();
if ( true === $archive->open( $filename, ZipArchive::RDONLY ) ) {
$files = $archive->count();
echo "Opened archive with {$files} files." . PHP_EOL;
for ( $i = 0; $i < $files; $i++ ) {
echo " => {$archive->getNameIndex( $i )}..." . PHP_EOL;
$meta = parse_old_post( $archive->getFromIndex( $i ) );
if ( isset( $meta['body'] ) ) {
migrate_post( $meta );
}
}
$archive->close();
}
function parse_old_post( $input ) : array {
$parts = explode( '---', $input );
// $parts[0] is empty
$meta = parse_post_frontmatter( $parts[1] );
if ( isset( $parts[2] ) ) {
$meta['body'] = trim( $parts[2] );
}
return $meta;
}
function parse_post_frontmatter( $input ) : array {
$lines = array_filter( explode( "\n", $input ) );
return array_reduce(
$lines,
function( $acc, $line ) {
$kv = explode( ': ', $line, 2 );
if ( isset( $kv[0], $kv[1] ) ) {
$acc[ $kv[0] ] = $kv[1];
}
return $acc;
},
array()
);
}
function get_wp_category( $name, $parent = 0 ) : int {
$cat = category_exists( $name );
if ( ! $cat ) {
$cat = wp_create_category( $name, $parent );
}
return $cat;
}
function migrate_post( $meta ) {
global $wpdb;
$categories = array_map(
fn( $cat ) => get_wp_category( $cat ),
explode( ',', $meta['categories'] )
);
$res = migrate_images( $meta );
$body = $res[0];
$thumb = $res[1];
$post_id = wp_insert_post(
array(
'post_author' => $meta['user_id'],
'post_date' => $meta['published_at'],
'post_date_gmt' => $meta['published_at'],
'post_content' => $res[0],
'post_title' => $meta['title'],
'post_excerpt' => wp_filter_kses( $meta['custom_teaser'] ),
'post_status' => 'publish',
'comment_status' => 'closed',
'ping_status' => 'closed',
'post_modified' => $meta['updated_at'],
'post_modified_gmt' => $meta['updated_at'],
'post_parent' => '0',
'post_category' => $categories,
'post_tags' => explode( ',', $meta['tags'] ),
'menu_order' => '0',
'post_type' => 'post',
)
);
if ( 0 === $post_id ) {
die( "Error: Unable to insert post: {$meta['title']}." . PHP_EOL );
}
if ( $thumb ) {
set_post_thumbnail( $post_id, $thumb );
}
}
function migrate_images( array $meta ) : array {
$post_date = new DateTimeImmutable( $meta['published_at'] );
if ( ! $post_date ) {
die( "Error: Could not parse published_at date: {$meta['published_at']}." );
}
$thumb = null;
foreach ( array( 'custom_teaser', 'body' ) as $section ) {
if ( isset( $meta[$section] ) ) {
$body = preg_replace_callback(
'/<img src="([^"]*)" title="([^"]*)" alt="([^"]*)"[^>]*( class="[^"]*")[^>]*>/',
function( array $matches ) use ( $post_date, &$thumb ) {
$remote_filename = get_real_image_name( $matches[1] );
$filename = basename( $remote_filename );
$title = $matches[2];
$alt = $matches[3];
$class = $matches[4] ?? '';
$uploads = wp_upload_dir( $post_date->format( 'Y/m' ) );
$target_file = implode( '/', array( $uploads['path'], $filename ) );
$target_url = implode( '/', array( $uploads['url'], $filename ) );
import_photo( $remote_filename, $target_file );
$mime = wp_get_image_mime( $target_file );
if ( ! $mime ) {
die( "Error: Unknown mime type for {$target_file}" . PHP_EOL );
}
$attachment_id = wp_insert_attachment(
array(
'post_mime_type' => $mime,
'guid' => $target_url,
'post_paren' => 0,
'post_title' => $filename,
),
$target_file
);
if ( 0 === $attachment_id ) {
die( "Error: Could not create attachment for {$target_file}" . PHP_EOL );
}
if ( null === $thumb ) {
$thumb = $attachment_id;
return '<!-- image moved to thumbnail -->';
} else {
return "<img src=\"{$target_url}\" title=\"{$title}\" alt=\"{$alt}\"{$class}>";
}
},
$meta[$section],
);
}
}
return array( $body, $thumb );
}
function import_photo( $remote_path, $target_filename ) {
$remote_url = implode( '/', array( 'https://heavymetal.no/system/refinery/images', $remote_path ) );
$curl = curl_init( $remote_url );
if ( false === $curl ) {
die( 'Error: Unable to init curl' );
}
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
$res = curl_exec( $curl );
if ( $res ) {
$status = curl_getinfo( $curl, CURLINFO_HTTP_CODE );
if ( $status > 299 ) {
die( "Error: Could not fetch '{$remote_url}', status = {$status}." . PHP_EOL );
}
file_put_contents( $target_filename, $res );
}
}
function get_real_image_name( string $orig ) : string {
$encoded = basename( dirname( $orig ) );
$decoded = base64_decode( $encoded );
if ( preg_match( '/\["f","([^"]+)"\]/', $decoded, $matches ) ) {
return $matches[1];
}
die( "Filed to decode filename: {$encoded}: {$decoded}" . PHP_EOL );
}