diff options
author | Harald Eilertsen <haraldei@anduin.net> | 2022-07-31 15:59:59 +0200 |
---|---|---|
committer | Harald Eilertsen <haraldei@anduin.net> | 2022-07-31 16:00:10 +0200 |
commit | 614030f9947a54868797d6a77acbeb6d6a096b29 (patch) | |
tree | 620d79a3a1a8fa2f8c7b4431110cc6939a2f594f | |
download | migrate-614030f9947a54868797d6a77acbeb6d6a096b29.tar.gz migrate-614030f9947a54868797d6a77acbeb6d6a096b29.tar.bz2 migrate-614030f9947a54868797d6a77acbeb6d6a096b29.zip |
Initial commit
Importing the posts works, but not images.
-rw-r--r-- | .editorconfig | 3 | ||||
-rw-r--r-- | .phpcs.xml | 25 | ||||
-rw-r--r-- | import-posts.php | 186 |
3 files changed, 214 insertions, 0 deletions
diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..22fcb8a --- /dev/null +++ b/.editorconfig @@ -0,0 +1,3 @@ +[*.php] +indent_style = tab +indent_size = 4 diff --git a/.phpcs.xml b/.phpcs.xml new file mode 100644 index 0000000..f8bb5f3 --- /dev/null +++ b/.phpcs.xml @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<ruleset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="PHP_CodeSniffer" xsi:noNamespaceSchemaLocation="phpcs.xsd"> + <description>PHPCS configuration for hmno.migrate</description> + + <exclude-pattern>*/src/Standards/*/Tests/*\.(inc|css|js)$</exclude-pattern> + <exclude-pattern>*/tests/Core/*/*\.(inc|css|js)$</exclude-pattern> + + <arg name="basepath" value="."/> + <arg name="colors"/> + <arg name="parallel" value="75"/> + <arg value="np"/> + + <!-- Don't hide tokenizer exceptions --> + <rule ref="Internal.Tokenizer.Exception"> + <type>error</type> + </rule> + + <!-- Include the whole PEAR standard --> + <rule ref="WordPress"> + <exclude name="Generic.WhiteSpace.DisallowSpaceIndent" /> + <exclude name="Squiz.Commenting.InlineComment.InvalidEndChar" /> + <exclude name="WordPress.Security.EscapeOutput.OutputNotEscaped" /> + <exclude name="Squiz.Commenting.FunctionComment.Missing" /> + </rule> +</ruleset> diff --git a/import-posts.php b/import-posts.php new file mode 100644 index 0000000..6697289 --- /dev/null +++ b/import-posts.php @@ -0,0 +1,186 @@ +<?php +/** + * Script to import posts from RefineryCMS to WordPress for Heavymetal.no + * + * We expect the posts to be in a zip-archive containing files as exported + * by the export-articles script in the old install. That is html files + * with a prepended frontmatter block containing the relevant metadata. + * + * We don't care about any subdirectories in the archive, or indeed file + * names, as we only iterate through the archive linearly by index. + * + * @package hmno.migrate + */ + +if ( $argc < 2 ) { + echo "Usage: {$argv[0]} <filename.zip>" . PHP_EOL; + exit( -1 ); +} + +$filename = $argv[1]; + + +// Load WordPress +require_once getcwd() . '/wp-load.php'; + +// Required for the taxonomy functions (categories/tags) +require_once getcwd() . '/wp-admin/includes/taxonomy.php'; + +// Required for kses functionality +require_once getcwd() . '/wp-includes/kses.php'; + + +$archive = new ZipArchive(); +if ( true === $archive->open( $filename, ZipArchive::RDONLY ) ) { + $files = $archive->count(); + echo "Opened archive with {$files} files." . PHP_EOL; + + for ( $i = 0; $i < $files; $i++ ) { + echo " => {$archive->getNameIndex( $i )}..." . PHP_EOL; + $meta = parse_old_post( $archive->getFromIndex( $i ) ); + migrate_post( $meta ); + } + + $archive->close(); +} + +function parse_old_post( $input ) : array { + $parts = explode( '---', $input ); + + // $parts[0] is empty + $meta = parse_post_frontmatter( $parts[1] ); + $meta['body'] = trim( $parts[2] ); + + return $meta; +} + +function parse_post_frontmatter( $input ) : array { + $lines = array_filter( explode( "\n", $input ) ); + return array_reduce( + $lines, + function( $acc, $line ) { + $kv = explode( ': ', $line, 2 ); + if ( isset( $kv[0], $kv[1] ) ) { + $acc[ $kv[0] ] = $kv[1]; + } + return $acc; + }, + array() + ); +} + +function get_wp_category( $name, $parent = 0 ) : int { + $cat = category_exists( $name ); + + if ( ! $cat ) { + $cat = wp_create_category( $name, $parent ); + } + + return $cat; +} + +function migrate_post( $meta ) { + global $wpdb; + + $categories = array_map( + fn( $cat ) => get_wp_category( $cat ), + explode( ',', $meta['categories'] ) + ); + + $images = migrate_images( $meta ); + + $post_id = wp_insert_post( + array( + 'post_author' => $meta['user_id'], + 'post_date' => $meta['published_at'], + 'post_date_gmt' => $meta['published_at'], + 'post_content' => $meta['body'], + 'post_title' => $meta['title'], + 'post_excerpt' => wp_filter_kses( $meta['custom_teaser'] ), + 'post_status' => 'publish', + 'comment_status' => 'closed', + 'ping_status' => 'closed', + 'post_modified' => $meta['updated_at'], + 'post_modified_gmt' => $meta['updated_at'], + 'post_parent' => '0', + 'post_category' => $categories, + 'post_tags' => explode( ',', $meta['tags'] ), + 'menu_order' => '0', + 'post_type' => 'post', + ) + ); + + if ( 0 === $post_id ) { + die( "Error: Unable to insert post: {$meta['title']}." . PHP_EOL ); + } + + return array( $post_id, $thumb ); +} + +function migrate_images( array $meta ) : array { + $post_date = new DateTimeImmutable( $meta['published_at'] ); + if ( ! $post_date ) { + die( "Error: Could not parse published_at date: {$meta['published_at']}." ); + } + + $thumb = null; + + if ( isset( $meta['body'] ) ) { + $meta['body'] = preg_replace_callback( + '/<img src="([^"]*)" title="([^"]*)" alt="([^"]*)"[^>]*>/', + function( array $matches ) { + $remote_filename = $matches[1]; + $filename = basename( $remote_filename ); + $title = $matches[2]; + $alt = $matches[3]; + + $uploads = wp_upload_dir( $post_date->format( 'Y/m' ) ); + $target_file = implode( '/', array( $uploads['path'], $filename ) ); + $target_url = implide( '/', array( $uploads['url'], $filename ) ); + + import_photo( $remote_filename, $target_file ); + + $mime = wp_get_image_mime( $target_file ); + if ( ! $mime ) { + die( "Error: Unknown mime type for {$target_file}" . PHP_EOL ); + } + + $attachment_id = wp_insert_attachment( + array( + 'post_mime_type' => $mime, + 'guid' => $target_url, + 'post_paren' => 0, + 'post_title' => $filename, + ), + $target_file + ); + + if ( 0 === $attachment_id ) { + die( "Error: Could not create attachment for {$target_file}" . PHP_EOL ); + } + + if ( ! $thumb ) { + $thumb = $attachment_id; + return ''; + } else { + return "<img src=\"{$target_url}\" title=\"{$title}\" alt=\"{$alt}\">"; + } + }, + $meta['body'], + ); + } + + return $thumb; +} + +function import_photo( $remote_path, $target_filename ) { + $curl = curl_init( implode( '/', array( 'https://heavymetal.no', $remote_path ) ) ); + + if ( false === $curl ) { + die( 'Error: Unable to init curl' ); + } + + curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true ); + $res = curl_exec( $curl ); + file_put_contents( $target_filename, $res ); +} |