From 785c83658e3f5b0bae6ad064010970ad6fdc33e7 Mon Sep 17 00:00:00 2001 From: Harald Eilertsen Date: Mon, 1 Aug 2022 11:21:19 +0200 Subject: Fix image import. It appears the url contains a base64 encoded array of arrays with information about the orinal image. (I originally thought it was a secret token of some sort.) This makes it easy to fetch the original unprocessed image from the old server. --- import-posts.php | 54 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/import-posts.php b/import-posts.php index 6697289..113d356 100644 --- a/import-posts.php +++ b/import-posts.php @@ -38,7 +38,9 @@ if ( true === $archive->open( $filename, ZipArchive::RDONLY ) ) { for ( $i = 0; $i < $files; $i++ ) { echo " => {$archive->getNameIndex( $i )}..." . PHP_EOL; $meta = parse_old_post( $archive->getFromIndex( $i ) ); - migrate_post( $meta ); + if ( isset( $meta['body'] ) ) { + migrate_post( $meta ); + } } $archive->close(); @@ -49,7 +51,9 @@ function parse_old_post( $input ) : array { // $parts[0] is empty $meta = parse_post_frontmatter( $parts[1] ); - $meta['body'] = trim( $parts[2] ); + if ( isset( $parts[2] ) ) { + $meta['body'] = trim( $parts[2] ); + } return $meta; } @@ -87,14 +91,16 @@ function migrate_post( $meta ) { explode( ',', $meta['categories'] ) ); - $images = migrate_images( $meta ); + $res = migrate_images( $meta ); + $body = $res[0]; + $thumb = $res[1]; $post_id = wp_insert_post( array( 'post_author' => $meta['user_id'], 'post_date' => $meta['published_at'], 'post_date_gmt' => $meta['published_at'], - 'post_content' => $meta['body'], + 'post_content' => $res[0], 'post_title' => $meta['title'], 'post_excerpt' => wp_filter_kses( $meta['custom_teaser'] ), 'post_status' => 'publish', @@ -114,7 +120,9 @@ function migrate_post( $meta ) { die( "Error: Unable to insert post: {$meta['title']}." . PHP_EOL ); } - return array( $post_id, $thumb ); + if ( $thumb ) { + set_post_thumbnail( $post_id, $thumb ); + } } function migrate_images( array $meta ) : array { @@ -126,17 +134,17 @@ function migrate_images( array $meta ) : array { $thumb = null; if ( isset( $meta['body'] ) ) { - $meta['body'] = preg_replace_callback( + $body = preg_replace_callback( '/([^]*>/', - function( array $matches ) { - $remote_filename = $matches[1]; + function( array $matches ) use ( $post_date, &$thumb ) { + $remote_filename = get_real_image_name( $matches[1] ); $filename = basename( $remote_filename ); $title = $matches[2]; $alt = $matches[3]; $uploads = wp_upload_dir( $post_date->format( 'Y/m' ) ); $target_file = implode( '/', array( $uploads['path'], $filename ) ); - $target_url = implide( '/', array( $uploads['url'], $filename ) ); + $target_url = implode( '/', array( $uploads['url'], $filename ) ); import_photo( $remote_filename, $target_file ); @@ -159,9 +167,9 @@ function migrate_images( array $meta ) : array { die( "Error: Could not create attachment for {$target_file}" . PHP_EOL ); } - if ( ! $thumb ) { + if ( null === $thumb ) { $thumb = $attachment_id; - return ''; + return ''; } else { return "\"{$alt}\""; } @@ -170,11 +178,12 @@ function migrate_images( array $meta ) : array { ); } - return $thumb; + return array( $body, $thumb ); } function import_photo( $remote_path, $target_filename ) { - $curl = curl_init( implode( '/', array( 'https://heavymetal.no', $remote_path ) ) ); + $remote_url = implode( '/', array( 'https://heavymetal.no/system/refinery/images', $remote_path ) ); + $curl = curl_init( $remote_url ); if ( false === $curl ) { die( 'Error: Unable to init curl' ); @@ -182,5 +191,22 @@ function import_photo( $remote_path, $target_filename ) { curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true ); $res = curl_exec( $curl ); - file_put_contents( $target_filename, $res ); + if ( $res ) { + $status = curl_getinfo( $curl, CURLINFO_HTTP_CODE ); + if ( $status > 299 ) { + die( "Error: Could not fetch '{$remote_url}', status = {$status}." . PHP_EOL ); + } + file_put_contents( $target_filename, $res ); + } +} + +function get_real_image_name( string $orig ) : string { + $encoded = basename( dirname( $orig ) ); + $decoded = base64_decode( $encoded ); + + if ( preg_match( '/\["f","([^"]+)"\]/', $decoded, $matches ) ) { + return $matches[1]; + } + + die( "Filed to decode filename: {$encoded}: {$decoded}" . PHP_EOL ); } -- cgit v1.2.3