summaryrefslogtreecommitdiffstats
path: root/import-posts.php
diff options
context:
space:
mode:
authorHarald Eilertsen <haraldei@anduin.net>2022-08-01 11:21:19 +0200
committerHarald Eilertsen <haraldei@anduin.net>2022-08-01 11:21:19 +0200
commit785c83658e3f5b0bae6ad064010970ad6fdc33e7 (patch)
treec843d22eb5a04397dc918df2b0b887c610ca4d7b /import-posts.php
parent614030f9947a54868797d6a77acbeb6d6a096b29 (diff)
downloadmigrate-785c83658e3f5b0bae6ad064010970ad6fdc33e7.tar.gz
migrate-785c83658e3f5b0bae6ad064010970ad6fdc33e7.tar.bz2
migrate-785c83658e3f5b0bae6ad064010970ad6fdc33e7.zip
Fix image import.
It appears the url contains a base64 encoded array of arrays with information about the orinal image. (I originally thought it was a secret token of some sort.) This makes it easy to fetch the original unprocessed image from the old server.
Diffstat (limited to 'import-posts.php')
-rw-r--r--import-posts.php54
1 files changed, 40 insertions, 14 deletions
diff --git a/import-posts.php b/import-posts.php
index 6697289..113d356 100644
--- a/import-posts.php
+++ b/import-posts.php
@@ -38,7 +38,9 @@ if ( true === $archive->open( $filename, ZipArchive::RDONLY ) ) {
for ( $i = 0; $i < $files; $i++ ) {
echo " => {$archive->getNameIndex( $i )}..." . PHP_EOL;
$meta = parse_old_post( $archive->getFromIndex( $i ) );
- migrate_post( $meta );
+ if ( isset( $meta['body'] ) ) {
+ migrate_post( $meta );
+ }
}
$archive->close();
@@ -49,7 +51,9 @@ function parse_old_post( $input ) : array {
// $parts[0] is empty
$meta = parse_post_frontmatter( $parts[1] );
- $meta['body'] = trim( $parts[2] );
+ if ( isset( $parts[2] ) ) {
+ $meta['body'] = trim( $parts[2] );
+ }
return $meta;
}
@@ -87,14 +91,16 @@ function migrate_post( $meta ) {
explode( ',', $meta['categories'] )
);
- $images = migrate_images( $meta );
+ $res = migrate_images( $meta );
+ $body = $res[0];
+ $thumb = $res[1];
$post_id = wp_insert_post(
array(
'post_author' => $meta['user_id'],
'post_date' => $meta['published_at'],
'post_date_gmt' => $meta['published_at'],
- 'post_content' => $meta['body'],
+ 'post_content' => $res[0],
'post_title' => $meta['title'],
'post_excerpt' => wp_filter_kses( $meta['custom_teaser'] ),
'post_status' => 'publish',
@@ -114,7 +120,9 @@ function migrate_post( $meta ) {
die( "Error: Unable to insert post: {$meta['title']}." . PHP_EOL );
}
- return array( $post_id, $thumb );
+ if ( $thumb ) {
+ set_post_thumbnail( $post_id, $thumb );
+ }
}
function migrate_images( array $meta ) : array {
@@ -126,17 +134,17 @@ function migrate_images( array $meta ) : array {
$thumb = null;
if ( isset( $meta['body'] ) ) {
- $meta['body'] = preg_replace_callback(
+ $body = preg_replace_callback(
'/<img src="([^"]*)" title="([^"]*)" alt="([^"]*)"[^>]*>/',
- function( array $matches ) {
- $remote_filename = $matches[1];
+ function( array $matches ) use ( $post_date, &$thumb ) {
+ $remote_filename = get_real_image_name( $matches[1] );
$filename = basename( $remote_filename );
$title = $matches[2];
$alt = $matches[3];
$uploads = wp_upload_dir( $post_date->format( 'Y/m' ) );
$target_file = implode( '/', array( $uploads['path'], $filename ) );
- $target_url = implide( '/', array( $uploads['url'], $filename ) );
+ $target_url = implode( '/', array( $uploads['url'], $filename ) );
import_photo( $remote_filename, $target_file );
@@ -159,9 +167,9 @@ function migrate_images( array $meta ) : array {
die( "Error: Could not create attachment for {$target_file}" . PHP_EOL );
}
- if ( ! $thumb ) {
+ if ( null === $thumb ) {
$thumb = $attachment_id;
- return '';
+ return '<!-- image moved to thumbnail -->';
} else {
return "<img src=\"{$target_url}\" title=\"{$title}\" alt=\"{$alt}\">";
}
@@ -170,11 +178,12 @@ function migrate_images( array $meta ) : array {
);
}
- return $thumb;
+ return array( $body, $thumb );
}
function import_photo( $remote_path, $target_filename ) {
- $curl = curl_init( implode( '/', array( 'https://heavymetal.no', $remote_path ) ) );
+ $remote_url = implode( '/', array( 'https://heavymetal.no/system/refinery/images', $remote_path ) );
+ $curl = curl_init( $remote_url );
if ( false === $curl ) {
die( 'Error: Unable to init curl' );
@@ -182,5 +191,22 @@ function import_photo( $remote_path, $target_filename ) {
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
$res = curl_exec( $curl );
- file_put_contents( $target_filename, $res );
+ if ( $res ) {
+ $status = curl_getinfo( $curl, CURLINFO_HTTP_CODE );
+ if ( $status > 299 ) {
+ die( "Error: Could not fetch '{$remote_url}', status = {$status}." . PHP_EOL );
+ }
+ file_put_contents( $target_filename, $res );
+ }
+}
+
+function get_real_image_name( string $orig ) : string {
+ $encoded = basename( dirname( $orig ) );
+ $decoded = base64_decode( $encoded );
+
+ if ( preg_match( '/\["f","([^"]+)"\]/', $decoded, $matches ) ) {
+ return $matches[1];
+ }
+
+ die( "Filed to decode filename: {$encoded}: {$decoded}" . PHP_EOL );
}