Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions features/media-import.feature
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,69 @@ Feature: Manage WordPress attachments
"""
/foo/large-image.jpg
"""

Scenario: Skip importing a local file that was already imported
Given download:
| path | url |
| {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg |

When I run `wp media import {CACHE_DIR}/large-image.jpg --porcelain`
Then save STDOUT as {ATTACHMENT_ID}
And STDOUT should not be empty

When I run `wp media import {CACHE_DIR}/large-image.jpg --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
already exists as attachment ID {ATTACHMENT_ID}
"""
And STDOUT should contain:
"""
Success: Imported 0 of 1 items (1 skipped).
"""
And the return code should be 0

Scenario: Skip importing a remote file that was already imported
When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --porcelain`
Then save STDOUT as {ATTACHMENT_ID}
And STDOUT should not be empty

When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
already exists as attachment ID {ATTACHMENT_ID}
"""
And STDOUT should contain:
"""
Success: Imported 0 of 1 items (1 skipped).
"""
And the return code should be 0

Scenario: Import new file while skipping duplicates from a batch
Given download:
| path | url |
| {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg |

When I run `wp media import {CACHE_DIR}/large-image.jpg`
Then STDOUT should contain:
"""
Success: Imported 1 of 1 items.
"""

When I run `wp media import {CACHE_DIR}/large-image.jpg 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates`
Then STDOUT should contain:
"""
Skipped importing file
"""
And STDOUT should contain:
"""
Success: Imported 1 of 2 items (1 skipped).
"""
And the return code should be 0
91 changes: 88 additions & 3 deletions src/Media_Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ public function regenerate( $args, $assoc_args = array() ) {
* [--featured_image]
* : If set, set the imported image as the Featured Image of the post it is attached to.
*
* [--skip-duplicates]
* : If set, media files that have already been imported will be skipped.
*
* [--porcelain[=<field>]]
* : Output a single field for each imported image. Defaults to attachment ID when used as flag.
* ---
Expand Down Expand Up @@ -308,7 +311,7 @@ public function regenerate( $args, $assoc_args = array() ) {
* http://wordpress-develop.dev/wp-header-logo/
*
* @param string[] $args Positional arguments.
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
* @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, 'skip-duplicates'?: bool, porcelain?: bool|string} $assoc_args Associative arguments.
* @return void
*/
public function import( $args, $assoc_args = array() ) {
Expand Down Expand Up @@ -361,6 +364,7 @@ public function import( $args, $assoc_args = array() ) {
$number = 0;
$successes = 0;
$errors = 0;
$skips = 0;
foreach ( $args as $file ) {
++$number;
if ( 0 === $number % self::WP_CLEAR_OBJECT_CACHE_INTERVAL ) {
Expand All @@ -379,6 +383,16 @@ public function import( $args, $assoc_args = array() ) {
++$errors;
continue;
}
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
$existing = $this->find_duplicate_attachment( Utils\basename( $file ) );
if ( false !== $existing ) {
if ( ! $porcelain ) {
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
}
++$skips;
continue;
}
}
if ( Utils\get_flag_value( $assoc_args, 'skip-copy' ) ) {
$tempfile = $file;
} else {
Expand All @@ -390,6 +404,16 @@ public function import( $args, $assoc_args = array() ) {
$file_time = @filemtime( $file );
}
} else {
if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) {
$existing = $this->find_duplicate_attachment( (string) explode( '?', Utils\basename( $file ), 2 )[0] );
if ( false !== $existing ) {
if ( ! $porcelain ) {
WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." );
}
++$skips;
continue;
}
}
$tempfile = download_url( $file );
if ( is_wp_error( $tempfile ) ) {
WP_CLI::warning(
Expand All @@ -402,7 +426,7 @@ public function import( $args, $assoc_args = array() ) {
++$errors;
continue;
}
$name = (string) strtok( Utils\basename( $file ), '?' );
$name = (string) explode( '?', Utils\basename( $file ), 2 )[0];
}

if ( ! empty( $assoc_args['file_name'] ) ) {
Expand Down Expand Up @@ -542,7 +566,7 @@ public function import( $args, $assoc_args = array() ) {

// Report the result of the operation
if ( ! Utils\get_flag_value( $assoc_args, 'porcelain' ) ) {
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors );
Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ? $skips : null );
} elseif ( $errors ) {
WP_CLI::halt( 1 );
}
Expand Down Expand Up @@ -692,6 +716,67 @@ private function make_copy( $path ) {
return $filename;
}

/**
* Finds an existing attachment whose basename matches the given filename.
*
* Searches the `_wp_attached_file` post meta, which stores the path relative to
* the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Also
* checks for the WP 5.3+ big-image scaled variant (e.g. 'image-scaled.jpg') so
* that re-importing a large file that was scaled on first import is correctly
* detected as a duplicate. Matches the first attachment found when multiple files
* share the same basename across different upload subdirectories.
*
* @param string $basename Filename basename to search for (e.g. 'image.jpg').
* @return int|false Attachment ID if found, false otherwise.
*/
private function find_duplicate_attachment( $basename ) {
global $wpdb;

// WP 5.3+ big-image scaling renames 'image.jpg' → 'image-scaled.jpg' and
// stores the scaled name in _wp_attached_file, so search for both variants.
$ext = pathinfo( $basename, PATHINFO_EXTENSION );
$name = pathinfo( $basename, PATHINFO_FILENAME );
$scaled_basename = $name . '-scaled' . ( $ext ? '.' . $ext : '' );

$slash_basename = '/' . $basename;
$slash_scaled_basename = '/' . $scaled_basename;

if ( function_exists( 'mb_strlen' ) ) {
$slash_basename_length = mb_strlen( $slash_basename, 'UTF-8' );
$slash_scaled_basename_length = mb_strlen( $slash_scaled_basename, 'UTF-8' );
} else {
$slash_basename_length = strlen( $slash_basename );
$slash_scaled_basename_length = strlen( $slash_scaled_basename );
}

$result = $wpdb->get_var(
$wpdb->prepare(
"SELECT p.ID
FROM {$wpdb->posts} p
INNER JOIN {$wpdb->postmeta} pm
ON p.ID = pm.post_id
WHERE p.post_type = 'attachment'
AND p.post_status != 'trash'
AND pm.meta_key = '_wp_attached_file'
AND (
pm.meta_value = %s
OR RIGHT(pm.meta_value, %d) = %s
OR pm.meta_value = %s
OR RIGHT(pm.meta_value, %d) = %s
)
LIMIT 1",
$basename,
$slash_basename_length,
$slash_basename,
$scaled_basename,
$slash_scaled_basename_length,
$slash_scaled_basename
)
);

return $result ? (int) $result : false;
}

/**
* Returns a human-readable description for one or more image size names.
*
Expand Down
Loading