PaddlePaddle · luukunn · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/fastdeploy/input/ernie4_5_vl_processor/image_preprocessor/image_preprocessor_adaptive.py b/fastdeploy/input/ernie4_5_vl_processor/image_preprocessor/image_preprocessor_adaptive.py
@@ -16,7 +16,6 @@
 
 """image preprocessor adaptive"""
 
-import math
 from typing import List, Optional, Union
 
 import numpy as np
@@ -45,6 +44,8 @@
 from paddleformers.transformers.legacy.tokenizer_utils_base import TensorType
 from PIL import Image
 
+from fastdeploy.input.image_processors.common import is_scaled_image
+from fastdeploy.input.image_processors.common import smart_resize_qwen as smart_resize
 from fastdeploy.utils import data_processor_logger
 
 OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
@@ -73,22 +74,9 @@
 ]
 
 
-def is_scaled_image(image: np.ndarray) -> bool:
-    """
-    Checks to see whether the pixel values have already been rescaled to [0, 1].
-    """
-    if image.dtype == np.uint8:
-        return False
-
-    # It's possible the image has pixel values in [0, 255] but is of floating type
-    return np.min(image) >= 0 and np.max(image) <= 1
-
-
 def make_batched_images(images) -> List[List[ImageInput]]:
     """
     Accepts images in list or nested list format, and makes a list of images for preprocessing.
-
-    Args:
         images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
             The input image.
 
@@ -521,67 +509,3 @@ def preprocess(
             }
 
         return BatchFeature(data=data, tensor_type=return_tensors)
-
-
-def round_by_factor(number: int, factor: int) -> int:
-    """Returns the closest integer to 'number' that is divisible by 'factor'."""
-    return round(number / factor) * factor
-
-
-def ceil_by_factor(number: int, factor: int) -> int:
-    """Returns the smallest integer greater than or equal to 'number' that is divisible by 'factor'."""
-    return math.ceil(number / factor) * factor
-
-
-def floor_by_factor(number: int, factor: int) -> int:
-    """Returns the largest integer less than or equal to 'number' that is divisible by 'factor'."""
-    return math.floor(number / factor) * factor
-
-
-def smart_resize(
-    height: int,
-    width: int,
-    factor: int = IMAGE_FACTOR,
-    min_pixels: int = MIN_PIXELS,
-    max_pixels: int = MAX_PIXELS,
-):
-    """
-    Rescales the image so that the following conditions are met:
-
-    1. Both dimensions (height and width) are divisible by 'factor'.
-
-    2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
-
-    3. The aspect ratio of the image is maintained as closely as possible.
-    """
-    if max(height, width) / min(height, width) > MAX_RATIO:
-        if height > width:
-            new_width = max(factor, round_by_factor(width, factor))
-            new_height = floor_by_factor(new_width * MAX_RATIO, factor)
-        else:
-            new_height = max(factor, round_by_factor(height, factor))
-            new_width = floor_by_factor(new_height * MAX_RATIO, factor)
-
-        data_processor_logger.info(
-            f"absolute aspect ratio must be smaller than {MAX_RATIO}, got {max(height, width) / min(height, width)},\
-              resize to {max(new_height, new_width) / min(new_height, new_width)}"
-        )
-
-        height = new_height
-        width = new_width
-
-    h_bar = max(factor, round_by_factor(height, factor))
-    w_bar = max(factor, round_by_factor(width, factor))
-    if h_bar * w_bar > max_pixels:
-        beta = math.sqrt((height * width) / max_pixels)
-        h_bar = floor_by_factor(height / beta, factor)
-        w_bar = floor_by_factor(width / beta, factor)
-    elif h_bar * w_bar < min_pixels:
-        beta = math.sqrt(min_pixels / (height * width))
-        h_bar = ceil_by_factor(height * beta, factor)
-        w_bar = ceil_by_factor(width * beta, factor)
-
-    if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels:
-        raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}")
-
-    return h_bar, w_bar
diff --git a/fastdeploy/input/image_processors/__init__.py b/fastdeploy/input/image_processors/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/fastdeploy/input/image_processors/common.py b/fastdeploy/input/image_processors/common.py
@@ -0,0 +1,208 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared image utility functions for all VL image processors."""
+
+import math
+
+import numpy as np
+
+from fastdeploy.utils import data_processor_logger
+
+__all__ = [
+    "round_by_factor",
+    "ceil_by_factor",
+    "floor_by_factor",
+    "is_scaled_image",
+    "smart_resize",
+    "smart_resize_qwen",
+    "smart_resize_paddleocr",
+]
+
+
+def round_by_factor(number: int, factor: int) -> int:
+    """Returns the closest integer to 'number' that is divisible by 'factor'."""
+    return round(number / factor) * factor
+
+
+def ceil_by_factor(number: int, factor: int) -> int:
+    """Returns the smallest integer >= 'number' that is divisible by 'factor'."""
+    return math.ceil(number / factor) * factor
+
+
+def floor_by_factor(number: int, factor: int) -> int:
+    """Returns the largest integer <= 'number' that is divisible by 'factor'."""
+    return math.floor(number / factor) * factor
+
+
+def is_scaled_image(image: np.ndarray) -> bool:
+    """Check if image pixel values are already normalized to [0, 1] range.
+
+    Args:
+        image: Input image array.
+
+    Returns:
+        bool: True if image is already scaled to [0, 1].
+    """
+    if image.dtype == np.uint8:
+        return False
+    # It's possible the image has pixel values in [0, 255] but is of floating type
+    return np.min(image) >= 0 and np.max(image) <= 1
+
+
+def smart_resize_qwen(
+    height: int,
+    width: int,
+    factor: int,
+    min_pixels: int,
+    max_pixels: int,
+    max_ratio: int = 200,
+) -> tuple:
+    """Smart image resizing for ERNIE / Qwen2.5 / Qwen3 models.
+
+    Maintains aspect ratio and respects pixel constraints. When the aspect ratio
+    exceeds max_ratio, the image is cropped (not raised as error) to fit within
+    the ratio limit.
-    Maintains aspect ratio and respects pixel constraints. When the aspect ratio
-    exceeds max_ratio, the image is cropped (not raised as error) to fit within
-    the ratio limit.
+    Maintains aspect ratio as much as possible while respecting pixel constraints.
+    When the aspect ratio exceeds ``max_ratio``, the target resize dimensions are
+    adjusted to clip/limit the aspect ratio by resizing (no cropping is applied,
+    and the output aspect ratio may differ from the original).
-    Maintains aspect ratio and respects pixel constraints. When the aspect ratio
-    exceeds max_ratio, the image is cropped (not raised as error) to fit within
-    the ratio limit.
+    Maintains aspect ratio as much as possible while respecting pixel constraints.
+    When the aspect ratio exceeds ``max_ratio``, the target resize dimensions are
+    adjusted to clip/limit the aspect ratio by resizing (no cropping is applied,
+    and the output aspect ratio may differ from the original).
+
+    Args:
+        height: Original image height.
+        width: Original image width.
+        factor: Patch size factor; both output dimensions will be multiples of this.
+        min_pixels: Minimum allowed total pixels.
+        max_pixels: Maximum allowed total pixels.
+        max_ratio: Maximum allowed aspect ratio (default 200).
+
+    Returns:
+        tuple: (new_height, new_width)
+
+    Raises:
+        ValueError: If calculated dimensions are still invalid after resizing.
+    """
+    if max(height, width) / min(height, width) > max_ratio:
+        if height > width:
+            new_width = max(factor, round_by_factor(width, factor))
+            new_height = floor_by_factor(new_width * max_ratio, factor)
+        else:
+            new_height = max(factor, round_by_factor(height, factor))
+            new_width = floor_by_factor(new_height * max_ratio, factor)
+
+        data_processor_logger.info(
+            f"absolute aspect ratio must be smaller than {max_ratio}, "
+            f"got {max(height, width) / min(height, width)}, "
+            f"resize to {max(new_height, new_width) / min(new_height, new_width)}"
+        )
+        height = new_height
+        width = new_width
+
+    h_bar = max(factor, round_by_factor(height, factor))
+    w_bar = max(factor, round_by_factor(width, factor))
+    if h_bar * w_bar > max_pixels:
+        beta = math.sqrt((height * width) / max_pixels)
+        h_bar = floor_by_factor(height / beta, factor)
+        w_bar = floor_by_factor(width / beta, factor)
+    elif h_bar * w_bar < min_pixels:
+        beta = math.sqrt(min_pixels / (height * width))
+        h_bar = ceil_by_factor(height * beta, factor)
+        w_bar = ceil_by_factor(width * beta, factor)
+
+    if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels:
+        raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}")
+
+    return h_bar, w_bar
+
+
+def smart_resize_paddleocr(
+    height: int,
+    width: int,
+    factor: int = 28,
+    min_pixels: int = 28 * 28 * 130,
+    max_pixels: int = 28 * 28 * 1280,
+) -> tuple:
+    """Smart image resizing for PaddleOCR-VL model.
+
+    Similar to smart_resize_qwen but adds small-image protection: if height or
+    width is smaller than factor, the image is scaled up to factor first. Also,
+    when aspect ratio exceeds 200 this function raises ValueError (instead of
+    silently cropping like the qwen variant).
+
+    Args:
+        height: Original image height.
+        width: Original image width.
+        factor: Patch size factor; both output dimensions will be multiples of this.
+        min_pixels: Minimum allowed total pixels.
+        max_pixels: Maximum allowed total pixels.
+
+    Returns:
+        tuple: (new_height, new_width)
+
+    Raises:
+        ValueError: If aspect ratio exceeds 200, or calculated dimensions are invalid.
+    """
+    if height < factor:
+        data_processor_logger.debug(f"smart_resize_paddleocr: height={height} < factor={factor}, reset height=factor")
+        width = round((width * factor) / height)
+        height = factor
+
+    if width < factor:
+        data_processor_logger.debug(f"smart_resize_paddleocr: width={width} < factor={factor}, reset width=factor")
+        height = round((height * factor) / width)
+        width = factor
+
+    if max(height, width) / min(height, width) > 200:
+        raise ValueError(
+            f"absolute aspect ratio must be smaller than 200, " f"got {max(height, width) / min(height, width)}"
+        )
+
+    h_bar = round(height / factor) * factor
+    w_bar = round(width / factor) * factor
+    if h_bar * w_bar > max_pixels:
+        beta = math.sqrt((height * width) / max_pixels)
+        h_bar = math.floor(height / beta / factor) * factor
+        w_bar = math.floor(width / beta / factor) * factor
+    elif h_bar * w_bar < min_pixels:
+        beta = math.sqrt(min_pixels / (height * width))
+        h_bar = math.ceil(height * beta / factor) * factor
+        w_bar = math.ceil(width * beta / factor) * factor
+
+    return h_bar, w_bar
+
+
+def smart_resize(
+    height: int,
+    width: int,
+    factor: int,
+    min_pixels: int,
+    max_pixels: int,
+    max_ratio: int = 200,
+    variant: str = "qwen",
+) -> tuple:
+    """Unified smart_resize dispatcher.
+
+    Args:
+        height: Original image height.
+        width: Original image width.
+        factor: Patch size factor.
+        min_pixels: Minimum allowed total pixels.
+        max_pixels: Maximum allowed total pixels.
+        max_ratio: Maximum allowed aspect ratio (only used by "qwen" variant).
+        variant: Which algorithm variant to use.
+            - "qwen" (default): for ERNIE / Qwen2.5 / Qwen3. Clips extreme ratios silently.
+            - "paddleocr": for PaddleOCR-VL. Adds small-image protection, raises on bad ratio.
+
+    Returns:
+        tuple: (new_height, new_width)
+    """
+    if variant == "paddleocr":
+        return smart_resize_paddleocr(height, width, factor, min_pixels, max_pixels)
+    return smart_resize_qwen(height, width, factor, min_pixels, max_pixels, max_ratio)