Source code for mmcv.image.geometric

# Copyright (c) OpenMMLab. All rights reserved.
import numbers

import cv2
import numpy as np

from ..utils import to_2tuple
from .io import imread_backend

try:
    from PIL import Image
except ImportError:
    Image = None


def _scale_size(size, scale):
    """Rescale a size by a ratio.

    Args:
        size (tuple[int]): (w, h).
        scale (float | tuple(float)): Scaling factor.

    Returns:
        tuple[int]: scaled size.
    """
    if isinstance(scale, (float, int)):
        scale = (scale, scale)
    w, h = size
    return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)


cv2_interp_codes = {
    'nearest': cv2.INTER_NEAREST,
    'bilinear': cv2.INTER_LINEAR,
    'bicubic': cv2.INTER_CUBIC,
    'area': cv2.INTER_AREA,
    'lanczos': cv2.INTER_LANCZOS4
}

if Image is not None:
    pillow_interp_codes = {
        'nearest': Image.NEAREST,
        'bilinear': Image.BILINEAR,
        'bicubic': Image.BICUBIC,
        'box': Image.BOX,
        'lanczos': Image.LANCZOS,
        'hamming': Image.HAMMING
    }


[docs]def imresize(img, size, return_scale=False, interpolation='bilinear', out=None, backend=None): """Resize image to a given size. Args: img (ndarray): The input image. size (tuple[int]): Target size (w, h). return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. out (ndarray): The output destination. backend (str | None): The image resize backend type. Options are `cv2`, `pillow`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = img.shape[:2] if backend is None: backend = imread_backend if backend not in ['cv2', 'pillow']: raise ValueError(f'backend: {backend} is not supported for resize.' f"Supported backends are 'cv2', 'pillow'") if backend == 'pillow': assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' pil_image = Image.fromarray(img) pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) resized_img = np.array(pil_image) else: resized_img = cv2.resize( img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) if not return_scale: return resized_img else: w_scale = size[0] / w h_scale = size[1] / h return resized_img, w_scale, h_scale
[docs]def imresize_to_multiple(img, divisor, size=None, scale_factor=None, keep_ratio=False, return_scale=False, interpolation='bilinear', out=None, backend=None): """Resize image according to a given size or scale factor and then rounds up the the resized or rescaled image size to the nearest value that can be divided by the divisor. Args: img (ndarray): The input image. divisor (int | tuple): Resized image size will be a multiple of divisor. If divisor is a tuple, divisor should be (w_divisor, h_divisor). size (None | int | tuple[int]): Target size (w, h). Default: None. scale_factor (None | float | tuple[float]): Multiplier for spatial size. Should match input size if it is a tuple and the 2D style is (w_scale_factor, h_scale_factor). Default: None. keep_ratio (bool): Whether to keep the aspect ratio when resizing the image. Default: False. return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' backend, "nearest", "bilinear" for 'pillow' backend. out (ndarray): The output destination. backend (str | None): The image resize backend type. Options are `cv2`, `pillow`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = img.shape[:2] if size is not None and scale_factor is not None: raise ValueError('only one of size or scale_factor should be defined') elif size is None and scale_factor is None: raise ValueError('one of size or scale_factor should be defined') elif size is not None: size = to_2tuple(size) if keep_ratio: size = rescale_size((w, h), size, return_scale=False) else: size = _scale_size((w, h), scale_factor) divisor = to_2tuple(divisor) size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) resized_img, w_scale, h_scale = imresize( img, size, return_scale=True, interpolation=interpolation, out=out, backend=backend) if return_scale: return resized_img, w_scale, h_scale else: return resized_img
[docs]def imresize_like(img, dst_img, return_scale=False, interpolation='bilinear', backend=None): """Resize image to the same size of a given image. Args: img (ndarray): The input image. dst_img (ndarray): The target image. return_scale (bool): Whether to return `w_scale` and `h_scale`. interpolation (str): Same as :func:`resize`. backend (str | None): Same as :func:`resize`. Returns: tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or `resized_img`. """ h, w = dst_img.shape[:2] return imresize(img, (w, h), return_scale, interpolation, backend=backend)
[docs]def rescale_size(old_size, scale, return_scale=False): """Calculate the new size to be rescaled to. Args: old_size (tuple[int]): The old size (w, h) of image. scale (float | tuple[int]): The scaling factor or maximum size. If it is a float number, then the image will be rescaled by this factor, else if it is a tuple of 2 integers, then the image will be rescaled as large as possible within the scale. return_scale (bool): Whether to return the scaling factor besides the rescaled image size. Returns: tuple[int]: The new rescaled image size. """ w, h = old_size if isinstance(scale, (float, int)): if scale <= 0: raise ValueError(f'Invalid scale {scale}, must be positive.') scale_factor = scale elif isinstance(scale, tuple): max_long_edge = max(scale) max_short_edge = min(scale) scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) else: raise TypeError( f'Scale must be a number or tuple of int, but got {type(scale)}') new_size = _scale_size((w, h), scale_factor) if return_scale: return new_size, scale_factor else: return new_size
[docs]def imrescale(img, scale, return_scale=False, interpolation='bilinear', backend=None): """Resize image while keeping the aspect ratio. Args: img (ndarray): The input image. scale (float | tuple[int]): The scaling factor or maximum size. If it is a float number, then the image will be rescaled by this factor, else if it is a tuple of 2 integers, then the image will be rescaled as large as possible within the scale. return_scale (bool): Whether to return the scaling factor besides the rescaled image. interpolation (str): Same as :func:`resize`. backend (str | None): Same as :func:`resize`. Returns: ndarray: The rescaled image. """ h, w = img.shape[:2] new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) rescaled_img = imresize( img, new_size, interpolation=interpolation, backend=backend) if return_scale: return rescaled_img, scale_factor else: return rescaled_img
[docs]def imflip(img, direction='horizontal'): """Flip an image horizontally or vertically. Args: img (ndarray): Image to be flipped. direction (str): The flip direction, either "horizontal" or "vertical" or "diagonal". Returns: ndarray: The flipped image. """ assert direction in ['horizontal', 'vertical', 'diagonal'] if direction == 'horizontal': return np.flip(img, axis=1) elif direction == 'vertical': return np.flip(img, axis=0) else: return np.flip(img, axis=(0, 1))
[docs]def imflip_(img, direction='horizontal'): """Inplace flip an image horizontally or vertically. Args: img (ndarray): Image to be flipped. direction (str): The flip direction, either "horizontal" or "vertical" or "diagonal". Returns: ndarray: The flipped image (inplace). """ assert direction in ['horizontal', 'vertical', 'diagonal'] if direction == 'horizontal': return cv2.flip(img, 1, img) elif direction == 'vertical': return cv2.flip(img, 0, img) else: return cv2.flip(img, -1, img)
[docs]def imrotate(img, angle, center=None, scale=1.0, border_value=0, interpolation='bilinear', auto_bound=False): """Rotate an image. Args: img (ndarray): Image to be rotated. angle (float): Rotation angle in degrees, positive values mean clockwise rotation. center (tuple[float], optional): Center point (w, h) of the rotation in the source image. If not specified, the center of the image will be used. scale (float): Isotropic scale factor. border_value (int): Border value. interpolation (str): Same as :func:`resize`. auto_bound (bool): Whether to adjust the image size to cover the whole rotated image. Returns: ndarray: The rotated image. """ if center is not None and auto_bound: raise ValueError('`auto_bound` conflicts with `center`') h, w = img.shape[:2] if center is None: center = ((w - 1) * 0.5, (h - 1) * 0.5) assert isinstance(center, tuple) matrix = cv2.getRotationMatrix2D(center, -angle, scale) if auto_bound: cos = np.abs(matrix[0, 0]) sin = np.abs(matrix[0, 1]) new_w = h * sin + w * cos new_h = h * cos + w * sin matrix[0, 2] += (new_w - w) * 0.5 matrix[1, 2] += (new_h - h) * 0.5 w = int(np.round(new_w)) h = int(np.round(new_h)) rotated = cv2.warpAffine( img, matrix, (w, h), flags=cv2_interp_codes[interpolation], borderValue=border_value) return rotated
def bbox_clip(bboxes, img_shape): """Clip bboxes to fit the image shape. Args: bboxes (ndarray): Shape (..., 4*k) img_shape (tuple[int]): (height, width) of the image. Returns: ndarray: Clipped bboxes. """ assert bboxes.shape[-1] % 4 == 0 cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) cmin[0::2] = img_shape[1] - 1 cmin[1::2] = img_shape[0] - 1 clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) return clipped_bboxes def bbox_scaling(bboxes, scale, clip_shape=None): """Scaling bboxes w.r.t the box center. Args: bboxes (ndarray): Shape(..., 4). scale (float): Scaling factor. clip_shape (tuple[int], optional): If specified, bboxes that exceed the boundary will be clipped according to the given shape (h, w). Returns: ndarray: Scaled bboxes. """ if float(scale) == 1.0: scaled_bboxes = bboxes.copy() else: w = bboxes[..., 2] - bboxes[..., 0] + 1 h = bboxes[..., 3] - bboxes[..., 1] + 1 dw = (w * (scale - 1)) * 0.5 dh = (h * (scale - 1)) * 0.5 scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) if clip_shape is not None: return bbox_clip(scaled_bboxes, clip_shape) else: return scaled_bboxes
[docs]def imcrop(img, bboxes, scale=1.0, pad_fill=None): """Crop image patches. 3 steps: scale the bboxes -> clip bboxes -> crop and pad. Args: img (ndarray): Image to be cropped. bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. scale (float, optional): Scale ratio of bboxes, the default value 1.0 means no padding. pad_fill (Number | list[Number]): Value to be filled for padding. Default: None, which means no padding. Returns: list[ndarray] | ndarray: The cropped image patches. """ chn = 1 if img.ndim == 2 else img.shape[2] if pad_fill is not None: if isinstance(pad_fill, (int, float)): pad_fill = [pad_fill for _ in range(chn)] assert len(pad_fill) == chn _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) clipped_bbox = bbox_clip(scaled_bboxes, img.shape) patches = [] for i in range(clipped_bbox.shape[0]): x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) if pad_fill is None: patch = img[y1:y2 + 1, x1:x2 + 1, ...] else: _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) if chn == 1: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) else: patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) patch = np.array( pad_fill, dtype=img.dtype) * np.ones( patch_shape, dtype=img.dtype) x_start = 0 if _x1 >= 0 else -_x1 y_start = 0 if _y1 >= 0 else -_y1 w = x2 - x1 + 1 h = y2 - y1 + 1 patch[y_start:y_start + h, x_start:x_start + w, ...] = img[y1:y1 + h, x1:x1 + w, ...] patches.append(patch) if bboxes.ndim == 1: return patches[0] else: return patches
[docs]def impad(img, *, shape=None, padding=None, pad_val=0, padding_mode='constant'): """Pad the given image to a certain shape or pad on all sides with specified padding mode and padding value. Args: img (ndarray): Image to be padded. shape (tuple[int]): Expected padding shape (h, w). Default: None. padding (int or tuple[int]): Padding on each border. If a single int is provided this is used to pad all borders. If tuple of length 2 is provided this is the padding on left/right and top/bottom respectively. If a tuple of length 4 is provided this is the padding for the left, top, right and bottom borders respectively. Default: None. Note that `shape` and `padding` can not be both set. pad_val (Number | Sequence[Number]): Values to be filled in padding areas when padding_mode is 'constant'. Default: 0. padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default: constant. - constant: pads with a constant value, this value is specified with pad_val. - edge: pads with the last value at the edge of the image. - reflect: pads with reflection of image without repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode will result in [3, 2, 1, 2, 3, 4, 3, 2]. - symmetric: pads with reflection of image repeating the last value on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] Returns: ndarray: The padded image. """ assert (shape is not None) ^ (padding is not None) if shape is not None: padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) # check pad_val if isinstance(pad_val, tuple): assert len(pad_val) == img.shape[-1] elif not isinstance(pad_val, numbers.Number): raise TypeError('pad_val must be a int or a tuple. ' f'But received {type(pad_val)}') # check padding if isinstance(padding, tuple) and len(padding) in [2, 4]: if len(padding) == 2: padding = (padding[0], padding[1], padding[0], padding[1]) elif isinstance(padding, numbers.Number): padding = (padding, padding, padding, padding) else: raise ValueError('Padding must be a int or a 2, or 4 element tuple.' f'But received {padding}') # check padding mode assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] border_type = { 'constant': cv2.BORDER_CONSTANT, 'edge': cv2.BORDER_REPLICATE, 'reflect': cv2.BORDER_REFLECT_101, 'symmetric': cv2.BORDER_REFLECT } img = cv2.copyMakeBorder( img, padding[1], padding[3], padding[0], padding[2], border_type[padding_mode], value=pad_val) return img
[docs]def impad_to_multiple(img, divisor, pad_val=0): """Pad an image to ensure each edge to be multiple to some number. Args: img (ndarray): Image to be padded. divisor (int): Padded image edges will be multiple to divisor. pad_val (Number | Sequence[Number]): Same as :func:`impad`. Returns: ndarray: The padded image. """ pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor return impad(img, shape=(pad_h, pad_w), pad_val=pad_val)
[docs]def cutout(img, shape, pad_val=0): """Randomly cut out a rectangle from the original img. Args: img (ndarray): Image to be cutout. shape (int | tuple[int]): Expected cutout shape (h, w). If given as a int, the value will be used for both h and w. pad_val (int | float | tuple[int | float]): Values to be filled in the cut area. Defaults to 0. Returns: ndarray: The cutout image. """ channels = 1 if img.ndim == 2 else img.shape[2] if isinstance(shape, int): cut_h, cut_w = shape, shape else: assert isinstance(shape, tuple) and len(shape) == 2, \ f'shape must be a int or a tuple with length 2, but got type ' \ f'{type(shape)} instead.' cut_h, cut_w = shape if isinstance(pad_val, (int, float)): pad_val = tuple([pad_val] * channels) elif isinstance(pad_val, tuple): assert len(pad_val) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(pad_val), channels) else: raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') img_h, img_w = img.shape[:2] y0 = np.random.uniform(img_h) x0 = np.random.uniform(img_w) y1 = int(max(0, y0 - cut_h / 2.)) x1 = int(max(0, x0 - cut_w / 2.)) y2 = min(img_h, y1 + cut_h) x2 = min(img_w, x1 + cut_w) if img.ndim == 2: patch_shape = (y2 - y1, x2 - x1) else: patch_shape = (y2 - y1, x2 - x1, channels) img_cutout = img.copy() patch = np.array( pad_val, dtype=img.dtype) * np.ones( patch_shape, dtype=img.dtype) img_cutout[y1:y2, x1:x2, ...] = patch return img_cutout
def _get_shear_matrix(magnitude, direction='horizontal'): """Generate the shear matrix for transformation. Args: magnitude (int | float): The magnitude used for shear. direction (str): The flip direction, either "horizontal" or "vertical". Returns: ndarray: The shear matrix with dtype float32. """ if direction == 'horizontal': shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) elif direction == 'vertical': shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) return shear_matrix
[docs]def imshear(img, magnitude, direction='horizontal', border_value=0, interpolation='bilinear'): """Shear an image. Args: img (ndarray): Image to be sheared with format (h, w) or (h, w, c). magnitude (int | float): The magnitude used for shear. direction (str): The flip direction, either "horizontal" or "vertical". border_value (int | tuple[int]): Value used in case of a constant border. interpolation (str): Same as :func:`resize`. Returns: ndarray: The sheared image. """ assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' height, width = img.shape[:2] if img.ndim == 2: channels = 1 elif img.ndim == 3: channels = img.shape[-1] if isinstance(border_value, int): border_value = tuple([border_value] * channels) elif isinstance(border_value, tuple): assert len(border_value) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(border_value), channels) else: raise ValueError( f'Invalid type {type(border_value)} for `border_value`') shear_matrix = _get_shear_matrix(magnitude, direction) sheared = cv2.warpAffine( img, shear_matrix, (width, height), # Note case when the number elements in `border_value` # greater than 3 (e.g. shearing masks whose channels large # than 3) will raise TypeError in `cv2.warpAffine`. # Here simply slice the first 3 values in `border_value`. borderValue=border_value[:3], flags=cv2_interp_codes[interpolation]) return sheared
def _get_translate_matrix(offset, direction='horizontal'): """Generate the translate matrix. Args: offset (int | float): The offset used for translate. direction (str): The translate direction, either "horizontal" or "vertical". Returns: ndarray: The translate matrix with dtype float32. """ if direction == 'horizontal': translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) elif direction == 'vertical': translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) return translate_matrix
[docs]def imtranslate(img, offset, direction='horizontal', border_value=0, interpolation='bilinear'): """Translate an image. Args: img (ndarray): Image to be translated with format (h, w) or (h, w, c). offset (int | float): The offset used for translate. direction (str): The translate direction, either "horizontal" or "vertical". border_value (int | tuple[int]): Value used in case of a constant border. interpolation (str): Same as :func:`resize`. Returns: ndarray: The translated image. """ assert direction in ['horizontal', 'vertical'], f'Invalid direction: {direction}' height, width = img.shape[:2] if img.ndim == 2: channels = 1 elif img.ndim == 3: channels = img.shape[-1] if isinstance(border_value, int): border_value = tuple([border_value] * channels) elif isinstance(border_value, tuple): assert len(border_value) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(border_value), channels) else: raise ValueError( f'Invalid type {type(border_value)} for `border_value`.') translate_matrix = _get_translate_matrix(offset, direction) translated = cv2.warpAffine( img, translate_matrix, (width, height), # Note case when the number elements in `border_value` # greater than 3 (e.g. translating masks whose channels # large than 3) will raise TypeError in `cv2.warpAffine`. # Here simply slice the first 3 values in `border_value`. borderValue=border_value[:3], flags=cv2_interp_codes[interpolation]) return translated