"""
Data structure for Binary Masks
Structure for efficient encoding of per-annotation segmentation masks
Based on efficient cython/C code in the cocoapi [CocoStuffPyx]_ [CocoStuffC]_
[CocoStuffH]_ [CocoStuffPy]_.
References:
.. [CocoStuffPyx] https://github.com/nightrome/cocostuffapi/blob/master/PythonAPI/pycocotools/_mask.pyx
.. [CocoStuffC] https://github.com/nightrome/cocostuffapi/blob/master/common/maskApi.c
.. [CocoStuffH] https://github.com/nightrome/cocostuffapi/blob/master/common/maskApi.h
.. [CocoStuffPy] https://github.com/nightrome/cocostuffapi/blob/master/PythonAPI/pycocotools/mask.py
Goals:
The goal of this file is to create a datastructure that lets the developer
seemlessly convert between:
(1) raw binary uint8 masks
(2) memory-efficient compressed run-length-encodings of binary segmentation masks.
(3) convex polygons
(4) convex hull polygons
(5) bounding box
It is not there yet, and the API is subject to change in order to better
accomplish these goals.
TODO:
- [ ] * Create two different classes: MultiLabelMask and BinaryMask
both can inherit from Mask.
Note:
IN THIS FILE ONLY: size corresponds to a h/w tuple to be compatible with
the coco semantics. Everywhere else in this repo, size uses opencv
semantics which are w/h.
"""
import sys
import copy
import numpy as np
import ubelt as ub
import itertools as it
import warnings
import numbers
from . import _generic
# try:
# import torch
# except Exception:
# torch = None
# try:
# from line_profiler import profile # NOQA
# except Exception:
# from ubelt import identity as profile # NOQA
class _Mask_Backends():
# TODO: could make this prettier
def __init__(self):
self._funcs = None
def _lazy_init(self):
from kwimage import _internal
_funcs = {}
try:
from pycocotools import _mask
_funcs['pycoco'] = _mask
except Exception as ex:
if not _internal.KWIMAGE_DISABLE_IMPORT_WARNINGS:
warnings.warn(
'optional module pycocotools is not available: {}'.format(
str(ex)))
if not _internal.KWIMAGE_DISABLE_C_EXTENSIONS:
try:
from kwimage_ext.structs._mask_backend import cython_mask
_funcs['kwimage'] = cython_mask
except Exception as ex:
if not _internal.KWIMAGE_DISABLE_IMPORT_WARNINGS:
warnings.warn(
'optional mask_backend is not available: {}'.format(str(ex)))
self._funcs = _funcs
self._valid = frozenset(self._funcs.keys())
def get_backend(self, prefs):
from kwimage import _internal
if self._funcs is None:
self._lazy_init()
valid = ub.oset(prefs) & set(self._funcs)
if not valid:
if not _internal.KWIMAGE_DISABLE_IMPORT_WARNINGS:
warnings.warn('no valid mask backend')
return None, None
key = ub.peek(valid)
func = self._funcs[key]
return key, func
_backends = _Mask_Backends()
@ub.memoize
def _lazy_mask_backend():
backend_key, cython_mask = _backends.get_backend(['kwimage', 'pycoco'])
return cython_mask
__all__ = ['Mask', 'MaskList']
class MaskFormat:
"""
Defines valid formats and their aliases.
Attrs:
aliases (Mapping[str, str]):
maps format aliases to their cannonical name.
"""
cannonical = []
def _register(k, cannonical=cannonical):
cannonical.append(k)
return k
BYTES_RLE = _register('bytes_rle') # cython compressed RLE
ARRAY_RLE = _register('array_rle') # numpy uncompreesed RLE
C_MASK = _register('c_mask') # row-major raw binary mask
F_MASK = _register('f_mask') # column-major raw binary mask
aliases = {
}
for key in cannonical:
aliases[key] = key
class _MaskConversionMixin(object):
"""
Mixin class registering conversion functions
For conversion speeds look into:
~/code/kwimage/dev/bench_rle.py
"""
convert_funcs = {}
def _register_convertor(key, convert_funcs=convert_funcs):
def _reg(func):
convert_funcs[key] = func
return func
return _reg
def toformat(self, format, copy=False):
"""
Changes the internal representation using one of the registered
convertor functions.
Args:
format (str):
the string code for the format you want to transform into.
copy (bool):
if True, we always return copy of the data.
if False, we try not to return a copy unless necessary.
Returns:
Mask: The Mask object with a new backend format
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> from kwimage.structs.mask import MaskFormat # NOQA
>>> mask = Mask.random(shape=(8, 8), rng=0)
>>> # Test that we can convert to and from all formats
>>> for format1 in MaskFormat.cannonical:
... mask1 = mask.toformat(format1)
... for format2 in MaskFormat.cannonical:
... mask2 = mask1.toformat(format2)
... img1 = mask1.to_c_mask().data
... img2 = mask2.to_c_mask().data
... if not np.all(img1 == img2):
... msg = 'Failed convert {} <-> {}'.format(format1, format2)
... print(msg)
... raise AssertionError(msg)
... else:
... msg = 'Passed convert {} <-> {}'.format(format1, format2)
... print(msg)
"""
key = MaskFormat.aliases.get(format, format)
try:
func = self.convert_funcs[key]
return func(self, copy)
except KeyError:
raise KeyError('Cannot convert {} to {}'.format(self.format, format))
@_register_convertor(MaskFormat.BYTES_RLE)
def to_bytes_rle(self, copy=False):
"""
Converts the mask format to a bytes-based run-length encoding.
Args:
copy (bool):
if True, we always return copy of the data.
if False, we try not to return a copy unless necessary.
Returns:
Mask: The Mask object with a new backend format
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> from kwimage.structs.mask import MaskFormat # NOQA
>>> mask = Mask.demo()
>>> print(mask.to_bytes_rle().data['counts'])
..._153L;4EL;1DO10;1DO10;1DO10;4EL;4ELW3b0jL^O60...
>>> print(mask.to_array_rle().data['counts'].tolist())
[47, 5, 3, 1, 14, 5, 3, 1, 14, 2, 2, 1, 3, 1, 14, ...
>>> print(mask.to_array_rle().to_bytes_rle().data['counts'])
..._153L;4EL;1DO10;1DO10;1DO10;4EL;4ELW3b0jL^O60L0...
"""
if self.format == MaskFormat.BYTES_RLE:
return self.copy() if copy else self
cython_mask = _lazy_mask_backend()
if self.format == MaskFormat.ARRAY_RLE:
h, w = self.data['size']
if self.data.get('order', 'F') != 'F':
raise ValueError('Expected column-major array RLE')
if cython_mask is None:
raise NotImplementedError('pure python version of array_rle to_bytes_rle')
newdata = cython_mask.frUncompressedRLE([self.data], h, w)[0]
self = Mask(newdata, MaskFormat.BYTES_RLE)
elif self.format == MaskFormat.F_MASK:
f_masks = self.data[:, :, None]
if cython_mask is None:
raise NotImplementedError('pure python version of f to to_bytes_rle')
encoded = cython_mask.encode(f_masks)[0]
if 'size' in encoded:
encoded['size'] = list(map(int, encoded['size'])) # python2 fix
self = Mask(encoded, format=MaskFormat.BYTES_RLE)
elif self.format == MaskFormat.C_MASK:
c_mask = self.data
f_masks = np.asfortranarray(c_mask)[:, :, None]
if cython_mask is None:
raise NotImplementedError('pure python version of c to to_bytes_rle')
encoded = cython_mask.encode(f_masks)[0]
if 'size' in encoded:
encoded['size'] = list(map(int, encoded['size'])) # python2 fix
self = Mask(encoded, format=MaskFormat.BYTES_RLE)
else:
raise NotImplementedError(self.format)
return self
@_register_convertor(MaskFormat.ARRAY_RLE)
def to_array_rle(self, copy=False):
"""
Converts the mask format to an array-based run-length encoding.
Args:
copy (bool):
if True, we always return copy of the data.
if False, we try not to return a copy unless necessary.
Returns:
Mask: the underlying RLE data will be in F-contiguous order.
"""
if self.format == MaskFormat.ARRAY_RLE:
return self.copy() if copy else self
elif self.format == MaskFormat.BYTES_RLE:
from kwimage.im_runlen import _rle_bytes_to_array
arr_counts = _rle_bytes_to_array(self.data['counts'])
encoded = {
'size': self.data['size'],
'binary': self.data.get('binary', True),
'counts': arr_counts,
'order': self.data.get('order', 'F'),
}
encoded['shape'] = self.data.get('shape', encoded['size'])
self = Mask(encoded, format=MaskFormat.ARRAY_RLE)
else:
import kwimage
f_mask = self.to_fortran_mask().data
encoded = kwimage.encode_run_length(f_mask, binary=True, order='F')
# NOTE: Generally `size` means (width, height) and `shape` means
# (height, width) but shape in this case is in F-order, which means
# it is (width, hight), so it can be used directly as size
encoded['size'] = encoded['shape'] # hack in size
self = Mask(encoded, format=MaskFormat.ARRAY_RLE)
return self
@_register_convertor(MaskFormat.F_MASK)
def to_fortran_mask(self, copy=False):
"""
Convert the mask format to a dense mask array in columnwise (F) order
Args:
copy (bool):
if True, we always return copy of the data.
if False, we try not to return a copy unless necessary.
Returns:
Mask : the converted mask
Example:
>>> import kwimage
>>> # This is modified version of a segmentation from COCO
>>> # We have some hard-coded assumptions when handling rles
>>> # that dont specify shape, order, and binary.
>>> coco_sseg = {
>>> "size": (51, 50),
>>> "counts": [26, 2, 651, 3, 13, 1, 313, 12, 6, 3, 12, 322, 11, 323, 10, 325, 8, 93, 416]
>>> }
>>> rle = kwimage.Mask(coco_sseg, 'array_rle')
>>> fmask = rle.to_fortran_mask()
>>> fmask.data.sum()
>>> # Note that the returned RLE is in our more explicit encoding
>>> rle2 = fmask.to_array_rle()
>>> assert rle2.data['counts'].tolist() == rle.data['counts']
>>> assert rle2.data['order'] == 'F'
>>> assert rle2.data['binary'] == True
>>> assert rle2.data['shape'] == rle.data['size']
"""
if self.format == MaskFormat.F_MASK:
return self.copy() if copy else self
elif self.format == MaskFormat.C_MASK:
c_mask = self.data.copy() if copy else self.data
f_mask = np.asfortranarray(c_mask)
elif self.format == MaskFormat.ARRAY_RLE:
import kwimage
encoded = dict(self.data)
# NOTE: Generally `size` means (width, height) and `shape` means
# (height, width) but shape in this case is in F-order, which means
# it is (width, hight), so it can be used directly as size
# Ideally we are given "shape" instead of "size", but the original
# COCO RLE's use "size", so we have to accept that here.
# Handle RLE is in COCO format, thus the defaults passed to
# decode_run_length should be specified with coco assumptions
encoded = {
'counts': self.data['counts'],
'binary': self.data.get('binary', True),
'order': self.data.get('order', 'F'),
}
if 'shape' in self.data:
encoded['shape'] = self.data['shape']
else:
encoded['shape'] = self.data['size']
f_mask = kwimage.decode_run_length(**encoded)
else:
# NOTE: inefficient, could be improved
self = self.to_bytes_rle(copy=False)
cython_mask = _lazy_mask_backend()
if cython_mask is None:
raise NotImplementedError('pure python version')
f_mask = cython_mask.decode([self.data])[:, :, 0]
self = Mask(f_mask, MaskFormat.F_MASK)
return self
@_register_convertor(MaskFormat.C_MASK)
def to_c_mask(self, copy=False):
"""
Convert the mask format to a dense mask array in rowwise (C) order
Args:
copy (bool):
if True, we always return copy of the data.
if False, we try not to return a copy unless necessary.
Returns:
Mask: The Mask object with a new backend format
"""
if self.format == MaskFormat.C_MASK:
return self.copy() if copy else self
elif self.format == MaskFormat.F_MASK:
f_mask = self.data.copy() if copy else self.data
c_mask = np.ascontiguousarray(f_mask)
else:
f_mask = self.to_fortran_mask(copy=False).data
c_mask = np.ascontiguousarray(f_mask)
self = Mask(c_mask, MaskFormat.C_MASK)
return self
def numpy(self):
"""
Ensure mask is in numpy format (if possible)
Returns:
Mask: The Mask object with a new backend format
"""
data = self.data
if self.format in {MaskFormat.C_MASK, MaskFormat.F_MASK}:
torch = sys.modules.get('torch', None)
if torch is not None and torch.is_tensor(data):
data = data.data.cpu().numpy()
newself = self.__class__(data, self.format)
return newself
def tensor(self, device=ub.NoParam):
"""
Ensure mask is in tensor format (if possible)
Returns:
Mask: The Mask object with a new backend format
"""
data = self.data
if self.format in {MaskFormat.C_MASK, MaskFormat.F_MASK}:
torch = sys.modules.get('torch', None)
if torch is not None and not torch.is_tensor(data):
data = torch.from_numpy(data)
if device is not ub.NoParam:
data = data.to(device)
newself = self.__class__(data, self.format)
return newself
class _MaskConstructorMixin(object):
"""
Alternative ways to construct a masks object
"""
@classmethod
def from_polygons(Mask, polygons, dims):
"""
DEPRICATE: use kwimage.Polygon.to_mask? or kwimage.Mask.coerce?
Args:
polygons (ndarray | List[ndarray]): one or more polygons that
will be joined together. The ndarray may either be an
Nx2 or a flat c-contiguous array or xy points.
dims (Tuple): height / width of the source image
Returns:
Mask: the new Mask object
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> polygons = [
>>> np.array([[3, 0],[2, 1],[2, 4],[4, 4],[4, 3],[7, 0]]),
>>> np.array([[0, 9],[4, 8],[2, 3]]),
>>> ]
>>> dims = (9, 5)
>>> self = Mask.from_polygons(polygons, dims)
>>> print(self)
<Mask({'counts': ...'724;MG2MN16', 'size': [9, 5]}, format=bytes_rle)>
>>> polygon = polygons[0]
>>> print(Mask.from_polygons(polygon, dims))
<Mask({'counts': ...'b04500N2', 'size': [9, 5]}, format=bytes_rle)>
"""
h, w = dims
# TODO: holes? geojson?
if isinstance(polygons, np.ndarray):
polygons = [polygons]
flat_polys = [np.array(ps).ravel() for ps in polygons]
cython_mask = _lazy_mask_backend()
if cython_mask is None:
raise NotImplementedError('pure python version from polygons')
encoded = cython_mask.frPoly(flat_polys, h, w)
if 'size' in encoded:
encoded['size'] = list(map(int, encoded['size'])) # python2 fix
ccs = [Mask(e, MaskFormat.BYTES_RLE) for e in encoded]
self = Mask.union(*ccs)
return self
@classmethod
def from_mask(Mask, mask, offset=None, shape=None, method='faster'):
"""
Creates an RLE encoded mask from a raw binary mask.
You may optionally specify an offset if the mask is part of a larger
image.
Args:
mask (ndarray):
a binary submask which belongs to a larger image
offset (Tuple[int, int]):
top-left xy location of the mask in the larger image
shape (Tuple[int, int]): shape of the larger image
SeeAlso:
../../test/test_rle.py
Example:
>>> mask = Mask.random(shape=(32, 32), rng=0).data
>>> offset = (30, 100)
>>> shape = (501, 502)
>>> self = Mask.from_mask(mask, offset=offset, shape=shape, method='faster')
"""
if shape is None:
shape = mask.shape
if offset is None:
offset = (0, 0)
if method == 'naive':
# inefficent but used to test correctness of algorithms
import kwimage
rc_offset = offset[::-1]
larger = kwimage.subpixel_translate(mask, rc_offset,
output_shape=shape)
# larger = np.zeros(shape, dtype=mask.dtype)
# larger_rc = offset[::-1]
# mask_dims = mask.shape[0:2]
# index = tuple(slice(s, s + d) for s, d in zip(larger_rc, mask_dims))
# larger[index] = mask
self = Mask(larger, MaskFormat.C_MASK).to_array_rle()
elif method == 'faster':
import kwimage
encoded = kwimage.encode_run_length(mask, binary=True, order='F')
encoded['size'] = encoded['shape']
self = Mask(encoded, MaskFormat.ARRAY_RLE)
self = self.translate(offset, shape)
else:
raise KeyError(method)
return self
class _MaskTransformMixin(object):
"""
Mixin methods relating to geometric transformations of mask objects
"""
# @profile
def scale(self, factor, output_dims=None, inplace=False):
"""
Perform a scale operation on the mask.
Args:
factor (float | Tuple[float, float]): the xy scale factor
input_dims (Tuple[int, int]): unused
output_dims (Tuple[int, int]): shape of the returned mask
Returns:
Mask: the transformed Mask object
Note:
* This function has not been optimized and may be inefficient
Example:
>>> # xdoctest: +REQUIRES(module:torch)
>>> self = Mask.random()
>>> factor = 5
>>> inplace = False
>>> new = self.scale(factor)
>>> print('new.shape = {!r}'.format(new.shape))
"""
if not ub.iterable(factor):
sx = sy = factor
else:
sx, sy = factor
if output_dims is None:
output_dims = (np.array(self.shape) * np.array((sy, sx))).astype(int)
# FIXME: the warp breaks when the third row is left out
transform = np.array([[sx, 0.0, 0.0], [0.0, sy, 0.0], [0, 0, 1]])
new = self.warp(transform, output_dims=output_dims, inplace=inplace)
return new
# @profile
def warp(self, transform, input_dims=None, output_dims=None, inplace=False):
"""
Perform a matrix warp (e.g. affine or projective) on the underlying
mask data.
Args:
transform (ndarray): the transform matrix
input_dims (Tuple[int, int]): unused
output_dims (Tuple[int, int]): shape of the returned mask
Returns:
Mask: the transformed Mask object
Note:
* This function has not been optimized and may be inefficient
Example:
>>> # xdoctest: +REQUIRES(module:torch)
>>> import kwimage
>>> self = mask = kwimage.Mask.random()
>>> transform = np.array([[5., 0, 0], [0, 5, 0], [0, 0, 1]])
>>> output_dims = np.array(self.shape) * 6
>>> new = self.warp(transform, output_dims=output_dims)
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> kwplot.figure(fnum=1, pnum=(1, 2, 1))
>>> self.draw()
>>> kwplot.figure(fnum=1, pnum=(1, 2, 2))
>>> new.draw()
Example:
>>> # Verify that the warp transform does roughtly the same thing
>>> # to a mask and an equivalent polygon
>>> # xdoctest: +REQUIRES(module:torch)
>>> import kwimage
>>> input_dims = (100, 100)
>>> output_dims = (200, 200)
>>> rng = 92703548026074914707206344922748
>>> transform = kwimage.Affine.random(shear=(0, 1.), rng=rng)
>>> mask1 = kwimage.Mask.random(rng=rng, shape=input_dims)
>>> poly1 = mask1.to_multi_polygon()
>>> mask2 = mask1.warp(transform.matrix, output_dims=output_dims)
>>> poly2 = poly1.warp(transform.matrix)
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> canvas1_m = np.zeros((*input_dims, 3))
>>> canvas1_p = np.zeros((*input_dims, 3))
>>> canvas2_m = np.zeros((*output_dims, 3))
>>> canvas2_p = np.zeros((*output_dims, 3))
>>> canvas1_m = mask1.draw_on(canvas1_m)
>>> canvas1_p = poly1.draw_on(canvas1_p, color='red')
>>> canvas2_m = mask2.draw_on(canvas2_m)
>>> canvas2_p = poly2.draw_on(canvas2_p, color='red')
>>> kwplot.imshow(canvas1_m, fnum=1, pnum=(2, 2, 1))
>>> kwplot.imshow(canvas1_p, fnum=1, pnum=(2, 2, 2))
>>> kwplot.imshow(canvas2_m, fnum=1, pnum=(2, 2, 3))
>>> kwplot.imshow(canvas2_p, fnum=1, pnum=(2, 2, 4))
"""
# HACK: use brute force just to get this implemented.
# very inefficient
import kwimage
torch = sys.modules.get('torch', None)
if torch is None:
raise Exception('need torch to warp raster masks')
if isinstance(transform, kwimage.Affine):
transform = transform.matrix
if transform is None:
new = self if inplace else Mask(self.data.copy(), self.format)
return new
elif isinstance(transform, kwimage.Affine):
transform = transform.matrix
c_mask = self.to_c_mask(copy=False).data
t_mask = torch.Tensor(c_mask)
matrix = torch.Tensor(transform)
output_dims = output_dims
if output_dims is not None:
if isinstance(output_dims, str):
if output_dims == 'same':
output_dims = self.data.shape[0:2]
else:
raise KeyError(output_dims)
# TODO: could use kwimage.warp_image here instead if torch is not
# available.
w_mask = kwimage.warp_tensor(t_mask, matrix, output_dims=output_dims,
mode='nearest')
new = self if inplace else Mask(self.data, self.format)
new.data = w_mask.numpy().astype(np.uint8)
new.format = MaskFormat.C_MASK
return new
# @profile
def translate(self, offset, output_dims=None, inplace=False):
"""
Translate the pixel values in the mask.
Works efficiently in rle or mask format when the offset is integral.
Args:
offset (Tuple | Number): x,y offset
output_dims (Tuple[int, int]): h, w of transformed mask.
If unspecified the parent shape is used.
inplace (bool): for api compatability, currently ignored
Returns:
Mask: the transformed Mask object
Example:
>>> self = Mask.random(shape=(8, 8), rng=0)
>>> shape = (10, 10)
>>> offset = (1, 1)
>>> data2 = self.translate(offset, shape).to_c_mask().data
>>> assert np.all(data2[1:7, 1:7] == self.data[:6, :6])
Example:
>>> from kwimage.structs.mask import MaskFormat # NOQA
>>> shape = (10, 10)
>>> offset = (2, -3)
>>> self = Mask.random(shape=(8, 8), rng=0)
>>> # avoid pycocotools
>>> test_formats = [MaskFormat.C_MASK, MaskFormat.F_MASK, MaskFormat.ARRAY_RLE]
>>> input_formats = {f: self.toformat(f) for f in test_formats}
>>> results = {}
>>> for f, mask in input_formats.items():
>>> new_mask = mask.translate(offset, shape)
>>> results[f] = new_mask
>>> assert ub.allsame(
>>> [r.toformat(MaskFormat.C_MASK).data
>>> for r in results.values()], eq=np.allclose)
"""
import kwimage
if output_dims is None:
output_dims = self.shape
if not ub.iterable(offset):
offset = (offset, offset)
integer_offset = all(isinstance(o, numbers.Integral) for o in offset)
mask_format = self.format in {MaskFormat.C_MASK, MaskFormat.F_MASK}
if mask_format or not integer_offset:
integer_offset = None # hack
if integer_offset:
# TODO: be more efficient
offset_x, offset_y = offset
new_data = np.zeros_like(self.data, shape=output_dims)
new_self = Mask(new_data, self.format)
else:
c_data = self.toformat(MaskFormat.C_MASK, copy=False).data
if c_data.dtype.kind == 'b':
c_data = c_data.astype(np.uint8)
transform = kwimage.Affine.affine(offset=offset)
dsize = output_dims[::-1]
new_c_data = kwimage.warp_affine(
c_data, transform, dsize=dsize, interpolation='nearest')
new_c_self = Mask(new_c_data, MaskFormat.C_MASK)
new_self = new_c_self.toformat(self.format, copy=False)
else:
rle = self.to_array_rle(copy=False).data
new_rle = kwimage.rle_translate(rle, offset, output_dims)
new_rle['size'] = new_rle['shape']
new_self = Mask(new_rle, MaskFormat.ARRAY_RLE)
return new_self
class _MaskDrawMixin(object):
"""
Mixin methods relating to visualizing mask objects via either
matplotlib (the ``draw`` method) or opencv (the ``draw_on`` method).
"""
def draw_on(self, image=None, color='blue', alpha=0.5,
show_border=False, border_thick=1,
border_color='white', copy=False):
"""
Draws the mask on an image
Args:
image (ndarray): the image to draw on
color (str | tuple): color code/rgb of the mask
alpha (float): mask alpha value
show_border (bool): draw border around the mask
Returns:
ndarray: the image with data drawn on it
Example:
>>> from kwimage.structs.mask import * # NOQA
>>> import kwimage
>>> image = kwimage.grab_test_image()
>>> self = Mask.random(shape=image.shape[0:2])
>>> canvas = self.draw_on(image)
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> kwplot.imshow(canvas)
>>> kwplot.show_if_requested()
Example:
>>> # Test the case where the mask and image are different sizes
>>> from kwimage.structs.mask import * # NOQA
>>> import kwimage
>>> image = kwimage.grab_test_image()
>>> self = Mask.random(shape=np.array(image.shape[0:2]) // 2)
>>> canvas = self.draw_on(image)
>>> self = Mask.random(shape=np.array(image.shape[0:2]) * 2)
>>> canvas = self.draw_on(image)
Example:
>>> import kwimage
>>> color = 'blue'
>>> self = kwimage.Mask.random(shape=(128, 128))
>>> # Test drawong on all channel + dtype combinations
>>> im3 = np.random.rand(128, 128, 3).astype(np.float32)
>>> im_chans = {
>>> 'im3': im3,
>>> 'im1': kwimage.convert_colorspace(im3, 'rgb', 'gray'),
>>> 'im4': kwimage.convert_colorspace(im3, 'rgb', 'rgba'),
>>> }
>>> inputs = {}
>>> for k, im in im_chans.items():
>>> inputs[k + '_01'] = (kwimage.ensure_float01(im.copy()), {'alpha': None})
>>> inputs[k + '_255'] = (kwimage.ensure_uint255(im.copy()), {'alpha': None})
>>> inputs[k + '_01_a'] = (kwimage.ensure_float01(im.copy()), {'alpha': 0.5})
>>> inputs[k + '_255_a'] = (kwimage.ensure_uint255(im.copy()), {'alpha': 0.5})
>>> outputs = {}
>>> for k, v in inputs.items():
>>> im, kw = v
>>> outputs[k] = self.draw_on(im, color=color, **kw)
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.figure(fnum=2, doclf=True)
>>> kwplot.autompl()
>>> pnum_ = kwplot.PlotNums(nCols=2, nRows=len(inputs))
>>> for k in inputs.keys():
>>> kwplot.imshow(inputs[k][0], fnum=2, pnum=pnum_(), title=k)
>>> kwplot.imshow(outputs[k], fnum=2, pnum=pnum_(), title=k)
>>> kwplot.show_if_requested()
"""
import kwimage
import cv2
if image is None:
image = np.zeros(self.shape[0:2] + (3,), dtype=np.float32)
dtype_fixer = _generic._consistent_dtype_fixer(image)
if alpha is None:
alpha = 1.0
# Make an alpha mask with the requested color
mask = self.to_c_mask().data
rgb01 = list(kwimage.Color(color).as01())
rgba01 = np.array(rgb01 + [1])[None, None, :]
alpha_mask = rgba01 * mask[:, :, None]
alpha_mask[..., 3] = mask * alpha
mask_shape = tuple(alpha_mask.shape[0:2])
canvas_shape = tuple(image.shape[0:2])
if mask_shape != canvas_shape:
# Overlay as much as is possible if the shapes dont match
min_shape = list(map(min, zip(mask_shape, canvas_shape)))
min_slice = tuple([slice(0, m) for m in min_shape])
canvas = kwimage.ensure_alpha_channel(image, copy=True)
alpha_part = alpha_mask[min_slice]
image_part = image[min_slice]
# TODO: could use add weighted to get a faster impl
canvas_part = kwimage.overlay_alpha_images(alpha_part, image_part)
canvas[min_slice] = canvas_part
else:
canvas = kwimage.overlay_alpha_images(alpha_mask, image)
if show_border:
# return shape of contours to openCV contours
polys = self.to_multi_polygon()
for poly in polys:
contours = [np.expand_dims(c, axis=1) for c in poly.data['exterior']]
canvas = cv2.drawContours((canvas * 255.).astype(np.uint8),
contours, -1,
kwimage.Color(border_color).as255(),
border_thick, cv2.LINE_AA)
canvas = canvas.astype(float) / 255.
canvas = dtype_fixer(canvas, copy=False)
return canvas
def draw(self, color='blue', alpha=0.5, ax=None, show_border=False,
border_thick=1, border_color='black'):
"""
Draw on the current matplotlib axis
Args:
color (str | tuple): color code/rgb of the mask
alpha (float): mask alpha value
"""
import kwimage
import cv2
if ax is None:
from matplotlib import pyplot as plt
ax = plt.gca()
mask = self.to_c_mask().numpy().data
rgb01 = list(kwimage.Color(color).as01())
rgba01 = np.array(rgb01 + [1])[None, None, :]
alpha_mask = rgba01 * mask[:, :, None]
alpha_mask[..., 3] = mask * alpha
if show_border:
# Add alpha channel to color
border_color_tup = kwimage.Color(border_color).as255()
border_color_tup = (border_color_tup[0], border_color_tup[1],
border_color_tup[2], 255 * alpha)
# return shape of contours to openCV contours
polys = self.to_multi_polygon()
for poly in polys:
contours = [np.expand_dims(c, axis=1) for c in poly.data['exterior']]
alpha_mask = cv2.drawContours(
(alpha_mask * 255.).astype(np.uint8),
contours, -1, border_color_tup, border_thick, cv2.LINE_AA)
alpha_mask = alpha_mask.astype(float) / 255.
ax.imshow(alpha_mask)
[docs]
class Mask(ub.NiceRepr, _MaskConversionMixin, _MaskConstructorMixin,
_MaskTransformMixin, _MaskDrawMixin):
"""
Manages a single segmentation mask and can convert to and from
multiple formats including:
* bytes_rle - byte encoded run length encoding
* array_rle - raw run length encoding
* c_mask - c-style binary mask
* f_mask - fortran-style binary mask
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> # a ms-coco style compressed bytes rle segmentation
>>> segmentation = {'size': [5, 9], 'counts': ';?1B10O30O4'}
>>> mask = Mask(segmentation, 'bytes_rle')
>>> # convert to binary numpy representation
>>> binary_mask = mask.to_c_mask().data
>>> print(ub.urepr(binary_mask.tolist(), nl=1, nobr=1))
[0, 0, 0, 1, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 0, 0, 0],
[0, 0, 1, 1, 1, 1, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 1, 1, 0],
[0, 0, 1, 1, 1, 0, 1, 1, 0],
"""
def __init__(self, data=None, format=None):
self.data = data
self.format = format
@property
def dtype(self):
try:
return self.data.dtype
except Exception:
print('kwimage.mask: no dtype for ' + str(type(self.data)))
raise
def __nice__(self):
return '{}, format={}'.format(ub.urepr(self.data, nl=0), self.format)
[docs]
@classmethod
def random(Mask, rng=None, shape=(32, 32)):
"""
Create a random binary mask object
Args:
rng (int | RandomState | None): the random seed
shape (Tuple[int, int]): the height / width of the returned mask
Returns:
Mask: the random mask
Example:
>>> import kwimage
>>> mask = kwimage.Mask.random()
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> mask.draw()
>>> kwplot.show_if_requested()
"""
import kwarray
import kwimage
rng = kwarray.ensure_rng(rng)
# Use random heatmap to make some blobs for the mask
heatmap = kwimage.Heatmap.random(dims=shape, rng=rng, classes=2)
probs = heatmap.data['class_probs'][1]
c_mask = (probs > probs.mean()).astype(np.uint8)
self = Mask(c_mask, MaskFormat.C_MASK)
return self
[docs]
@classmethod
def demo(cls):
"""
Demo mask with holes and disjoint shapes
Returns:
Mask: the demo mask
"""
text = ub.codeblock(
'''
................................
..ooooooo....ooooooooooooo......
..ooooooo....o...........o......
..oo...oo....o.oooooooo..o......
..oo...oo....o.o......o..o......
..ooooooo....o.o..oo..o..o......
.............o.o...o..o..o......
.............o.o..oo..o..o......
.............o.o......o..o......
..ooooooo....o.oooooooo..o......
.............o...........o......
.............o...........o......
.............ooooooooooooo......
.............o...........o......
.............o...........o......
.............o....ooooo..o......
.............o....o...o..o......
.............o....ooooo..o......
.............o...........o......
.............ooooooooooooo......
................................
................................
................................
''')
self = cls.from_text(text, zero_chr='.')
return self
[docs]
@classmethod
def from_text(cls, text, zero_chr='.', shape=None, has_border=False):
"""
Construct a mask from a text art representation
Args:
text (str):
the text representing a mask
zero_chr (str):
the character that represents a zero
shape (None | Tuple[int, int]):
if specified force a specific height / width, otherwise
the character extent determines this.
has_border (bool):
if True, assume the characters at the edge
are representing a border and remove them.
Example:
>>> import kwimage
>>> import ubelt as ub
>>> text = ub.indent(ub.codeblock(
>>> '''
>>> ooo
>>> ooo
>>> ooooo
>>> o
>>> '''))
>>> mask = kwimage.Mask.from_text(text, zero_chr=' ')
>>> print(mask.data)
[[0 0 0 0 1 1 1 0 0]
[0 0 0 0 1 1 1 0 0]
[0 0 0 0 1 1 1 1 1]
[0 0 0 0 0 0 0 0 1]]
Example:
>>> import kwimage
>>> import ubelt as ub
>>> text = ub.codeblock(
>>> '''
>>> +------------+
>>> | |
>>> | ooo |
>>> | ooo |
>>> | ooooo |
>>> | o |
>>> | |
>>> +------------+
>>> ''')
>>> mask = kwimage.Mask.from_text(text, has_border=True, zero_chr=' ')
>>> print(mask.data)
[[0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 1 1 1 0 0 0 0 0]
[0 0 0 0 1 1 1 0 0 0 0 0]
[0 0 0 0 1 1 1 1 1 0 0 0]
[0 0 0 0 0 0 0 0 1 0 0 0]
[0 0 0 0 0 0 0 0 0 0 0 0]]
"""
lines = text.split('\n')
data = [[0 if c == zero_chr else 1 for c in line] for line in lines]
max_width = max(len(row) for row in data)
max_height = len(data)
if shape is not None:
max_height, max_width = shape
# Pad out (or shrink) the width of each row
data = [
row[0:max_width] if len(row) >= max_width else
row + [0] * (max_width - len(row))
for row in data
]
# Pad out (or shrink) the height of the columns
extra_rows = max_height - len(data)
if extra_rows > 0:
data = [[0] * max_width for _ in range(extra_rows)]
else:
data = data[0:max_height]
data = np.array(data).astype(np.uint8)
if has_border:
data = data[1:-1, 1:-1]
self = cls(data, format=MaskFormat.C_MASK)
return self
[docs]
def copy(self):
"""
Performs a deep copy of the mask data
Returns:
Mask: the copied mask
Example:
>>> self = Mask.random(shape=(8, 8), rng=0)
>>> other = self.copy()
>>> assert other.data is not self.data
"""
return Mask(copy.deepcopy(self.data), self.format)
[docs]
def union(self, *others):
"""
This can be used as a staticmethod or an instancemethod
Args:
*others: multiple input masks to union
Returns:
Mask: the unioned mask
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> from kwimage.structs.mask import * # NOQA
>>> masks = [Mask.random(shape=(8, 8), rng=i) for i in range(2)]
>>> mask = Mask.union(*masks)
>>> print(mask.area)
>>> masks = [m.to_c_mask() for m in masks]
>>> mask = Mask.union(*masks)
>>> print(mask.area)
>>> masks = [m.to_bytes_rle() for m in masks]
>>> mask = Mask.union(*masks)
>>> print(mask.area)
Ignore:
import ubelt as ub
ti = ub.Timerit(100, bestof=10, verbose=2)
masks = [Mask.random(shape=(172, 172), rng=i) for i in range(2)]
for timer in ti.reset('native rle union'):
masks = [m.to_bytes_rle() for m in masks]
with timer:
mask = Mask.union(*masks)
for timer in ti.reset('native cmask union'):
masks = [m.to_c_mask() for m in masks]
with timer:
mask = Mask.union(*masks)
for timer in ti.reset('cmask->rle union'):
masks = [m.to_c_mask() for m in masks]
with timer:
mask = Mask.union(*[m.to_bytes_rle() for m in masks])
"""
if isinstance(self, Mask):
cls = self.__class__
items = list(it.chain([self], others))
else:
cls = Mask
items = others
if len(items) == 0:
raise Exception('empty union')
else:
format = items[0].format
if format == MaskFormat.C_MASK:
datas = [item.to_c_mask().data for item in items]
new_data = np.bitwise_or.reduce(datas)
new = cls(new_data, MaskFormat.C_MASK)
elif format == MaskFormat.BYTES_RLE:
datas = [item.to_bytes_rle().data for item in items]
cython_mask = _lazy_mask_backend()
if cython_mask is None:
raise NotImplementedError('pure python version of bytes rle union')
new_data = cython_mask.merge(datas, intersect=0)
if 'size' in new_data:
new_data['size'] = list(map(int, new_data['size'])) # python2 fix
new = cls(new_data, MaskFormat.BYTES_RLE)
else:
datas = [item.to_bytes_rle().data for item in items]
if cython_mask is None:
raise NotImplementedError('pure python version of union')
new_rle = cython_mask.merge(datas, intersect=0)
if 'size' in new_rle:
new_rle['size'] = list(map(int, new_rle['size'])) # python2 fix
new = cls(new_rle, MaskFormat.BYTES_RLE)
return new
[docs]
def intersection(self, *others):
"""
This can be used as a staticmethod or an instancemethod
Args:
*others: multiple input masks to intersect
Returns:
Mask: the intersection of the masks
Example:
>>> n = 3
>>> masks = [Mask.random(shape=(8, 8), rng=i) for i in range(n)]
>>> items = masks
>>> mask = Mask.intersection(*masks)
>>> areas = [item.area for item in items]
>>> print('areas = {!r}'.format(areas))
>>> print(mask.area)
>>> print(Mask.intersection(*masks).area / Mask.union(*masks).area)
"""
if isinstance(self, Mask):
cls = self.__class__
items = list(it.chain([self], others))
else:
cls = Mask
items = others
if len(items) == 0:
raise Exception('empty intersection')
else:
format = items[0].format
items2 = [item.toformat(format) for item in items]
if format == MaskFormat.C_MASK or format == MaskFormat.F_MASK:
bit_data = [item.data for item in items2]
new_data = np.bitwise_and.reduce(bit_data)
new = cls(new_data, format=format)
else:
rle_datas = [item.data for item in items]
cython_mask = _lazy_mask_backend()
if cython_mask is None:
raise NotImplementedError('pure python version of mask intersection')
encoded = cython_mask.merge(rle_datas, intersect=1)
if 'size' in encoded:
encoded['size'] = list(map(int, encoded['size'])) # python2 fix
new = cls(encoded, MaskFormat.BYTES_RLE)
return new
@property
def shape(self):
if self.format in {MaskFormat.BYTES_RLE, MaskFormat.ARRAY_RLE}:
if 'shape' in self.data:
return self.data['shape']
else:
return self.data['size']
if self.format in {MaskFormat.C_MASK, MaskFormat.F_MASK}:
return self.data.shape
@property
def area(self):
"""
Returns the number of non-zero pixels
Returns:
int: the number of non-zero pixels
Example:
>>> self = Mask.demo()
>>> self.area
150
"""
if self.format == MaskFormat.C_MASK:
return self.data.sum()
elif self.format == MaskFormat.F_MASK:
return self.data.sum()
elif self.format == MaskFormat.BYTES_RLE:
cython_mask = _lazy_mask_backend()
if cython_mask is None:
raise NotImplementedError('pure python version mask area')
return cython_mask.area([self.data])[0]
else:
raise NotImplementedError('Mask.area for {}'.format(self.format))
[docs]
def get_patch(self):
"""
Extract the patch with non-zero data
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> from kwimage.structs.mask import * # NOQA
>>> self = Mask.random(shape=(8, 8), rng=0)
>>> self.get_patch()
"""
x, y, w, h = self.get_xywh().astype(int).tolist()
output_dims = (h, w)
xy_offset = (-x, -y)
temp = self.translate(xy_offset, output_dims)
patch = temp.to_c_mask().data
return patch
# @profile
[docs]
def get_xywh(self):
"""
Gets the bounding xywh box coordinates of this mask
Returns:
ndarray: x, y, w, h: Note we dont use a Boxes object because
a general singular version does not yet exist.
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> self = Mask.random(shape=(8, 8), rng=0)
>>> self.get_xywh().tolist()
>>> self = Mask.random(rng=0).translate((10, 10))
>>> self.get_xywh().tolist()
Example:
>>> # test empty case
>>> import kwimage
>>> self = kwimage.Mask(np.empty((0, 0), dtype=np.uint8), format='c_mask')
>>> assert self.get_xywh().tolist() == [0, 0, 0, 0]
Ignore:
>>> import kwimage
>>> self = kwimage.Mask(np.zeros((768, 768), dtype=np.uint8), format='c_mask')
>>> x_coords = np.array([621, 752])
>>> y_coords = np.array([366, 292])
>>> self.data[y_coords, x_coords] = 1
>>> self.get_xywh()
>>> # References:
>>> # https://stackoverflow.com/questions/33281957/faster-alternative-to-numpy-where
>>> # https://answers.opencv.org/question/4183/what-is-the-best-way-to-find-bounding-box-for-binary-mask/
>>> import timerit
>>> ti = timerit.Timerit(100, bestof=10, verbose=2)
>>> for timer in ti.reset('time'):
>>> with timer:
>>> y_coords, x_coords = np.where(self.data)
>>> #
>>> for timer in ti.reset('time'):
>>> with timer:
>>> cv2.findNonZero(data)
self.data = np.random.rand(800, 700) > 0.5
import timerit
ti = timerit.Timerit(100, bestof=10, verbose=2)
for timer in ti.reset('time'):
with timer:
y_coords, x_coords = np.where(self.data)
#
for timer in ti.reset('time'):
with timer:
data = np.ascontiguousarray(self.data).astype(np.uint8)
cv2_coords = cv2.findNonZero(data)
>>> poly = self.to_multi_polygon()
"""
import cv2
if self.format == MaskFormat.C_MASK:
# findNonZero seems much faster than np.where
data = np.ascontiguousarray(self.data).astype(np.uint8)
cv2_coords = cv2.findNonZero(data)
if cv2_coords is None:
xywh = np.array([0, 0, 0, 0])
else:
x_coords = cv2_coords[:, 0, 0]
y_coords = cv2_coords[:, 0, 1]
# # y_coords, x_coords = np.where(self.data)
# if len(x_coords) == 0:
# xywh = np.array([0, 0, 0, 0])
# else:
tl_x = x_coords.min()
br_x = x_coords.max()
tl_y = y_coords.min()
br_y = y_coords.max()
w = br_x - tl_x
h = br_y - tl_y
xywh = np.array([tl_x, tl_y, w, h])
elif self.format == MaskFormat.F_MASK:
x_coords, y_coords = np.where(self.data)
if len(x_coords) == 0:
xywh = np.array([0, 0, 0, 0])
else:
tl_x = x_coords.min()
br_x = x_coords.max()
tl_y = y_coords.min()
br_y = y_coords.max()
w = br_x - tl_x
h = br_y - tl_y
xywh = np.array([tl_x, tl_y, w, h])
else:
try:
self_rle = self.to_bytes_rle()
cython_mask = _lazy_mask_backend()
if cython_mask is None:
raise NotImplementedError('pure python version get_xywh')
xywh = cython_mask.toBbox([self_rle.data])[0]
except NotImplementedError:
self_c = self.to_c_mask() # alternate path
xywh = self_c.get_xywh()
return xywh
[docs]
def bounding_box(self):
"""
Returns an axis-aligned bounding box for this mask
Returns:
kwimage.Boxes
"""
import kwimage
xywh = self.get_xywh()
boxes = kwimage.Boxes([xywh], 'xywh')
return boxes
[docs]
def get_polygon(self):
"""
DEPRECATED: USE to_multi_polygon
Returns a list of (x,y)-coordinate lists. The length of the list is
equal to the number of disjoint regions in the mask.
Returns:
List[ndarray]: polygon around each connected component of the
mask. Each ndarray is an Nx2 array of xy points.
Note:
The returned polygon may not surround points that are only one
pixel thick.
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> from kwimage.structs.mask import * # NOQA
>>> self = Mask.random(shape=(8, 8), rng=0)
>>> polygons = self.get_polygon()
>>> print('polygons = ' + ub.urepr(polygons))
>>> polygons = self.get_polygon()
>>> self = self.to_bytes_rle()
>>> other = Mask.from_polygons(polygons, self.shape)
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> image = np.ones(self.shape)
>>> image = self.draw_on(image, color='blue')
>>> image = other.draw_on(image, color='red')
>>> kwplot.imshow(image)
Ignore:
polygons = [
np.array([[6, 4],[7, 4]], dtype=np.int32),
np.array([[0, 1],[0, 3],[2, 3],[2, 1]], dtype=np.int32),
]
"""
import cv2
ub.schedule_deprecation(
'kwimage', 'Mask.get_polygon', 'method',
migration='use Mask.to_multi_polygon instead',
deprecate='0.9.5', error='1.0.0', remove='1.1.0',
)
p = 2
if 0:
mask = self.to_c_mask().data
offset = (-p, -p)
else:
# It should be faster to only extract the patch of non-zero values
x, y, w, h = self.get_xywh().astype(int).tolist()
output_dims = (h, w)
xy_offset = (-x, -y)
temp = self.translate(xy_offset, output_dims)
mask = temp.to_c_mask().data
offset = (x - p, y - p)
padded_mask = cv2.copyMakeBorder(mask, p, p, p, p,
cv2.BORDER_CONSTANT, value=0)
# print('src =\n{!r}'.format(padded_mask))
kernel = np.array([
[1, 1, 0],
[1, 1, 0],
[0, 0, 0],
], dtype=np.uint8)
padded_mask = cv2.dilate(padded_mask, kernel, dst=padded_mask)
# print('dst =\n{!r}'.format(padded_mask))
mode = cv2.RETR_LIST
# mode = cv2.RETR_EXTERNAL
# https://docs.opencv.org/3.1.0/d3/dc0/
# group__imgproc__shape.html#ga4303f45752694956374734a03c54d5ff
method = cv2.CHAIN_APPROX_SIMPLE
# method = cv2.CHAIN_APPROX_NONE
# method = cv2.CHAIN_APPROX_TC89_KCOS
# Different versions of cv2 have different return types
_ret = cv2.findContours(padded_mask, mode, method, offset=offset)
if len(_ret) == 2:
_contours, _hierarchy = _ret
else:
_img, _contours, _hierarchy = _ret
polygon = [c[:, 0, :] for c in _contours]
if False:
import kwplot
import kwimage
kwplot.autompl()
# Note that cv2 draw contours doesnt have the 1-pixel thick problem
# it seems to just be the way the coco implementation is
# interpreting polygons.
image = kwimage.atleast_3channels(mask)
canvas = np.zeros(image.shape, dtype="uint8")
cv2.drawContours(canvas, _contours, -1, (255, 0, 0), 1)
kwplot.imshow(canvas)
return polygon
[docs]
def to_mask(self, dims=None, pixels_are='points'):
"""
Converts to a mask object (which does nothing because this already is
mask object!)
Returns:
kwimage.Mask
"""
return self
[docs]
def to_boxes(self):
"""
Returns the bounding box of the mask.
Returns:
kwimage.Boxes
"""
import kwimage
boxes = kwimage.Boxes([self.get_xywh()], 'xywh')
return boxes
# @profile
[docs]
def to_multi_polygon(self, pixels_are='points'):
"""
Returns a MultiPolygon object fit around this raster including disjoint
pieces and holes.
Args:
pixel_are (str):
Can either be "points" or "areas".
If pixels are "points", the we treat each pixel (i, j) as a
single infinitely small point at (i, j). As such, some polygons
may have zero area.
If pixels are "areas", then each pixel (i, j) represents a
square with coordinates ([i - 0.5, j - 0.5], [i + 0.5, j -
0.5], [i + 0.5, j + 0.5], and [i - 0.5, j + 0.5]). Must have
rasterio installed to use this method.
Returns:
kwimage.MultiPolygon: vectorized representation
Note:
The OpenCV (and thus this function) coordinate system places
coordinates at the center of pixels, and the polygon is traced
tightly around these coordinates. A single pixel is not considered
to have any width, so polygon edges will directly trace through the
centers of pixels, and in the case where an object is only 1 pixel
thick, this will produce a polygon that is not a valid shapely
polygon.
TODO:
- [x] add a flag where polygons consider pixels to have width and the resulting polygon is traced around the pixel edges, not the pixel centers.
- [ ] Polygons and Masks should keep track of what "pixels_are"
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> from kwimage.structs.mask import * # NOQA
>>> self = Mask.demo()
>>> self = self.scale(5)
>>> multi_poly = self.to_multi_polygon()
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> # xdoctest: +REQUIRES(--show)
>>> self.draw(color='red')
>>> multi_poly.scale(1.1).draw(color='blue')
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> image = np.ones(self.shape)
>>> image = self.draw_on(image, color='blue')
>>> #image = other.draw_on(image, color='red')
>>> kwplot.imshow(image)
>>> multi_poly.draw()
Example:
>>> # Test empty cases
>>> import kwimage
>>> mask0 = kwimage.Mask(np.zeros((0, 0), dtype=np.uint8), format='c_mask')
>>> mask1 = kwimage.Mask(np.zeros((1, 1), dtype=np.uint8), format='c_mask')
>>> mask2 = kwimage.Mask(np.zeros((2, 2), dtype=np.uint8), format='c_mask')
>>> mask3 = kwimage.Mask(np.zeros((3, 3), dtype=np.uint8), format='c_mask')
>>> pixels_are = 'points'
>>> poly0 = mask0.to_multi_polygon(pixels_are=pixels_are)
>>> poly1 = mask1.to_multi_polygon(pixels_are=pixels_are)
>>> poly2 = mask2.to_multi_polygon(pixels_are=pixels_are)
>>> poly3 = mask3.to_multi_polygon(pixels_are=pixels_are)
>>> assert len(poly0) == 0
>>> assert len(poly1) == 0
>>> assert len(poly2) == 0
>>> assert len(poly3) == 0
>>> # xdoctest: +REQUIRES(module:rasterio)
>>> pixels_are = 'areas'
>>> poly0 = mask0.to_multi_polygon(pixels_are=pixels_are)
>>> poly1 = mask1.to_multi_polygon(pixels_are=pixels_are)
>>> poly2 = mask2.to_multi_polygon(pixels_are=pixels_are)
>>> poly3 = mask3.to_multi_polygon(pixels_are=pixels_are)
>>> assert len(poly0) == 0
>>> assert len(poly1) == 0
>>> assert len(poly2) == 0
>>> assert len(poly3) == 0
Example:
>>> # Test full ones cases
>>> import kwimage
>>> mask1 = kwimage.Mask(np.ones((1, 1), dtype=np.uint8), format='c_mask')
>>> mask2 = kwimage.Mask(np.ones((2, 2), dtype=np.uint8), format='c_mask')
>>> mask3 = kwimage.Mask(np.ones((3, 3), dtype=np.uint8), format='c_mask')
>>> pixels_are = 'points'
>>> poly1 = mask1.to_multi_polygon(pixels_are=pixels_are)
>>> poly2 = mask2.to_multi_polygon(pixels_are=pixels_are)
>>> poly3 = mask3.to_multi_polygon(pixels_are=pixels_are)
>>> assert np.all(poly1.to_mask(mask1.shape).data == 1)
>>> assert np.all(poly2.to_mask(mask2.shape).data == 1)
>>> assert np.all(poly3.to_mask(mask3.shape).data == 1)
>>> # xdoctest: +REQUIRES(module:rasterio)
>>> pixels_are = 'areas'
>>> poly1 = mask1.to_multi_polygon(pixels_are=pixels_are)
>>> poly2 = mask2.to_multi_polygon(pixels_are=pixels_are)
>>> poly3 = mask3.to_multi_polygon(pixels_are=pixels_are)
>>> assert np.all(poly1.to_mask(mask1.shape).data == 1)
>>> assert np.all(poly2.to_mask(mask2.shape).data == 1)
>>> assert np.all(poly3.to_mask(mask3.shape).data == 1)
Example:
>>> # Corner case, only two pixels are on
>>> import kwimage
>>> self = kwimage.Mask(np.zeros((768, 768), dtype=np.uint8), format='c_mask')
>>> x_coords = np.array([621, 752])
>>> y_coords = np.array([366, 292])
>>> self.data[y_coords, x_coords] = 1
>>> poly = self.to_multi_polygon()
Ignore:
poly.to_mask(self.shape).data.sum()
self.to_array_rle().to_c_mask().data.sum()
temp.to_c_mask().data.sum()
Example:
>>> # xdoctest: +REQUIRES(module:rasterio)
>>> import kwimage
>>> dims = (10, 10)
>>> data = np.zeros(dims, dtype=np.uint8)
>>> data[0, 3:5] = 1
>>> data[9, 1:3] = 1
>>> data[3:5, 0:2] = 1
>>> data[1, 1] = 1
>>> # 1 pixel L shape
>>> data[3, 5] = 1
>>> data[4, 5] = 1
>>> data[4, 6] = 1
>>> data[1, 5] = 1
>>> data[2, 6] = 1
>>> data[3, 7] = 1
>>> data[6, 1] = 1
>>> data[7, 1] = 1
>>> data[7, 2] = 1
>>> data[6:10, 5] = 1
>>> data[6:10, 8] = 1
>>> data[9, 5:9] = 1
>>> data[6, 5:9] = 1
>>> #data = kwimage.imresize(data, scale=2.0, interpolation='nearest')
>>> self = kwimage.Mask.coerce(data)
>>> #self = self.translate((0, 0), output_dims=(10, 9))
>>> self = self.translate((0, 1), output_dims=(11, 11))
>>> dims = self.shape[0:2]
>>> multi_poly1 = self.to_multi_polygon(pixels_are='points')
>>> multi_poly2 = self.to_multi_polygon(pixels_are='areas')
>>> # xdoctest: +REQUIRES(--show)
>>> import kwplot
>>> kwplot.autompl()
>>> pretty_data = kwplot.make_heatmask(self.data/1.0, cmap='magma')[..., 0:3]
>>> def _pixel_grid_lines(self, ax):
>>> h, w = self.data.shape[0:2]
>>> ybasis = np.arange(0, h) + 0.5
>>> xbasis = np.arange(0, w) + 0.5
>>> xmin = 0 - 0.5
>>> xmax = w - 0.5
>>> ymin = 0 - 0.5
>>> ymax = h - 0.5
>>> ax.hlines(y=ybasis, xmin=xmin, xmax=xmax, color="gainsboro")
>>> ax.vlines(x=xbasis, ymin=ymin, ymax=ymax, color="gainsboro")
>>> def _setup_grid(self, pnum):
>>> ax = kwplot.imshow(pretty_data, show_ticks=True, pnum=pnum)[1]
>>> # The gray ticks show the center of the pixels
>>> ax.grid(color='dimgray', linewidth=0.5)
>>> ax.set_xticks(np.arange(self.data.shape[1]))
>>> ax.set_yticks(np.arange(self.data.shape[0]))
>>> # Also draw black lines around the edges of the pixels
>>> _pixel_grid_lines(self, ax=ax)
>>> return ax
>>> # Overlay the extracted polygons
>>> ax = _setup_grid(self, pnum=(2, 3, 1))
>>> ax.set_title('input binary mask data')
>>> ax = _setup_grid(self, pnum=(2, 3, 2))
>>> multi_poly1.draw(linewidth=5, alpha=0.5, radius=0.2, ax=ax, fill=False, vertex=0.2)
>>> ax.set_title('opencv "point" polygons')
>>> ax = _setup_grid(self, pnum=(2, 3, 3))
>>> multi_poly2.draw(linewidth=5, alpha=0.5, radius=0.2, color='limegreen', ax=ax, fill=False, vertex=0.2)
>>> ax.set_title('raterio "area" polygons')
>>> ax.figure.suptitle(ub.codeblock(
>>> '''
>>> Gray lines are coordinates and pass through pixel centers (integer coords)
>>> White lines trace pixel boundaries (fractional coords)
>>> '''))
>>> raster1 = multi_poly1.to_mask(dims, pixels_are='points')
>>> raster2 = multi_poly2.to_mask(dims, pixels_are='areas')
>>> kwplot.imshow(raster1.draw_on(), pnum=(2, 3, 5), title='rasterized')
>>> kwplot.imshow(raster2.draw_on(), pnum=(2, 3, 6), title='rasterized')
"""
from kwimage.structs.polygon import Polygon, MultiPolygon
# Note: it is not necessarilly faster to to only exact the patch of
# non-zero values
temp_mask = self.to_c_mask(copy=False).data
if temp_mask.dtype.kind == 'b':
temp_mask = temp_mask.astype(np.uint8)
# TODO: polygons and masks should keep track what "pixels_are"
polys = _find_contours(temp_mask, pixels_are=pixels_are)
poly_list = [Polygon(**data) for data in polys]
multi_poly = MultiPolygon(poly_list)
return multi_poly
[docs]
def get_convex_hull(self):
"""
Returns a list of xy points around the convex hull of this mask
Note:
The returned polygon may not surround points that are only one
pixel thick.
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> self = Mask.random(shape=(8, 8), rng=0)
>>> polygons = self.get_convex_hull()
>>> print('polygons = ' + ub.urepr(polygons))
>>> other = Mask.from_polygons(polygons, self.shape)
"""
import cv2
mask = self.to_c_mask().data
cc_y, cc_x = np.where(mask)
points = np.vstack([cc_x, cc_y]).T
hull = cv2.convexHull(points)[:, 0, :]
return hull
[docs]
def iou(self, other):
"""
The area of intersection over the area of union
TODO:
- [ ] Write plural Masks version of this class, which should be able to perform this operation more efficiently.
CommandLine:
xdoctest -m kwimage.structs.mask Mask.iou
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> self = Mask.demo()
>>> other = self.translate(1)
>>> iou = self.iou(other)
>>> print('iou = {:.4f}'.format(iou))
iou = 0.0830
>>> iou2 = self.intersection(other).area / self.union(other).area
>>> print('iou2 = {:.4f}'.format(iou2))
"""
item1 = self.to_bytes_rle(copy=False).data
item2 = other.to_bytes_rle(copy=False).data
# I'm not sure what passing `pyiscrowd` actually does here
# TODO: determine what `pyiscrowd` does, and document it.
pyiscrowd = np.array([0], dtype=np.uint8)
cython_mask = _lazy_mask_backend()
if cython_mask is None:
raise NotImplementedError('pure python version iou')
iou = cython_mask.iou([item1], [item2], pyiscrowd)[0, 0]
return iou
[docs]
@classmethod
def coerce(Mask, data, dims=None):
"""
Attempts to auto-inspect the format of the data and conver to Mask
Args:
data (Any) : the data to coerce
dims (Tuple): required for certain formats like polygons
height / width of the source image
Returns:
Mask: the constructed mask object
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> segmentation = {'size': [5, 9], 'counts': ';?1B10O30O4'}
>>> polygon = [
>>> [np.array([[3, 0],[2, 1],[2, 4],[4, 4],[4, 3],[7, 0]])],
>>> [np.array([[2, 1],[2, 2],[4, 2],[4, 1]])],
>>> ]
>>> dims = (9, 5)
>>> mask = (np.random.rand(32, 32) > .5).astype(np.uint8)
>>> Mask.coerce(polygon, dims).to_bytes_rle()
>>> Mask.coerce(segmentation).to_bytes_rle()
>>> Mask.coerce(mask).to_bytes_rle()
"""
# TODO: this could be more explicitly written
from kwimage.structs.segmentation import _coerce_coco_segmentation
self = _coerce_coco_segmentation(data, dims)
self = self.to_mask(dims)
return self
[docs]
def _to_coco(self):
""" use to_coco instead """
return self.to_coco()
[docs]
def to_coco(self, style='orig'):
"""
Convert the Mask to a COCO json representation based on the current
format.
A COCO mask is formatted as a run-length-encoding (RLE), of which there
are two variants: (1) a array RLE, which is slightly more readable and
extensible, and (2) a bytes RLE, which is slightly more concise. The
returned format will depend on the current format of the Mask object.
If it is in "bytes_rle" format, it will be returned in that format,
otherwise it will be converted to the "array_rle" format and returned
as such.
Args:
style (str): Does nothing for this particular method, exists for
API compatibility and if alternate encoding styles are
implemented in the future.
Returns:
dict: either a bytes-rle or array-rle encoding, depending
on the current mask format. The keys in this dictionary
are as follows:
counts (List[int] | str): the array or bytes rle encoding
size (Tuple[int]): the height and width of the encoded mask
*see note*.
shape (Tuple[int]): only present in array-rle mode. This
is also the height/width of the underlying encoded array.
This exists for semantic consistency with other kwimage
conventions, and is not part of the original coco spec.
order (str): only present in array-rle mode.
Either C or F, indicating if counts is aranged in row-major
or column-major order. For COCO-compatibility this is
always returned in F (column-major) order.
binary (bool): only present in array-rle mode.
For COCO-compatibility this is always returned as False,
indicating the mask only contains binary 0 or 1 values.
Note:
The output dictionary will contain a key named "size", this is the
only location in kwimage where "size" refers to a tuple in
(height/width) order, in order to be backwards compatible with the
original coco spec. In all other locations in kwimage a "size" will
refer to a (width/height) ordered tuple.
SeeAlso:
:func: kwimage.im_runlen.encode_run_length - backend function that
does array-style run length encoding.
Example:
>>> # xdoctest: +REQUIRES(--mask)
>>> from kwimage.structs.mask import * # NOQA
>>> self = Mask.demo()
>>> coco_data1 = self.toformat('array_rle').to_coco()
>>> coco_data2 = self.toformat('bytes_rle').to_coco()
>>> print('coco_data1 = {}'.format(ub.urepr(coco_data1, nl=1)))
>>> print('coco_data2 = {}'.format(ub.urepr(coco_data2, nl=1)))
coco_data1 = {
'binary': True,
'counts': [47, 5, 3, 1, 14, ... 1, 4, 19, 141],
'order': 'F',
'shape': (23, 32),
'size': (23, 32),
}
coco_data2 = {
'counts': '_153L;4EL...ON3060L0N060L0Nb0Y4',
'size': [23, 32],
}
"""
use_bytes = (self.format == MaskFormat.BYTES_RLE)
if use_bytes:
try:
bytes_rle = self.to_bytes_rle()
except NotImplementedError:
use_bytes = False
if use_bytes:
# This is actually the original style, but it relies on
# to_bytes_rle, which doesnt always work.
data = bytes_rle.data.copy()
text = data['counts']
if isinstance(text, bytes):
text = text.decode('utf8')
data['counts'] = text
return data
else:
data = self.to_array_rle().data.copy()
data['counts'] = data['counts'].tolist()
return data
[docs]
class MaskList(_generic.ObjectList):
"""
Store and manipulate multiple masks, usually within the same image
"""
[docs]
def to_polygon_list(self):
"""
Converts all mask objects to multi-polygon objects
Returns:
kwimage.PolygonList
"""
import kwimage
new = kwimage.PolygonList([
None if mask is None else mask.to_multi_polygon()
for mask in self
])
return new
[docs]
def to_segmentation_list(self):
"""
Converts all items to segmentation objects
Returns:
kwimage.SegmentationList
"""
import kwimage
new = kwimage.SegmentationList([
None if item is None else kwimage.Segmentation.coerce(item)
for item in self
])
return new
[docs]
def to_mask_list(self, dims=None, pixels_are='points'):
"""
returns this object
Returns:
kwimage.MaskList
"""
return self
def _find_contours(binary_mask, pixels_are='points'):
"""
Finds the contours in a binary mask
Args:
binary_mask (ndarray): a binary valued numpy array
pixel_are (str):
Can either be "points" or "areas".
If pixels are "points", the we treat each pixel (i, j) as a
single infinitely small point at (i, j). As such, some polygons
may have zero area.
If pixels are "areas", then each pixel (i, j) represents a
square with coordinates ([i - 0.5, j - 0.5], [i + 0.5, j -
0.5], [i + 0.5, j + 0.5], and [i - 0.5, j + 0.5]). Must have
rasterio installed to use this method.
Returns:
List[Dict]: list of polygon exteriors and interiors
"""
if pixels_are == 'points':
# Note this mask needs to be uint8 not bool
polys = _opencv_find_contours(binary_mask)
elif pixels_are == 'areas':
polys = _rasterio_find_contours(binary_mask)
else:
raise KeyError(pixels_are)
return polys
def _write_img_in_terminal(binary_mask):
"""
Could be slightly more sophisticated with this feature and have masks able
to be inspected at lower resolution in the terminal. It's a nice-to-have
and likely not necessary for production.
References:
https://dev.to/pranavbaburaj/print-images-to-console-using-python-23k6
https://github.com/lainq/img/blob/main/image.py
https://github.com/eddieantonio/imgcat
https://github.com/stefanhaustein/TerminalImageViewer
"""
import kwarray
block = "\u2584"
lines = []
for row in binary_mask:
line_parts = []
groups = kwarray.group_consecutive(row, offset=0)
for group in groups:
value = group[0]
if value:
line_parts.extend(ub.color_text(block * len(group), 'white'))
else:
line_parts.extend(ub.color_text(block * len(group), 'black'))
line = ''.join(line_parts)
lines.append(line)
text = '\n'.join(lines)
print(text)
def _rasterio_find_contours(label_img):
"""
Note:
The :func:`rasterio.features.shapes` is capable of multi-label polygon
extraction.
Ignore:
label_img = kwimage.Mask.demo().data
label_img[:, 0:5][binary_mask[:, 0:5] > 0] = 2
"""
import numpy as np
from rasterio import features
polys = []
if label_img.size > 0:
shapes = list(features.shapes(label_img, connectivity=8))
translate = np.array([-0.5, -0.5]).ravel()[None, :]
for shape, value in shapes:
if value > 0:
coords = shape['coordinates']
exterior = np.array(coords[0]) + translate
interiors = [np.array(p) + translate for p in coords[1:]]
polys.append({
'exterior': exterior,
'interiors': interiors,
# 'value': value,
})
return polys
def _opencv_find_contours(binary_mask):
import cv2
p = 2
offset = (0 - p, 0 - p)
padded_mask = cv2.copyMakeBorder(binary_mask, p, p, p, p,
cv2.BORDER_CONSTANT, value=0)
# https://docs.opencv.org/3.1.0/d3/dc0/group__imgproc__shape.html#ga4303f45752694956374734a03c54d5ff
mode = cv2.RETR_CCOMP
method = cv2.CHAIN_APPROX_SIMPLE
# method = cv2.CHAIN_APPROX_TC89_KCOS
# Different versions of cv2 have different return types
_ret = cv2.findContours(padded_mask, mode, method, offset=offset)
if len(_ret) == 2:
_contours, _hierarchy = _ret
else:
_img, _contours, _hierarchy = _ret
if _hierarchy is None:
if len(_contours) == 0:
return []
raise AssertionError('Contour extraction from binary mask failed')
_hierarchy = _hierarchy[0]
polys = {i: {'exterior': None, 'interiors': []}
for i, row in enumerate(_hierarchy) if row[3] == -1}
for i, row in enumerate(_hierarchy):
# This only works in RETR_CCOMP mode
nxt, prev, child, parent = row[0:4]
if parent != -1:
coords = _contours[i][:, 0, :]
polys[parent]['interiors'].append(coords)
else:
coords = _contours[i][:, 0, :]
# if len(coords) < 3:
# raise Exception
polys[i]['exterior'] = coords
polys = list(polys.values())
return polys
if __name__ == '__main__':
"""
CommandLine:
xdoctest -m ~/code/kwimage/kwimage/structs/mask.py
"""
import xdoctest
xdoctest.doctest_module(__file__)