RandomResizedCrop in PyTorch (1)

Buy Me a Coffee☕ *Memos: My post explains OxfordIIITPet(). RandomResizedCrop() can crop a random part of an image, then resize it to a given size as shown below: *Memos: The 1st argument for initialization is size(Required-Type:int or tuple/list(int) or size()): *Memos: It's [height, width]. It must be 1

Feb 9, 2025 - 22:28
 0
RandomResizedCrop in PyTorch (1)

Buy Me a Coffee

*Memos:

RandomResizedCrop() can crop a random part of an image, then resize it to a given size as shown below:

*Memos:

  • The 1st argument for initialization is size(Required-Type:int or tuple/list(int) or size()): *Memos:
    • It's [height, width].
    • It must be 1 <= x.
    • A tuple/list must be the 1D with 1 or 2 elements.
    • A single value(int or tuple/list(int)) means [size, size].
  • The 2nd argument for initialization is scale(Optional-Type:tuple/list(int or float)): *Memos:
    • It's [min, max] so it must min <= max.
    • It must be 0 <= x.
    • A tuple/list must be the 1D with 2 elements.
    • A double of 0 or 1 <= x gets the same result.
  • The 3rd argument for initialization is ratio(Optional-Type:tuple/list(int or float)): *Memos:
    • It's [min, max] so it must min <= max.
    • It must be 0 < x.
    • A tuple/list must be the 1D with 2 elements.
  • The 4th argument for initialization is interpolation(Optional-Default:InterpolationMode.BILINEAR-Type:InterpolationMode).
  • The 5th argument for initialization is antialias(Optional-Default:True-Type:bool). *Even if setting False to it, it's always True if interpolation is InterpolationMode.BILINEAR or InterpolationMode.BICUBIC.
  • The 1st argument is img(Required-Type:PIL Image or tensor(int)): *Memos:
    • A tensor must be 3D.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import RandomResizedCrop
from torchvision.transforms.functional import InterpolationMode

rrc = RandomResizedCrop(size=100)
rrc = RandomResizedCrop(size=100,
                        scale=(0.08, 1.0),
                        ratio=(0.75, 1.3333333333333333),
                        interpolation=InterpolationMode.BILINEAR,
                        antialias=True)
rrc
# RandomResizedCrop(size=(100, 100),
#                   scale=(0.08, 1.0),
#                   ratio=(0.75, 1.3333333333333333), 
#                   interpolation=InterpolationMode.BILINEAR,
#                   antialias=True)

rrc.size
# (100, 100)

rrc.scale
# (0.08, 1.0)

rrc.ratio
# (0.75, 1.3333333333333333)

rrc.interpolationa
# 

rrc.antialias
# True

origin_data = OxfordIIITPet(
    root="data",
    transform=None
)

s1000_data = OxfordIIITPet( # `s` is size.
    root="data",
    transform=RandomResizedCrop(size=1000)
    # transform=RandomResizedCrop(size=[1000])
    # transform=RandomResizedCrop(size=[1000, 1000])
)

s500_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=500)
)

s100_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=100)
)

s50_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=50)
)

s10_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=10)
)

s1_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=1)
)

s600_900_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=[600, 900])
)

s900_600_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=[900, 600])
)

s200_300_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=[200, 300])
)

s300_200_data = OxfordIIITPet(
    root="data",
    transform=RandomResizedCrop(size=[300, 200])
)

import matplotlib.pyplot as plt

def show_images1(data, main_title=None):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        plt.imshow(X=im)
    plt.tight_layout()
    plt.show()

show_images1(data=origin_data, main_title="origin_data")
show_images1(data=s1000_data, main_title="s1000_data")
show_images1(data=s500_data, main_title="s500_data")
show_images1(data=s100_data, main_title="s100_data")
show_images1(data=s50_data, main_title="s50_data")
show_images1(data=s10_data, main_title="s10_data")
show_images1(data=s1_data, main_title="s1_data")
print()
show_images1(data=origin_data, main_title="origin_data")
show_images1(data=s600_900_data, main_title="s600_900_data")
show_images1(data=s900_600_data, main_title="s900_600_data")
show_images1(data=s200_300_data, main_title="s200_300_data")
show_images1(data=s300_200_data, main_title="s300_200_data")

# ↓ ↓ ↓ ↓ ↓ ↓ The code below is identical to the code above. ↓ ↓ ↓ ↓ ↓ ↓ 
def show_images2(data, main_title=None, s=None, sc=(0.08, 1.0),
                 r=(0.75, 1.3333333333333333),
                 ip=InterpolationMode.BILINEAR, a=True):
    plt.figure(figsize=[10, 5])
    plt.suptitle(t=main_title, y=0.8, fontsize=14)
    for i, (im, _) in zip(range(1, 6), data):
        plt.subplot(1, 5, i)
        if s:
            rrc = RandomResizedCrop(size=s, scale=sc, # Here
                                    ratio=r, interpolation=ip,
                                    antialias=a)
            plt.imshow(X=rrc(im)) # Here
        else:
            plt.imshow(X=im)
    plt.tight_layout()
    plt.show()

show_images2(data=origin_data, main_title="origin_data")
show_images2(data=origin_data, main_title="s1000_data", s=1000)
show_images2(data=origin_data, main_title="s500_data", s=500)
show_images2(data=origin_data, main_title="s100_data", s=100)
show_images2(data=origin_data, main_title="s50_data", s=50)
show_images2(data=origin_data, main_title="s10_data", s=10)
show_images2(data=origin_data, main_title="s1_data", s=1)
print()
show_images2(data=origin_data, main_title="origin_data")
show_images2(data=origin_data, main_title="s600_900_data", s=[600, 900])
show_images2(data=origin_data, main_title="s900_600_data", s=[900, 600])
show_images2(data=origin_data, main_title="s200_300_data", s=[200, 300])
show_images2(data=origin_data, main_title="s300_200_data", s=[300, 200])

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description

Image description