为什么 pytorch 的 transforms.Compose 的 CPU 最大利用率是 117% ，可以超过单核限制？

Z时代
2024-02-08
分类：IT

下面这段代码，跑起来的时候，CPU 利用率从 htop 中看到，可以在 116-117% 之间

from torchvision import transforms
from PIL import Image
from torch import Tensor
from numpy import ndarray
import numpy
import time
preprocess = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])
image = Image.open('bh.jpg')
s = time.time()
for i in range(200000):
    tensor: Tensor = preprocess(image)
e = time.time()
print(e-s)

一般来讲，python 有 GIL 锁，为什么可以超过 100%?

如果是 101%、102% 倒也正常，毕竟 htop 统计也有误差

同样我自己写了一段等效 transforms.Compose 的代码：

from PIL import Image
import numpy as np
from numpy import ndarray
from PIL.Image import Resampling
from typing import List, Tuple, Optional
import time
mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def center_crop(image: Image.Image, output_size: int):
    w, h = image.size
    th, tw = output_size
    left = (w - tw) / 2
    top = (h - th) / 2
    right = (w + tw) / 2
    bottom = (h + th) / 2
    return image.crop((left, top, right, bottom))
def compute_resized_output_size(
    image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
) -> List[int]:
    if len(size) == 1:  # specified size only for the smallest edge
        h, w = image_size
        short, long = (w, h) if w <= h else (h, w)
        requested_new_short = size if isinstance(size, int) else size[0]
        new_short, new_long = requested_new_short, int(
            requested_new_short * long / short)
        if max_size is not None:
            if max_size <= requested_new_short:
                raise ValueError(
                    f"max_size = {max_size} must be strictly greater than the requested "
                    f"size for the smaller edge size = {size}"
                )
            if new_long > max_size:
                new_short, new_long = int(
                    max_size * new_short / new_long), max_size
        new_w, new_h = (new_short, new_long) if w <= h else (
            new_long, new_short)
    else:  # specified both h and w
        new_w, new_h = size[1], size[0]
    return [new_h, new_w]
def imresize(image: Image.Image, imsize: int = 224) -> Image.Image:
    w, h = image.size
    new_h, new_w = compute_resized_output_size((h, w), [imsize])
    image.thumbnail((new_w, new_h), Image.Resampling.BILINEAR)
    image = center_crop(image, (imsize, imsize))
    return image
def preprocess(image: Image.Image) -> ndarray:
    # resized_image = image.resize((224, 224), resample=Resampling.BILINEAR)
    resized_image = imresize(image, 224)
    resized_image_ndarray = np.array(resized_image)
    transposed_image_ndarray = resized_image_ndarray.transpose((2, 0, 1))
    transposed_image_ndarrayfloat32 = transposed_image_ndarray.astype(
        np.float32)
    transposed_image_ndarrayfloat32 /= 255.0
    normalized_image_ndarray = (transposed_image_ndarrayfloat32 - mean) / std
    normalized_image_ndarrayfloat32 = normalized_image_ndarray.astype(
        np.float32)
    return normalized_image_ndarrayfloat32
image = Image.open('bh.jpg')
s = time.time()
for i in range(20000):
    preprocessed_ndarray: ndarray = preprocess(image)
e = time.time()
print(e-s)