torch_tensorrt 设置动态的 batch_size 报错 cannot create std::vector larger than max_size?

torch_tensorrt 设置动态的 batch_size 报错 cannot create std::vector larger than max_size?

我的模型

import torch

import torch_tensorrt

import torch.nn as nn

from torch import Tensor

import torch.nn.functional as F

import torchvision.models as models

from torch.nn.parameter import Parameter

def l2n(x: Tensor, eps: float = 1e-6) -> Tensor:

return x / (torch.norm(x, p=2, dim=1, keepdim=True) + eps).expand_as(x)

class L2N(nn.Module):

def __init__(self, eps=1e-6):

super(L2N, self).__init__()

self.eps = eps

def forward(self, x):

return l2n(x, eps=self.eps)

def __repr__(self):

return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'

class GeM(nn.Module):

def __init__(self, p=3, eps=1e-6):

super(GeM, self).__init__()

self.p = Parameter(torch.ones(1)*p)

self.eps = eps

def forward(self, x: torch.Tensor) -> torch.Tensor:

p: int = 3

eps: float = 1e-6

input = x.clamp(min=eps)

_input = input.pow(p)

t = F.avg_pool2d(_input, (7, 7)).pow(1./p)

return t

def __repr__(self):

return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

class ImageRetrievalNet(nn.Module):

def __init__(self, dim: int = 512):

super(ImageRetrievalNet, self).__init__()

resnet50_model = models.resnet50()

features = list(resnet50_model.children())[:-2]

self.features = nn.Sequential(*features)

self.pool = GeM()

self.norm = L2N()

self.lwhiten = None

self.whiten = nn.Linear(2048, dim, bias=True)

def forward(self, x: Tensor):

featured_t: Tensor = self.features(x)

pooled_t: Tensor = self.pool(featured_t)

normed_t: Tensor = self.norm(pooled_t)

o: Tensor = normed_t.squeeze(-1).squeeze(-1)

# 启用白化,则: pooled features -> whiten -> norm

if self.whiten is not None:

whitened_t = self.whiten(o)

normed_t: Tensor = self.norm(whitened_t)

o = normed_t

# 使每个图像为Dx1列向量(如果有许多图像,则为DxN)

return o.permute(1, 0)

device = 'cuda:0'

state: dict = torch.load('weight/gl18-tl-resnet50-gem-w-83fdc30.pth')

state['state_dict']['whiten.weight'] = state['state_dict']['whiten.weight'][0::4, ::]

state['state_dict']['whiten.bias'] = state['state_dict']['whiten.bias'][0::4]

network: ImageRetrievalNet = ImageRetrievalNet()

network.load_state_dict(state['state_dict'])

network.eval()

network.to(device)

gl18-tl-resnet50-gem-w-83fdc30.pth 权重文件下载地址:http://cmp.felk.cvut.cz/cnnimageretrieval/data/networks/gl18/...

我想把这个模型导出成 tensorRT

所以我写了下面的代码:

import torch

import torch_tensorrt

model = network

batch_size = None

image_channel = 3

image_size = 224

device = torch.device("cuda:0")

inputs = [

torch_tensorrt.Input(

min_shape=[1, image_channel, image_size, image_size],

opt_shape=[2, image_channel, image_size, image_size],

max_shape=[4, image_channel, image_size, image_size],

device=device

)

]

enabled_precisions = {torch.float} # Run with fp16

trt_ts_module = torch_tensorrt.compile(

model,

inputs=inputs,

enabled_precisions=enabled_precisions

)

trt_ts_module = trt_ts_module.to(device)

torch.jit.save(

trt_ts_module, "models/iv_resnet50_export_into_pytorch_tensorrt_model_dynamic.ts")

但是运行会报错

─➤  python -m example.iv_resnet50_export_into_pytorch_tensorRT_model_all

WARNING: [Torch-TensorRT TorchScript Conversion Context] - CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage. See `CUDA_MODULE_LOADING` in https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars

WARNING: [Torch-TensorRT] - Dilation not used in Max pooling converter

WARNING: [Torch-TensorRT] - Normalize layer will be run through ATen, not TensorRT. Performance may be lower than expected

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

ERROR: [Torch-TensorRT TorchScript Conversion Context] - 9: [graphShapeAnalyzer.cpp::addVolumeCheck::739] Error Code 9: Internal Error ((Unnamed Layer* 183) [PluginV2DynamicExt]_output_0: dimension 0 never exceeds -2147483648)

Traceback (most recent call last):

File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main

return _run_code(code, main_globals, None,

File "/usr/lib/python3.10/runpy.py", line 86, in _run_code

exec(code, run_globals)

File "/home/ponponon/code/image2vector/example/iv_resnet50_export_into_pytorch_tensorRT_model_all.py", line 119, in <module>

trt_ts_module = torch_tensorrt.compile(

File "/home/ponponon/.local/share/virtualenvs/image2vector-oucNrpsS/lib/python3.10/site-packages/torch_tensorrt/_compile.py", line 125, in compile

return torch_tensorrt.ts.compile(

File "/home/ponponon/.local/share/virtualenvs/image2vector-oucNrpsS/lib/python3.10/site-packages/torch_tensorrt/ts/_compiler.py", line 136, in compile

compiled_cpp_mod = _C.compile_graph(module._c, _parse_compile_spec(spec))

ValueError: cannot create std::vector larger than max_size()

问题应该是处在设置动态的 batch_size 上面

如果把 batch_size 都设置为 1 就是可以的,但是 min_shape、opt_shape、max_shape 的 batch_size 不一样的话,就会报错!

inputs = [

torch_tensorrt.Input(

min_shape=[1, image_channel, image_size, image_size],

opt_shape=[1, image_channel, image_size, image_size],

max_shape=[1, image_channel, image_size, image_size],

device=device

)

]


完整代码如下:

import torch

import torch_tensorrt

import torch.nn as nn

from torch import Tensor

import torch.nn.functional as F

import torchvision.models as models

from torch.nn.parameter import Parameter

def l2n(x: Tensor, eps: float = 1e-6) -> Tensor:

return x / (torch.norm(x, p=2, dim=1, keepdim=True) + eps).expand_as(x)

class L2N(nn.Module):

def __init__(self, eps=1e-6):

super(L2N, self).__init__()

self.eps = eps

def forward(self, x):

return l2n(x, eps=self.eps)

def __repr__(self):

return self.__class__.__name__ + '(' + 'eps=' + str(self.eps) + ')'

class GeM(nn.Module):

def __init__(self, p=3, eps=1e-6):

super(GeM, self).__init__()

self.p = Parameter(torch.ones(1)*p)

self.eps = eps

def forward(self, x: torch.Tensor) -> torch.Tensor:

p: int = 3

eps: float = 1e-6

input = x.clamp(min=eps)

_input = input.pow(p)

t = F.avg_pool2d(_input, (7, 7)).pow(1./p)

return t

def __repr__(self):

return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

class ImageRetrievalNet(nn.Module):

def __init__(self, dim: int = 512):

super(ImageRetrievalNet, self).__init__()

resnet50_model = models.resnet50()

features = list(resnet50_model.children())[:-2]

self.features = nn.Sequential(*features)

self.pool = GeM()

self.norm = L2N()

self.lwhiten = None

self.whiten = nn.Linear(2048, dim, bias=True)

def forward(self, x: Tensor):

featured_t: Tensor = self.features(x)

pooled_t: Tensor = self.pool(featured_t)

normed_t: Tensor = self.norm(pooled_t)

o: Tensor = normed_t.squeeze(-1).squeeze(-1)

# 启用白化,则: pooled features -> whiten -> norm

if self.whiten is not None:

whitened_t = self.whiten(o)

normed_t: Tensor = self.norm(whitened_t)

o = normed_t

# 使每个图像为Dx1列向量(如果有许多图像,则为DxN)

return o.permute(1, 0)

device = 'cuda:0'

state: dict = torch.load('weight/gl18-tl-resnet50-gem-w-83fdc30.pth')

state['state_dict']['whiten.weight'] = state['state_dict']['whiten.weight'][0::4, ::]

state['state_dict']['whiten.bias'] = state['state_dict']['whiten.bias'][0::4]

network: ImageRetrievalNet = ImageRetrievalNet()

network.load_state_dict(state['state_dict'])

network.eval()

network.to(device)

# self.network = network

# Initialize a residual neural network

model = network

batch_size = None

image_channel = 3

image_size = 224

device = torch.device("cuda:0")

inputs = [

torch_tensorrt.Input(

min_shape=[1, image_channel, image_size, image_size],

opt_shape=[2, image_channel, image_size, image_size],

max_shape=[4, image_channel, image_size, image_size],

device=device

)

]

enabled_precisions = {torch.float} # Run with fp16

trt_ts_module = torch_tensorrt.compile(

model,

inputs=inputs,

enabled_precisions=enabled_precisions

)

trt_ts_module = trt_ts_module.to(device)

torch.jit.save(

trt_ts_module, "models/iv_resnet50_export_into_pytorch_tensorrt_model_dynamic.ts")

# pytorch 模型导出成 torch_tensorrt 格式的时候,使用 torch_tensorrt.compile 和 torch.jit.save ,如何指定 batch_size 的大小会动态的


回答:

主要需要改的地方,你参考一下吧:

import torch

import torch_tensorrt as torchtrt

import torch.nn as nn

from torch import Tensor

import torch.nn.functional as F

import torchvision.models as models

from torch.nn.parameter import Parameter

# ...(其他代码不变)

device = torch.device("cuda:0")

inputs = [

torchtrt.Input(

min_shape=[1, image_channel, image_size, image_size],

opt_shape=[2, image_channel, image_size, image_size],

max_shape=[4, image_channel, image_size, image_size],

)

]

# ...(其他代码不变)

trt_ts_module = torchtrt.compile(

model,

inputs=inputs,

enabled_precisions=enabled_precisions

)

# ...(其他代码不变)

以上是 torch_tensorrt 设置动态的 batch_size 报错 cannot create std::vector larger than max_size? 的全部内容, 来源链接: utcz.com/p/938854.html

回到顶部