我使用了一个使用PyTorch构建的ConvNet进行推理,并在下面的行中得到了一个错误:
outputs = model(X_batch)
字符串
误差
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)
型
我的代码结构如下。我把模型和Tensor放在GPU上,并做了一些检查,以确保它们都在GPU上。
print(args.device) # returns 'cuda'
print(torch.cuda.is_available()) # returns True
model = CNN_MLP(args)
model.to(args.device)
def inference(model, test_dataloader, device='cpu'):
model.eval()
metrics = []
for _, (X_batch, y_batch) in enumerate(tqdm(test_dataloader)):
X_batch = X_batch.to(args.device)
print(next(model.parameters()).is_cuda) # returns True
print(X_batch.is_cuda) # returns True
outputs = model(X_batch) # Error raised here
metrics.append(calc_metrics(outputs, y_batch)) # This is a simplified version of the code here
return aggregate(metrics)
zero_shot_metrics = inference(model, test_dataloader, device=args.device)
型
其他详情:
- 我使用的是一个NVIDIA RTX A4000 GPU,可以用
nvidia-smi
来确认。 torch
版本是2.0.1+cu118
。nvidia-smi
显示CUDA版本是12.0。我不确定这种不匹配是否重要,但torch.cuda.is_available()
似乎工作正常。CNN_MLP
类的定义如下:
import math
import torch
class CNN_MLP(torch.nn.Module):
""" CNN-MLP with 1 Conv layer, 1 Max Pool layer, and 1 Linear layer. """
def __init__(self, seq_len=220, embed_size=64, vocab_size=45, pad_index=0,
stride=1, kernel_size=3, conv_out_size=64, hidden_layer_sizes=[128, 64, 32, 8, 1], dropout_rate=0.25):
super(CNN_MLP, self).__init__()
# Embedding layer parameters
self.seq_len = seq_len
self.embed_size = embed_size
self.vocab_size = vocab_size
self.pad_index = pad_index
self.hidden_layer_sizes = hidden_layer_sizes
# Conv layer parameters
self.stride = stride
self.kernel_size = kernel_size
self.conv_out_size = conv_out_size
# Misc
self.dropout_rate = dropout_rate
# Conv Layers
self.embedding = torch.nn.Embedding(self.vocab_size, self.embed_size, padding_idx=self.pad_index)
self.conv = torch.nn.Conv1d(self.seq_len, self.conv_out_size, self.kernel_size, self.stride)
self.hidden_act = torch.relu
self.max_pool = torch.nn.MaxPool1d(self.kernel_size, self.stride)
self.flatten = lambda x: x.view(x.shape[0], x.shape[1]*x.shape[2])
# MLP layers
self.fc_layers = []
self.hidden_layer_sizes.insert(0, self._linear_layer_in_size())
for i in range(len(self.hidden_layer_sizes) - 1):
self.fc_layers.append(torch.nn.Linear(self.hidden_layer_sizes[i], self.hidden_layer_sizes[i+1]))
self.fc_layers.append(torch.nn.ReLU())
if self.dropout_rate and i != len(self.hidden_layer_sizes) - 2:
self.fc_layers.append(torch.nn.Dropout(self.dropout_rate))
self.fc_layers.append(torch.sigmoid)
def _linear_layer_in_size(self):
out_conv_1 = ((self.embed_size - 1 * (self.kernel_size - 1) - 1) / self.stride) + 1
out_conv_1 = math.floor(out_conv_1)
out_pool_1 = ((out_conv_1 - 1 * (self.kernel_size - 1) - 1) / self.stride) + 1
out_pool_1 = math.floor(out_pool_1)
return out_pool_1*self.conv_out_size
def forward(self, x):
x = self.embedding(x)
x = self.conv(x)
x = self.hidden_act(x)
x = self.max_pool(x)
x = self.flatten(x)
for layer in self.fc_layers:
x = layer(x)
return x.squeeze()
def embed(self, x):
x = self.embedding(x)
x = self.conv(x)
x = self.hidden_act(x)
x = self.max_pool(x)
x = self.flatten(x)
for i, layer in enumerate(self.fc_layers):
if i != len(self.fc_layers) - 1:
x = layer(x)
return x
型
1条答案
按热度按时间c6ubokkw1#
在
CNN_MLP
中,fc_layers
成员需要是nn.Module
,因为它包含其他模块。当您在模型上调用.to
时,.to
将在nn.Module
固有的所有其他成员上被递归调用。fc_layers
是一个python列表(不是nn.Module
),因此这些模块的参数不会被传输到GPU。最简单的修复方法是将字符串
与
型
(see
torch.ModuleList
了解更多信息)。更好的解决方案是将
fc_layers
设置为nn.Sequential
,然后在转发过程中像普通的nn.Module
一样调用它。