YOLOV1_pytorch_(2)

2022-08-07 10:20 作者:儒雅随和的老顽童 0人读过 | 我要投稿

## 3 预测

### 3.1 预测流程

1. 图片预处理

2. 预测

3. 解码

4. 画框

（1）预测

predict.py

```python

import torch

from torch.autograd import Variable

from resnet_yolo import resnet50

import torchvision.transforms as transforms

import CV2

import numpy as np

VOC_CLASSES = ( # always index 0

'aeroplane', 'bicycle', 'bird', 'boat',

'bottle', 'bus', 'car', 'cat', 'chair',

'cow', 'diningtable', 'dog', 'horse',

'motorbike', 'person', 'pottedplant',

'sheep', 'sofa', 'train', 'tvmonitor')

Color = [[0, 0, 0],[128, 0, 0],[0, 128, 0],[128, 128, 0],[0, 0, 128],

[128, 0, 128],[0, 128, 128],[128, 128, 128],[64, 0, 0],[192, 0, 0],

[64, 128, 0],[192, 128, 0],[64, 0, 128],[192, 0,128],[64, 128, 128],

[192, 128, 128],[0, 64, 0],[128, 64, 0],[0, 192, 0],[128, 192, 0],[0, 64, 128]]

def nms(bboxes,scores,threshold=0.5):

x1 = bboxes[:,0]

y1 = bboxes[:,1]

x2 = bboxes[:,2]

y2 = bboxes[:,3]

areas = (x2-x1)*(y2-y1)

_,order = scores.sort(0,descending=True)

keep = []

while order.numel() > 0:

if order.numel()>1:

i = order[0]

else:

i = order

keep.append(i)

if order.numel() == 1:

break

xx1 = x1[order[1:]].clamp(min=x1[i])

yy1 = y1[order[1:]].clamp(min=y1[i])

xx2 = x2[order[1:]].clamp(max=x1[i])

yy2 = y2[order[1:]].clamp(max=y1[i])

w = (xx2-xx1).clamp(min=0)

h = (yy2-yy1).clamp(min=0)

inter = w*h

ove = inter/(areas[i]+areas[order[1:]]-inter)

#ids = (ove <= threshold).nonzero().squeeze()

ids = torch.nonzero(ove <= threshold).squeeze()

if ids.numel() == 0:

break

order = order[ids+1]

return torch.LongTensor(keep)

def decoder(pred):

grid_num = 7

boxes = []

cls_indexs = []

probs = []

cell_size = 1./grid_num

pred = pred.data

pred = pred.squeeze(0) # 7x7x30

contain1 = pred[:,:,4].unsqueeze(2) # [7, 7, 1]

contain2 = pred[:,:,9].unsqueeze(2) # [7, 7, 1]

contain = torch.cat((contain1,contain2),2) # [7, 7, 2]

mask1 = contain > 0.1 # [7, 7, 2]

mask2 = (contain==contain.max()) # [7, 7, 2]

mask = (mask1+mask2).gt(0) # [7, 7, 2]

for i in range(grid_num):

for j in range(grid_num):

for b in range(2):

if mask[i,j,b] == 1:

box = pred[i,j,b*5:b*5+4]

contain_prob = torch.FloatTensor([pred[i,j,b*5+4]])

xy = torch.FloatTensor([j,i])*cell_size #cell左上角 up left of cell

box[:2] = box[:2]*cell_size + xy # return cxcy relative to image

box_xy = torch.FloatTensor(box.size())#转换成xy形式 convert[cx,cy,w,h] to [x1,y1,x2,y2]

box_xy[:2] = box[:2] - 0.5*box[2:]

box_xy[2:] = box[:2] + 0.5*box[2:]

max_prob,cls_index = torch.max(pred[i,j,10:],0)

if float((contain_prob*max_prob)[0]) > 0.1:

boxes.append(box_xy.view(1,4))

cls_indexs.append(torch.LongTensor(cls_index,0))

probs.append(contain_prob*max_prob)

if len(boxes) == 0:

boxes = torch.zeros((1,4))

probs = torch.zeros(1)

cls_indexs = torch.zeros(1)

else:

boxes = torch.cat(boxes,0) #(n,4)

probs = torch.cat(probs,0) #(n,)

cls_indexs = torch.cat(cls_indexs,0) #(n,)

keep = nms(boxes,probs)

return boxes[keep],cls_indexs[keep],probs[keep]

def predict_gpu(model,image_name,root_path='/Users/ls/PycharmProjects/YOLOV1_LS/VOCdevkit/VOC2007/JPEGImages/'):

result = []

image = CV2.imread(root_path+image_name)

# 1 图片预处理

h,w,_ = image.shape

img = CV2.resize(image,(448,448)) # 统一输入模型的图片尺寸

img = CV2.cvtColor(img,CV2.COLOR_BGR2RGB) # 色彩空间转换

mean = (123,117,104) #RGB

img = img - np.array(mean,dtype=np.float32) # 去均值

transform = transforms.Compose([transforms.ToTensor(),])

img = transform(img) # 转置

img = Variable(img[None,:,:,:],volatile=True)

# img = img.cuda()

# 2 预测

pred = model(img) #1x7x7x30

pred = pred.cpu()

# 3 解码

boxes,cls_indexs,probs = decoder(pred)

for i,box in enumerate(boxes):

x1 = int(box[0]*w)

x2 = int(box[2]*w)

y1 = int(box[1]*h)

y2 = int(box[3]*h)

cls_index = cls_indexs[i]

if cls_index.numel()==0:return

cls_index = int(cls_index) # convert LongTensor to int

prob = probs[i]

prob = float(prob)

result.append([(x1,y1),(x2,y2),VOC_CLASSES[cls_index],image_name,prob])

return result

if __name__ == '__main__':

model = resnet50()

print('load model...')

# model.load_state_dict(torch.load('best.pth'))

model.eval()

#model.cuda()

image_name = '000015.jpg'

image = CV2.imread(image_name)

print('predicting...')

result = predict_gpu(model,image_name)

# 4 画框

for left_up,right_bottom,class_name, _ ,prob in result:

color = Color[VOC_CLASSES.index(class_name)]

CV2.rectangle(image,left_up,right_bottom,color,2)

label = class_name+str(round(prob,2))

text_size, baseline = CV2.getTextSize(label, CV2.FONT_HERSHEY_SIMPLEX, 0.4, 1)

p1 = (left_up[0], left_up[1]- text_size[1])

CV2.rectangle(image, (p1[0] - 2//2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), color, -1)

CV2.putText(image, label, (p1[0], p1[1] + baseline), CV2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, 8)

CV2.imwrite('result.jpg',image)

```

（2）NMS

NMS的目的是根据IoU删除重复的预测框，原理是同一物体的预测框IoU 大，不同物体的预测框IoU小。

I. 计算预测框面积。

II. 对置信度降序排列，返回排序下标。把排序好的第一个概率大的预测框取出，计算剩余框与第一个框的IoU。

III. 根据阈值筛选保留的框，并对保留的框做同样的操作，直到剩余一个框，则停止操作。

```python

def nms(bboxes,scores,threshold=0.5):

x1 = bboxes[:,0]

y1 = bboxes[:,1]

x2 = bboxes[:,2]

y2 = bboxes[:,3]

# 1 计算所有预测框的面积

areas = (x2-x1)*(y2-y1)

# 2 按照预测概率排序

_,order = scores.sort(0,descending=True)

keep = []

while order.numel() > 0:

# 3 取出第一个框

if order.numel()>1:

i = order[0]

else:

i = order

keep.append(i)

if order.numel() == 1:

break

# 4 计算剩余框与第一个框的IoU

xx1 = x1[order[1:]].clamp(min=x1[i])

yy1 = y1[order[1:]].clamp(min=y1[i])

xx2 = x2[order[1:]].clamp(max=x1[i])

yy2 = y2[order[1:]].clamp(max=y1[i])

w = (xx2-xx1).clamp(min=0)

h = (yy2-yy1).clamp(min=0)

inter = w*h

ove = inter/(areas[i]+areas[order[1:]]-inter)

#ids = (ove <= threshold).nonzero().squeeze()

# 5 根据IoU剔除重合的框

ids = torch.nonzero(ove <= threshold).squeeze()

if ids.numel() == 0:

break

# 6 取出与第一个框重叠小或不重叠的框作为下一轮筛选的对象

order = order[ids+1]

return torch.LongTensor(keep)

```

（3）解码

I. 取出预测值中的置信度，根据置信度阈值初筛预测框。

II.遍历输出特征图的行、列、每个网格的框，取出对应的预测框、类别概率。根据预测偏移计算预测框的中心点。预测类别概率与置信度乘积作为最终的预测概率，再根据最终的预测概率设置阈值帅选一遍框。

III.根据预测框和物体类别概率进行非极大值抑制，输出符合条件的预测值。

```python

def decoder(pred):

grid_num = 7

boxes = []

cls_indexs = []

probs = []

cell_size = 1./grid_num

pred = pred.data

pred = pred.squeeze(0) # 7x7x30

contain1 = pred[:,:,4].unsqueeze(2) # [7, 7, 1]

contain2 = pred[:,:,9].unsqueeze(2) # [7, 7, 1]

# 1 根据置信度筛选框

contain = torch.cat((contain1,contain2),2) # [7, 7, 2]

mask1 = contain > 0.1 # [7, 7, 2]

mask2 = (contain==contain.max()) # [7, 7, 2]

mask = (mask1+mask2).gt(0) # [7, 7, 2]

for i in range(grid_num):

for j in range(grid_num):

for b in range(2):

if mask[i,j,b] == 1:

box = pred[i,j,b*5:b*5+4]

contain_prob = torch.FloatTensor([pred[i,j,b*5+4]])

xy = torch.FloatTensor([j,i])*cell_size #cell左上角 up left of cell

# 2 解码

box[:2] = box[:2]*cell_size + xy # return cxcy relative to image

box_xy = torch.FloatTensor(box.size())#转换成xy形式 convert[cx,cy,w,h] to [x1,y1,x2,y2]

box_xy[:2] = box[:2] - 0.5*box[2:]

box_xy[2:] = box[:2] + 0.5*box[2:]

max_prob,cls_index = torch.max(pred[i,j,10:],0)

# 3 根据最终预测概率筛选框

if float((contain_prob*max_prob)[0]) > 0.1:

boxes.append(box_xy.view(1,4))

cls_indexs.append(torch.LongTensor(cls_index,0))

probs.append(contain_prob*max_prob)

if len(boxes) == 0:

boxes = torch.zeros((1,4))

probs = torch.zeros(1)

cls_indexs = torch.zeros(1)

else:

boxes = torch.cat(boxes,0) #(n,4)

probs = torch.cat(probs,0) #(n,)

cls_indexs = torch.cat(cls_indexs,0) #(n,)

# 4 非极大值抑制

keep = nms(boxes,probs)

return boxes[keep],cls_indexs[keep],probs[keep]

```

标签：

YOLOV1_pytorch_(2)

YOLOV1_pytorch_(2)的评论 (共条)

你可能也喜欢这些文章

最新发布的文章

YOLOV1_pytorch_(2)

本文作者的其他文章

YOLOV1_pytorch_(2)的评论 (共 条)

你可能也喜欢这些文章

最新发布的文章

YOLOV1_pytorch_(2)的评论 (共条)