欢迎光临散文网 会员登陆 & 注册

YOLOV1_pytorch_(2)

2022-08-07 10:20 作者:儒雅随和的老顽童  | 我要投稿

## 3 预测

### 3.1 预测流程

1. 图片预处理

2. 预测

3. 解码

4. 画框

(1) 预测

predict.py

```python

import torch

from torch.autograd import Variable

from resnet_yolo import resnet50

import torchvision.transforms as transforms

import CV2

import numpy as np

VOC_CLASSES = (    # always index 0

    'aeroplane', 'bicycle', 'bird', 'boat',

    'bottle', 'bus', 'car', 'cat', 'chair',

    'cow', 'diningtable', 'dog', 'horse',

    'motorbike', 'person', 'pottedplant',

    'sheep', 'sofa', 'train', 'tvmonitor')

Color = [[0, 0, 0],[128, 0, 0],[0, 128, 0],[128, 128, 0],[0, 0, 128],

         [128, 0, 128],[0, 128, 128],[128, 128, 128],[64, 0, 0],[192, 0, 0],

         [64, 128, 0],[192, 128, 0],[64, 0, 128],[192, 0,128],[64, 128, 128],

         [192, 128, 128],[0, 64, 0],[128, 64, 0],[0, 192, 0],[128, 192, 0],[0, 64, 128]]

def nms(bboxes,scores,threshold=0.5):

    x1 = bboxes[:,0]

    y1 = bboxes[:,1]

    x2 = bboxes[:,2]

    y2 = bboxes[:,3]

    areas = (x2-x1)*(y2-y1)

    _,order = scores.sort(0,descending=True)

    keep = []

    while order.numel() > 0:

        if order.numel()>1:

            i = order[0]

        else:

            i = order

        keep.append(i)

        if order.numel() == 1:

            break

        xx1 = x1[order[1:]].clamp(min=x1[i])

        yy1 = y1[order[1:]].clamp(min=y1[i])

        xx2 = x2[order[1:]].clamp(max=x1[i])

        yy2 = y2[order[1:]].clamp(max=y1[i])

        w = (xx2-xx1).clamp(min=0)

        h = (yy2-yy1).clamp(min=0)

        inter = w*h

        ove = inter/(areas[i]+areas[order[1:]]-inter)

        #ids = (ove <= threshold).nonzero().squeeze()

        ids = torch.nonzero(ove <= threshold).squeeze()

        if ids.numel() == 0:

            break

        order = order[ids+1]

    return torch.LongTensor(keep)

def decoder(pred):

    grid_num = 7

    boxes = []

    cls_indexs = []

    probs = []

    cell_size = 1./grid_num

    pred = pred.data

    pred = pred.squeeze(0)  # 7x7x30

    contain1 = pred[:,:,4].unsqueeze(2)  # [7, 7, 1]

    contain2 = pred[:,:,9].unsqueeze(2)  # [7, 7, 1]

    contain = torch.cat((contain1,contain2),2)  # [7, 7, 2]

    mask1 = contain > 0.1  # [7, 7, 2]

    mask2 = (contain==contain.max())  # [7, 7, 2]

    mask = (mask1+mask2).gt(0)  # [7, 7, 2]

    for i in range(grid_num):

        for j in range(grid_num):

            for b in range(2):

                if mask[i,j,b] == 1:

                    box = pred[i,j,b*5:b*5+4]

                    contain_prob = torch.FloatTensor([pred[i,j,b*5+4]])

                    xy = torch.FloatTensor([j,i])*cell_size #cell左上角  up left of cell

                    box[:2] = box[:2]*cell_size + xy # return cxcy relative to image

                    box_xy = torch.FloatTensor(box.size())#转换成xy形式    convert[cx,cy,w,h] to [x1,y1,x2,y2]

                    box_xy[:2] = box[:2] - 0.5*box[2:]

                    box_xy[2:] = box[:2] + 0.5*box[2:]

                    max_prob,cls_index = torch.max(pred[i,j,10:],0)

                    if float((contain_prob*max_prob)[0]) > 0.1:

                        boxes.append(box_xy.view(1,4))

                        cls_indexs.append(torch.LongTensor(cls_index,0))

                        probs.append(contain_prob*max_prob)

    if len(boxes) == 0:

        boxes = torch.zeros((1,4))

        probs = torch.zeros(1)

        cls_indexs = torch.zeros(1)

    else:

        boxes = torch.cat(boxes,0) #(n,4)

        probs = torch.cat(probs,0) #(n,)

        cls_indexs = torch.cat(cls_indexs,0) #(n,)

    keep = nms(boxes,probs)

    return boxes[keep],cls_indexs[keep],probs[keep]

def predict_gpu(model,image_name,root_path='/Users/ls/PycharmProjects/YOLOV1_LS/VOCdevkit/VOC2007/JPEGImages/'):

    result = []

    image = CV2.imread(root_path+image_name)

    # 1 图片预处理

    h,w,_ = image.shape

    img = CV2.resize(image,(448,448))  # 统一输入模型的图片尺寸

    img = CV2.cvtColor(img,CV2.COLOR_BGR2RGB)  # 色彩空间转换

    mean = (123,117,104)  #RGB

    img = img - np.array(mean,dtype=np.float32)  # 去均值

    transform = transforms.Compose([transforms.ToTensor(),])

    img = transform(img)  # 转置

    img = Variable(img[None,:,:,:],volatile=True)

    # img = img.cuda()

    # 2 预测

    pred = model(img) #1x7x7x30   

    pred = pred.cpu()

    # 3 解码

    boxes,cls_indexs,probs =  decoder(pred)

    for i,box in enumerate(boxes):

        x1 = int(box[0]*w)

        x2 = int(box[2]*w)

        y1 = int(box[1]*h)

        y2 = int(box[3]*h)

        cls_index = cls_indexs[i]

        if cls_index.numel()==0:return

        cls_index = int(cls_index) # convert LongTensor to int

        prob = probs[i]

        prob = float(prob)

        result.append([(x1,y1),(x2,y2),VOC_CLASSES[cls_index],image_name,prob])

    return result

if __name__ == '__main__':

    model = resnet50()

    print('load model...')

    # model.load_state_dict(torch.load('best.pth'))

    model.eval()

    #model.cuda()

    image_name = '000015.jpg'

    image = CV2.imread(image_name)

    print('predicting...')

    result = predict_gpu(model,image_name)

    #  4 画框

    for left_up,right_bottom,class_name, _ ,prob in result:

        color = Color[VOC_CLASSES.index(class_name)]

        CV2.rectangle(image,left_up,right_bottom,color,2)

        label = class_name+str(round(prob,2))

        text_size, baseline = CV2.getTextSize(label, CV2.FONT_HERSHEY_SIMPLEX, 0.4, 1)

        p1 = (left_up[0], left_up[1]- text_size[1])

        CV2.rectangle(image, (p1[0] - 2//2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), color, -1)

        CV2.putText(image, label, (p1[0], p1[1] + baseline), CV2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, 8)

    CV2.imwrite('result.jpg',image)

```

(2)NMS

NMS的目的是根据IoU删除重复的预测框,原理是同一物体的预测框IoU 大,不同物体的预测框IoU小。

I. 计算预测框面积。

II. 对置信度降序排列,返回排序下标。把排序好的第一个概率大的预测框取出,计算剩余框与第一个框的IoU。

III. 根据阈值筛选保留的框,并对保留的框做同样的操作,直到剩余一个框,则停止操作。

```python

def nms(bboxes,scores,threshold=0.5):

    x1 = bboxes[:,0]

    y1 = bboxes[:,1]

    x2 = bboxes[:,2]

    y2 = bboxes[:,3]

    # 1 计算所有预测框的面积

    areas = (x2-x1)*(y2-y1)

    # 2 按照预测概率排序

    _,order = scores.sort(0,descending=True)

    keep = []

    while order.numel() > 0:

        # 3 取出第一个框

        if order.numel()>1:

            i = order[0]

        else:

            i = order

        keep.append(i)

        if order.numel() == 1:

            break

        # 4 计算剩余框与第一个框的IoU

        xx1 = x1[order[1:]].clamp(min=x1[i])

        yy1 = y1[order[1:]].clamp(min=y1[i])

        xx2 = x2[order[1:]].clamp(max=x1[i])

        yy2 = y2[order[1:]].clamp(max=y1[i])

        w = (xx2-xx1).clamp(min=0)

        h = (yy2-yy1).clamp(min=0)

        inter = w*h

        ove = inter/(areas[i]+areas[order[1:]]-inter)

        #ids = (ove <= threshold).nonzero().squeeze()

        # 5 根据IoU剔除重合的框

        ids = torch.nonzero(ove <= threshold).squeeze()

        if ids.numel() == 0:

            break

        # 6 取出与第一个框重叠小或不重叠的框作为下一轮筛选的对象

        order = order[ids+1]

    return torch.LongTensor(keep)

```

(3)解码

I. 取出预测值中的置信度,根据置信度阈值初筛预测框。

II.遍历输出特征图的行、列、每个网格的框,取出对应的预测框、类别概率。根据预测偏移计算预测框的中心点。预测类别概率与置信度乘积作为最终的预测概率,再根据最终的预测概率设置阈值帅选一遍框。

III.根据预测框和物体类别概率进行非极大值抑制,输出符合条件的预测值。

```python

def decoder(pred):

    grid_num = 7

    boxes = []

    cls_indexs = []

    probs = []

    cell_size = 1./grid_num

    pred = pred.data

    pred = pred.squeeze(0)  # 7x7x30

    contain1 = pred[:,:,4].unsqueeze(2)  # [7, 7, 1]

    contain2 = pred[:,:,9].unsqueeze(2)  # [7, 7, 1]

    # 1 根据置信度筛选框

    contain = torch.cat((contain1,contain2),2)  # [7, 7, 2]

    mask1 = contain > 0.1  # [7, 7, 2]

    mask2 = (contain==contain.max())  # [7, 7, 2]

    mask = (mask1+mask2).gt(0)  # [7, 7, 2]

    for i in range(grid_num):

        for j in range(grid_num):

            for b in range(2):

                if mask[i,j,b] == 1:

                    box = pred[i,j,b*5:b*5+4]

                    contain_prob = torch.FloatTensor([pred[i,j,b*5+4]])

                    xy = torch.FloatTensor([j,i])*cell_size #cell左上角  up left of cell

                    # 2 解码

                    box[:2] = box[:2]*cell_size + xy # return cxcy relative to image

                    box_xy = torch.FloatTensor(box.size())#转换成xy形式    convert[cx,cy,w,h] to [x1,y1,x2,y2]

                    box_xy[:2] = box[:2] - 0.5*box[2:]

                    box_xy[2:] = box[:2] + 0.5*box[2:]

                    max_prob,cls_index = torch.max(pred[i,j,10:],0)

                    # 3 根据最终预测概率筛选框

                    if float((contain_prob*max_prob)[0]) > 0.1:

                        boxes.append(box_xy.view(1,4))

                        cls_indexs.append(torch.LongTensor(cls_index,0))

                        probs.append(contain_prob*max_prob)

    if len(boxes) == 0:

        boxes = torch.zeros((1,4))

        probs = torch.zeros(1)

        cls_indexs = torch.zeros(1)

    else:

        boxes = torch.cat(boxes,0) #(n,4)

        probs = torch.cat(probs,0) #(n,)

        cls_indexs = torch.cat(cls_indexs,0) #(n,)

    # 4 非极大值抑制

    keep = nms(boxes,probs)

    return boxes[keep],cls_indexs[keep],probs[keep]

```


YOLOV1_pytorch_(2)的评论 (共 条)

分享到微博请遵守国家法律