zl程序教程

您现在的位置是:首页 >  其它

当前栏目

yolo_model to output理解

to 理解 model output yolo
2023-09-14 09:15:53 时间

如果想看yolov3的 非极大抑制算法可以看这里

由神经网络的特征层到输出层

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 17 14:48:19 2021

@author: ledi
"""

from absl import flags
from absl.flags import FLAGS
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import (
    Add,
    Concatenate,
    Conv2D,
    Input,
    Lambda,
    LeakyReLU,
    MaxPool2D,
    UpSampling2D,
    ZeroPadding2D,
    BatchNormalization,
)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import (
    binary_crossentropy,
    sparse_categorical_crossentropy
)
from utils1 import broadcast_iou

# flags.DEFINE_integer('yolo_max_boxes', 100,
#                      'maximum number of boxes per image')
# flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold')
# flags.DEFINE_float('yolo_score_threshold', 0.5, 'score threshold')

yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
                         (59, 119), (116, 90), (156, 198), (373, 326)],
                        np.float32) / 416
yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])

yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58),
                              (81, 82), (135, 169),  (344, 319)],
                             np.float32) / 416
yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])


def DarknetConv(x, filters, size, strides=1, batch_norm=True):
    if strides == 1:
        padding = 'same'
    else:
        x = ZeroPadding2D(((1, 0), (1, 0)))(x)  # top left half-padding
        padding = 'valid'
    x = Conv2D(filters=filters, kernel_size=size,
               strides=strides, padding=padding,
               use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)
    if batch_norm:
        x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.1)(x)
    return x


def DarknetResidual(x, filters):
    prev = x
    x = DarknetConv(x, filters // 2, 1)
    x = DarknetConv(x, filters, 3)
    x = Add()([prev, x])
    return x


def DarknetBlock(x, filters, blocks):
    x = DarknetConv(x, filters, 3, strides=2)
    for _ in range(blocks):
        x = DarknetResidual(x, filters)
    return x


def Darknet(name=None):
    x = inputs = Input([None, None, 3])
    x = DarknetConv(x, 32, 3)
    x = DarknetBlock(x, 64, 1)
    x = DarknetBlock(x, 128, 2)  # skip connection
    x = x_36 = DarknetBlock(x, 256, 8)  # skip connection
    x = x_61 = DarknetBlock(x, 512, 8)
    x = DarknetBlock(x, 1024, 4)
    return tf.keras.Model(inputs, (x_36, x_61, x), name=name)


def DarknetTiny(name=None):
    x = inputs = Input([None, None, 3])
    x = DarknetConv(x, 16, 3)
    x = MaxPool2D(2, 2, 'same')(x)
    x = DarknetConv(x, 32, 3)
    x = MaxPool2D(2, 2, 'same')(x)
    x = DarknetConv(x, 64, 3)
    x = MaxPool2D(2, 2, 'same')(x)
    x = DarknetConv(x, 128, 3)
    x = MaxPool2D(2, 2, 'same')(x)
    x = x_8 = DarknetConv(x, 256, 3)  # skip connection
    x = MaxPool2D(2, 2, 'same')(x)
    x = DarknetConv(x, 512, 3)
    x = MaxPool2D(2, 1, 'same')(x)
    x = DarknetConv(x, 1024, 3)
    return tf.keras.Model(inputs, (x_8, x), name=name)




#这个函数是一个嵌套函数,外层设置filters的数量
#内层负责运算
def YoloConv(filters, name=None):
    
    #filters=512, name='yolo_conv_0'
    def yolo_conv(x_in):
        if isinstance(x_in, tuple):
            print('-'*30+'>','is_tuple')
            inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
            x, x_skip = inputs

            # concat with skip connection
            x = DarknetConv(x, filters, 1)
            x = UpSampling2D(2)(x)
            x = Concatenate()([x, x_skip])
        else:
            print('>'*30+'?','not_tuple')
            x = inputs = Input(x_in.shape[1:])

        x = DarknetConv(x, filters, 1)
        x = DarknetConv(x, filters * 2, 3)
        x = DarknetConv(x, filters, 1)
        x = DarknetConv(x, filters * 2, 3)
        x = DarknetConv(x, filters, 1)
        print(x)
        return Model(inputs, x, name=name)(x_in)
    return yolo_conv


def YoloConvTiny(filters, name=None):
    def yolo_conv(x_in):
        if isinstance(x_in, tuple):
            inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
            x, x_skip = inputs

            # concat with skip connection
            x = DarknetConv(x, filters, 1)
            x = UpSampling2D(2)(x)
            x = Concatenate()([x, x_skip])
        else:
            x = inputs = Input(x_in.shape[1:])
            x = DarknetConv(x, filters, 1)

        return Model(inputs, x, name=name)(x_in)
    return yolo_conv


def YoloOutput(filters, anchors, classes, name=None):
    def yolo_output(x_in):
        x = inputs = Input(x_in.shape[1:])
        x = DarknetConv(x, filters * 2, 3)
        x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
        x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],
                                            anchors, classes + 5)))(x)
        return tf.keras.Model(inputs, x, name=name)(x_in)
    return yolo_output


# As tensorflow lite doesn't support tf.size used in tf.meshgrid, 
# we reimplemented a simple meshgrid function that use basic tf function.
def _meshgrid(n_a, n_b):

    return [
        tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)),
        tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a))
    ]


def yolo_boxes(pred, anchors, classes):
    
    # pred=output_0
    # anchors=anchors[masks[0]]
    
    # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
    grid_size = tf.shape(pred)[1:3]
    
    print('grid_size=',grid_size)
    
    #将85 维度的向量分割成 2+2+1+classes
    box_xy, box_wh, objectness, class_probs = tf.split(
        pred, (2, 2, 1, classes), axis=-1)

    box_xy = tf.sigmoid(box_xy)
    objectness = tf.sigmoid(objectness)
    class_probs = tf.sigmoid(class_probs)
    pred_box = tf.concat((box_xy, box_wh), axis=-1)  # original xywh for loss

    # !!! grid[x][y] == (y, x)
    grid = _meshgrid(grid_size[1],grid_size[0])
    grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)  # [gx, gy, 1, 2]


    #https://www.cnblogs.com/wangxinzhe/p/10648465.html
    #https://www.shuzhiduo.com/A/qVdeERkndP/
    
    
    
    
    box_xy = (tf.cast(box_xy,tf.float32) + tf.cast(grid, tf.float32)) / \
        tf.cast(grid_size, tf.float32)
    box_wh = tf.exp(box_wh) * anchors

    box_x1y1 = box_xy - box_wh / 2
    box_x2y2 = box_xy + box_wh / 2
    bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)

    return bbox, objectness, class_probs, pred_box


'''
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
FLAGS.yolo_max_boxes= 100
FLAGS.yolo_iou_threshold 0.5
FLAGS.yolo_score_threshold 0.5
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
'''

def yolo_nms(outputs, anchors, masks, classes):
    
    #从feature到box的映射
    #box的两个点坐标 #准确度  #softmax多分类本例中80
    #4 维       #一维   #n 维度 
    #bbox,      objectness, class_probs, 
    # outputs=(boxes_0[:3], boxes_1[:3], boxes_2[:3])
    
    # boxes, conf, type
    b, c, t = [], [], []
    
    #b:box的两个点坐标
    #c:准确度
    #t:softmax多分类
    for o in outputs:
        b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))
        print(o[0].shape,o[1].shape,o[2].shape)
        c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1])))
        t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1])))
        
    
    '''
    (1, 13, 13, 3, 4) (1, 13, 13, 3, 1) (1, 13, 13, 3, 80)
    (1, 26, 26, 3, 4) (1, 26, 26, 3, 1) (1, 26, 26, 3, 80)
    (1, 52, 52, 3, 4) (1, 52, 52, 3, 1) (1, 52, 52, 3, 80)
    '''
    
    bbox = tf.concat(b, axis=1)
    print('   bbox.shape  =',bbox.shape)           #(1, 10647, 4)
    confidence = tf.concat(c, axis=1)
    
    print('confidence.shape=',confidence.shape)    #(1, 10647, 1)
    class_probs = tf.concat(t, axis=1)
    
    print('class_probs.shape=',class_probs.shape)  #(1, 10647, 80)
    
    #这里的10647=(13*13+26*26+52*52)*3,
    
    '''
       bbox.shape  = (1, 21294, 4)
    confidence.shape= (1, 21294, 1)
    class_probs.shape= (1, 21294, 80)
    '''
    
    
    #矩阵广播 scores.shape=[1, 10647, 80]
    scores = confidence * class_probs
    
    print('scores.shape=',scores.shape)

    dscores = tf.squeeze(scores, axis=0)
    
    #dscores.shape= (10647, 80)
    print('dscores.shape=',dscores.shape)
    
    
    #tf.reduce_max(dscores,[1]) 取 dscores每一行的最大值
    
    #这里是softmax 多分类,找到最大的那个score,行
    #一共10647 行,一幅图像检测10647 点,这里的score 是这10647行每一行的最大值
    scores = tf.reduce_max(dscores,[1])
    
    #这个时候bbox.shape= (10647, 4)
    bbox = tf.reshape(bbox,(-1,4))
    
    print('bbox.shape=',bbox.shape)
    
    
    #这里是softmax 多分类,找到最大的那个score的index,每行最大
    classes = tf.argmax(dscores,1)
    print('classes')
    
    #这里是 nms
    selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(
        boxes=bbox,              #所有要检测的boxes,一共10647个
        scores=scores,           #每一个box的打分值,一共10647个 
        max_output_size=100,     #最多保留多少个box
        iou_threshold=0.5,       #设定非最大抑制的阈值,当两个box 的交并比大于0.5时候就保留score较大的那个box
        score_threshold=0.5,     #对score 也要加以限制
        soft_nms_sigma=0.5       #
    )
    
    num_valid_nms_boxes = tf.shape(selected_indices)[0]

    selected_indices = tf.concat([selected_indices,tf.zeros(100-num_valid_nms_boxes, tf.int32)], 0)
    selected_scores = tf.concat([selected_scores,tf.zeros(100-num_valid_nms_boxes,tf.float32)], -1)


    #tf.gather 相当于列表按照索引取值
    boxes=tf.gather(bbox, selected_indices)
    
    print('boxes.shape=',boxes.shape)
    boxes = tf.expand_dims(boxes, axis=0)
    print('boxes.shape=',boxes.shape)
    scores=selected_scores
    
    print('scores.shape=',scores.shape)
    scores = tf.expand_dims(scores, axis=0)
    
    print('scores.shape=',scores.shape)
    
    classes = tf.gather(classes,selected_indices)
    
    print('classes.shape=',classes.shape)
    classes = tf.expand_dims(classes, axis=0)
    print('classes.shape=',classes.shape)
    valid_detections=num_valid_nms_boxes
    valid_detections = tf.expand_dims(valid_detections, axis=0)

    return boxes, scores, classes, valid_detections


# def YoloV3(size=None, channels=3, anchors=yolo_anchors,
#            masks=yolo_anchor_masks, classes=80, training=False):
    
    
size=None
channels=3
anchors=yolo_anchors
masks=yolo_anchor_masks
classes=80
training=False
# x = inputs = Input([size, size, channels], name='input')


#这里通过输入矩阵x 提取特征

"""
x_36.shape =TensorShape([1, 52, 52, 256])
x_61.shape = TensorShape([1, 26, 26, 512])
x.shape    =TensorShape([1, 13, 13, 1024])


"""
#这里体现了yolov3 模型的多尺度,就是同时提取模型的多层特征
#如下的 x_36, x_61, x


#在这里我们糟了了一个数据

x=inputs=np.array(range(1*416*416*3)).reshape(1,416,416,3)/1*416*416*3


x_36, x_61, x = Darknet(name='yolo_darknet')(x)

x = inputs= YoloConv(512, name='yolo_conv_0')(x)

#仅仅是x 特征的输出
output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x)
#同时融合 (x, x_61)的输出
x = YoloConv(256, name='yolo_conv_1')((x, x_61))
output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x)

#同时融合 (x, x_61,x_36)的输出
x = YoloConv(128, name='yolo_conv_2')((x, x_36))
output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x)


"""
output_0,output_1,output_2 这三个output的维度都是 (None, None, None, 3, 80+5)

"""

# if training:
#     return Model(inputs, (output_0, output_1, output_2), name='yolov3')

#这是最大的那个框框
boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
                 name='yolo_boxes_0')(output_0)

#这是中号的框框
boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
                 name='yolo_boxes_1')(output_1)
#这是小号的框框
boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes),
                 name='yolo_boxes_2')(output_2)


outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
                 name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))