使用YOLOV5训练好的模型并对识别结果进行标注
2023-09-11 14:20:47 时间
使用YOLOV5训练好的模型并对识别结果进行标注
本文主要讲的是如何使用训练好的YOLOV5模型,并把结果展示出来。使用代码如下。
import torch
from library import DetectMultiBackend,LoadImages,Annotator,non_max_suppression,scale_coords,colors
import cv2
import numpy as np
import base64
device = torch.device('cpu')
model = DetectMultiBackend("flaskshi/date/best.pt", device=device, dnn=False)
if model.pt:
model.model.float()
print("模型加载完成")
def detect_img(img0): #预测
device = torch.device('cpu')
stride, names = model.stride, model.names #读取模型的步长和模型的识别类别
dataset = LoadImages(img0=img0, img_size=[640, 640], stride=stride, auto=False) #对读取的图片进行格式化处理
# print(dataset)
for im, im0s in dataset:
im = (torch.from_numpy(im).to(device).float()/255)[None] #把图片数据转换成张量
pred = model(im, augment=False, visualize=False) #进行检测
det = non_max_suppression(pred, 0.25, 0.45, None, False, max_det=1000)[0] #对检测结果进行处理
im0 = im0s.copy()
annotator = Annotator(im0, line_width=0, example=str(names))
data = dict.fromkeys(names,0)
if len(det):
det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() #识别结果都在这里了
for *xyxy, conf, cls in reversed(det): #xyxy是识别结果的标注框的坐标,conf是识别结果的置信度,cls是识别结果对应的类别
c = int(cls)
data[names[c]]+=1
label = (f'{names[c][0]}{data[names[c]]}')
annotator.box_label(xyxy, label, color=colors(c, True)) #对图片进行标注,就是画框
im0 = annotator.result()
return data,im0
img = cv2.imread('flaskshi/a1.jpg')
# img0 = cv2.imdecode(img)
count,im0 = detect_img(img)
print(count)
image = cv2.imencode('.jpg',im0)[1]
img = str(base64.b64encode(image))[2:-1]
# cv2.imshow('a1',img1)
with open('flaskshi/path/ai.jpg','wb') as f:
f.write(base64.b64decode(img))
img1 = cv2.imread('flaskshi/path/ai.jpg',1)
cv2.imshow('a1',img1)
cv2.waitKey(0)
本代码根据YOLOV5的使用方法对所使用到的一些函数进行了而外封装,文件为library.py,代码如下。
import json
import torch
import torch.nn as nn
from pathlib import Path
import numpy as np
import platform
import glob
import re
import os
import cv2
import time
import torchvision
from PIL import Image, ImageDraw, ImageFont
IMG_FORMATS = 'bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp' # include image suffixes
VID_FORMATS = 'asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'ts', 'wmv' # include video suffixes
def autopad(k, p=None): # kernel, padding
if p is None:
p = k // 2 if isinstance(k, int) else (x // 2 for x in k) # auto-pad
return p
class Ensemble(nn.ModuleList):
def __init__(self):
super().__init__()
def forward(self, x, augment=False, profile=False, visualize=False):
y = []
for module in self:
y.append(module(x, augment, profile, visualize)[0])
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output
def attempt_load(weights, map_location=None, inplace=True, fuse=True):
model = Ensemble()
for w in weights if isinstance(weights, list) else [weights]:
ckpt = torch.load(Path(str(w).strip().replace("'", '')), map_location=map_location) # load
ckpt = (ckpt.get('ema') or ckpt['model']).float() # FP32 model
model.append(ckpt.fuse().eval() if fuse else ckpt.eval()) # fused or un-fused model in eval mode
for m in model.modules():
m.inplace = inplace # torch 1.7.0 compatibility
if len(model) == 1:
return model[-1] # return model
else:
print(f'Ensemble created with {weights}\n')
for k in ['names']:
setattr(model, k, getattr(model[-1], k))
model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride # max stride
return model # return ensemble
class DetectMultiBackend(nn.Module):
def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False):
super().__init__()
w = str(weights[0] if isinstance(weights, list) else weights)
pt, jit, onnx, engine = self.model_type(w) # get backend
fp16 &= (pt or jit or onnx or engine) and device.type != 'cpu' # FP16
if pt:
model = attempt_load(weights if isinstance(weights, list) else w, map_location=device)
stride = max(int(model.stride.max()), 32) # model stride
names = model.module.names if hasattr(model, 'module') else model.names # get class names
model.half() if fp16 else model.float()
self.model = model
self.__dict__.update(locals())
def forward(self, im, augment=False, visualize=False, val=False):
if self.pt:
y = self.model(im, augment=augment, visualize=visualize)[0]
if isinstance(y, np.ndarray):
y = torch.tensor(y, device=self.device)
return (y, []) if val else y
@staticmethod
def model_type(p='path/to/model.pt'):
suffixes = ['.pt', '.torchscript', '.onnx', '.engine']
p = Path(p).name
pt, jit, onnx, engine = (s in p for s in suffixes)
return pt, jit, onnx, engine
class LoadImages:
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
def __init__(self,img0, img_size=640, stride=32, auto=True):
self.img0 = img0
self.img_size = img_size
self.stride = stride
self.mode = 'image'
self.auto = auto
def __iter__(self):
self.count = 0
return self
def __next__(self):
img0 = self.img0
# Read image
# img0 = cv2.imread(path) # BGR
# Padded resize
img = letterbox(img0, self.img_size, stride=self.stride, auto=self.auto)[0]
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
return img, img0
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nf # number of files
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def box_area(box):
# box = xyxy(4,n)
return (box[2] - box[0]) * (box[3] - box[1])
def box_iou(box1, box2):
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
(a1, a2), (b1, b2) = box1[:, None].chunk(2, 2), box2.chunk(2, 1)
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
# IoU = inter / (area1 + area2 - inter)
return inter / (box_area(box1.T)[:, None] + box_area(box2.T) - inter)
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def non_max_suppression(prediction,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300):
"""Non-Maximum Suppression (NMS) on inference results to reject overlapping bounding boxes
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
bs = prediction.shape[0] # batch size
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
# Settings
# min_wh = 2 # (pixels) minimum box width and height
max_wh = 7680 # (pixels) maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 0.1 + 0.03 * bs # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
# Cat apriori labels if autolabelling
if labels and len(labels[xi]):
lb = labels[xi]
v = torch.zeros((len(lb), nc + 5), device=x.device)
v[:, :4] = lb[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(lb)), lb[:, 0].long() + 5] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Apply finite constraint
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
break # time limit exceeded
return output
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
def clip_coords(boxes, shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
if isinstance(boxes, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x1
boxes[:, 1].clamp_(0, shape[0]) # y1
boxes[:, 2].clamp_(0, shape[1]) # x2
boxes[:, 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
RANK = int(os.getenv('RANK', -1))
def is_ascii(s=''):
# Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
s = str(s) # convert list, tuple, None, etc. to str
return len(s.encode().decode('ascii', 'ignore')) == len(s)
def is_chinese(s='人工智能'):
# Is string composed of any Chinese characters?
return True if re.search('[\u4e00-\u9fff]', str(s)) else False
def check_font(font=FONT):
# Download font to CONFIG_DIR if necessary
font = Path(font)
if not font.exists() and not (CONFIG_DIR / font.name).exists():
url = "https://ultralytics.com/assets/" + font.name
torch.hub.download_url_to_file(url, str(font), progress=False)
def is_writeable(dir, test=False):
# Return True if directory has write permissions, test opening a file with write permissions if test=True
if test: # method 1
file = Path(dir) / 'tmp.txt'
try:
with open(file, 'w'): # open file with write permissions
pass
file.unlink() # remove file
return True
except OSError:
return False
else: # method 2
return os.access(dir, os.R_OK) # possible issues on Windows
def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
# Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
env = os.getenv(env_var)
if env:
path = Path(env) # use environment variable
else:
cfg = {'Windows': 'AppData/Roaming', 'Linux': '.config', 'Darwin': 'Library/Application Support'} # 3 OS dirs
path = Path.home() / cfg.get(platform.system(), '') # OS-specific config dir
path = (path if is_writeable(path) else Path('/tmp')) / dir # GCP and AWS lambda fix, only /tmp is writeable
path.mkdir(exist_ok=True) # make if required
return path
CONFIG_DIR = user_config_dir() # Ultralytics settings dir
class URLError(OSError):
def __init__(self, reason, filename=None):
self.args = reason,
self.reason = reason
if filename is not None:
self.filename = filename
def __str__(self):
return '<urlopen error %s>' % self.reason
def check_pil_font(font=FONT, size=10):
# Return a PIL TrueType Font, downloading to CONFIG_DIR if necessary
font = Path(font)
font = font if font.exists() else (CONFIG_DIR / font.name)
try:
return ImageFont.truetype(str(font) if font.exists() else font.name, size)
except Exception: # download if missing
try:
check_font(font)
return ImageFont.truetype(str(font), size)
except URLError: # not online
return ImageFont.load_default()
class Annotator:
if RANK in (-1, 0):
check_pil_font() # download TTF if necessary
# YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations
def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):
assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'
self.pil = pil or not is_ascii(example) or is_chinese(example)
if self.pil: # use PIL
self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)
self.draw = ImageDraw.Draw(self.im)
self.font = check_pil_font(font='Arial.Unicode.ttf' if is_chinese(example) else font,
size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12))
else: # use cv2
self.im = im
self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2) # line width
def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):
# Add one xyxy box to image with label
if self.pil or not is_ascii(label):
self.draw.rectangle(box, width=self.lw, outline=color) # box
if label:
w, h = self.font.getsize(label) # text width, height
outside = box[1] - h >= 0 # label fits outside box
self.draw.rectangle(
(box[0], box[1] - h if outside else box[1], box[0] + w + 1,
box[1] + 1 if outside else box[1] + h + 1),
fill=color,
)
# self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0
self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)
else: # cv2
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)
if label:
tf = max(self.lw - 1, 1) # font thickness
w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0] # text width, height
outside = p1[1] - h - 3 >= 0 # label fits outside box
p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3
cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA) # filled
cv2.putText(self.im,
label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),
0,
self.lw / 3,
txt_color,
thickness=tf,
lineType=cv2.LINE_AA)
def rectangle(self, xy, fill=None, outline=None, width=1):
# Add rectangle to image (PIL-only)
self.draw.rectangle(xy, fill, outline, width)
def text(self, xy, text, txt_color=(255, 255, 255)):
# Add text to image (PIL-only)
w, h = self.font.getsize(text) # text width, height
self.draw.text((xy[0], xy[1] - h + 1), text, fill=txt_color, font=self.font)
def result(self):
# Return annotated image as array
return np.asarray(self.im)
class Colors:
# Ultralytics color palette https://ultralytics.com/
def __init__(self):
# hex = matplotlib.colors.TABLEAU_COLORS.values()
hex = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB',
'2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')
self.palette = [self.hex2rgb('#' + c) for c in hex]
self.n = len(self.palette)
def __call__(self, i, bgr=False):
c = self.palette[int(i) % self.n]
return (c[2], c[1], c[0]) if bgr else c
@staticmethod
def hex2rgb(h): # rgb order (PIL)
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
colors = Colors() # create instance for 'from utils.plots import colors'
##############################################################################################################
然后除了这个以外还要把原生YOLOV5项目文件内的 models 文件夹,path文件夹,utils文件夹也放在一起。
E:.
├─date
├─models
│ └─__pycache__
├─path
├─utils
│ └─__pycache__
└─__pycache__
这些弄好之后就可以进行尝试了,然后对目标进行检测的时候记得根据自己训练的模型调整一些参数,因为这个是针对这个模型专门调整过的,可能会对其他的模型使用上有问题,所以这个代码仅作参考。然后,本代码的原作者为xiaofang113的博客_CSDN博客-Python,ESP32-CAM,爬虫领域博主以前和他一起做项目时他写的代码,这里的是对他的代码进行拆解出来,加了一定的注释的。有问题可以私信询问他。
相关文章
- 【车牌识别】基于GRNN广义回归神经网络的车牌识别matlab仿真
- 《AR与VR开发实战》——2.7 3D物体识别
- 10、Halcon图像条形码和二维码识别
- 玩一下C#的语音识别
- Crazy Rockets-教你如何集成华为HMS ML Kit人脸检测和手势识别打造爆款小游戏
- Tesseract-OCR4.0识别中文与训练字库实例
- Qt数据库应用22-文件编码格式识别
- VUE-008-通过路由 router.push 传递 query 参数(路由 path 识别,请求链接显示参数传递)
- 智能驾驶 车牌检测和识别(二)《YOLOv5实现车牌检测(含车牌检测数据集和训练代码)》
- 跌倒检测和识别2:YOLOv5实现跌倒检测(含跌倒检测数据集和训练代码)
- 将tensorflow训练好的模型移植到Android (MNIST手写数字识别)
- Y7000 2020 安装ubuntu 16.04.3双系统后无法识别无线网卡问题
- 如何识别一个字符串是否Json格式
- (原)人体姿态识别alphapose