requirements

python env

conda 22.9.0
python==3.7.4

yolov5 commit id

yolov5 commit id: 3f02fdee1d8f1a6cf18a24be3438096466367d9f

yolov5 requirements.txt

# Usage: pip install -r requirements.txt
# Base ------------------------------------------------------------------------
gitpython #>=3.1.30
matplotlib==3.2.2 #>=3.3
numpy==1.16.5 #>=1.22.2
opencv-python==4.6.0.66 #>=4.1.1
Pillow # >=10.0.1
psutil  # system resources
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
thop>=0.1.1  # FLOPs computation
#torch>=1.8.0  # see https://pytorch.org/get-started/locally (recommended)
#torchvision>=0.9.0
tqdm>=4.64.0
ultralytics #>=8.0.147
# protobuf<=3.20.1  # https://github.com/ultralytics/yolov5/issues/8012

# Logging ---------------------------------------------------------------------
# tensorboard>=2.4.1
# clearml>=1.2.0
# comet

# Plotting --------------------------------------------------------------------
pandas>=1.1.4
seaborn>=0.11.0

# Export ----------------------------------------------------------------------
# coremltools>=6.0  # CoreML export
# onnx>=1.10.0  # ONNX export
# onnx-simplifier>=0.4.1  # ONNX simplifier
# nvidia-pyindex  # TensorRT export
# nvidia-tensorrt  # TensorRT export
# scikit-learn<=1.1.2  # CoreML quantization
# tensorflow>=2.4.0  # TF exports (-cpu, -aarch64, -macos)
# tensorflowjs>=3.9.0  # TF.js export
# openvino-dev>=2023.0  # OpenVINO export

# Deploy ----------------------------------------------------------------------
setuptools>=65.5.1 # Snyk vulnerability fix
# tritonclient[all]~=2.24.0

# Extras ----------------------------------------------------------------------
# ipython  # interactive notebook
# mss  # screenshots
# albumentations>=1.0.3
# pycocotools>=2.0.6  # COCO mAP

pip

absl-py==2.0.0
cachetools==5.3.2
certifi @ file:///croot/certifi_1671487769961/work/certifi
charset-normalizer==3.3.2
cycler==0.11.0
Cython==3.0.5
fonttools==4.38.0
gitdb==4.0.11
GitPython==3.1.40
google-auth==2.23.4
google-auth-oauthlib==0.4.6
grpcio==1.59.3
idna==3.4
importlib-metadata==6.7.0
kiwisolver==1.4.5
Markdown==3.4.4
MarkupSafe==2.1.3
matplotlib==3.2.2
numpy==1.16.5
nvidia-cublas-cu11==11.10.3.66
nvidia-cuda-nvrtc-cu11==11.7.99
nvidia-cuda-runtime-cu11==11.7.99
nvidia-cudnn-cu11==8.5.0.96
nvidia-tensorrt==8.4.1.5
oauthlib==3.2.2
opencv-python==4.6.0.66
packaging==23.2
pandas==1.2.5
Pillow==9.5.0
protobuf==3.20.3
psutil==5.9.6
py-cpuinfo==9.0.0
pyasn1==0.5.0
pyasn1-modules==0.3.0
pycocotools @ git+https://github.com/cocodataset/cocoapi.git@8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9#subdirectory=PythonAPI
pyparsing==3.1.1
python-dateutil==2.8.2
pytz==2023.3.post1
PyYAML==6.0.1
requests==2.31.0
requests-oauthlib==1.3.1
rsa==4.9
scipy==1.7.3
seaborn==0.11.2
sentry-sdk==1.37.1
six==1.16.0
smmap==5.0.1
tensorboard==2.11.2
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
thop==0.1.1.post2209072238
torch @ file:///media/lvsolo/CA89-5817/datasets/helmet/hard-hat-detection/codes/yolov5/torch-1.10.0%2Bcu113-cp37-cp37m-linux_x86_64.whl
torchaudio @ file:///media/lvsolo/CA89-5817/datasets/helmet/hard-hat-detection/codes/yolov5/torchaudio-0.10.0%2Bcu113-cp37-cp37m-linux_x86_64.whl
torchvision @ file:///media/lvsolo/CA89-5817/datasets/helmet/hard-hat-detection/codes/yolov5/torchvision-0.11.0%2Bcu113-cp37-cp37m-linux_x86_64.whl
tqdm==4.66.1
typing_extensions==4.7.1
ultralytics==8.0.145
urllib3==2.0.7
Werkzeug==2.2.3
zipp==3.15.0

train code

python train.py --data data.yaml --weights yolov5s.pt --epochs 3 --img 640

pytorch 转换为 tensorrt engine

来自pt2trt_trt8415.py

import os
model_input_shape = (640,640)
pt_model_path="test_models/best.pt"
onnx_model_path=pt_model_path.split('.')[0]+".onnx"
trt_model_path=pt_model_path.split('.')[0]+".engine"
model_dir = '/'.join(list(pt_model_path.split('/')[:-1])) +'/'

output_shape_for_dynamic = (1,7,8400)

print("*"*50)
print("onnx path:", onnx_model_path)
print("trt path:", trt_model_path)
print("model dir:", model_dir)
print("-"*50)

"""using ultralytics model.export"""
from ultralytics import YOLO
from ultralytics.models.yolo.detect.val import DetectionValidator
model = YOLO(pt_model_path)  # load a pretrained model (recommended for training)
model.model.cuda().half()

path = model.export(format="onnx", dynamic=True,  simplify=True)#, half=True)  # export the model to ONNX format
os.system("mv "+onnx_model_path + " " + onnx_model_path.split('.')[0] + "_ultrlytics_export_dynamic_fp32.onnx")

path = model.export(format="onnx", dynamic=False,  simplify=True)#, half=True)  # export the model to ONNX format
os.system("mv "+onnx_model_path + " " + onnx_model_path.split('.')[0] + "_ultrlytics_export_static_fp32.onnx")

model.model.cuda()
path = model.export(format="engine", dynamic=True,  simplify=True, device=0)#, half=True)  # export the model to ONNX format
os.system("mv "+trt_model_path + " " + trt_model_path.split('.')[0] + "_ultrlytics_export_pt2trt_dynamic_fp32.engine")

path = model.export(format="engine", dynamic=False,  simplify=True, device=0)#, half=True)  # export the model to ONNX format
os.system("mv "+trt_model_path + " " + trt_model_path.split('.')[0] + "_ultrlytics_export_pt2trt_static_fp32.engine")


"""using torch export for pt2onnx convertion"""
import torch
import torch.nn
import cv2
import time 
import onnx
import onnxruntime
import numpy as np

from ultralytics.nn.tasks import attempt_load_weights, attempt_load_one_weight


model = attempt_load_weights(pt_model_path,
                             device=torch.device('cuda'),
                             inplace=True,
                             fuse=True)
#input_tensor = torch.ones((1,3,640,640)).cuda()
input_tensor = torch.ones((1,3,*model_input_shape)).cuda()
# static fp32
with torch.no_grad():
    print(f'process model:{pt_model_path}...')
    torch.onnx.export(model,
            input_tensor,
            onnx_model_path,
            opset_version=11,
            input_names=['images'],
            output_names=['output0'],
            dynamic_axes=None)
    onnx_model = onnx.load(onnx_model_path)
    try:
        onnx.checker.check_model(onnx_model)
    except Exception as e:
        print('model incorrect')
        print(e)
    else:
        os.system("mv "+onnx_model_path + " " + onnx_model_path.split('.')[0] + "_torch_export_static_fp32.onnx")
        print('model correct')

# dynamic fp32
#dynamic_axes ={'input':{0:'batch',2:'H',3:'W'},
#dynamic_axes ={'input':{2:'H',3:'W'},
dynamic_axes ={'images':{2:'H', 3:'W'},
        #'output0':{2:'H',3:'W'},
        #'output1':{2:'H',3:'W'},
}
with torch.no_grad():
    print(f'process model:{pt_model_path}...')
    torch.onnx.export(model,
            input_tensor,
            onnx_model_path,
            opset_version=11,
            input_names=['images'],
            output_names=['output0'],
            dynamic_axes=dynamic_axes)
    onnx_model = onnx.load(onnx_model_path)
    try:
        onnx.checker.check_model(onnx_model)
    except Exception as e:
        print('model incorrect')
        print(e)
    else:
        os.system("mv "+onnx_model_path + " " + onnx_model_path.split('.')[0] + "_torch_export_dynamic_fp32.onnx")
        print('model correct')

"""using trt api for onnx2trt convertion"""
import tensorrt as trt
import os
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
TRT_LOGGER = trt.Logger()
def get_engine(onnx_file_path, engine_file_path="", fp16=False, dynamic_in=False, dynamic_out=False):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""

    def build_engine(onnx_file_path, engine_file_path, fp16=False, dynamic_in=False, dynamic_out=False):
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
            EXPLICIT_BATCH
        ) as network, builder.create_builder_config() as config, trt.OnnxParser(
            network, TRT_LOGGER
        ) as parser, trt.Runtime(
            TRT_LOGGER
        ) as runtime:
            config.max_workspace_size = 1 << 32  # 4GB
            if fp16:
                assert (builder.platform_has_fast_fp16 == True), "not support fp16"
                config.flags = 1<<int(trt.BuilderFlag.FP16)
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print(
                    "ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.".format(onnx_file_path)
                )
                exit(0)
            print("Loading ONNX file from path {}...".format(onnx_file_path))
            with open(onnx_file_path, "rb") as model:
                print("Beginning ONNX file parsing")
                if not parser.parse(model.read()):
                    print("ERROR: Failed to parse the ONNX file.")
                    for error in range(parser.num_errors):
                        print(parser.get_error(error))
                    return None

            # # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            # network.get_input(0).shape = [1, 3, 608, 608]

            print("Completed parsing of ONNX file")
            print("Building an engine from file {}; this may take a while...".format(onnx_file_path))

            if dynamic_in or dynamic_out:
                print("dynamic_in or out:", dynamic_in, dynamic_out)
                # Dynamic input setting ��̬������builder��������
                profile = builder.create_optimization_profile()
                #��С�ijߴ�,���õijߴ�,���ijߴ�,����ʱ��������Ҫ�������Χ��
                profile.set_shape('images',(1,3,1,model_input_shape[1]),\
                        (1,3,model_input_shape[0]*3//4,model_input_shape[1]),(1,3,*model_input_shape))

#                profile.set_shape('images',(3,1,model_input_shape[1]),\
#                        (3,model_input_shape[0]*3//4,model_input_shape[1]),(3,*model_input_shape))
#                profile.set_shape('output0', output_shape_for_dynamic)
                config.add_optimization_profile(profile)

            plan = builder.build_serialized_network(network, config)
            print('plan:', plan, network, config, flush=True)
            engine = runtime.deserialize_cuda_engine(plan)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(plan)
            return engine
    return build_engine(onnx_file_path, engine_file_path,\
            fp16=fp16, dynamic_in=dynamic_in, dynamic_out=dynamic_out)

for modelname in [os.path.join(model_dir, item) for item in os.listdir(model_dir)]:
#for modelname in [model_dir+"best_ultrlytics_export_static_fp32.onnx"]:
#for modelname in [model_dir+"best_ultrlytics_export_dynamic_fp32.onnx"]:
    if not modelname.endswith('.onnx'):
        continue
    bare_name = modelname.split('.')[0]
    engine_name = bare_name + '.engine'
    print('-'*50)
    print("src modelname:", modelname)
    print('dst engine name:', engine_name)
    dynamic_in = False
    if 'dynamic' in bare_name.split('/')[-1]:
        dynamic_in = True
    dynamic_out = False
    try:
        # static fp32
        print('static fp32:')
        if os.path.exists(engine_name.split('.')[0] + "_onnx_trtapi_static_fp32.engine"):
            print(engine_name.split('.')[0] + "_onnx_trtapi_static_fp32.engine exists.")
            assert 0
        get_engine(modelname, engine_name, fp16=False, dynamic_in=dynamic_in, dynamic_out=dynamic_out)
        os.system("mv "+ engine_name + " " + engine_name.split('.')[0] + "_onnx_trtapi_static_fp32.engine")
        print(modelname + " static fp32 convert success")
    except:
        print(modelname + " static fp32 convert failed")

    print('-'*50)
    try:
        # dynamic fp32
        print('dynamic fp32:')
        if os.path.exists(engine_name.split('.')[0] + "_onnx_trtapi_dynamic_fp32.engine"):
            print(engine_name.split('.')[0] + "_onnx_trtapi_dynamic_fp32.engine exists")
            assert 0
        dynamic_out = True
        get_engine(modelname, engine_name, fp16=False, dynamic_in=dynamic_in, dynamic_out=dynamic_out)
        os.system("mv "+ engine_name + " " + engine_name.split('.')[0] + "_onnx_trtapi_dynamic_fp32.engine")
        print(modelname + " dynamic fp32 convert success")
    except:
        print(modelname + " dynamic fp32 convert failed")

    print('-'*50)
    try:
        # static fp16
        print('static fp16:')
        if os.path.exists(engine_name.split('.')[0] + "_onnx_trtapi_static_fp16.engine"):
            print(engine_name.split('.')[0] + "_onnx_trtapi_static_fp16.engine exists")
            assert 0
        get_engine(modelname, engine_name, fp16=True, dynamic_in=dynamic_in, dynamic_out=dynamic_out)
        os.system("mv "+ engine_name + " " + engine_name.split('.')[0] + "_onnx_trtapi_static_fp16.engine")
        print(modelname + " static fp16 convert success")
    except:
        print(modelname + " static fp16 convert failed")

    print('-'*50)
    try:
        # dynamic fp16
        print('dynamic fp16:')
        dynamic_out = True
        if os.path.exists(engine_name.split('.')[0] + "_onnx_trtapi_dynamic_fp16.engine"):
            print(engine_name.split('.')[0] + "_onnx_trtapi_dynamic_fp16.engine exists")
            assert 0
        get_engine(modelname, engine_name, fp16=True, dynamic_in=dynamic_in, dynamic_out=dynamic_out)
        os.system("mv "+ engine_name + " " + engine_name.split('.')[0] + "_onnx_trtapi_dynamic_fp16.engine")
        print(modelname + " dynamic fp16 convert success")
    except:
        print(modelname + " dynamic fp16 convert failed")
    print('-'*50)

run

pt

import json
import numpy as np
from collections import defaultdict
import sys
sys.path.append('/project/train/src_repo')
import os
from ultralytics.nn.tasks import attempt_load_weights
import torch
from utils.augmentations import letterbox

def init():
    """Initialize model
        Returns: model
    """   
    torch.backends.cudnn.benchmark = True
    #w = '/project/train/models/detect/train5/weights/best.pt'
    # model = torch.jit.load() if 'torchscript' in w else attempt_load(weights, map_location='cpu')

    # model = attempt_load_weights('/project/train/models/detect/train5/weights/epoch60.pt',
    model = attempt_load_weights('/project/train/models/detect/train5/weights/best.pt',
                                    device=torch.device('cuda'),
                                    inplace=True,
                                    fuse=True)
    #stride = max(int(model.stride.max()), 32)  # model stride
    #names = model.module.names if hasattr(model, 'module') else model.names  # get class names
    model.half()# if fp16 else model.float()
    RGB2BGR = True
    #if model trained in rgb, first conv RGB to BGR
    if RGB2BGR:
        for name, param in model.named_parameters():
            if name in ('model.0.conv.weight'):
                tmp = param[:,0,:,:].clone()
                param[:,0,:,:] = param[:,2,:,:]/255.
                param[:,2,:,:] = tmp/255.
                param[:,1,:,:] = param[:,1,:,:]/255.
                # print(model)
    # model_int8 = torch.quantization.quantize_dynamic(
    #                     model,  # the original model
    #                     {torch.nn.Conv2d},  # a set of layers to dynamically quantize
    #                     dtype=torch.qint8)  #

    
    return model#model_int8#.eval()

def calc_box_area(box):
    return (box[2] - box[0]) * (box[3] - box[1])

def in_box(person_box, head_box, ratio_threshold=0.25):
    min_x = max(person_box[0], head_box[0])
    min_y = max(person_box[1], head_box[1])
    max_x = min(person_box[2], head_box[2])
    max_y = min(person_box[3], head_box[3])
    inter = 0
    if (max_x-min_x)> 0 and  (max_y - min_y) > 0:
        inter = (max_x-min_x) * (max_y - min_y)
    head_box_area = calc_box_area(head_box)
    if head_box_area <= 0:
        return False
    if (inter / head_box_area) > ratio_threshold:
        return True
    else:
        return False
                                                      
from ultralytics.engine.results import Results
from ultralytics.utils import ops 
import cv2
import time
def process_image(handle=None, input_image=None, args=None, ** kwargs):
    """Do inference to analysis input_image and get output
        Attributes:
            handle: algorithm handle returned by init()
            input_image (numpy.ndarray): image to be process, format: (h, w, c), BGR
        Returns: process result
    """
    # Process image here
    # start_pre = time.time()

    # input_shape = (640, 640)
    input_shape = (1024, 1024)
    org_shape = (0,0)
    if isinstance(input_image, str):
        cv_image = cv2.imread(input_image)
        org_shape = cv_image.shape
        torch_image = cv_image
        torch_image = letterbox(torch_image, input_shape, stride=32, auto=True)[0]
        input_shape = torch_image.shape[:2]
        torch_image = torch_image /255.
        torch_image = torch_image.transpose((2, 0, 1))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
        torch_image = np.expand_dims(torch_image, axis=0)
        torch_image = torch.from_numpy(torch_image).cuda().half()
    else:
        cv_image = input_image
        org_shape = cv_image.shape
        torch_image = cv_image#.astype(np.int8)
        torch_image = letterbox(torch_image, input_shape, stride=32, auto=False)[0]#True)[0]
        input_shape = torch_image.shape[:2]
        torch_image = torch_image.transpose((2, 0, 1))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
        torch_image = np.expand_dims(torch_image, axis=0)
        torch_image = torch.from_numpy(torch_image).cuda().half()
        # torch_image = torch_image /255.
                
    # print('torchimage shape:', torch_image.shape)
    # print('org_shape:', org_shape)

    # print('pre time:', time.time()-start_pre)

    # start_infer = time.time()
    pred = handle(torch_image, augment=False, visualize=False)[0]
    # print('infer time:', time.time()-start_infer)
    # start_post = time.time()
    
    pred = ops.non_max_suppression(pred,
                                    0.25,#self.args.conf,
                                    0.5,#self.args.iou,
                                    agnostic=False,#self.args.agnostic_nms,
                                    max_det=300,#self.args.max_det,
                                    classes=None)#self.args.classes)[0]
    # print('nms time:', time.time()-start_post)
    # start_logic = time.time()
    xyxys = []
    confs = []
    cls = []
    for i, det in enumerate(pred):
        det[:,:4] = ops.scale_boxes(input_shape, det[:,:4], org_shape)#.round()
        # det[:,:4] = ops.scale_boxes(torch_image.shape[2:], det[:,:4], cv_image.shape).round()
        xyxys.append(det[:,:4].cpu().numpy().tolist())
        confs.append(det[:,4].cpu().numpy().tolist())
        cls.append(det[:,5].cpu().numpy().astype(np.int32).tolist())
    xyxys = xyxys[0]
    confs = confs[0]
    cls = cls[0]
    # print('xxxx:', xyxys, confs, cls, flush=True)
    
    dict_res = defaultdict(list)
    fake_result = {"algorithm_data":{},
                    "model_data":{'objects':[]}
                    }
    #classes=['motorbike_person','electric_scooter_person','head','helmet','hat','bicycle_helmet']
    #              0                  1                        2   3        4     5
    map_cls={'0':'motorbike_person','1':'electric_scooter_person','2':'head','3':'helmet','4':'hat','5':'bicycle_helmet'}
    
    # return xyxys

    for ind in range(len(xyxys)):
        dict_res[str(cls[ind])].append({'xyxy':xyxys[ind], 'conf': confs[ind], 'cl': str(cls[ind])})
        fake_result['model_data']['objects'].append({
            'x':xyxys[ind][0],
            'y':xyxys[ind][1],
            'height':xyxys[ind][3]-xyxys[ind][1],
            'width':xyxys[ind][2]-xyxys[ind][0],
            'confidence':confs[ind],
            'name': map_cls[str(cls[ind])]
            })
    person_has_head = {'flags':[], 'heads':[]} #has head hat except helmat
    person_has_helmat = {'flags':[], 'helmats':[]}
    for person in dict_res['0'] + dict_res['1']:
        flag_has_head = False
        for head in dict_res['2'] + dict_res['4'] + dict_res['5']:
            if (not flag_has_head) and in_box(person['xyxy'], head['xyxy']):
                # print('0000',head)
                person_has_head['flags'] += [True]
                person_has_head['heads'] += [head]
                flag_has_head = True
        if not flag_has_head:
            person_has_head['flags'] += [False]
            person_has_head['heads'] += [None]
        
        flag_has_helmat = False
        for helmat in dict_res['3']:
            if (not flag_has_helmat) and in_box(person['xyxy'], helmat['xyxy']):
                person_has_helmat['flags'] += [True]
                person_has_helmat['helmats'] += [helmat]
                flag_has_helmat = True
        if not flag_has_helmat:
            person_has_helmat['flags'] += [False]
            person_has_helmat['helmats'] += [None]

    target_count = 0
    target_info = []
    for ind in range(len(person_has_head['flags'])):
        if person_has_head['flags'][ind] and (not person_has_helmat['flags'][ind]):
            target_count += 1
            tmp_head = person_has_head['heads'][ind]
            target_info.append({
                'x': tmp_head['xyxy'][0],
                'y': tmp_head['xyxy'][1],
                'width': tmp_head['xyxy'][2] - tmp_head['xyxy'][0],
                'height': tmp_head['xyxy'][3] - tmp_head['xyxy'][1],
                'confidence': tmp_head['conf'],
                'name': map_cls[tmp_head['cl']]
                })
    if target_count:
        fake_result['algorithm_data'] = {
            "is_alert": True,
            "target_count": target_count,
            "target_info": target_info
        }
    else:
        fake_result["algorithm_data"] = {
            "is_alert": False,
            "target_count": 0,
            "target_info": []
            }
    print(fake_result, flush=True)

    # print('logic time:', time.time()-start_logic)
    # print('total time:', time.time()-start_pre)
    return json.dumps(fake_result, indent=4)
    # return fake_result

run engine

import json
import numpy as np
from collections import defaultdict
import sys
sys.path.append('/project/train/src_repo')
import os
# os.system('source activate && conda activate yolov8')

# from ultralytics.models.yolo import classify, detect, segment
from ultralytics.nn.tasks import attempt_load_weights
import torch
from utils.augmentations import letterbox

import tensorrt as trt
import os
import time
import pycuda.driver as cuda
#import pycuda.driver as cuda2
import pycuda.autoinit
import numpy as np
import cv2
import tqdm
from ultralytics.utils import ops 


def load_engine(engine_path):
        #TRT_LOGGER = trt.Logger(trt.Logger.WARNING)  # INFO
    TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
    with open(engine_path, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
        return runtime.deserialize_cuda_engine(f.read())


pt_model_path="/project/train/models/detect/train5/weights/last.pt"
output_shape_for_dynamic = (1, 10, 21504)
model_input_shape = (1024,1024)

def convert_pt2trt(pt_model_path):
    import os
    onnx_model_path=pt_model_path.split('.')[0]+".onnx"
    trt_model_path=pt_model_path.split('.')[0]+".engine"
    model_dir = '/'.join(list(pt_model_path.split('/')[:-1])) +'/'

    from ultralytics import YOLO
    from ultralytics.models.yolo.detect.val import DetectionValidator
    model = YOLO(pt_model_path)
    path = model.export(format="onnx", dynamic=False,  simplify=True)#, half=True)  # export the model to ONNX format
    os.system("mv "+onnx_model_path + " " + onnx_model_path.split('.')[0] + "_ultrlytics_export_static_fp32.onnx")

    import tensorrt as trt
    import os
    EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    TRT_LOGGER = trt.Logger()
    def get_engine(onnx_file_path, engine_file_path="", fp16=False, dynamic_in=False, dynamic_out=False):
        """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""

        def build_engine(onnx_file_path, engine_file_path, fp16=False, dynamic_in=False, dynamic_out=False):
            """Takes an ONNX file and creates a TensorRT engine to run inference with"""
            with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
                EXPLICIT_BATCH
            ) as network, builder.create_builder_config() as config, trt.OnnxParser(
                network, TRT_LOGGER
            ) as parser, trt.Runtime(
                TRT_LOGGER
            ) as runtime:
                config.max_workspace_size = 1 << 32  # 4GB
                if fp16:
                    assert (builder.platform_has_fast_fp16 == True), "not support fp16"
                    config.flags = 1<<int(trt.BuilderFlag.FP16)
                builder.max_batch_size = 1
                # Parse model file
                if not os.path.exists(onnx_file_path):
                    print(
                        "ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.".format(onnx_file_path)
                    )
                    exit(0)
                print("Loading ONNX file from path {}...".format(onnx_file_path))
                with open(onnx_file_path, "rb") as model:
                    print("Beginning ONNX file parsing")
                    if not parser.parse(model.read()):
                        print("ERROR: Failed to parse the ONNX file.")
                        for error in range(parser.num_errors):
                            print(parser.get_error(error))
                        return None

                # # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
                # network.get_input(0).shape = [1, 3, 608, 608]

                print("Completed parsing of ONNX file")
                print("Building an engine from file {}; this may take a while...".format(onnx_file_path))

                if dynamic_in or dynamic_out:
                    # Dynamic input setting ��̬������builder��������
                    profile = builder.create_optimization_profile()
                    #��С�ijߴ�,���õijߴ�,���ijߴ�,����ʱ��������Ҫ�������Χ��
                    profile.set_shape('images',(1,3,1,model_input_shape[1]),\
                            (1,3,model_input_shape[0]*3//4,model_input_shape[1]),(1,3,*model_input_shape))

    #                profile.set_shape('images',(3,1,model_input_shape[1]),\
    #                        (3,model_input_shape[0]*3//4,model_input_shape[1]),(3,*model_input_shape))
    #                profile.set_shape('output0', output_shape_for_dynamic)
                    config.add_optimization_profile(profile)

                plan = builder.build_serialized_network(network, config)
                print('plan:', plan, network, config, flush=True)
                engine = runtime.deserialize_cuda_engine(plan)
                print("Completed creating Engine")
                with open(engine_file_path, "wb") as f:
                    f.write(plan)
                return engine
        return build_engine(onnx_file_path, engine_file_path,\
                fp16=fp16, dynamic_in=dynamic_in, dynamic_out=dynamic_out)

    for modelname in [onnx_model_path.split('.')[0] + "_ultrlytics_export_static_fp32.onnx"]: 
    #for modelname in [os.path.join(model_dir, item) for item in os.listdir(model_dir)]:
                
        if not modelname.endswith('.onnx'):
            continue
        bare_name = modelname.split('.')[0]
        engine_name = bare_name + '.engine'
        print('-'*50)
        print("src modelname:", modelname)
        print('dst engine name:', engine_name)
        dynamic_in = False
        if 'dynamic' in bare_name.split('/')[-1]:
            dynamic_in = True
        dynamic_out = False
        print('-'*50)
        try:
            # static fp16
            print('static fp16:')
            if os.path.exists(engine_name.split('.')[0] + "_onnx_trtapi_static_fp16.engine"):
                print(engine_name.split('.')[0] + "_onnx_trtapi_static_fp16.engine exists")
                # assert 0
            get_engine(modelname, engine_name, fp16=True, dynamic_in=dynamic_in, dynamic_out=dynamic_out)
            os.system("mv "+ engine_name + " " + engine_name.split('.')[0] + "_onnx_trtapi_static_fp16.engine")
            print(modelname + " static fp16 convert success")
        except:
            print(modelname + " static fp16 convert failed")

class engine_detector:
    def __init__(self, engine_path):                   
        
        # pt_model_path="/project/train/models/detect/train5/weights/epoch60.pt"

        
        self.engine = load_engine(engine_path)
        self.context = self.engine.create_execution_context()

        inshape= self.context.get_binding_shape(0)
        outshape= self.context.get_binding_shape(1)
        if len(outshape) < 3:
            outshape = output_shape_for_dynamic

        self.output = np.empty((outshape), dtype=np.float32)
        imgpath = os.listdir('/home/data/1233')[0]
        imgpath = '/home/data/1233/' + imgpath 
        image1 = cv2.imread(imgpath)
        image1 = cv2.resize(image1,(1024,1024))
        image1 = image1.transpose(2,0,1) / 255.
        image = np.expand_dims(image1, axis=0)
        image = image.astype(np.float32)
        image = np.ascontiguousarray(image)

        # print(inshape, outshape, self.output.size, image.size, image.dtype.itemsize, self.output.dtype.itemsize)
        self.d_input = cuda.mem_alloc(1 * image.size * image.dtype.itemsize)
        self.d_output = cuda.mem_alloc(1*self.output.size * self.output.dtype.itemsize)
        self.bindings = [int(self.d_input), int(self.d_output)]
        self.stream = cuda.Stream()
        # warm up
        for _ in range(10):
            cuda.memcpy_htod(self.d_input, image)
            self.context.execute_v2(self.bindings)
            cuda.memcpy_dtoh(self.output, self.d_output)
        # return self
    
    def __call__(self, image):
        
        # print(type(self.d_input), type(image), flush=True)
        # print(type(self.d_input), image.dtype, flush=True)
        cuda.memcpy_htod(self.d_input, image)
        self.context.execute_v2(self.bindings)
        cuda.memcpy_dtoh(self.output, self.d_output)
        return self.output

def init():
    """Initialize model
        Returns: model
    """   
    # pt_model_path = '/project/train/models/detect/train5/weights/best.pt'
    # trt_model_path = conver_pt2trt(pt_model_path)
    # path = trt_model_path
    
    convert_pt2trt(pt_model_path)
    path = pt_model_path.split('.')[0] + '_ultrlytics_export_static_fp32_onnx_trtapi_static_fp16.engine'
    # path = '/project/train/models/detect/train5/weights/best_ultrlytics_export_static_fp32_onnx_trtapi_static_fp16.engine'
    # '/project/train/models/detect/train5/weights/best.engine'
    detector = engine_detector(path)
    return detector

def calc_box_area(box):
    return (box[2] - box[0]) * (box[3] - box[1])

def in_box(person_box, head_box, ratio_threshold=0.25):
    min_x = max(person_box[0], head_box[0])
    min_y = max(person_box[1], head_box[1])
    max_x = min(person_box[2], head_box[2])
    max_y = min(person_box[3], head_box[3])
    inter = 0
    if (max_x-min_x)> 0 and  (max_y - min_y) > 0:
        inter = (max_x-min_x) * (max_y - min_y)
    head_box_area = calc_box_area(head_box)
    if head_box_area <= 0:
        return False
    if (inter / head_box_area) > ratio_threshold:
        return True
    else:
        return False
                                                                                                                                                                                                        
                                                                
def process_image(handle=None, input_image=None, args=None, ** kwargs):
    """Do inference to analysis input_image and get output
        Attributes:
            handle: algorithm handle returned by init()
            input_image (numpy.ndarray): image to be process, format: (h, w, c), BGR
        Returns: process result
    """
    # Process image here
    # start_pre = time.time()

    # input_shape = (640, 640)
    input_shape = (1024, 1024)
    org_shape = (0,0)
    if isinstance(input_image, str):
        cv_image = cv2.imread(input_image)
        org_shape = cv_image.shape
        torch_image = cv_image
        torch_image = letterbox(torch_image, input_shape, stride=32, auto=True)[0]
        input_shape = torch_image.shape[:2]
        torch_image = torch_image /255.
        torch_image = torch_image.transpose((2, 0, 1))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
        torch_image = np.expand_dims(torch_image, axis=0)
        torch_image = torch.from_numpy(torch_image).cuda().half()
    else:
        cv_image = input_image
        org_shape = cv_image.shape
        torch_image = cv_image
        torch_image = letterbox(torch_image, input_shape, stride=32, auto=False)[0]#True)[0]#.astype(np.float16)
        input_shape = torch_image.shape[:2]
        torch_image = torch_image.astype(np.float32)[:,:,::-1].transpose((2, 0, 1))  # BGR to RGB, BHWC to BCHW, (n, 3, h, w)
        torch_image = np.ascontiguousarray(torch_image)
        torch_image = np.expand_dims(torch_image, axis=0)
        #torch_image = torch.from_numpy(torch_image).cuda().half()
        torch_image = torch_image /255.
                
    # print('torchimage shape:', torch_image.shape)
    # print('torchimage type:', torch_image.dtype)
    # print('org_shape:', org_shape)

    # print('pre time:', time.time()-start_pre)

    # start_infer = time.time()
    pred = handle(torch_image)#[0]#, augment=False, visualize=False)#[0]
    # print('infer time:', time.time()-start_infer)
    # start_post = time.time()
    
    # print('pred shape:', pred.shape)

    pred = torch.from_numpy(pred).cuda()
    #TODO ���ͨ���ڴ��ַ���������ͺ�����size��initһ��torch.cuda.tensor
    #pred = torch.cuda.FloatTensor(d_output, output.size)
    pred = ops.non_max_suppression(pred,
                                   conf_thres=0.2,#self.args.conf,
                                   iou_thres=0.5,#self.args.iou,
                                   classes=None,
                                   agnostic=True,#agnostic=False,#self.args.agnostic_nms,
                                   max_det=300,#self.args.max_det,
                                   )#classes=None)#self.args.classes)[0]

    xyxys = []
    confs = []
    cls = []
    for i, det in enumerate(pred):
        det[:,:4] = ops.scale_boxes(input_shape, det[:,:4], org_shape).round()
        # det[:,:4] = ops.scale_boxes(torch_image.shape[2:], det[:,:4], cv_image.shape).round()
        xyxys.append(det[:,:4].cpu().numpy().tolist())
        confs.append(det[:,4].cpu().numpy().tolist())
        cls.append(det[:,5].cpu().numpy().astype(np.int32).tolist())
    xyxys = xyxys[0]
    confs = confs[0]
    cls = cls[0]
    # print('xxxx:', xyxys, confs, cls, flush=True)
    
    dict_res = defaultdict(list)
    fake_result = {"algorithm_data":{},
                    "model_data":{'objects':[]}
                    }
    #classes=['motorbike_person','electric_scooter_person','head','helmet','hat','bicycle_helmet']
    #              0                  1                        2   3        4     5
    map_cls={'0':'motorbike_person','1':'electric_scooter_person','2':'head','3':'helmet','4':'hat','5':'bicycle_helmet'}
    
    # return xyxys

    for ind in range(len(xyxys)):
        dict_res[str(cls[ind])].append({'xyxy':xyxys[ind], 'conf': confs[ind], 'cl': str(cls[ind])})
        fake_result['model_data']['objects'].append({
            'x':xyxys[ind][0],
            'y':xyxys[ind][1],
            'height':xyxys[ind][3]-xyxys[ind][1],
            'width':xyxys[ind][2]-xyxys[ind][0],
            'confidence':confs[ind],
            'name': map_cls[str(cls[ind])]
            })
    person_has_head = {'flags':[], 'heads':[]} #has head hat except helmat
    person_has_helmat = {'flags':[], 'helmats':[]}
    for person in dict_res['0'] + dict_res['1']:
        flag_has_head = False
        for head in dict_res['2'] + dict_res['4'] + dict_res['5']:
            if (not flag_has_head) and in_box(person['xyxy'], head['xyxy']):
                # print('0000',head)
                person_has_head['flags'] += [True]
                person_has_head['heads'] += [head]
                flag_has_head = True
        if not flag_has_head:
            person_has_head['flags'] += [False]
            person_has_head['heads'] += [None]
        
        flag_has_helmat = False
        for helmat in dict_res['3']:
            if (not flag_has_helmat) and in_box(person['xyxy'], helmat['xyxy']):
                person_has_helmat['flags'] += [True]
                person_has_helmat['helmats'] += [helmat]
                flag_has_helmat = True
        if not flag_has_helmat:
            person_has_helmat['flags'] += [False]
            person_has_helmat['helmats'] += [None]

    target_count = 0
    target_info = []
    for ind in range(len(person_has_head['flags'])):
        if person_has_head['flags'][ind] and (not person_has_helmat['flags'][ind]):
            target_count += 1
            tmp_head = person_has_head['heads'][ind]
            target_info.append({
                'x': tmp_head['xyxy'][0],
                'y': tmp_head['xyxy'][1],
                'width': tmp_head['xyxy'][2] - tmp_head['xyxy'][0],
                'height': tmp_head['xyxy'][3] - tmp_head['xyxy'][1],
                'confidence': tmp_head['conf'],
                'name': map_cls[tmp_head['cl']]
                })
    if target_count:
        fake_result['algorithm_data'] = {
            "is_alert": True,
            "target_count": target_count,
            "target_info": target_info
        }
    else:
        fake_result["algorithm_data"] = {
            "is_alert": False,
            "target_count": 0,
            "target_info": []
            }
    # print(fake_result, flush=True)

    # print('logic time:', time.time()-start_logic)
    # print('total time:', time.time()-start_pre)
    return json.dumps(fake_result, indent=4)
    # return fake_result

more details in jishi_20231204.tar

TODO

训练一个能够接受输入长宽比非1:1的模型 用torch重构加速前处理的letterbox模块和后处理的nms模块