当前位置：服务支持 > 软件文章 > YOLOv5多任务检测人脸Landmark（Multi-task）

YOLOv5多任务检测人脸Landmark（Multi-task）

阅读数 10

yolov5源码地址：https://github.com/ultralytics/yolov5 文末有完整代码

克隆下最新的yolov5，在根目录下创建landmark文件夹

文件夹中创建三个文件（predict， test ，train_landmark），类似yolov5的分类和语义分割任务

没有提及修改的都是直接添加就行

Landmark文件夹

landmark/train_landmark.py

import argparseimport loggingimport mathimport osimport sysimport randomimport timefrom pathlib import Pathfrom threading import Threadfrom warnings import warn import numpy as npimport torch.distributed as distimport torch.nn as nnimport torch.nn.functional as Fimport torch.optim as optimimport torch.optim.lr_scheduler as lr_schedulerimport torch.utils.dataimport yamlfrom torch.cuda import ampfrom torch.nn.parallel import DistributedDataParallel as DDPfrom torch.utils.tensorboard import SummaryWriterfrom tqdm import tqdm FILE = Path(__file__).resolve()ROOT = FILE.parents[1]  # YOLOv5 root directoryif str(ROOT) not in sys.path:    sys.path.append(str(ROOT))  # add ROOT to PATHROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative import landmark.test as test  # import test.py to get mAP after each epochfrom models.experimental import attempt_loadfrom models.yolo import Modelfrom utils.callbacks import Callbacksfrom utils.autoanchor import check_anchors# from utils.facedataloaders import create_dataloaderfrom utils.landmark.dataloaders import create_dataloaderfrom utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \    fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \    print_mutation, set_logging, intersect_dicts, check_suffix, check_yaml,colorstr,LOGGERfrom utils.downloads import attempt_download,is_urlfrom utils.landmark.loss import compute_lossfrom utils.landmark.plots import plot_labelsf, plot_resultsf, plot_evolutionfrom utils.plots import plot_imagesfrom utils.torch_utils import ModelEMA, select_device, torch_distributed_zero_first logger = logging.getLogger(__name__) try:    import wandbexcept ImportError:    wandb = None    # logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)")  def train(hyp, opt, device, tb_writer=None, wandb=None):    logger.info(f'Hyperparameters {hyp}')    save_dir, epochs, batch_size, total_batch_size, weights, rank = \        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank     # Directories    wdir = save_dir / 'weights'    wdir.mkdir(parents=True, exist_ok=True)  # make dir    last = wdir / 'last.pt'    best = wdir / 'best.pt'    results_file = save_dir / 'results.txt'     # Save run settings    with open(save_dir / 'hyp.yaml', 'w') as f:        yaml.dump(hyp, f, sort_keys=False)    with open(save_dir / 'opt.yaml', 'w') as f:        yaml.dump(vars(opt), f, sort_keys=False)     # Configure    plots = not opt.evolve  # create plots    cuda = device.type != 'cpu'    init_seeds(2 + rank)    with open(opt.data) as f:        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict    with torch_distributed_zero_first(rank):        check_dataset(data_dict)  # check    train_path = data_dict['train']    test_path = data_dict['val']    nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check     # Model    check_suffix(weights, '.pt')  # check weights    pretrained = weights.endswith('.pt')    if pretrained:        with torch_distributed_zero_first(rank):            attempt_download(weights)  # download if not found locally        ckpt = torch.load(weights, map_location=device)  # load checkpoint        if hyp.get('anchors'):            ckpt['model'].yaml['anchors'] = round(hyp['anchors'])  # force autoanchor        model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device)  # create        exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else []  # exclude keys        state_dict = ckpt['model'].float().state_dict()  # to FP32        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect        model.load_state_dict(state_dict, strict=False)  # load        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))  # report    else:        model = Model(opt.cfg, ch=3, nc=nc).to(device)  # create     # Freeze    freeze = []  # parameter names to freeze (full or partial)    for k, v in model.named_parameters():        v.requires_grad = True  # train all layers        if any(x in k for x in freeze):            print('freezing %s' % k)            v.requires_grad = False     # Optimizer    nbs = 64  # nominal batch size    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay     pg0, pg1, pg2 = [], [], []  # optimizer parameter groups    for k, v in model.named_modules():        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):            pg2.append(v.bias)  # biases        if isinstance(v, nn.BatchNorm2d):            pg0.append(v.weight)  # no decay        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):            pg1.append(v.weight)  # apply decay     if opt.adam:        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum    else:        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)     optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))    del pg0, pg1, pg2     # Scheduler https://arxiv.org/pdf/1812.01187.pdf    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR    lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf']  # cosine    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)    # plot_lr_scheduler(optimizer, scheduler, epochs)     # Logging    if wandb and wandb.run is None:        opt.hyp = hyp  # add hyperparameters        wandb_run = wandb.init(config=opt, resume="allow",                               project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,                               name=save_dir.stem,                               id=ckpt.get('wandb_id') if 'ckpt' in locals() else None)    loggers = {'wandb': wandb}  # loggers dict     # Resume    start_epoch, best_fitness = 0, 0.0    if pretrained:        # Optimizer        if ckpt['optimizer'] is not None:            optimizer.load_state_dict(ckpt['optimizer'])            best_fitness = ckpt['best_fitness']         # Results        if ckpt.get('training_results') is not None:            with open(results_file, 'w') as file:                file.write(ckpt['training_results'])  # write results.txt         # Epochs        # start_epoch = ckpt['epoch'] + 1        if opt.resume:            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)        if epochs < start_epoch:            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %                        (weights, ckpt['epoch'], epochs))            epochs += ckpt['epoch']  # finetune additional epochs         del ckpt, state_dict     # Image sizes    gs = int(max(model.stride))  # grid size (max stride)    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples     # DP mode    if cuda and rank == -1 and torch.cuda.device_count() > 1:        model = torch.nn.DataParallel(model)     # SyncBatchNorm    if opt.sync_bn and cuda and rank != -1:        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)        logger.info('Using SyncBatchNorm()')     # EMA    ema = ModelEMA(model) if rank in [-1, 0] else None     # DDP mode    if cuda and rank != -1:        model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)     # Trainloader    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,                                            world_size=opt.world_size, workers=opt.workers,                                            image_weights=opt.image_weights,prefix=colorstr('train: '))    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class    nb = len(dataloader)  # number of batches    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)     # Process 0    if rank in [-1, 0]:        ema.updates = start_epoch * nb // accumulate  # set EMA updates        testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt,  # testloader                                       hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True,                                       rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5,prefix=colorstr('val: '))[0]         if not opt.resume:            labels = np.concatenate(dataset.labels, 0)            c = torch.tensor(labels[:, 0])  # classes            # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency            # model._initialize_biases(cf.to(device))            if plots:                plot_labelsf(labels, save_dir, loggers)                if tb_writer:                    tb_writer.add_histogram('classes', c, 0)             # Anchors            if not opt.noautoanchor:                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)     # Model parameters    hyp['cls'] *= nc / 80.  # scale coco-tuned hyp['cls'] to current dataset    model.nc = nc  # attach number of classes to model    model.hyp = hyp  # attach hyperparameters to model    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights    model.names = names     # Start training    t0 = time.time()    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)    # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training    maps = np.zeros(nc)  # mAP per class    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)    scheduler.last_epoch = start_epoch - 1  # do not move    scaler = amp.GradScaler(enabled=cuda)    logger.info('Image sizes %g train, %g test\n'                'Using %g dataloader workers\nLogging results to %s\n'                'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs))    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------        model.train()         # Update image weights (optional)        if opt.image_weights:            # Generate indices            if rank in [-1, 0]:                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx            # Broadcast if DDP            if rank != -1:                indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()                dist.broadcast(indices, 0)                if rank != 0:                    dataset.indices = indices.cpu().numpy()         # Update mosaic border        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)        # dataset.mosaic_border = [b - imgsz, -b] # height, width borders         mloss = torch.zeros(5, device=device)  # mean losses        if rank != -1:            dataloader.sampler.set_epoch(epoch)        pbar = enumerate(dataloader)        logger.info(            ('\n' + '%10s' * 9) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'landmark', 'total', 'targets', 'img_size'))        if rank in [-1, 0]:            pbar = tqdm(pbar, total=nb)  # progress bar        optimizer.zero_grad()        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------            ni = i + nb * epoch  # number integrated batches (since train start)            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0             # Warmup            if ni <= nw:                xi = [0, nw]  # x interp                # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())                for j, x in enumerate(optimizer.param_groups):                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])                    if 'momentum' in x:                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])             # Multi-scale            if opt.multi_scale:                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size                sf = sz / max(imgs.shape[2:])  # scale factor                if sf != 1:                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)             # Forward            with amp.autocast(enabled=cuda):                pred = model(imgs)  # forward                loss, loss_items = compute_loss(pred, targets.to(device), model)  # loss scaled by batch_size                if rank != -1:                    loss *= opt.world_size  # gradient averaged between devices in DDP mode             # Backward            scaler.scale(loss).backward()             # Optimize            if ni % accumulate == 0:                scaler.step(optimizer)  # optimizer.step                scaler.update()                optimizer.zero_grad()                if ema:                    ema.update(model)             # Print            if rank in [-1, 0]:                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)                s = ('%10s' * 2 + '%10.4g' * 7) % (                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])                pbar.set_description(s)                 # Plot                if plots and ni < 3:                    f = save_dir / f'train_batch{ni}.jpg'  # filename                    Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()                    # if tb_writer:                    # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)                    # tb_writer.add_graph(model, imgs) # add model to tensorboard                elif plots and ni == 3 and wandb:                    wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]})             # end batch ------------------------------------------------------------------------------------------------        # end epoch ----------------------------------------------------------------------------------------------------         # Scheduler        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard        scheduler.step()         # DDP process 0 or single-GPU        if rank in [-1, 0] and epoch >= 0:            # mAP            if ema:                ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])            final_epoch = epoch + 1 == epochs            if not opt.notest or final_epoch:  # Calculate mAP                results, maps, times = test.test(opt.data,                                                 batch_size=total_batch_size,                                                 imgsz=imgsz_test,                                                 model=ema.ema,                                                 single_cls=opt.single_cls,                                                 dataloader=testloader,                                                 save_dir=save_dir,                                                 plots=False,                                                 log_imgs=opt.log_imgs if wandb else 0)             # Write            with open(results_file, 'a') as f:                f.write(s + '%10.4g' * 7 % results + '\n')  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)            if len(opt.name) and opt.bucket:                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))             # Log            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss                    'x/lr0', 'x/lr1', 'x/lr2']  # params            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):                if tb_writer:                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard                if wandb:                    wandb.log({tag: x})  # W&B             # Update best mAP            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]            if fi > best_fitness:                best_fitness = fi             # Save model            save = (not opt.nosave) or (final_epoch and not opt.evolve)            if save:                with open(results_file, 'r') as f:  # create checkpoint                    ckpt = {'epoch': epoch,                            'best_fitness': best_fitness,                            'training_results': f.read(),                            'model': ema.ema,                            'optimizer': None if final_epoch else optimizer.state_dict(),                            'wandb_id': wandb_run.id if wandb else None}                 # Save last, best and delete                torch.save(ckpt, last)                if best_fitness == fi:                    torch.save(ckpt, best)                del ckpt        # end epoch ----------------------------------------------------------------------------------------------------    # end training     if rank in [-1, 0]:        # Strip optimizers        final = best if best.exists() else last  # final model        for f in [last, best]:            if f.exists():                strip_optimizer(f)  # strip optimizers        if opt.bucket:            os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload         # Plots        if plots:            plot_resultsf(save_dir=save_dir)  # save as results.png            if wandb:                files = ['results.png', 'precision_recall_curve.png', 'confusion_matrix.png']                wandb.log({"Results": [wandb.Image(str(save_dir / f), caption=f) for f in files                                       if (save_dir / f).exists()]})                if opt.log_artifacts:                    wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem)         # Test best.pt        logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))        for f in last, best:            if f.exists():                strip_optimizer(f)  # strip optimizers                if f is best:                    LOGGER.info(f'\nValidating {f}...')                    results, _, _ = test.test(                                              opt.data,                                              batch_size=total_batch_size,                                              imgsz=imgsz_test,                                              conf_thres=0.001,                                              iou_thres=0.65,                                              model=attempt_load(final, device),                                              single_cls=opt.single_cls,                                              dataloader=testloader,                                              save_dir=save_dir,                                              save_json=False,                                              plots=True)  # val best model with plots     else:        dist.destroy_process_group()     wandb.run.finish() if wandb and wandb.run else None    torch.cuda.empty_cache()    return results  def parse_opt(known=False):    parser = argparse.ArgumentParser()    parser.add_argument('--weights', type=str, default=ROOT / 'yolov5n-lmk.pt', help='initial weights path')    parser.add_argument('--cfg', type=str, default=ROOT / 'models/landmark/yolov5n-landmark.yaml',help='model.yaml path')    parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='data.yaml path')    parser.add_argument('--hyp', type=str, default=ROOT / 'data/hyps/hyp.scratch.yaml', help='hyperparameters path')    parser.add_argument('--epochs', type=int, default=300)    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')    parser.add_argument('--img-size', nargs='+', type=int, default=[800, 800], help='[train, test] image sizes')    parser.add_argument('--rect', action='store_true', help='rectangular training')    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')    parser.add_argument('--notest', action='store_true', help='only test final epoch')    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')    parser.add_argument('--evolve', type=int, nargs='?', const=300, help='evolve hyperparameters for x generations')    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')    parser.add_argument('--multi-scale', action='store_true', default=False, help='vary img-size +/- 50%%')    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')    parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100')    parser.add_argument('--log-artifacts', action='store_true', help='log artifacts, i.e. final trained model')    parser.add_argument('--workers', type=int, default=4, help='maximum number of dataloader workers')    parser.add_argument('--project', default=ROOT / 'runs/train-lmk', help='save to project/name')    parser.add_argument('--name', default='exp', help='save to project/name')    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')    return parser.parse_known_args()[0] if known else parser.parse_args()  def main(opt, callbacks=Callbacks()):    # Set DDP variables    opt.total_batch_size = opt.batch_size    opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1    opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1    set_logging(str(opt.global_rank))    if opt.global_rank in [-1, 0]:        check_git_status()     # Resume    if opt.resume and not opt.evolve:  # resume from specified or most recent last.pt        last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())        opt_yaml = last.parent.parent / 'opt.yaml'  # train options yaml        opt_data = opt.data  # original dataset        if opt_yaml.is_file():            with open(opt_yaml, errors='ignore') as f:                d = yaml.safe_load(f)        else:            d = torch.load(last, map_location='cpu')['opt']        opt = argparse.Namespace(**d)  # replace        opt.cfg, opt.weights, opt.resume = '', str(last), True  # reinstate        if is_url(opt_data):            opt.data = check_file(opt_data)  # avoid HUB resume auth timeout    else:        opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \            check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project)  # checks        assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'        if opt.evolve:            if opt.project == str(ROOT / 'runs/train'):  # if default project name, rename to runs/evolve                opt.project = str(ROOT / 'runs/evolve')            opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume        if opt.name == 'cfg':            opt.name = Path(opt.cfg).stem  # use model.yaml as name        opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))     # DDP mode    device = select_device(opt.device, batch_size=opt.batch_size)    if opt.local_rank != -1:        assert torch.cuda.device_count() > opt.local_rank        torch.cuda.set_device(opt.local_rank)        device = torch.device('cuda', opt.local_rank)        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'        opt.batch_size = opt.total_batch_size // opt.world_size     # Hyperparameters    with open(opt.hyp) as f:        hyp = yaml.load(f, Loader=yaml.FullLoader)  # load hyps        if 'box' not in hyp:            warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' %                 (opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120'))            hyp['box'] = hyp.pop('giou')     # Train    logger.info(opt)    if not opt.evolve:        tb_writer = None  # init loggers        if opt.global_rank in [-1, 0]:            logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/')            tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard        train(hyp, opt, device, tb_writer, wandb)     # Evolve hyperparameters (optional)    else:        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)        meta = {'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)                'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)                'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1                'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay                'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)                'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum                'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr                'box': (1, 0.02, 0.2),  # box loss gain                'cls': (1, 0.2, 4.0),  # cls loss gain                'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight                'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)                'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight                'iou_t': (0, 0.1, 0.7),  # IoU training threshold                'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold                'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)                'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)                'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)                'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)                'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)                'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)                'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)                'scale': (1, 0.0, 0.9),  # image scale (+/- gain)                'shear': (1, 0.0, 10.0),  # image shear (+/- deg)                'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001                'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)                'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)                'mosaic': (1, 0.0, 1.0),  # image mixup (probability)                'mixup': (1, 0.0, 1.0)}  # image mixup (probability)         assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'        opt.notest, opt.nosave = True, True  # only test/save final epoch        # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices        yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml'  # save best result here        if opt.bucket:            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists         for _ in range(opt.evolve):  # generations to evolve            if Path('evolve.txt').exists():  # if evolve.txt exists: select best hyps and mutate                # Select parent(s)                parent = 'single'  # parent selection method: 'single' or 'weighted'                x = np.loadtxt('evolve.txt', ndmin=2)                n = min(5, len(x))  # number of previous results to consider                x = x[np.argsort(-fitness(x))][:n]  # top n mutations                w = fitness(x) - fitness(x).min() + 1E-6  # weights                if parent == 'single' or len(x) == 1:                    # x = x[random.randint(0, n - 1)] # random selection                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection                elif parent == 'weighted':                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination                 # Mutate                mp, s = 0.8, 0.2  # mutation probability, sigma                npr = np.random                npr.seed(int(time.time()))                g = np.array([x[0] for x in meta.values()])  # gains 0-1                ng = len(meta)                v = np.ones(ng)                while all(v == 1):  # mutate until a change occurs (prevent duplicates)                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)                    hyp[k] = float(x[i + 7] * v[i])  # mutate             # Constrain to limits            for k, v in meta.items():                hyp[k] = max(hyp[k], v[1])  # lower limit                hyp[k] = min(hyp[k], v[2])  # upper limit                hyp[k] = round(hyp[k], 5)  # significant digits             # Train mutation            results = train(hyp.copy(), opt, device, wandb=wandb)             # Write mutation results            # keys = ('metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss',            # 'val/obj_loss', 'val/cls_loss')            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)            # print_mutation(keys, results, hyp.copy(), save_dir, opt.bucket)        # Plot results        plot_evolution(yaml_file)        print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'              f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')   def run(**kwargs):    # Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')    opt = parse_opt(True)    for k, v in kwargs.items():        setattr(opt, k, v)    main(opt)    return opt  if __name__ == "__main__":    opt = parse_opt()    main(opt)

landmark/test.py

import argparseimport jsonimport osimport sysfrom pathlib import Pathfrom threading import Thread import numpy as npimport torchimport yamlfrom tqdm import tqdm FILE = Path(__file__).resolve()ROOT = FILE.parents[1]  # YOLOv5 root directoryif str(ROOT) not in sys.path:    sys.path.append(str(ROOT))  # add ROOT to PATHROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative from models.experimental import attempt_loadfrom utils.landmark.dataloaders import create_dataloaderfrom utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, box_iou, \     scale_boxes, xyxy2xywh, xywh2xyxy, set_logging, increment_pathfrom utils.landmark.general import non_max_suppression_facefrom utils.landmark.loss import compute_lossfrom utils.metrics import ConfusionMatrixfrom utils.landmark.metrics import ap_per_classffrom utils.landmark.plots import plot_imagesf, output_to_targetf, plot_study_txtfrom utils.torch_utils import select_device, time_sync  def test(data, weights=None, batch_size=32, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_hybrid=False, # for hybrid auto-labelling save_conf=False, # save auto-label confidences plots=True, log_imgs=0):  # number of logged images     # Initialize/load model and set device    training = model is not None    if training:  # called by train.py        device = next(model.parameters()).device  # get model device     else:  # called directly        set_logging()        device = select_device(opt.device, batch_size=batch_size)         # Directories        save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir         # Load model        model = attempt_load(weights)  # load FP32 model        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size         # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99        # if device.type != 'cpu' and torch.cuda.device_count() > 1:        # model = nn.DataParallel(model)     # Half    half = device.type != 'cpu'  # half precision only supported on CUDA    if half:        model.half()    # Configure    model.eval()    is_coco = data.endswith('coco.yaml')  # is COCO dataset    with open(data) as f:        data = yaml.load(f, Loader=yaml.FullLoader)  # model dict    check_dataset(data)  # check    nc = 1 if single_cls else int(data['nc'])  # number of classes    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95    niou = iouv.numel()     # Logging    log_imgs, wandb = min(log_imgs, 100), None  # ceil    try:        import wandb  # Weights & Biases    except ImportError:        log_imgs = 0     # Dataloader    if not training:        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img        _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once        path = data['test'] if opt.task == 'test' else data['val']  # path to val/test images        dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0]     seen = 0    confusion_matrix = ConfusionMatrix(nc=nc)    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}    coco91class = coco80_to_coco91_class()    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')    p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.    loss = torch.zeros(3, device=device)    jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):        img = img.to(device, non_blocking=True)        img = img.half() if half else img.float()  # uint8 to fp16/32        img /= 255.0  # 0 - 255 to 0.0 - 1.0        targets = targets.to(device)        nb, _, height, width = img.shape  # batch size, channels, height, width         with torch.no_grad():            # Run model            t = time_sync()            inf_out, train_out = model(img, augment=augment)  # inference and training outputs            t0 += time_sync() - t             # Compute loss            if training:                loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3]  # box, obj, cls             # Run NMS            targets[:, 2:6] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels            lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling            t = time_sync()            #output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb)            output = non_max_suppression_face(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb)            t1 += time_sync() - t         # Statistics per image        for si, pred in enumerate(output):            pred = torch.cat((pred[:, :5], pred[:, 15:]), 1) # throw landmark in thresh            labels = targets[targets[:, 0] == si, 1:]            nl = len(labels)            tcls = labels[:, 0].tolist() if nl else []  # target class            path = Path(paths[si])            seen += 1             if len(pred) == 0:                if nl:                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))                    if plots:                        confusion_matrix.process_batch(detections=None, labels=labels[:, 0])                continue             # Predictions            predn = pred.clone()            scale_boxes(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1])  # native-space pred             # Append to text file            if save_txt:                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh                for *xyxy, conf, cls in predn.tolist():                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh                    line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format                    with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:                        f.write(('%g ' * len(line)).rstrip() % line + '\n')             # W&B logging            if plots and len(wandb_images) < log_imgs:                box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},                             "class_id": int(cls),                             "box_caption": "%s %.3f" % (names[cls], conf),                             "scores": {"class_score": conf},                             "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]                boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space                wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name))             # Append to pycocotools JSON dictionary            if save_json:                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...                image_id = int(path.stem) if path.stem.isnumeric() else path.stem                box = xyxy2xywh(predn[:, :4])  # xywh                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner                for p, b in zip(pred.tolist(), box.tolist()):                    jdict.append({'image_id': image_id,                                  'category_id': coco91class[int(p[15])] if is_coco else int(p[15]),                                  'bbox': [round(x, 3) for x in b],                                  'score': round(p[4], 5)})             # Assign all predictions as incorrect            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)            if nl:                detected = []  # target indices                tcls_tensor = labels[:, 0]                 # target boxes                tbox = xywh2xyxy(labels[:, 1:5])                scale_boxes(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1])  # native-space labels                if plots:                    confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1))                 # Per target class                for cls in torch.unique(tcls_tensor):                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices                     # Search for detections                    if pi.shape[0]:                        # Prediction to target ious                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1)  # best ious, indices                         # Append detections                        detected_set = set()                        for j in (ious > iouv[0]).nonzero(as_tuple=False):                            d = ti[i[j]]  # detected target                            if d.item() not in detected_set:                                detected_set.add(d.item())                                detected.append(d)                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn                                if len(detected) == nl:  # all targets already located in image                                    break             # Append statistics (correct, conf, pcls, tcls)            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))         # Plot images        if plots and batch_i < 3:            plot_imagesf(img, targets, paths, save_dir / f'val_batch{batch_i}_labels.jpg', names)  # labels            plot_imagesf(img, output_to_targetf(output), paths, save_dir / f'val_batch{batch_i}_pred.jpg', names)  # pred     # Compute statistics    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy    if len(stats) and stats[0].any():        p, r, ap, f1, ap_class = ap_per_classf(*stats, plot=plots, save_dir=save_dir, names=names)        p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1)  # [P, R, AP@0.5, AP@0.5:0.95]        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class    else:        nt = torch.zeros(1)     # Print results    pf = '%20s' + '%12.3g' * 6  # print format    print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))     # Print results per class    if verbose and nc > 1 and len(stats):        for i, c in enumerate(ap_class):            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))     # Print speeds    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple    if not training:        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)     # Plots    if plots:        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))        if wandb and wandb.run:            wandb.log({"Images": wandb_images})            wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]})     # Save JSON    if save_json and len(jdict):        w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ''  # weights        anno_json = '../coco/annotations/instances_val2017.json'  # annotations json        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json        print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)        with open(pred_json, 'w') as f:            json.dump(jdict, f)         try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb            from pycocotools.coco import COCO            from pycocotools.cocoeval import COCOeval             anno = COCO(anno_json)  # init annotations api            pred = anno.loadRes(pred_json)  # init predictions api            eval = COCOeval(anno, pred, 'bbox')            if is_coco:                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]  # image IDs to evaluate            eval.evaluate()            eval.accumulate()            eval.summarize()            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)        except Exception as e:            print(f'pycocotools unable to run: {e}')     # Return results    if not training:        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''        print(f"Results saved to {save_dir}{s}")    model.float()  # for training    maps = np.zeros(nc) + map    for i, c in enumerate(ap_class):        maps[c] = ap[i]    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t  if __name__ == '__main__':    parser = argparse.ArgumentParser(prog='test.py')    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5n-lmk.pt', help='model.pt path(s)')    parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='*.data path')    parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch')    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')    parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')    parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS')    parser.add_argument('--task', default='val', help="'val', 'test', 'study'")    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')    parser.add_argument('--augment', action='store_true', help='augmented inference')    parser.add_argument('--verbose', action='store_true', help='report mAP by class')    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')    parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')    parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')    parser.add_argument('--project', default='runs/test', help='save to project/name')    parser.add_argument('--name', default='exp', help='save to project/name')    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')    opt = parser.parse_args()    opt.save_json |= opt.data.endswith('coco.yaml')    opt.data = check_file(opt.data)  # check file    print(opt)     if opt.task in ['val', 'test']:  # run normally        test(opt.data,             opt.weights,             opt.batch_size,             opt.img_size,             opt.conf_thres,             opt.iou_thres,             opt.save_json,             opt.single_cls,             opt.augment,             opt.verbose,             save_txt=opt.save_txt | opt.save_hybrid,             save_hybrid=opt.save_hybrid,             save_conf=opt.save_conf,             )     elif opt.task == 'study':  # run over a range of settings and save/plot        for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:            f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem)  # filename to save to            x = list(range(320, 800, 64))  # x axis            y = []  # y axis            for i in x:  # img-size                print('\nRunning %s point %s...' % (f, i))                r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json,                               plots=False)                y.append(r + t)  # results and times            np.savetxt(f, y, fmt='%10.4g')  # save        os.system('zip -r study.zip study_*.txt')        plot_study_txt(f, x)  # plot

landmark/predict.py

import argparseimport osimport platformimport sysfrom pathlib import Path import torch FILE = Path(__file__).resolve()ROOT = FILE.parents[1]  # YOLOv5 root directoryif str(ROOT) not in sys.path:    sys.path.append(str(ROOT))  # add ROOT to PATHROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative from models.common import DetectMultiBackendfrom utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreamsfrom utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,                           increment_path, print_args, scale_boxes,strip_optimizer)from utils.landmark.general import non_max_suppression_face,scale_coords_landmarksfrom utils.plots import Annotator, colors, save_one_boxfrom utils.torch_utils import select_device, smart_inference_mode  @smart_inference_mode()def run(    weights=ROOT / 'yolov5n-lmk.pt',  # model.pt path(s)    source=ROOT / 'data/images',  # file/dir/URL/glob/screen/0(webcam)    data=ROOT / 'data/coco128.yaml',  # dataset.yaml path    imgsz=(640, 640),  # inference size (height, width)    conf_thres=0.25,  # confidence threshold    iou_thres=0.45,  # NMS IOU threshold    max_det=1000,  # maximum detections per image    device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu    view_img=False,  # show results    save_txt=False,  # save results to *.txt    save_conf=False,  # save confidences in --save-txt labels    save_crop=False,  # save cropped prediction boxes    nosave=False,  # do not save images/videos    classes=None,  # filter by class: --class 0, or --class 0 2 3    agnostic_nms=False,  # class-agnostic NMS    augment=False,  # augmented inference    visualize=False,  # visualize features    update=False,  # update all models    project=ROOT / 'runs/predict-lmk',  # save results to project/name    name='exp',  # save results to project/name    exist_ok=False,  # existing project/name ok, do not increment    line_thickness=3,  # bounding box thickness (pixels)    hide_labels=False,  # hide labels    hide_conf=False,  # hide confidences    half=False,  # use FP16 half-precision inference    dnn=False,  # use OpenCV DNN for ONNX inference    vid_stride=1,  # video frame-rate stride):    source = str(source)    save_img = not nosave and not source.endswith('.txt')  # save inference images    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))    webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)    screenshot = source.lower().startswith('screen')    if is_url and is_file:        source = check_file(source)  # download     # Directories    save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir     # Load model    device = select_device(device)    model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)    stride, names, pt = model.stride, model.names, model.pt    imgsz = check_img_size(imgsz, s=stride)  # check image size     # Dataloader    bs = 1  # batch_size    if webcam:        view_img = check_imshow()        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)        bs = len(dataset)    elif screenshot:        dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)    else:        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)    vid_path, vid_writer = [None] * bs, [None] * bs     # Run inference    model.warmup(imgsz=(1 if pt else bs, 3, *imgsz))  # warmup    seen, windows, dt = 0, [], (Profile(), Profile(), Profile())    for path, im, im0s, vid_cap, s in dataset:        with dt[0]:            im = torch.from_numpy(im).to(model.device)            im = im.half() if model.fp16 else im.float()  # uint8 to fp16/32            im /= 255  # 0 - 255 to 0.0 - 1.0            if len(im.shape) == 3:                im = im[None]  # expand for batch dim         # Inference        with dt[1]:            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False            pred, proto = model(im, augment=augment, visualize=visualize)[:2]         # NMS        with dt[2]:            pred = non_max_suppression_face(pred, conf_thres, iou_thres)         # Second-stage classifier (optional)        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)         # Process predictions        for i, det in enumerate(pred):  # per image            seen += 1            if webcam:  # batch_size >= 1                p, im0, frame = path[i], im0s[i].copy(), dataset.count                s += f'{i}: '            else:                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)             p = Path(p)  # to Path            save_path = str(save_dir / p.name)  # im.jpg            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt            s += '%gx%g ' % im.shape[2:]  # print string            imc = im0.copy() if save_crop else im0  # for save_crop            annotator = Annotator(im0, line_width=line_thickness, example=str(names))            if len(det):                # Rescale boxes from img_size to im0 size                det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()                 # Print results                for c in det[:, 5].unique():                    n = (det[:, 5] == c).sum()  # detections per class                    # s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string                 det[:, 5:15] = scale_coords_landmarks(im.shape[2:], det[:, 5:15], im0.shape).round()                # Write results                 for j in range(det.size()[0]):                    xyxy = det[j, :4].view(-1).tolist()                    conf = det[j, 4].cpu().detach().numpy()                    landmarks = det[j, 5:15].view(-1).tolist()                    cls = det[j, 15].cpu().detach().numpy()                    if save_txt:                        line = (cls, *xyxy, conf) if save_conf else (cls, *xyxy)                        with open(f'{txt_path}.txt', 'a') as f:                            f.write(('%g ' * len(line)).rstrip() % line + '\n')                     if save_img or save_crop or view_img:  # Add bbox to image                        c = int(cls)  # integer class                        label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')                        annotator.box_label(xyxy, label, color=colors(c, True))                        annotator.landmark(landmarks)                    if save_crop:                        save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)             # Stream results            im0 = annotator.result()            if view_img:                if platform.system() == 'Linux' and p not in windows:                    windows.append(p)                    cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)  # allow window resize (Linux)                    cv2.resizeWindow(str(p), im0.shape[1], im0.shape[0])                cv2.imshow(str(p), im0)                cv2.waitKey(1)  # 1 millisecond             # Save results (image with detections)            if save_img:                if dataset.mode == 'image':                    cv2.imwrite(save_path, im0)                else:  # 'video' or 'stream'                    if vid_path[i] != save_path:  # new video                        vid_path[i] = save_path                        if isinstance(vid_writer[i], cv2.VideoWriter):                            vid_writer[i].release()  # release previous video writer                        if vid_cap:  # video                            fps = vid_cap.get(cv2.CAP_PROP_FPS)                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))                        else:  # stream                            fps, w, h = 30, im0.shape[1], im0.shape[0]                        save_path = str(Path(save_path).with_suffix('.mp4'))  # force *.mp4 suffix on results videos                        vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))                    vid_writer[i].write(im0)         # Print time (inference-only)        LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")     # Print results    t = tuple(x.t / seen * 1E3 for x in dt)  # speeds per image    LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t)    if save_txt or save_img:        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")    if update:        strip_optimizer(weights[0])  # update model (to fix SourceChangeWarning)  def parse_opt():    parser = argparse.ArgumentParser()    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5n-lmk.pt', help='model path(s)')    parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')    parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='(optional) dataset.yaml path')    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')    parser.add_argument('--view-img', action='store_true', help='show results')    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')    parser.add_argument('--augment', action='store_true', help='augmented inference')    parser.add_argument('--visualize', action='store_true', help='visualize features')    parser.add_argument('--update', action='store_true', help='update all models')    parser.add_argument('--project', default=ROOT / 'runs/predict-lmk', help='save results to project/name')    parser.add_argument('--name', default='exp', help='save results to project/name')    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')    parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')    opt = parser.parse_args()    opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1  # expand    print_args(vars(opt))    return opt  def main(opt):    check_requirements(exclude=('tensorboard', 'thop'))    run(**vars(opt))  if __name__ == "__main__":    opt = parse_opt()    main(opt)

landmark/export.py

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""Export a YOLOv5 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobitFormat | `export.py --include` | Model--- | --- | ---PyTorch | - | yolov5s.ptTorchScript | `torchscript` | yolov5s.torchscriptONNX | `onnx` | yolov5s.onnxOpenVINO | `openvino` | yolov5s_openvino_model/TensorRT | `engine` | yolov5s.engineCoreML | `coreml` | yolov5s.mlmodelTensorFlow SavedModel | `saved_model` | yolov5s_saved_model/TensorFlow GraphDef | `pb` | yolov5s.pbTensorFlow Lite | `tflite` | yolov5s.tfliteTensorFlow Edge TPU | `edgetpu` | yolov5s_edgetpu.tfliteTensorFlow.js | `tfjs` | yolov5s_web_model/PaddlePaddle | `paddle` | yolov5s_paddle_model/Requirements: $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime openvino-dev tensorflow-cpu # CPU $ pip install -r requirements.txt coremltools onnx onnx-simplifier onnxruntime-gpu openvino-dev tensorflow # GPUUsage: $ python export.py --weights yolov5s.pt --include torchscript onnx openvino engine coreml tflite ...Inference: $ python detect.py --weights yolov5s.pt # PyTorch yolov5s.torchscript # TorchScript yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn yolov5s.xml # OpenVINO yolov5s.engine # TensorRT yolov5s.mlmodel # CoreML (macOS-only) yolov5s_saved_model # TensorFlow SavedModel yolov5s.pb # TensorFlow GraphDef yolov5s.tflite # TensorFlow Lite yolov5s_edgetpu.tflite # TensorFlow Edge TPU yolov5s_paddle_model # PaddlePaddleTensorFlow.js: $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example $ npm install $ ln -s ../../yolov5/yolov5s_web_model public/yolov5s_web_model $ npm start""" import argparseimport jsonimport osimport platformimport reimport subprocessimport sysimport timeimport warningsfrom pathlib import Path import pandas as pdimport torchimport torch.nn as nnfrom torch.utils.mobile_optimizer import optimize_for_mobile FILE = Path(__file__).resolve()ROOT = FILE.parents[1]  # YOLOv5 root directoryif str(ROOT) not in sys.path:    sys.path.append(str(ROOT))  # add ROOT to PATHif platform.system() != 'Windows':    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative from models.experimental import attempt_loadffrom models.yolo import ClassificationModel, Detect, DetectionModel, SegmentationModel, LmkDetectfrom utils.dataloaders import LoadImagesfrom utils.general import (LOGGER, Profile, check_dataset, check_img_size, check_requirements, check_version,                           check_yaml, colorstr, file_size, get_default_args, print_args, url2file, yaml_save)from utils.torch_utils import select_device, smart_inference_mode MACOS = platform.system() == 'Darwin'  # macOS environment  def export_formats():    # YOLOv5 export formats    x = [        ['PyTorch', '-', '.pt', True, True],        ['TorchScript', 'torchscript', '.torchscript', True, True],        ['ONNX', 'onnx', '.onnx', True, True],        ['OpenVINO', 'openvino', '_openvino_model', True, False],        ['TensorRT', 'engine', '.engine', False, True],        ['CoreML', 'coreml', '.mlmodel', True, False],        ['TensorFlow SavedModel', 'saved_model', '_saved_model', True, True],        ['TensorFlow GraphDef', 'pb', '.pb', True, True],        ['TensorFlow Lite', 'tflite', '.tflite', True, False],        ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', False, False],        ['TensorFlow.js', 'tfjs', '_web_model', False, False],        ['PaddlePaddle', 'paddle', '_paddle_model', True, True], ]    return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])  def try_export(inner_func):    # YOLOv5 export decorator, i..e @try_export    inner_args = get_default_args(inner_func)     def outer_func(*args, **kwargs):        prefix = inner_args['prefix']        try:            with Profile() as dt:                f, model = inner_func(*args, **kwargs)            LOGGER.info(f'{prefix} export success ✅ {dt.t:.1f}s, saved as {f} ({file_size(f):.1f} MB)')            return f, model        except Exception as e:            LOGGER.info(f'{prefix} export failure ❌ {dt.t:.1f}s: {e}')            return None, None     return outer_func  @try_exportdef export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):    # YOLOv5 TorchScript model export    LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...')    f = file.with_suffix('.torchscript')     ts = torch.jit.trace(model, im, strict=False)    d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}    extra_files = {'config.txt': json.dumps(d)}  # torch._C.ExtraFilesMap()    if optimize:  # https://pytorch.org/tutorials/recipes/mobile_interpreter.html        optimize_for_mobile(ts)._save_for_lite_interpreter(str(f), _extra_files=extra_files)    else:        ts.save(str(f), _extra_files=extra_files)    return f, None  @try_exportdef export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr('ONNX:')):    # YOLOv5 ONNX export    check_requirements('onnx')    import onnx     LOGGER.info(f'\n{prefix} starting export with onnx {onnx.__version__}...')    f = file.with_suffix('.onnx')     output_names = ['output0', 'output1'] if isinstance(model, SegmentationModel) else ['output0']    if dynamic:        dynamic = {'images': {0: 'batch', 2: 'height', 3: 'width'}}  # shape(1,3,640,640)        if isinstance(model, SegmentationModel):            dynamic['output0'] = {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)            dynamic['output1'] = {0: 'batch', 2: 'mask_height', 3: 'mask_width'}  # shape(1,32,160,160)        elif isinstance(model, DetectionModel):            dynamic['output0'] = {0: 'batch', 1: 'anchors'}  # shape(1,25200,85)     torch.onnx.export(        model.cpu() if dynamic else model,  # --dynamic only compatible with cpu        im.cpu() if dynamic else im,        f,        verbose=False,        opset_version=opset,        do_constant_folding=True,        input_names=['images'],        output_names=output_names,        dynamic_axes=dynamic or None)     # Checks    model_onnx = onnx.load(f)  # load onnx model    onnx.checker.check_model(model_onnx)  # check onnx model     # Metadata    d = {'stride': int(max(model.stride)), 'names': model.names}    for k, v in d.items():        meta = model_onnx.metadata_props.add()        meta.key, meta.value = k, str(v)    onnx.save(model_onnx, f)     # Simplify    if simplify:        try:            cuda = torch.cuda.is_available()            check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))            import onnxsim             LOGGER.info(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')            model_onnx, check = onnxsim.simplify(model_onnx)            assert check, 'assert check failed'            onnx.save(model_onnx, f)        except Exception as e:            LOGGER.info(f'{prefix} simplifier failure: {e}')    return f, model_onnx  @try_exportdef export_openvino(file, metadata, half, prefix=colorstr('OpenVINO:')):    # YOLOv5 OpenVINO export    check_requirements('openvino-dev')  # requires openvino-dev: https://pypi.org/project/openvino-dev/    import openvino.inference_engine as ie     LOGGER.info(f'\n{prefix} starting export with openvino {ie.__version__}...')    f = str(file).replace('.pt', f'_openvino_model{os.sep}')     cmd = f"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} --data_type {'FP16' if half else 'FP32'}"    subprocess.run(cmd.split(), check=True, env=os.environ)  # export    yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata)  # add metadata.yaml    return f, None  @try_exportdef export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePaddle:')):    # YOLOv5 Paddle export    check_requirements(('paddlepaddle', 'x2paddle'))    import x2paddle    from x2paddle.convert import pytorch2paddle     LOGGER.info(f'\n{prefix} starting export with X2Paddle {x2paddle.__version__}...')    f = str(file).replace('.pt', f'_paddle_model{os.sep}')     pytorch2paddle(module=model, save_dir=f, jit_type='trace', input_examples=[im])  # export    yaml_save(Path(f) / file.with_suffix('.yaml').name, metadata)  # add metadata.yaml    return f, None  @try_exportdef export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):    # YOLOv5 CoreML export    check_requirements('coremltools')    import coremltools as ct     LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')    f = file.with_suffix('.mlmodel')     ts = torch.jit.trace(model, im, strict=False)  # TorchScript model    ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])    bits, mode = (8, 'kmeans_lut') if int8 else (16, 'linear') if half else (32, None)    if bits < 32:        if MACOS:  # quantization only supported on macOS            with warnings.catch_warnings():                warnings.filterwarnings("ignore", category=DeprecationWarning)  # suppress numpy==1.20 float warning                ct_model = ct.models.neural_network.quantization_utils.quantize_weights(ct_model, bits, mode)        else:            print(f'{prefix} quantization only supported on macOS, skipping...')    ct_model.save(f)    return f, ct_model  @try_exportdef export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):    # YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt    assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'    try:        import tensorrt as trt    except Exception:        if platform.system() == 'Linux':            check_requirements('nvidia-tensorrt', cmds='-U --index-url https://pypi.ngc.nvidia.com')        import tensorrt as trt     if trt.__version__[0] == '7':  # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012        grid = model.model[-1].anchor_grid        model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]        export_onnx(model, im, file, 12, dynamic, simplify)  # opset 12        model.model[-1].anchor_grid = grid    else:  # TensorRT >= 8        check_version(trt.__version__, '8.0.0', hard=True)  # require tensorrt>=8.0.0        export_onnx(model, im, file, 12, dynamic, simplify)  # opset 12    onnx = file.with_suffix('.onnx')     LOGGER.info(f'\n{prefix} starting export with TensorRT {trt.__version__}...')    assert onnx.exists(), f'failed to export ONNX file: {onnx}'    f = file.with_suffix('.engine')  # TensorRT engine file    logger = trt.Logger(trt.Logger.INFO)    if verbose:        logger.min_severity = trt.Logger.Severity.VERBOSE     builder = trt.Builder(logger)    config = builder.create_builder_config()    config.max_workspace_size = workspace * 1 << 30    # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30) # fix TRT 8.4 deprecation notice     flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))    network = builder.create_network(flag)    parser = trt.OnnxParser(network, logger)    if not parser.parse_from_file(str(onnx)):        raise RuntimeError(f'failed to load ONNX file: {onnx}')     inputs = [network.get_input(i) for i in range(network.num_inputs)]    outputs = [network.get_output(i) for i in range(network.num_outputs)]    for inp in inputs:        LOGGER.info(f'{prefix} input "{inp.name}" with shape{inp.shape} {inp.dtype}')    for out in outputs:        LOGGER.info(f'{prefix} output "{out.name}" with shape{out.shape} {out.dtype}')     if dynamic:        if im.shape[0] <= 1:            LOGGER.warning(f"{prefix} WARNING ⚠️ --dynamic model requires maximum --batch-size argument")        profile = builder.create_optimization_profile()        for inp in inputs:            profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape)        config.add_optimization_profile(profile)     LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine as {f}')    if builder.platform_has_fast_fp16 and half:        config.set_flag(trt.BuilderFlag.FP16)    with builder.build_engine(network, config) as engine, open(f, 'wb') as t:        t.write(engine.serialize())    return f, None  @try_exportdef export_saved_model(model, im, file, dynamic, tf_nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, keras=False, prefix=colorstr('TensorFlow SavedModel:')):    # YOLOv5 TensorFlow SavedModel export    try:        import tensorflow as tf    except Exception:        check_requirements(f"tensorflow{'' if torch.cuda.is_available() else '-macos' if MACOS else '-cpu'}")        import tensorflow as tf    from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2     from models.tf import TFModel     LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')    f = str(file).replace('.pt', '_saved_model')    batch_size, ch, *imgsz = list(im.shape)  # BCHW     tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)    im = tf.zeros((batch_size, *imgsz, ch))  # BHWC order for TensorFlow    _ = tf_model.predict(im, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)    inputs = tf.keras.Input(shape=(*imgsz, ch), batch_size=None if dynamic else batch_size)    outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres)    keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)    keras_model.trainable = False    keras_model.summary()    if keras:        keras_model.save(f, save_format='tf')    else:        spec = tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype)        m = tf.function(lambda x: keras_model(x))  # full model        m = m.get_concrete_function(spec)        frozen_func = convert_variables_to_constants_v2(m)        tfm = tf.Module()        tfm.__call__ = tf.function(lambda x: frozen_func(x)[:4] if tf_nms else frozen_func(x), [spec])        tfm.__call__(im)        tf.saved_model.save(tfm,                            f,                            options=tf.saved_model.SaveOptions(experimental_custom_gradients=False) if check_version(                                tf.__version__, '2.6') else tf.saved_model.SaveOptions())    return f, keras_model  @try_exportdef export_pb(keras_model, file, prefix=colorstr('TensorFlow GraphDef:')):    # YOLOv5 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow    import tensorflow as tf    from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2     LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')    f = file.with_suffix('.pb')     m = tf.function(lambda x: keras_model(x))  # full model    m = m.get_concrete_function(tf.TensorSpec(keras_model.inputs[0].shape, keras_model.inputs[0].dtype))    frozen_func = convert_variables_to_constants_v2(m)    frozen_func.graph.as_graph_def()    tf.io.write_graph(graph_or_graph_def=frozen_func.graph, logdir=str(f.parent), name=f.name, as_text=False)    return f, None  @try_exportdef export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, prefix=colorstr('TensorFlow Lite:')):    # YOLOv5 TensorFlow Lite export    import tensorflow as tf     LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')    batch_size, ch, *imgsz = list(im.shape)  # BCHW    f = str(file).replace('.pt', '-fp16.tflite')     converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]    converter.target_spec.supported_types = [tf.float16]    converter.optimizations = [tf.lite.Optimize.DEFAULT]    if int8:        from models.tf import representative_dataset_gen        dataset = LoadImages(check_dataset(check_yaml(data))['train'], img_size=imgsz, auto=False)        converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib=100)        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]        converter.target_spec.supported_types = []        converter.inference_input_type = tf.uint8  # or tf.int8        converter.inference_output_type = tf.uint8  # or tf.int8        converter.experimental_new_quantizer = True        f = str(file).replace('.pt', '-int8.tflite')    if nms or agnostic_nms:        converter.target_spec.supported_ops.append(tf.lite.OpsSet.SELECT_TF_OPS)     tflite_model = converter.convert()    open(f, "wb").write(tflite_model)    return f, None  @try_exportdef export_edgetpu(file, prefix=colorstr('Edge TPU:')):    # YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/    cmd = 'edgetpu_compiler --version'    help_url = 'https://coral.ai/docs/edgetpu/compiler/'    assert platform.system() == 'Linux', f'export only supported on Linux. See {help_url}'    if subprocess.run(f'{cmd} >/dev/null', shell=True).returncode != 0:        LOGGER.info(f'\n{prefix} export requires Edge TPU compiler. Attempting install from {help_url}')        sudo = subprocess.run('sudo --version >/dev/null', shell=True).returncode == 0  # sudo installed on system        for c in (                'curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -',                'echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list',                'sudo apt-get update', 'sudo apt-get install edgetpu-compiler'):            subprocess.run(c if sudo else c.replace('sudo ', ''), shell=True, check=True)    ver = subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1]     LOGGER.info(f'\n{prefix} starting export with Edge TPU compiler {ver}...')    f = str(file).replace('.pt', '-int8_edgetpu.tflite')  # Edge TPU model    f_tfl = str(file).replace('.pt', '-int8.tflite')  # TFLite model     cmd = f"edgetpu_compiler -s -d -k 10 --out_dir {file.parent} {f_tfl}"    subprocess.run(cmd.split(), check=True)    return f, None  @try_exportdef export_tfjs(file, prefix=colorstr('TensorFlow.js:')):    # YOLOv5 TensorFlow.js export    check_requirements('tensorflowjs')    import tensorflowjs as tfjs     LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')    f = str(file).replace('.pt', '_web_model')  # js dir    f_pb = file.with_suffix('.pb')  # *.pb path    f_json = f'{f}/model.json'  # *.json path     cmd = f'tensorflowjs_converter --input_format=tf_frozen_model ' \          f'--output_node_names=Identity,Identity_1,Identity_2,Identity_3 {f_pb} {f}'    subprocess.run(cmd.split())     json = Path(f_json).read_text()    with open(f_json, 'w') as j:  # sort JSON Identity_* in ascending order        subst = re.sub(            r'{"outputs": {"Identity.?.?": {"name": "Identity.?.?"}, '            r'"Identity.?.?": {"name": "Identity.?.?"}, '            r'"Identity.?.?": {"name": "Identity.?.?"}, '            r'"Identity.?.?": {"name": "Identity.?.?"}}}', r'{"outputs": {"Identity": {"name": "Identity"}, '                                                           r'"Identity_1": {"name": "Identity_1"}, '                                                           r'"Identity_2": {"name": "Identity_2"}, '                                                           r'"Identity_3": {"name": "Identity_3"}}}', json)        j.write(subst)    return f, None  @smart_inference_mode()def run(        data=ROOT / 'data/coco128.yaml',  # 'dataset.yaml path'        weights=ROOT / 'yolov5s.pt',  # weights path        imgsz=(640, 640),  # image (height, width)        batch_size=1,  # batch size        device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu        include=('torchscript', 'onnx'),  # include formats        half=False,  # FP16 half-precision export        inplace=False,  # set YOLOv5 Detect() inplace=True        keras=False,  # use Keras        optimize=False,  # TorchScript: optimize for mobile        int8=False,  # CoreML/TF INT8 quantization        dynamic=False,  # ONNX/TF/TensorRT: dynamic axes        simplify=False,  # ONNX: simplify model        opset=12,  # ONNX: opset version        verbose=False,  # TensorRT: verbose log        workspace=4,  # TensorRT: workspace size (GB)        nms=False,  # TF: add NMS to model        agnostic_nms=False,  # TF: add agnostic NMS to model        topk_per_class=100,  # TF.js NMS: topk per class to keep        topk_all=100,  # TF.js NMS: topk for all classes to keep        iou_thres=0.45,  # TF.js NMS: IoU threshold        conf_thres=0.25,  # TF.js NMS: confidence threshold):    t = time.time()    include = [x.lower() for x in include]  # to lowercase    fmts = tuple(export_formats()['Argument'][1:])  # --include arguments    flags = [x in include for x in fmts]    assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}'    jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle = flags  # export booleans    file = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights)  # PyTorch weights     # Load PyTorch model    device = select_device(device)    if half:        assert device.type != 'cpu' or coreml, '--half only compatible with GPU export, i.e. use --device 0'        assert not dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'    model = attempt_loadf(weights, device=device, inplace=True, fuse=True)  # load FP32 model     # Checks    imgsz *= 2 if len(imgsz) == 1 else 1  # expand    if optimize:        assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu'     # Input    gs = int(max(model.stride))  # grid size (max stride)    imgsz = [check_img_size(x, gs) for x in imgsz]  # verify img_size are gs-multiples    im = torch.zeros(batch_size, 3, *imgsz).to(device)  # image size(1,3,320,192) BCHW iDetection     # Update model    model.eval()    for k, m in model.named_modules():        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility        if isinstance(m, (Detect, LmkDetect)):            m.inplace = inplace            m.dynamic = dynamic            m.export = True     for _ in range(2):        y = model(im)  # dry runs    if half and not coreml:        im, model = im.half(), model.half()  # to FP16    shape = tuple((y[0] if isinstance(y, tuple) else y).shape)  # model output shape    metadata = {'stride': int(max(model.stride)), 'names': model.names}  # model metadata    LOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)")     # Exports    f = [''] * len(fmts)  # exported filenames    warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning)  # suppress TracerWarning    if jit:  # TorchScript        f[0], _ = export_torchscript(model, im, file, optimize)    if engine:  # TensorRT required before ONNX        f[1], _ = export_engine(model, im, file, half, dynamic, simplify, workspace, verbose)    if onnx or xml:  # OpenVINO requires ONNX        f[2], _ = export_onnx(model, im, file, opset, dynamic, simplify)    if xml:  # OpenVINO        f[3], _ = export_openvino(file, metadata, half)    if coreml:  # CoreML        f[4], _ = export_coreml(model, im, file, int8, half)    if any((saved_model, pb, tflite, edgetpu, tfjs)):  # TensorFlow formats        assert not tflite or not tfjs, 'TFLite and TF.js models must be exported separately, please pass only one type.'        assert not isinstance(model, ClassificationModel), 'ClassificationModel export to TF formats not yet supported.'        f[5], s_model = export_saved_model(model.cpu(),                                           im,                                           file,                                           dynamic,                                           tf_nms=nms or agnostic_nms or tfjs,                                           agnostic_nms=agnostic_nms or tfjs,                                           topk_per_class=topk_per_class,                                           topk_all=topk_all,                                           iou_thres=iou_thres,                                           conf_thres=conf_thres,                                           keras=keras)        if pb or tfjs:  # pb prerequisite to tfjs            f[6], _ = export_pb(s_model, file)        if tflite or edgetpu:            f[7], _ = export_tflite(s_model, im, file, int8 or edgetpu, data=data, nms=nms, agnostic_nms=agnostic_nms)        if edgetpu:            f[8], _ = export_edgetpu(file)        if tfjs:            f[9], _ = export_tfjs(file)    if paddle:  # PaddlePaddle        f[10], _ = export_paddle(model, im, file, metadata)     # Finish    f = [str(x) for x in f if x]  # filter out '' and None    if any(f):        cls, det, seg = (isinstance(model, x) for x in (ClassificationModel, DetectionModel, SegmentationModel))  # type        dir = Path('segment' if seg else 'classify' if cls else '')        h = '--half' if half else ''  # --half FP16 inference arg        s = "# WARNING ⚠️ ClassificationModel not yet supported for PyTorch Hub AutoShape inference" if cls else \            "# WARNING ⚠️ SegmentationModel not yet supported for PyTorch Hub AutoShape inference" if seg else ''        LOGGER.info(f'\nExport complete ({time.time() - t:.1f}s)'                    f"\nResults saved to {colorstr('bold', file.parent.resolve())}"                    f"\nDetect: python {dir / ('detect.py' if det else 'predict.py')} --weights {f[-1]} {h}"                    f"\nValidate: python {dir / 'val.py'} --weights {f[-1]} {h}"                    f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}') {s}"                    f"\nVisualize: https://netron.app")    return f  # return list of exported files/dirs  def parse_opt():    parser = argparse.ArgumentParser()    parser.add_argument('--data', type=str, default=ROOT / 'data/widerface.yaml', help='dataset.yaml path')    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5n-lmk.pt', help='model.pt path(s)')    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')    parser.add_argument('--batch-size', type=int, default=1, help='batch size')    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')    parser.add_argument('--half', action='store_true', help='FP16 half-precision export')    parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')    parser.add_argument('--keras', action='store_true', help='TF: use Keras')    parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')    parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')    parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes')    parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')    parser.add_argument('--opset', type=int, default=11, help='ONNX: opset version')    parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')    parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')    parser.add_argument('--nms', action='store_true', help='TF: add NMS to model')    parser.add_argument('--agnostic-nms', action='store_true', help='TF: add agnostic NMS to model')    parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')    parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')    parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')    parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')    parser.add_argument(        '--include',        nargs='+',        default=['onnx'],        help='torchscript, onnx, openvino, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle')    opt = parser.parse_args()    print_args(vars(opt))    return opt  def main(opt):    for opt.weights in (opt.weights if isinstance(opt.weights, list) else [opt.weights]):        run(**vars(opt))  if __name__ == "__main__":    opt = parse_opt()    main(opt)

utils文件夹

添加完以上三个文件后在utils文件夹中添加landmark文件夹

utils/landmark/general.py

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""General utils""" import time import torchimport torchvision from ..metrics import box_ioufrom ..general import xywh2xyxy  # for face landmarkdef non_max_suppression_face(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):    """Performs Non-Maximum Suppression (NMS) on inference results Returns: detections with shape: nx16 (x1, y1, x2, y2, conf, keypoint*10, cls) """     nc = prediction.shape[2] - 15  # number of classes    xc = prediction[..., 4] > conf_thres  # candidates     # Settings    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height    time_limit = 10.0  # seconds to quit after    redundant = True  # require redundant detections    multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)    merge = False  # use merge-NMS     t = time.time()    output = [torch.zeros((0, 16), device=prediction.device)] * prediction.shape[0]    for xi, x in enumerate(prediction):  # image index, image inference        # Apply constraints        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height        x = x[xc[xi]]  # confidence         # Cat apriori labels if autolabelling        if labels and len(labels[xi]):            l = labels[xi]            v = torch.zeros((len(l), nc + 15), device=x.device)            v[:, :4] = l[:, 1:5]  # box            v[:, 4] = 1.0  # conf            v[range(len(l)), l[:, 0].long() + 15] = 1.0  # cls            x = torch.cat((x, v), 0)         # If none remain process next image        if not x.shape[0]:            continue         # Compute conf        x[:, 15:] *= x[:, 4:5]  # conf = obj_conf * cls_conf         # Box (center x, center y, width, height) to (x1, y1, x2, y2)        box = xywh2xyxy(x[:, :4])         # Detections matrix nx6 (xyxy, conf, landmarks, cls)        if multi_label:            i, j = (x[:, 15:] > conf_thres).nonzero(as_tuple=False).T            x = torch.cat((box[i], x[i, j + 15, None], x[i, 5:15], j[:, None].float()), 1)        else:  # best class only            conf, j = x[:, 15:].max(1, keepdim=True)            x = torch.cat((box, conf, x[:, 5:15], j.float()), 1)[conf.view(-1) > conf_thres]         # Filter by class        if classes is not None:            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]         # If none remain process next image        n = x.shape[0]  # number of boxes        if not n:            continue         # Batched NMS        c = x[:, 15:16] * (0 if agnostic else max_wh)  # classes        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS        # if i.shape[0] > max_det: # limit detections        # i = i[:max_det]        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix            weights = iou * scores[None]  # box weights            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes            if redundant:                i = i[iou.sum(1) > 1]  # require redundancy         output[xi] = x[i]        if (time.time() - t) > time_limit:            break  # time limit exceeded     return output  def scale_coords_landmarks(img1_shape, coords, img0_shape, ratio_pad=None):    # Rescale coords (xyxy) from img1_shape to img0_shape    if ratio_pad is None:  # calculate from img0_shape        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain = old / new        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding    else:        gain = ratio_pad[0][0]        pad = ratio_pad[1]     coords[:, [0, 2, 4, 6, 8]] -= pad[0]  # x padding    coords[:, [1, 3, 5, 7, 9]] -= pad[1]  # y padding    coords[:, :10] /= gain    # clip_coords(coords, img0_shape)    coords[:, 0].clamp_(0, img0_shape[1])  # x1    coords[:, 1].clamp_(0, img0_shape[0])  # y1    coords[:, 2].clamp_(0, img0_shape[1])  # x2    coords[:, 3].clamp_(0, img0_shape[0])  # y2    coords[:, 4].clamp_(0, img0_shape[1])  # x3    coords[:, 5].clamp_(0, img0_shape[0])  # y3    coords[:, 6].clamp_(0, img0_shape[1])  # x4    coords[:, 7].clamp_(0, img0_shape[0])  # y4    coords[:, 8].clamp_(0, img0_shape[1])  # x5    coords[:, 9].clamp_(0, img0_shape[0])  # y5    return coords

utils/landmark/loss.py

import numpy as npimport torchimport torch.nn as nn from ..loss import FocalLoss, smooth_BCEfrom ..landmark.metrics import bbox_iouffrom ..torch_utils import is_parallel  class WingLoss(nn.Module):    def __init__(self, w=10, e=2):        super(WingLoss, self).__init__()        # https://arxiv.org/pdf/1711.06753v4.pdf Figure 5        self.w = w        self.e = e        self.C = self.w - self.w * np.log(1 + self.w / self.e)     def forward(self, x, t, sigma=1):        weight = torch.ones_like(t)        weight[torch.where(t == -1)] = 0        diff = weight * (x - t)        abs_diff = diff.abs()        flag = (abs_diff.data < self.w).float()        y = flag * self.w * torch.log(1 + abs_diff / self.e) + (1 - flag) * (abs_diff - self.C)        return y.sum()  class LandmarksLoss(nn.Module):    # BCEwithLogitLoss() with reduced missing label effects.    def __init__(self, alpha=1.0):        super(LandmarksLoss, self).__init__()        self.loss_fcn = WingLoss()  # nn.SmoothL1Loss(reduction='sum')        self.alpha = alpha     def forward(self, pred, truel, mask):        loss = self.loss_fcn(pred * mask, truel * mask)        return loss / (torch.sum(mask) + 10e-14)  def compute_loss(p, targets, model):  # predictions, targets, model    device = targets.device    lcls, lbox, lobj, lmark = torch.zeros(1, device=device), \                              torch.zeros(1, device=device), \                              torch.zeros(1,device=device), \                              torch.zeros(1, device=device)    tcls, tbox, indices, anchors, tlandmarks, lmks_mask = build_targets(p, targets, model)  # targets    h = model.hyp  # hyperparameters     # Define criteria    BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))  # weight=model.class_weights)    BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))     landmarks_loss = LandmarksLoss(1.0)     # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3    cp, cn = smooth_BCE(eps=0.0)     # Focal loss    g = h['fl_gamma']  # focal loss gamma    if g > 0:        BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)     # Losses    nt = 0  # number of targets    no = len(p)  # number of outputs    balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1]  # P3-5 or P3-6    for i, pi in enumerate(p):  # layer index, layer predictions        b, a, gj, gi = indices[i]  # image, anchor, gridy, gridx        tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj         n = b.shape[0]  # number of targets        if n:            nt += n  # cumulative targets            l_ = [b, a, gj, gi]            ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets             # Regression            pxy = ps[:, :2].sigmoid() * 2. - 0.5            pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]            pbox = torch.cat((pxy, pwh), 1)  # predicted box            iou = bbox_iouf(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)            lbox += (1.0 - iou).mean()  # iou loss             # Objectness            # 获取target所对应的obj,网格中存在gt目标的会被标记为iou与gt的交并比 (这个obj相当于前景和背景的关系)            tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio             # Classification            if model.nc > 1:  # cls loss (only if multiple classes)                t = torch.full_like(ps[:, 15:], cn, device=device)  # targets                t[range(n), tcls[i]] = cp                lcls += BCEcls(ps[:, 15:], t)  # BCE             # Append targets to text file            # with open('targets.txt', 'a') as file:            # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]             # landmarks loss            # plandmarks = ps[:,5:15].sigmoid() * 8. - 4.            plandmarks = ps[:, 5:15]             plandmarks[:, 0:2] = plandmarks[:, 0:2] * anchors[i]            plandmarks[:, 2:4] = plandmarks[:, 2:4] * anchors[i]            plandmarks[:, 4:6] = plandmarks[:, 4:6] * anchors[i]            plandmarks[:, 6:8] = plandmarks[:, 6:8] * anchors[i]            plandmarks[:, 8:10] = plandmarks[:, 8:10] * anchors[i]             lmark += landmarks_loss(plandmarks, tlandmarks[i], lmks_mask[i])         lobj += BCEobj(pi[..., 4], tobj) * balance[i]  # obj loss     s = 3 / no  # output count scaling    lbox *= h['box'] * s    lobj *= h['obj'] * s * (1.4 if no == 4 else 1.)    lcls *= h['cls'] * s    lmark *= h['landmark'] * s     bs = tobj.shape[0]  # batch size     loss = lbox + lobj + lcls + lmark    return loss * bs, torch.cat((lbox, lobj, lcls, lmark, loss)).detach()  def build_targets(p, targets, model):    # Build targets for compute_loss(), input targets(image,class,x,y,w,h)    det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module    na, nt = det.na, targets.shape[0]  # number of anchors, targets    tcls, tbox, indices, anch, landmarks, lmks_mask = [], [], [], [], [], []    # gain = torch.ones(7, device=targets.device) # normalized to gridspace gain    gain = torch.ones(17, device=targets.device)    ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)    targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices     g = 0.5  # bias    off = torch.tensor([[0, 0],                        [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m                        # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm                        ], device=targets.device).float() * g  # offsets     for i in range(det.nl):        anchors = det.anchors[i]        gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain        # landmarks 10        gain[6:16] = torch.tensor(p[i].shape)[[3, 2, 3, 2, 3, 2, 3, 2, 3, 2]]  # xyxy gain         # Match targets to anchors        t = targets * gain        if nt:            # Matches            r = t[:, :, 4:6] / anchors[:, None]  # wh ratio            j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t']  # compare            # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))            t = t[j]  # filter             # Offsets            gxy = t[:, 2:4]  # grid xy            gxi = gain[[2, 3]] - gxy  # inverse            j, k = ((gxy % 1. < g) & (gxy > 1.)).T            l, m = ((gxi % 1. < g) & (gxi > 1.)).T            j = torch.stack((torch.ones_like(j), j, k, l, m))            t = t.repeat((5, 1, 1))[j]            offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]        else:            t = targets[0]            offsets = 0         # Define        b, c = t[:, :2].long().T  # image, class        gxy = t[:, 2:4]  # grid xy        gwh = t[:, 4:6]  # grid wh        gij = (gxy - offsets).long()        gi, gj = gij.T  # grid xy indices         # Append        a = t[:, 16].long()  # anchor indices        indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices        tbox.append(torch.cat((gxy - gij, gwh), 1))  # box        anch.append(anchors[a])  # anchors        tcls.append(c)  # class         # landmarks        lks = t[:, 6:16]        # lks_mask = lks > 0        # lks_mask = lks_mask.float()        lks_mask = torch.where(lks < 0, torch.full_like(lks, 0.), torch.full_like(lks, 1.0))         # 应该是关键点的坐标除以anch的宽高才对，便于模型学习。使用gwh会导致不同关键点的编码不同，没有统一的参考标准         lks[:, [0, 1]] = (lks[:, [0, 1]] - gij)        lks[:, [2, 3]] = (lks[:, [2, 3]] - gij)        lks[:, [4, 5]] = (lks[:, [4, 5]] - gij)        lks[:, [6, 7]] = (lks[:, [6, 7]] - gij)        lks[:, [8, 9]] = (lks[:, [8, 9]] - gij)         lks_mask_new = lks_mask        lmks_mask.append(lks_mask_new)        landmarks.append(lks)        # print('lks: ', lks.size())     return tcls, tbox, indices, anch, landmarks, lmks_mask

utils/landmark/metrics.py

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""Model validation metrics""" import mathfrom pathlib import Path import numpy as npimport torch from ..metrics import plot_pr_curve, compute_ap  def ap_per_classf(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=(), prefix=""):    """ Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments tp: True positives (nparray, nx1 or nx10). conf: Objectness value from 0-1 (nparray). pred_cls: Predicted object classes (nparray). target_cls: True object classes (nparray). plot: Plot precision-recall curve at mAP@0.5 save_dir: Plot save directory # Returns The average precision as computed in py-faster-rcnn. """     # Sort by objectness    i = np.argsort(-conf)    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]     # Find unique classes    unique_classes = np.unique(target_cls)     # Create Precision-Recall curve and compute AP for each class    px, py = np.linspace(0, 1, 1000), []  # for plotting    pr_score = 0.1  # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898    s = [unique_classes.shape[0], tp.shape[1]]  # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)    ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)    for ci, c in enumerate(unique_classes):        i = pred_cls == c        n_l = (target_cls == c).sum()  # number of labels        n_p = i.sum()  # number of predictions         if n_p == 0 or n_l == 0:            continue        else:            # Accumulate FPs and TPs            fpc = (1 - tp[i]).cumsum(0)            tpc = tp[i].cumsum(0)             # Recall            recall = tpc / (n_l + 1e-16)  # recall curve            r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0])  # r at pr_score, negative x, xp because xp decreases             # Precision            precision = tpc / (tpc + fpc)  # precision curve            p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0])  # p at pr_score             # AP from recall-precision curve            for j in range(tp.shape[1]):                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])                if plot and (j == 0):                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5     # Compute F1 score (harmonic mean of precision and recall)    f1 = 2 * p * r / (p + r + 1e-16)     if plot:        # plot_pr_curve(px, py, ap, save_dir, names)        plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)     return p, r, ap, f1, unique_classes.astype('int32')  def bbox_iouf(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4    box2 = box2.T     # Get the coordinates of bounding boxes    if x1y1x2y2:  # x1, y1, x2, y2 = box1        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]    else:  # transform from xywh to xyxy        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2     # Intersection area    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \            (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)     # Union Area    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps    union = w1 * h1 + w2 * h2 - inter + eps     iou = inter / union    if GIoU or DIoU or CIoU:        # convex (smallest enclosing box) width        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height        if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared            if DIoU:                return iou - rho2 / c2  # DIoU            elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47                v = (4 / math.pi ** 2) * \                    torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)                with torch.no_grad():                    alpha = v / ((1 + eps) - iou + v)                return iou - (rho2 / c2 + v * alpha)  # CIoU        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf            c_area = cw * ch + eps  # convex area            return iou - (c_area - union) / c_area  # GIoU    else:        return iou  # IoU

utils/landmark/plots.py

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license"""Plotting utils""" import mathimport osimport randomfrom pathlib import Path import cv2import matplotlibimport matplotlib.pyplot as pltimport numpy as npimport pandas as pdimport seaborn as snimport torchimport yamlfrom PIL import Image from ..metrics import fitnessfrom .. import threadedfrom ..general import xywh2xyxy,xyxy2xywhfrom ..plots import Annotator, colors,hist2d  # add for landmarkdef plot_evolution(yaml_file='data/hyp.finetune.yaml'):  # from utils.plots import *; plot_evolution()    # Plot hyperparameter evolution results in evolve.txt    with open(yaml_file) as f:        hyp = yaml.load(f, Loader=yaml.SafeLoader)    x = np.loadtxt('evolve.txt', ndmin=2)    f = fitness(x)    # weights = (f - f.min()) ** 2 # for weighted results    plt.figure(figsize=(10, 12), tight_layout=True)    matplotlib.rc('font', **{'size': 8})    for i, (k, v) in enumerate(hyp.items()):        y = x[:, i + 7]        # mu = (y * weights).sum() / weights.sum() # best weighted result        mu = y[f.argmax()]  # best single result        plt.subplot(6, 5, i + 1)        plt.scatter(y, f, c=hist2d(y, f, 20), cmap='viridis', alpha=.8, edgecolors='none')        plt.plot(mu, f.max(), 'k+', markersize=15)        plt.title('%s = %.3g' % (k, mu), fontdict={'size': 9})  # limit to 40 characters        if i % 5 != 0:            plt.yticks([])        print('%15s: %.3g' % (k, mu))    plt.savefig('evolve.png', dpi=200)    print('\nPlot saved as evolve.png')  def plot_study_txt(path='study/', x=None):  # from utils.plots import *; plot_study_txt()    # Plot study.txt generated by test.py    fig, ax = plt.subplots(2, 4, figsize=(10, 6), tight_layout=True)    ax = ax.ravel()     fig2, ax2 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)    for f in [Path(path) / f'study_coco_{x}.txt' for x in ['yolov5s', 'yolov5m', 'yolov5l', 'yolov5x']]:        y = np.loadtxt(f, dtype=np.float32, usecols=[0, 1, 2, 3, 7, 8, 9], ndmin=2).T        x = np.arange(y.shape[1]) if x is None else np.array(x)        s = ['P', 'R', 'mAP@.5', 'mAP@.5:.95', 't_inference (ms/img)', 't_NMS (ms/img)', 't_total (ms/img)']        for i in range(7):            ax[i].plot(x, y[i], '.-', linewidth=2, markersize=8)            ax[i].set_title(s[i])         j = y[3].argmax() + 1        ax2.plot(y[6, :j], y[3, :j] * 1E2, '.-', linewidth=2, markersize=8,                 label=f.stem.replace('study_coco_', '').replace('yolo', 'YOLO'))     ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [34.6, 40.5, 43.0, 47.5, 49.7, 51.5],             'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')     ax2.grid()    ax2.set_yticks(np.arange(30, 60, 5))    ax2.set_xlim(0, 30)    ax2.set_ylim(29, 51)    ax2.set_xlabel('GPU Speed (ms/img)')    ax2.set_ylabel('COCO AP val')    ax2.legend(loc='lower right')    plt.savefig('test_study.png', dpi=300) def plot_resultsf(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''):    # Plot training 'results*.txt'. from utils.plots import *; plot_results(save_dir='runs/train/exp')    fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)    ax = ax.ravel()    s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall',         'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95']    if bucket:        # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id]        files = ['results%g.txt' % x for x in id]        c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id)        os.system(c)    else:        files = list(Path(save_dir).glob('results*.txt'))    assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir)    for fi, f in enumerate(files):        try:            results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T            n = results.shape[1]  # number of rows            x = range(start, min(stop, n) if stop else n)            for i in range(10):                y = results[i, x]                if i in [0, 1, 2, 5, 6, 7]:                    y[y == 0] = np.nan  # don't show zero loss values                    # y /= y[0] # normalize                label = labels[fi] if len(labels) else f.stem                ax[i].plot(x, y, marker='.', label=label, linewidth=2, markersize=8)                ax[i].set_title(s[i])                # if i in [5, 6, 7]: # share train and val loss y axes                # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])        except Exception as e:            print('Warning: Plotting error for %s; %s' % (f, e))     ax[1].legend()    fig.savefig(Path(save_dir) / 'results.png', dpi=200) def plot_one_box(x, img, color=None, label=None, line_thickness=None):    # Plots one bounding box on image img    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness    color = color or [random.randint(0, 255) for _ in range(3)]    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)    if label:        tf = max(tl - 1, 1)  # font thickness        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) def plot_labelsf(labels, save_dir=Path(''), loggers=None):    # plot dataset labels    print('Plotting labels... ')    c, b = labels[:, 0], labels[:, 1:5].transpose()  # classes, boxes    nc = int(c.max() + 1)  # number of classes    x = pd.DataFrame(b.transpose(), columns=['x', 'y', 'width', 'height'])     # seaborn correlogram    sn.pairplot(x, corner=True, diag_kind='auto', kind='hist', diag_kws=dict(bins=50), plot_kws=dict(pmax=0.9))    plt.savefig(save_dir / 'labels_correlogram.jpg', dpi=200)    plt.close()     # matplotlib labels    matplotlib.use('svg')  # faster    ax = plt.subplots(2, 2, figsize=(8, 8), tight_layout=True)[1].ravel()    ax[0].hist(c, bins=np.linspace(0, nc, nc + 1) - 0.5, rwidth=0.8)    ax[0].set_xlabel('classes')    sn.histplot(x, x='x', y='y', ax=ax[2], bins=50, pmax=0.9)    sn.histplot(x, x='width', y='height', ax=ax[3], bins=50, pmax=0.9)     # rectangles    labels[:, 1:3] = 0.5  # center    labels[:, 1:] = xywh2xyxy(labels[:, 1:]) * 2000    img = Image.fromarray(np.ones((2000, 2000, 3), dtype=np.uint8) * 255)    # for cls, *box in labels[:1000]:    # ImageDraw.Draw(img).rectangle(box, width=1, outline=colors[int(cls) % 10]) # plot    ax[1].imshow(img)    ax[1].axis('off')     for a in [0, 1, 2, 3]:        for s in ['top', 'right', 'left', 'bottom']:            ax[a].spines[s].set_visible(False)     plt.savefig(save_dir / 'labels.jpg', dpi=200)    matplotlib.use('Agg')    plt.close()     # loggers    for k, v in loggers.items() or {}:        if k == 'wandb' and v:            v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]})  def output_to_targetf(output):    # Convert model output to target format [batch_id, class_id, x, y, w, h, conf]    targets = []    for i, o in enumerate(output):        # for *box, conf, cls in o.cpu().numpy():        # targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])        for item in o.cpu().numpy():             # (x, y, w, h), conf, *keypoints, cls            box = item[:4]            conf = item[4]            keypoints = item[5:15]            cls = item[15]            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])),*keypoints, conf])    return np.array(targets) @threadeddef plot_imagesf(images, targets, paths=None, fname='images.jpg', names=None):    # Plot image grid with labels    if isinstance(images, torch.Tensor):        images = images.cpu().float().numpy()    if isinstance(targets, torch.Tensor):        targets = targets.cpu().numpy()     max_size = 1920  # max image size    max_subplots = 16  # max image subplots, i.e. 4x4    bs, _, h, w = images.shape  # batch size, _, height, width    bs = min(bs, max_subplots)  # limit plot images    ns = np.ceil(bs ** 0.5)  # number of subplots (square)    if np.max(images[0]) <= 1:        images *= 255  # de-normalise (optional)     # Build Image    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init    for i, im in enumerate(images):        if i == max_subplots:  # if last batch has fewer images than we expect            break        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin        im = im.transpose(1, 2, 0)        mosaic[y:y + h, x:x + w, :] = im     # Resize (optional)    scale = max_size / ns / max(h, w)    if scale < 1:        h = math.ceil(scale * h)        w = math.ceil(scale * w)        mosaic = cv2.resize(mosaic, tuple(int(x * ns) for x in (w, h)))     # Annotate    fs = int((h + w) * ns * 0.01)  # font size    annotator = Annotator(mosaic, line_width=round(fs / 10), font_size=fs, pil=True, example=names)    for i in range(i + 1):        x, y = int(w * (i // ns)), int(h * (i % ns))  # block origin        annotator.rectangle([x, y, x + w, y + h], None, (255, 255, 255), width=2)  # borders        if paths:            annotator.text((x + 5, y + 5), text=Path(paths[i]).name[:40], txt_color=(220, 220, 220))  # filenames        if len(targets) > 0:            ti = targets[targets[:, 0] == i]  # image targets            boxes = xywh2xyxy(ti[:, 2:6]).T            classes = ti[:, 1].astype('int')            labels = ti.shape[1] == 16  # labels if no conf column            conf = None if labels else ti[:, 16]  # check for confidence presence (label vs pred)            keypoints = ti[:, 6:16].T             if boxes.shape[1]:                if boxes.max() <= 1.01:  # if normalized with tolerance 0.01                    boxes[[0, 2]] *= w  # scale to pixels                    boxes[[1, 3]] *= h                elif scale < 1:  # absolute coords need scale if image scales                    boxes *= scale            boxes[[0, 2]] += x            boxes[[1, 3]] += y             if keypoints.shape[1]:                if keypoints.max() <= 1.01:  # if normalized with tolerance 0.01                    keypoints[[0,2,4,6,8]] *= w  # scale to pixels                    keypoints[[1,3,5,7,9]] *= h                elif scale < 1:  # absolute coords need scale if image scales                    keypoints *= scale            keypoints[[0,2,4,6,8]] += x            keypoints[[1,3,5,7,9]] += y             for j, (box,kpy) in enumerate(zip(boxes.T.tolist(),keypoints.T.tolist())):                cls = classes[j]                color = colors(cls)                cls = names[cls] if names else cls                if labels or conf[j] > 0.25:  # 0.25 conf thresh                    label = f'{cls}' if labels else f'{cls} {conf[j]:.1f}'                    annotator.box_label(box, label, color=color)                    annotator.landmark(kpy)     annotator.im.save(fname)  # save

utils/plots.py还有个函数需要修改

class Annotator:    # YOLOv5 Annotator for train/val mosaics and jpgs and detect/hub inference annotations    def __init__(self, im, line_width=None, font_size=None, font='Arial.ttf', pil=False, example='abc'):        assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to Annotator() input images.'        non_ascii = not is_ascii(example)  # non-latin labels, i.e. asian, arabic, cyrillic        self.pil = pil or non_ascii        if self.pil:  # use PIL            self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)            self.draw = ImageDraw.Draw(self.im)            self.font = check_pil_font(font='Arial.Unicode.ttf' if non_ascii else font,                                       size=font_size or max(round(sum(self.im.size) / 2 * 0.035), 12))        else:  # use cv2            self.im = im        self.lw = line_width or max(round(sum(im.shape) / 2 * 0.003), 2)  # line width     def box_label(self, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255)):        # Add one xyxy box to image with label        if self.pil or not is_ascii(label):            self.draw.rectangle(box, width=self.lw, outline=color)  # box            if label:                w, h = self.font.getsize(label)  # text width, height                outside = box[1] - h >= 0  # label fits outside box                self.draw.rectangle(                    (box[0], box[1] - h if outside else box[1], box[0] + w + 1,                     box[1] + 1 if outside else box[1] + h + 1),                    fill=color,                )                # self.draw.text((box[0], box[1]), label, fill=txt_color, font=self.font, anchor='ls') # for PIL>8.0                self.draw.text((box[0], box[1] - h if outside else box[1]), label, fill=txt_color, font=self.font)        else:  # cv2            p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))            cv2.rectangle(self.im, p1, p2, color, thickness=self.lw, lineType=cv2.LINE_AA)            if label:                tf = max(self.lw - 1, 1)  # font thickness                w, h = cv2.getTextSize(label, 0, fontScale=self.lw / 3, thickness=tf)[0]  # text width, height                outside = p1[1] - h >= 3                p2 = p1[0] + w, p1[1] - h - 3 if outside else p1[1] + h + 3                cv2.rectangle(self.im, p1, p2, color, -1, cv2.LINE_AA)  # filled                cv2.putText(self.im,                            label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2),                            0,                            self.lw / 3,                            txt_color,                            thickness=tf,                            lineType=cv2.LINE_AA)    # 新增landmark    def landmark(self,landmarks):        clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)]        if self.pil:            for i in range(len(landmarks)//2):                point_x = int(landmarks[2 * i])                point_y = int(landmarks[2 * i + 1])                self.draw.point((point_x,point_y),clors[i])        else:  # cv2            h, w, c = self.im.shape            tl = 1 or round(0.002 * (h + w) / 2) + 1  # line/font thickness            for i in range(5):                point_x = int(landmarks[2 * i])                point_y = int(landmarks[2 * i + 1])                cv2.circle(self.im, (point_x, point_y), tl + 1, clors[i], -1)     def masks(self, masks, colors, im_gpu=None, alpha=0.5):        """Plot masks at once. Args: masks (tensor): predicted masks on cuda, shape: [n, h, w] colors (List[List[Int]]): colors for predicted masks, [[r, g, b] * n] im_gpu (tensor): img is in cuda, shape: [3, h, w], range: [0, 1] alpha (float): mask transparency: 0.0 fully transparent, 1.0 opaque """        if self.pil:            # convert to numpy first            self.im = np.asarray(self.im).copy()        if im_gpu is None:            # Add multiple masks of shape(h,w,n) with colors list([r,g,b], [r,g,b], ...)            if len(masks) == 0:                return            if isinstance(masks, torch.Tensor):                masks = torch.as_tensor(masks, dtype=torch.uint8)                masks = masks.permute(1, 2, 0).contiguous()                masks = masks.cpu().numpy()            # masks = np.ascontiguousarray(masks.transpose(1, 2, 0))            masks = scale_image(masks.shape[:2], masks, self.im.shape)            masks = np.asarray(masks, dtype=np.float32)            colors = np.asarray(colors, dtype=np.float32)  # shape(n,3)            s = masks.sum(2, keepdims=True).clip(0, 1)  # add all masks together            masks = (masks @ colors).clip(0, 255)  # (h,w,n) @ (n,3) = (h,w,3)            self.im[:] = masks * alpha + self.im * (1 - s * alpha)        else:            if len(masks) == 0:                self.im[:] = im_gpu.permute(1, 2, 0).contiguous().cpu().numpy() * 255            colors = torch.tensor(colors, device=im_gpu.device, dtype=torch.float32) / 255.0            colors = colors[:, None, None]  # shape(n,1,1,3)            masks = masks.unsqueeze(3)  # shape(n,h,w,1)            masks_color = masks * (colors * alpha)  # shape(n,h,w,3)             inv_alph_masks = (1 - masks * alpha).cumprod(0)  # shape(n,h,w,1)            mcs = (masks_color * inv_alph_masks).sum(0) * 2  # mask color summand shape(n,h,w,3)             im_gpu = im_gpu.flip(dims=[0])  # flip channel            im_gpu = im_gpu.permute(1, 2, 0).contiguous()  # shape(h,w,3)            im_gpu = im_gpu * inv_alph_masks[-1] + mcs            im_mask = (im_gpu * 255).byte().cpu().numpy()            self.im[:] = scale_image(im_gpu.shape, im_mask, self.im.shape)        if self.pil:            # convert im back to PIL and update draw            self.fromarray(self.im)     def rectangle(self, xy, fill=None, outline=None, width=1):        # Add rectangle to image (PIL-only)        self.draw.rectangle(xy, fill, outline, width)     def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):        # Add text to image (PIL-only)        if anchor == 'bottom':  # start y from font bottom            w, h = self.font.getsize(text)  # text width, height            xy[1] += 1 - h        self.draw.text(xy, text, fill=txt_color, font=self.font)     def fromarray(self, im):        # Update self.im from a numpy array        self.im = im if isinstance(im, Image.Image) else Image.fromarray(im)        self.draw = ImageDraw.Draw(self.im)     def result(self):        # Return annotated image as array        return np.asarray(self.im)

utils/landmark/dataloaders.py

import globimport loggingimport mathimport osimport randomimport shutilfrom itertools import repeatfrom multiprocessing.pool import ThreadPoolfrom pathlib import Path import cv2import numpy as npimport torchimport torch.nn.functional as Ffrom PIL import Image, ExifTagsfrom torch.utils.data import Datasetfrom tqdm import tqdm from ..general import xyxy2xywh, xywh2xyxyfrom ..torch_utils import torch_distributed_zero_first # Parametershelp_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng']  # acceptable image suffixesvid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixeslogger = logging.getLogger(__name__) # Get orientation exif tagfor orientation in ExifTags.TAGS.keys():    if ExifTags.TAGS[orientation] == 'Orientation':        break  def get_hash(files):    # Returns a single hash value of a list of files    return sum(os.path.getsize(f) for f in files if os.path.isfile(f))  def img2label_paths(img_paths):    # Define label paths as a function of image paths    sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep  # /images/, /labels/ substrings    return [x.replace(sa, sb, 1).replace('.' + x.split('.')[-1], '.txt') for x in img_paths]  def exif_size(img):    # Returns exif-corrected PIL size    s = img.size  # (width, height)    try:        rotation = dict(img._getexif().items())[orientation]        if rotation == 6:  # rotation 270            s = (s[1], s[0])        elif rotation == 8:  # rotation 90            s = (s[1], s[0])    except:        pass     return s  def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''):    # Make sure only the first process in DDP process the dataset first, and the following others can use the cache    with torch_distributed_zero_first(rank):        dataset = LoadFaceImagesAndLabels(path, imgsz, batch_size,                                          augment=augment,  # augment images                                          hyp=hyp,  # augmentation hyperparameters                                          rect=rect,  # rectangular training                                          cache_images=cache,                                          single_cls=opt.single_cls,                                          stride=int(stride),                                          pad=pad,                                          image_weights=image_weights,                                          prefix=prefix)     batch_size = min(batch_size, len(dataset))    nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])  # number of workers    sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None    loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader    # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()    dataloader = loader(dataset,                        batch_size=batch_size,                        num_workers=nw,                        sampler=sampler,                        pin_memory=True,                        collate_fn=LoadFaceImagesAndLabels.collate_fn4 if quad else LoadFaceImagesAndLabels.collate_fn)    return dataloader, dataset  class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):    """ Dataloader that reuses workers Uses same syntax as vanilla DataLoader """     def __init__(self, *args, **kwargs):        super().__init__(*args, **kwargs)        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))        self.iterator = super().__iter__()     def __len__(self):        return len(self.batch_sampler.sampler)     def __iter__(self):        for i in range(len(self)):            yield next(self.iterator)  class _RepeatSampler(object):    """ Sampler that repeats forever Args: sampler (Sampler) """     def __init__(self, sampler):        self.sampler = sampler     def __iter__(self):        while True:            yield from iter(self.sampler)  class LoadFaceImagesAndLabels(Dataset):  # for training/testing    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):        self.img_size = img_size        self.augment = augment        self.hyp = hyp        self.image_weights = image_weights        self.rect = False if image_weights else rect        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)        self.mosaic_border = [-img_size // 2, -img_size // 2]        self.stride = stride         try:            f = []  # image files            for p in path if isinstance(path, list) else [path]:                p = Path(p)  # os-agnostic                if p.is_dir():  # dir                    f += glob.glob(str(p / '**' / '*.*'), recursive=True)                elif p.is_file():  # file                    with open(p, 'r') as t:                        t = t.read().strip().splitlines()                        parent = str(p.parent) + os.sep                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path                else:                    raise FileNotFoundError(f'{prefix}{p} does not exist')            self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])            assert self.img_files, f'{prefix}No images found'        except Exception as e:            raise Exception(f'{prefix}Error loading data from {path}: {e}\n{help_url}')         # Check cache        self.label_files = img2label_paths(self.img_files)  # labels        cache_path = Path(self.label_files[0]).parent.with_suffix('.cache')  # cached labels        if cache_path.is_file():            cache = torch.load(cache_path)  # load            if cache['hash'] != get_hash(self.label_files + self.img_files) or 'results' not in cache:  # changed                cache = self.cache_labels(cache_path)  # re-cache        else:            cache = self.cache_labels(cache_path)  # cache         # Display cache        [nf, nm, ne, nc, n] = cache.pop('results')  # found, missing, empty, corrupted, total        desc = f"Scanning '{cache_path}' for images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"        tqdm(None, desc=prefix + desc, total=n, initial=n)        assert nf > 0 or not augment, f'{prefix}No labels found in {cache_path}, can not start training. {help_url}'         # Read cache        cache.pop('hash')  # remove hash        labels, shapes = zip(*cache.values())        self.labels = list(labels)        self.shapes = np.array(shapes, dtype=np.float64)        self.img_files = list(cache.keys())  # update        self.label_files = img2label_paths(cache.keys())  # update        if single_cls:            for x in self.labels:                x[:, 0] = 0         n = len(shapes)  # number of images        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index        nb = bi[-1] + 1  # number of batches        self.batch = bi  # batch index of image        self.n = n        self.indices = range(n)         # Rectangular Training        if self.rect:            # Sort by aspect ratio            s = self.shapes  # wh            ar = s[:, 1] / s[:, 0]  # aspect ratio            irect = ar.argsort()            self.img_files = [self.img_files[i] for i in irect]            self.label_files = [self.label_files[i] for i in irect]            self.labels = [self.labels[i] for i in irect]            self.shapes = s[irect]  # wh            ar = ar[irect]             # Set training image shapes            shapes = [[1, 1]] * nb            for i in range(nb):                ari = ar[bi == i]                mini, maxi = ari.min(), ari.max()                if maxi < 1:                    shapes[i] = [maxi, 1]                elif mini > 1:                    shapes[i] = [1, 1 / mini]             self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride         # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)        self.imgs = [None] * n        if cache_images:            gb = 0  # Gigabytes of cached images            self.img_hw0, self.img_hw = [None] * n, [None] * n            results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))  # 8 threads            pbar = tqdm(enumerate(results), total=n)            for i, x in pbar:                self.imgs[i], self.img_hw0[i], self.img_hw[i] = x  # img, hw_original, hw_resized = load_image(self, i)                gb += self.imgs[i].nbytes                pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'     def cache_labels(self, path=Path('./labels.cache'),prefix=''):        # Cache dataset labels, check images and read shapes        x = {}  # dict        nm, nf, ne, nc = 0, 0, 0, 0  # number missing, found, empty, duplicate        desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels..."        pbar = tqdm(zip(self.img_files, self.label_files), desc=desc, total=len(self.img_files))        for i, (im_file, lb_file) in enumerate(pbar):            try:                # verify images                im = Image.open(im_file)                im.verify()  # PIL verify                shape = exif_size(im)  # image size                assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'                 # verify labels                if os.path.isfile(lb_file):                    nf += 1  # label found                    with open(lb_file, 'r') as f:                        l = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels                    if len(l):                        assert l.shape[1] == 15, 'labels require 15 columns each'                        assert (l >= -1).all(), 'negative labels'                        assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'                        assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'                    else:                        ne += 1  # label empty                        l = np.zeros((0, 15), dtype=np.float32)                else:                    nm += 1  # label missing                    l = np.zeros((0, 15), dtype=np.float32)                x[im_file] = [l, shape]            except Exception as e:                nc += 1                print(f'{prefix}WARNING: Ignoring corrupted image and/or label %s: %s' % (im_file, e))             pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' for images and labels... " \                        f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"         if nf == 0:            print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')         x['hash'] = get_hash(self.label_files + self.img_files)        x['results'] = [nf, nm, ne, nc, i + 1]        torch.save(x, path)  # save for next time        logging.info(f"{prefix}New cache created: {path}")        return x     def __len__(self):        return len(self.img_files)     # def __iter__(self):    # self.count = -1    # print('ran dataset iter')    # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)    # return self     def __getitem__(self, index):        index = self.indices[index]  # linear, shuffled, or image_weights         hyp = self.hyp        mosaic = self.mosaic and random.random() < hyp['mosaic']        if mosaic:            # Load mosaic            img, labels = load_mosaic_face(self, index)            shapes = None             # MixUp https://arxiv.org/pdf/1710.09412.pdf            if random.random() < hyp['mixup']:                img2, labels2 = load_mosaic_face(self, random.randint(0, self.n - 1))                r = np.random.beta(8.0, 8.0)  # mixup ratio, alpha=beta=8.0                img = (img * r + img2 * (1 - r)).astype(np.uint8)                labels = np.concatenate((labels, labels2), 0)         else:            # Load image            img, (h0, w0), (h, w) = load_image(self, index)             # Letterbox            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling             # Load labels            labels = []            x = self.labels[index]            if x.size > 0:                # Normalized xywh to pixel xyxy format                labels = x.copy()                labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width                labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]                 # labels[:, 5] = ratio[0] * w * x[:, 5] + pad[0] # pad width                labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 5] + pad[0]) + (                        np.array(x[:, 5] > 0, dtype=np.int32) - 1)                labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 6] + pad[1]) + (                        np.array(x[:, 6] > 0, dtype=np.int32) - 1)                labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 7] + pad[0]) + (                        np.array(x[:, 7] > 0, dtype=np.int32) - 1)                labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 8] + pad[1]) + (                        np.array(x[:, 8] > 0, dtype=np.int32) - 1)                labels[:, 9] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 9] + pad[0]) + (                        np.array(x[:, 9] > 0, dtype=np.int32) - 1)                labels[:, 10] = np.array(x[:, 5] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 10] + pad[1]) + (                        np.array(x[:, 10] > 0, dtype=np.int32) - 1)                labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 11] + pad[0]) + (                        np.array(x[:, 11] > 0, dtype=np.int32) - 1)                labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 12] + pad[1]) + (                        np.array(x[:, 12] > 0, dtype=np.int32) - 1)                labels[:, 13] = np.array(x[:, 13] > 0, dtype=np.int32) * (ratio[0] * w * x[:, 13] + pad[0]) + (                        np.array(x[:, 13] > 0, dtype=np.int32) - 1)                labels[:, 14] = np.array(x[:, 14] > 0, dtype=np.int32) * (ratio[1] * h * x[:, 14] + pad[1]) + (                        np.array(x[:, 14] > 0, dtype=np.int32) - 1)         if self.augment:            # Augment imagespace            if not mosaic:                img, labels = random_perspective(img, labels,                                                 degrees=hyp['degrees'],                                                 translate=hyp['translate'],                                                 scale=hyp['scale'],                                                 shear=hyp['shear'],                                                 perspective=hyp['perspective'])             # Augment colorspace            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])             # Apply cutouts            # if random.random() < 0.9:            # labels = cutout(img, labels)         nL = len(labels)  # number of labels        if nL:            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywh            labels[:, [2, 4]] /= img.shape[0]  # normalized height 0-1            labels[:, [1, 3]] /= img.shape[1]  # normalized width 0-1             labels[:, [5, 7, 9, 11, 13]] /= img.shape[1]  # normalized landmark x 0-1            labels[:, [5, 7, 9, 11, 13]] = np.where(labels[:, [5, 7, 9, 11, 13]] < 0, -1, labels[:, [5, 7, 9, 11, 13]])            labels[:, [6, 8, 10, 12, 14]] /= img.shape[0]  # normalized landmark y 0-1            labels[:, [6, 8, 10, 12, 14]] = np.where(labels[:, [6, 8, 10, 12, 14]] < 0, -1,                                                     labels[:, [6, 8, 10, 12, 14]])         if self.augment:            # flip up-down            if random.random() < hyp['flipud']:                img = np.flipud(img)                if nL:                    labels[:, 2] = 1 - labels[:, 2]                     labels[:, 6] = np.where(labels[:, 6] < 0, -1, 1 - labels[:, 6])                    labels[:, 8] = np.where(labels[:, 8] < 0, -1, 1 - labels[:, 8])                    labels[:, 10] = np.where(labels[:, 10] < 0, -1, 1 - labels[:, 10])                    labels[:, 12] = np.where(labels[:, 12] < 0, -1, 1 - labels[:, 12])                    labels[:, 14] = np.where(labels[:, 14] < 0, -1, 1 - labels[:, 14])             # flip left-right            if random.random() < hyp['fliplr']:                img = np.fliplr(img)                if nL:                    labels[:, 1] = 1 - labels[:, 1]                     labels[:, 5] = np.where(labels[:, 5] < 0, -1, 1 - labels[:, 5])                    labels[:, 7] = np.where(labels[:, 7] < 0, -1, 1 - labels[:, 7])                    labels[:, 9] = np.where(labels[:, 9] < 0, -1, 1 - labels[:, 9])                    labels[:, 11] = np.where(labels[:, 11] < 0, -1, 1 - labels[:, 11])                    labels[:, 13] = np.where(labels[:, 13] < 0, -1, 1 - labels[:, 13])                     # 左右镜像的时候，左眼、右眼，　左嘴角、右嘴角无法区分, 应该交换位置，便于网络学习                    eye_left = np.copy(labels[:, [5, 6]])                    mouth_left = np.copy(labels[:, [11, 12]])                    labels[:, [5, 6]] = labels[:, [7, 8]]                    labels[:, [7, 8]] = eye_left                    labels[:, [11, 12]] = labels[:, [13, 14]]                    labels[:, [13, 14]] = mouth_left         labels_out = torch.zeros((nL, 16))        if nL:            labels_out[:, 1:] = torch.from_numpy(labels)            # showlabels(img, labels[:, 1:5], labels[:, 5:15])         # Convert        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416        img = np.ascontiguousarray(img)        # print(index, ' --- labels_out: ', labels_out)        # if nL:        # print( ' : landmarks : ', torch.max(labels_out[:, 5:15]), ' --- ', torch.min(labels_out[:, 5:15]))        return torch.from_numpy(img), labels_out, self.img_files[index], shapes  @staticmethod    def collate_fn(batch):        img, label, path, shapes = zip(*batch)  # transposed        for i, l in enumerate(label):            l[:, 0] = i  # add target image index for build_targets()        return torch.stack(img, 0), torch.cat(label, 0), path, shapes  @staticmethod    def collate_fn4(batch):        im, label, path, shapes = zip(*batch)  # transposed        n = len(shapes) // 4        im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]         ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale        for i in range(n):  # zidane torch.zeros(16,3,720,1280) # BCHW            i *= 4            if random.random() < 0.5:                im1 = F.interpolate(im[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear',                                    align_corners=False)[0].type(im[i].type())                lb = label[i]            else:                im1 = torch.cat((torch.cat((im[i], im[i + 1]), 1), torch.cat((im[i + 2], im[i + 3]), 1)), 2)                lb = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s            im4.append(im1)            label4.append(lb)         for i, lb in enumerate(label4):            lb[:, 0] = i  # add target image index for build_targets()         return torch.stack(im4, 0), torch.cat(label4, 0), path4, shapes4 def showlabels(img, boxs, landmarks):    for box in boxs:        x, y, w, h = box[0] * img.shape[1], box[1] * img.shape[0], box[2] * img.shape[1], box[3] * img.shape[0]        # cv2.rectangle(image, (x,y), (x+w,y+h), (0,255,0), 2)        cv2.rectangle(img, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (0, 255, 0), 2)     for landmark in landmarks:        # cv2.circle(img,(60,60),30,(0,0,255))        for i in range(5):            cv2.circle(img, (int(landmark[2 * i] * img.shape[1]), int(landmark[2 * i + 1] * img.shape[0])), 3,                       (0, 0, 255), -1)    cv2.imshow('test', img)    cv2.waitKey(0)  def load_mosaic_face(self, index):    # loads images in a mosaic    labels4 = []    s = self.img_size    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y    indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(3)]  # 3 additional image indices    for i, index in enumerate(indices):        # Load image        img, _, (h, w) = load_image(self, index)         # place img in img4        if i == 0:  # top left            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)        elif i == 1:  # top right            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h        elif i == 2:  # bottom left            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)        elif i == 3:  # bottom right            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)         img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]        padw = x1a - x1b        padh = y1a - y1b         # Labels        x = self.labels[index]        labels = x.copy()        if x.size > 0:  # Normalized xywh to pixel xyxy format            # box, x1,y1,x2,y2            labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw            labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh            labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw            labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh            # 10 landmarks             labels[:, 5] = np.array(x[:, 5] > 0, dtype=np.int32) * (w * x[:, 5] + padw) + (                        np.array(x[:, 5] > 0, dtype=np.int32) - 1)            labels[:, 6] = np.array(x[:, 6] > 0, dtype=np.int32) * (h * x[:, 6] + padh) + (                        np.array(x[:, 6] > 0, dtype=np.int32) - 1)            labels[:, 7] = np.array(x[:, 7] > 0, dtype=np.int32) * (w * x[:, 7] + padw) + (                        np.array(x[:, 7] > 0, dtype=np.int32) - 1)            labels[:, 8] = np.array(x[:, 8] > 0, dtype=np.int32) * (h * x[:, 8] + padh) + (                        np.array(x[:, 8] > 0, dtype=np.int32) - 1)            labels[:, 9] = np.array(x[:, 9] > 0, dtype=np.int32) * (w * x[:, 9] + padw) + (                        np.array(x[:, 9] > 0, dtype=np.int32) - 1)            labels[:, 10] = np.array(x[:, 10] > 0, dtype=np.int32) * (h * x[:, 10] + padh) + (                        np.array(x[:, 10] > 0, dtype=np.int32) - 1)            labels[:, 11] = np.array(x[:, 11] > 0, dtype=np.int32) * (w * x[:, 11] + padw) + (                        np.array(x[:, 11] > 0, dtype=np.int32) - 1)            labels[:, 12] = np.array(x[:, 12] > 0, dtype=np.int32) * (h * x[:, 12] + padh) + (                        np.array(x[:, 12] > 0, dtype=np.int32) - 1)            labels[:, 13] = np.array(x[:, 13] > 0, dtype=np.int32) * (w * x[:, 13] + padw) + (                        np.array(x[:, 13] > 0, dtype=np.int32) - 1)            labels[:, 14] = np.array(x[:, 14] > 0, dtype=np.int32) * (h * x[:, 14] + padh) + (                        np.array(x[:, 14] > 0, dtype=np.int32) - 1)        labels4.append(labels)     # Concat/clip labels    if len(labels4):        labels4 = np.concatenate(labels4, 0)        np.clip(labels4[:, 1:5], 0, 2 * s, out=labels4[:, 1:5])  # use with random_perspective        # img4, labels4 = replicate(img4, labels4) # replicate         # landmarks        labels4[:, 5:] = np.where(labels4[:, 5:] < 0, -1, labels4[:, 5:])        labels4[:, 5:] = np.where(labels4[:, 5:] > 2 * s, -1, labels4[:, 5:])         labels4[:, 5] = np.where(labels4[:, 6] == -1, -1, labels4[:, 5])        labels4[:, 6] = np.where(labels4[:, 5] == -1, -1, labels4[:, 6])         labels4[:, 7] = np.where(labels4[:, 8] == -1, -1, labels4[:, 7])        labels4[:, 8] = np.where(labels4[:, 7] == -1, -1, labels4[:, 8])         labels4[:, 9] = np.where(labels4[:, 10] == -1, -1, labels4[:, 9])        labels4[:, 10] = np.where(labels4[:, 9] == -1, -1, labels4[:, 10])         labels4[:, 11] = np.where(labels4[:, 12] == -1, -1, labels4[:, 11])        labels4[:, 12] = np.where(labels4[:, 11] == -1, -1, labels4[:, 12])         labels4[:, 13] = np.where(labels4[:, 14] == -1, -1, labels4[:, 13])        labels4[:, 14] = np.where(labels4[:, 13] == -1, -1, labels4[:, 14])     # Augment    img4, labels4 = random_perspective(img4, labels4,                                       degrees=self.hyp['degrees'],                                       translate=self.hyp['translate'],                                       scale=self.hyp['scale'],                                       shear=self.hyp['shear'],                                       perspective=self.hyp['perspective'],                                       border=self.mosaic_border)  # border to remove    return img4, labels4  # Ancillary functions --------------------------------------------------------------------------------------------------def load_image(self, index):    # loads 1 image from dataset, returns img, original hw, resized hw    img = self.imgs[index]    if img is None:  # not cached        path = self.img_files[index]        img = cv2.imread(path)  # BGR        assert img is not None, 'Image Not Found ' + path        h0, w0 = img.shape[:2]  # orig hw        r = self.img_size / max(h0, w0)  # resize image to img_size        if r != 1:  # always resize down, only resize up if training with augmentation            interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)        return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized    else:        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized  def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):    r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1  # random gains    hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))    dtype = img.dtype  # uint8     x = np.arange(0, 256, dtype=np.int16)    lut_hue = ((x * r[0]) % 180).astype(dtype)    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)     img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)    cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed     # Histogram equalization    # if random.random() < 0.2:    # for i in range(3):    # img[:, :, i] = cv2.equalizeHist(img[:, :, i])  def replicate(img, labels):    # Replicate labels    h, w = img.shape[:2]    boxes = labels[:, 1:].astype(int)    x1, y1, x2, y2 = boxes.T    s = ((x2 - x1) + (y2 - y1)) / 2  # side length (pixels)    for i in s.argsort()[:round(s.size * 0.5)]:  # smallest indices        x1b, y1b, x2b, y2b = boxes[i]        bh, bw = y2b - y1b, x2b - x1b        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y        x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]        img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)     return img, labels  def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232    shape = img.shape[:2]  # current shape [height, width]    if isinstance(new_shape, int):        new_shape = (new_shape, new_shape)     # Scale ratio (new / old)    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])    if not scaleup:  # only scale down, do not scale up (for better test mAP)        r = min(r, 1.0)     # Compute padding    ratio = r, r  # width, height ratios    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding    if auto:  # minimum rectangle        dw, dh = np.mod(dw, 64), np.mod(dh, 64)  # wh padding    elif scaleFill:  # stretch        dw, dh = 0.0, 0.0        new_unpad = (new_shape[1], new_shape[0])        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios     dw /= 2  # divide padding into 2 sides    dh /= 2     if shape[::-1] != new_unpad:  # resize        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border    return img, ratio, (dw, dh)  def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))    # targets = [cls, xyxy]     height = img.shape[0] + border[0] * 2  # shape(h,w,c)    width = img.shape[1] + border[1] * 2     # Center    C = np.eye(3)    C[0, 2] = -img.shape[1] / 2  # x translation (pixels)    C[1, 2] = -img.shape[0] / 2  # y translation (pixels)     # Perspective    P = np.eye(3)    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)     # Rotation and Scale    R = np.eye(3)    a = random.uniform(-degrees, degrees)    # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations    s = random.uniform(1 - scale, 1 + scale)    # s = 2 ** random.uniform(-scale, scale)    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)     # Shear    S = np.eye(3)    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)     # Translation    T = np.eye(3)    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)     # Combined rotation matrix    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed        if perspective:            img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))        else:  # affine            img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))     # Visualize    # import matplotlib.pyplot as plt    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()    # ax[0].imshow(img[:, :, ::-1]) # base    # ax[1].imshow(img2[:, :, ::-1]) # warped     # Transform label coordinates    n = len(targets)    if n:        # warp points        # xy = np.ones((n * 4, 3))        xy = np.ones((n * 9, 3))        xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]].reshape(n * 9,                                                                                                    2)  # x1y1, x2y2, x1y2, x2y1        xy = xy @ M.T  # transform        if perspective:            xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 18)  # rescale        else:  # affine            xy = xy[:, :2].reshape(n, 18)         # create new boxes        x = xy[:, [0, 2, 4, 6]]        y = xy[:, [1, 3, 5, 7]]         landmarks = xy[:, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17]]        mask = np.array(targets[:, 5:] > 0, dtype=np.int32)        landmarks = landmarks * mask        landmarks = landmarks + mask - 1         landmarks = np.where(landmarks < 0, -1, landmarks)        landmarks[:, [0, 2, 4, 6, 8]] = np.where(landmarks[:, [0, 2, 4, 6, 8]] > width, -1,                                                 landmarks[:, [0, 2, 4, 6, 8]])        landmarks[:, [1, 3, 5, 7, 9]] = np.where(landmarks[:, [1, 3, 5, 7, 9]] > height, -1,                                                 landmarks[:, [1, 3, 5, 7, 9]])         landmarks[:, 0] = np.where(landmarks[:, 1] == -1, -1, landmarks[:, 0])        landmarks[:, 1] = np.where(landmarks[:, 0] == -1, -1, landmarks[:, 1])         landmarks[:, 2] = np.where(landmarks[:, 3] == -1, -1, landmarks[:, 2])        landmarks[:, 3] = np.where(landmarks[:, 2] == -1, -1, landmarks[:, 3])         landmarks[:, 4] = np.where(landmarks[:, 5] == -1, -1, landmarks[:, 4])        landmarks[:, 5] = np.where(landmarks[:, 4] == -1, -1, landmarks[:, 5])         landmarks[:, 6] = np.where(landmarks[:, 7] == -1, -1, landmarks[:, 6])        landmarks[:, 7] = np.where(landmarks[:, 6] == -1, -1, landmarks[:, 7])         landmarks[:, 8] = np.where(landmarks[:, 9] == -1, -1, landmarks[:, 8])        landmarks[:, 9] = np.where(landmarks[:, 8] == -1, -1, landmarks[:, 9])         targets[:, 5:] = landmarks         xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T         # # apply angle-based reduction of bounding boxes        # radians = a * math.pi / 180        # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5        # x = (xy[:, 2] + xy[:, 0]) / 2        # y = (xy[:, 3] + xy[:, 1]) / 2        # w = (xy[:, 2] - xy[:, 0]) * reduction        # h = (xy[:, 3] - xy[:, 1]) * reduction        # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T         # clip boxes        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)         # filter candidates        i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)        targets = targets[i]        targets[:, 1:5] = xy[i]     return img, targets  def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1):  # box1(4,n), box2(4,n)    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]    ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16))  # aspect ratio    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr)  # candidates  def cutout(image, labels):    # Applies image cutout augmentation https://arxiv.org/abs/1708.04552    h, w = image.shape[:2]     def bbox_ioa(box1, box2):        # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2        box2 = box2.transpose()         # Get the coordinates of bounding boxes        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]         # Intersection area        inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \                     (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)         # box2 area        box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16         # Intersection over box2 area        return inter_area / box2_area     # create random masks    scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction    for s in scales:        mask_h = random.randint(1, int(h * s))        mask_w = random.randint(1, int(w * s))         # box        xmin = max(0, random.randint(0, w) - mask_w // 2)        ymin = max(0, random.randint(0, h) - mask_h // 2)        xmax = min(w, xmin + mask_w)        ymax = min(h, ymin + mask_h)         # apply random color mask        image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]         # return unobscured labels        if len(labels) and s > 0.03:            box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)            ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area            labels = labels[ioa < 0.60]  # remove >60% obscured labels     return labels  def create_folder(path='./new'):    # Create folder    if os.path.exists(path):        shutil.rmtree(path)  # delete output folder    os.makedirs(path)  # make new output folder  def flatten_recursive(path='../coco128'):    # Flatten a recursive directory by bringing all files to top level    new_path = Path(path + '_flat')    create_folder(new_path)    for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):        shutil.copyfile(file, new_path / Path(file).name)  def extract_boxes(path='../coco128/'):  # from utils.datasets import *; extract_boxes('../coco128')    # Convert detection dataset into classification dataset, with one directory per class     path = Path(path)  # images dir    shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None  # remove existing    files = list(path.rglob('*.*'))    n = len(files)  # number of files    for im_file in tqdm(files, total=n):        if im_file.suffix[1:] in img_formats:            # image            im = cv2.imread(str(im_file))[..., ::-1]  # BGR to RGB            h, w = im.shape[:2]             # labels            lb_file = Path(img2label_paths([str(im_file)])[0])            if Path(lb_file).exists():                with open(lb_file, 'r') as f:                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels                 for j, x in enumerate(lb):                    c = int(x[0])  # class                    f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename                    if not f.parent.is_dir():                        f.parent.mkdir(parents=True)                     b = x[1:] * [w, h, w, h]  # box                    # b[2:] = b[2:].max() # rectangle to square                    b[2:] = b[2:] * 1.2 + 3  # pad                    b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)                     b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image                    b[[1, 3]] = np.clip(b[[1, 3]], 0, h)                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'  def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0)):  # from utils.datasets import *; autosplit('../coco128')    """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files # Arguments path: Path to images directory weights: Train, val, test weights (list) """    path = Path(path)  # images dir    files = list(path.rglob('*.*'))    n = len(files)  # number of files    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split    txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files    [(path / x).unlink() for x in txt if (path / x).exists()]  # remove existing    for i, img in tqdm(zip(indices, files), total=n):        if img.suffix[1:] in img_formats:            with open(path / txt[i], 'a') as f:                f.write(str(img) + '\n')  # add image to txt file

models 文件夹

models/ yolo .py

class LmkDetect(nn.Module):    stride = None  # strides computed during build    export = False  # onnx export cat output     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer        super(LmkDetect, self).__init__()        self.nc = nc  # number of classes        # self.no = nc + 5 # number of outputs per anchor        self.no = nc + 5 + 10  # number of outputs per anchor         self.nl = len(anchors)  # number of detection layers        self.na = len(anchors[0]) // 2  # number of anchors        self.grid = [torch.zeros(1)] * self.nl  # init grid        a = torch.tensor(anchors).float().view(self.nl, -1, 2)        self.register_buffer('anchors', a)  # shape(nl,na,2)        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv     def forward(self, x):        # x = x.copy() # for profiling        z = []  # inference output        if self.export:            for i in range(self.nl):                x[i] = self.m[i](x[i])  # conv                bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:                    # self.grid[i] = self._make_grid(nx, ny).to(x[i].device)                    self.grid[i], self.anchor_grid[i] = self._make_grid_new(nx, ny, i)                 y = torch.full_like(x[i], 0)                y = y + torch.cat((x[i][:, :, :, :, 0:5].sigmoid(),                                   torch.cat((x[i][:, :, :, :, 5:15], x[i][:, :, :, :, 15:15 + self.nc].sigmoid()), 4)),                                  4)                 box_xy = (y[:, :, :, :, 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy                box_wh = (y[:, :, :, :, 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh                # box_conf = torch.cat((box_xy, torch.cat((box_wh, y[:, :, :, :, 4:5]), 4)), 4)                 landm1 = y[:, :, :, :, 5:7] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x1 y1                landm2 = y[:, :, :, :, 7:9] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x2 y2                landm3 = y[:, :, :, :, 9:11] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x3 y3                landm4 = y[:, :, :, :, 11:13] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x4 y4                landm5 = y[:, :, :, :, 13:15] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x5 y5                # landm = torch.cat((landm1, torch.cat((landm2, torch.cat((landm3, torch.cat((landm4, landm5), 4)), 4)), 4)), 4)                # y = torch.cat((box_conf, torch.cat((landm, y[:, :, :, :, 15:15+self.nc]), 4)), 4)                y = torch.cat([box_xy, box_wh, y[:, :, :, :, 4:5], landm1, landm2, landm3, landm4, landm5,                               y[:, :, :, :, 15:15 + self.nc]], -1)                 z.append(y.view(bs, -1, self.no))            return torch.cat(z, 1)         for i in range(self.nl):            x[i] = self.m[i](x[i])  # conv            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()             if not self.training:  # inference                if self.grid[i].shape[2:4] != x[i].shape[2:4]:                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)                 y = torch.full_like(x[i], 0)                class_range = list(range(5)) + list(range(15, 15 + self.nc))                y[..., class_range] = x[i][..., class_range].sigmoid()                y[..., 5:15] = x[i][..., 5:15]                # y = x[i].sigmoid()                 y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i]  # xy                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh                 # y[..., 5:15] = y[..., 5:15] * 8 - 4                y[..., 5:7] = y[..., 5:7] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x1 y1                y[..., 7:9] = y[..., 7:9] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x2 y2                y[..., 9:11] = y[..., 9:11] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x3 y3                y[..., 11:13] = y[..., 11:13] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x4 y4                y[..., 13:15] = y[..., 13:15] * self.anchor_grid[i] + self.grid[i].to(x[i].device) * self.stride[                    i]  # landmark x5 y5                 # y[..., 5:7] = (y[..., 5:7] * 2 -1) * self.anchor_grid[i] # landmark x1 y1                # y[..., 7:9] = (y[..., 7:9] * 2 -1) * self.anchor_grid[i] # landmark x2 y2                # y[..., 9:11] = (y[..., 9:11] * 2 -1) * self.anchor_grid[i] # landmark x3 y3                # y[..., 11:13] = (y[..., 11:13] * 2 -1) * self.anchor_grid[i] # landmark x4 y4                # y[..., 13:15] = (y[..., 13:15] * 2 -1) * self.anchor_grid[i] # landmark x5 y5                 z.append(y.view(bs, -1, self.no))         return x if self.training else (torch.cat(z, 1), x)  @staticmethod    def _make_grid(nx=20, ny=20):        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()     def _make_grid_new(self, nx=20, ny=20, i=0):        d = self.anchors[i].device        if '1.10.0' in torch.__version__:  # torch>=1.10.0 meshgrid workaround for torch>=0.7 compatibility            yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)], indexing='ij')        else:            yv, xv = torch.meshgrid([torch.arange(ny).to(d), torch.arange(nx).to(d)])        grid = torch.stack((xv, yv), 2).expand((1, self.na, ny, nx, 2)).float()        anchor_grid = (self.anchors[i].clone() * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(            (1, self.na, ny, nx, 2)).float()        return grid, anchor_grid

同时在以下两个函数中增加LmkDetect

1、DetectionModel中 if isinstance(m, (Detect, Segment, LmkDetect)) 2、parse_model中 elif m in {Detect, Segment, LmkDetect}

models/experimental.py添加

def attempt_loadf(weights, device=None, inplace=True, fuse=True):    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a    from models.yolo import Detect, Model, LmkDetect     model = Ensemble()    for w in weights if isinstance(weights, list) else [weights]:        ckpt = torch.load(attempt_download(w), map_location='cpu')  # load        ckpt = (ckpt.get('ema') or ckpt['model']).to(device).float()  # FP32 model         # Model compatibility updates        if not hasattr(ckpt, 'stride'):            ckpt.stride = torch.tensor([32.])        if hasattr(ckpt, 'names') and isinstance(ckpt.names, (list, tuple)):            ckpt.names = dict(enumerate(ckpt.names))  # convert to dict         model.append(ckpt.fuse().eval() if fuse and hasattr(ckpt, 'fuse') else ckpt.eval())  # model in eval mode     # Module compatibility updates    for m in model.modules():        t = type(m)        if t in (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU, Detect, Model,LmkDetect):            m.inplace = inplace  # torch 1.7.0 compatibility            if t is Detect and not isinstance(m.anchor_grid, list):                delattr(m, 'anchor_grid')                setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)            elif t is LmkDetect and not isinstance(m.anchor_grid, list):                delattr(m, 'anchor_grid')                setattr(m, 'anchor_grid', [torch.zeros(1)] * m.nl)        elif t is nn.Upsample and not hasattr(m, 'recompute_scale_factor'):            m.recompute_scale_factor = None  # torch 1.11.0 compatibility     # Return model    if len(model) == 1:        return model[-1]     # Return detection ensemble    print(f'Ensemble created with {weights}\n')    for k in 'names', 'nc', 'yaml':        setattr(model, k, getattr(model[0], k))    model.stride = model[torch.argmax(torch.tensor([m.stride.max() for m in model])).int()].stride  # max stride    assert all(model[0].nc == m.nc for m in model), f'Models have different class counts: {[m.nc for m in model]}'    return model

模型配置文件

models/landmark/yolov5n-landmark.yaml

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license # Parametersnc: 80  # number of classesdepth_multiple: 0.33  # model depth multiplewidth_multiple: 0.25  # layer channel multipleanchors:  - [4,5,  8,10,  13,16]  # P3/8  - [23,29,  43,55,  73,105]  # P4/16  - [146,217,  231,300,  335,433]  # P5/32 # YOLOv5 v6.0 backbonebackbone:                                       #channel * (1/width_multiple)  # [from, number, module, args]                input size [N, 3, 640, 640]  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2                 [N, 16, 320, 320]   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4                   [N, 32, 160, 160]   [-1, 3, C3, [128]], #                                   [N, 32, 160, 160]   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8                   [N, 64, 80, 80]   [-1, 6, C3, [256]], #                                   [N, 64, 80, 80]   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16                  [N, 128, 40, 40]   [-1, 9, C3, [512]], #                                   [N, 128, 40, 40]   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32                 [N, 256, 20, 20]   [-1, 1, SPPF, [1024, 5]],  # 9                          [N, 256, 20, 20]   [-1, 3, C3, [1024, False]],#                                   [N, 256, 20, 20]  ] # YOLOv5 v6.0 headhead:  [[-1, 1, Conv, [512, 1, 1]], #                           [N, 128, 20, 20]   [-1, 1, nn.Upsample, [None, 2, 'nearest']], #           [N, 128, 40, 40]   [[-1, 6], 1, Concat, [1]],  # cat backbone P4           [N, 256, 40, 40]   [-1, 3, C3, [512, False]],  # 13                        [N, 128, 40, 40]    [-1, 1, Conv, [256, 1, 1]], #                           [N, 64, 40, 40]   [-1, 1, nn.Upsample, [None, 2, 'nearest']], #           [N, 64, 80, 80]   [[-1, 4], 1, Concat, [1]],  # cat backbone P3           [N, 128, 80, 80]   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)           [N, 64, 80, 80]    [-1, 1, Conv, [256, 3, 2]], #                           [N, 64, 40, 40]   [[-1, 14], 1, Concat, [1]],  # cat head P4              [N, 128, 40, 40]   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)         [N, 128, 40, 40]    [-1, 1, Conv, [512, 3, 2]], #                           [N, 128, 20, 20]   [[-1, 10], 1, Concat, [1]],  # cat head P5              [N, 256, 20, 20]   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)         [N, 256, 20, 20]    [[17, 20, 23], 1, LmkDetect, [nc, anchors]],  # Detect(P3, P4, P5)  ] #YOLOv5n summary: 270 layers, 1872157 parameters, 1872157 gradients, 4.5 GFLOPs

超参 data /hyps/hyp.scratch.yaml

# Hyperparameters for COCO training from scratch# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials  lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)momentum: 0.937  # SGD momentum/Adam beta1weight_decay: 0.0005  # optimizer weight decay 5e-4warmup_epochs: 3.0  # warmup epochs (fractions ok)warmup_momentum: 0.8  # warmup initial momentumwarmup_bias_lr: 0.1  # warmup initial bias lrbox: 0.05  # box loss gaincls: 0.5  # cls loss gainlandmark: 0.005 # landmark loss gaincls_pw: 1.0  # cls BCELoss positive_weightobj: 1.0  # obj loss gain (scale with pixels)obj_pw: 1.0  # obj BCELoss positive_weightiou_t: 0.20  # IoU training thresholdanchor_t: 4.0  # anchor-multiple threshold# anchors: 3  # anchors per output layer (0 to ignore)fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)hsv_h: 0.015  # image HSV-Hue augmentation (fraction)hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)hsv_v: 0.4  # image HSV-Value augmentation (fraction)degrees: 0.0  # image rotation (+/- deg)translate: 0.1  # image translation (+/- fraction)scale: 0.5  # image scale (+/- gain)shear: 0.5  # image shear (+/- deg)perspective: 0.0  # image perspective (+/- fraction), range 0-0.001flipud: 0.0  # image flip up-down (probability)fliplr: 0.5  # image flip left-right (probability)mosaic: 0.5  # image mosaic (probability)mixup: 0.0  # image mixup (probability)