"""
Validate a trained YOLOv5 segment model on a segment dataset.
Usage:
$ bash data/scripts/get_coco.sh --val --segments # download COCO-segments val split (1G, 5000 images)
$ python segment/val.py --weights yolov5s-seg.pt --data coco.yaml --img 640 # validate COCO-segments
Usage - formats:
$ python segment/val.py --weights yolov5s-seg.pt # PyTorch
yolov5s-seg.torchscript # TorchScript
yolov5s-seg.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s-seg_openvino_label # OpenVINO
yolov5s-seg.engine # TensorRT
yolov5s-seg.mlmodel # CoreML (macOS-only)
yolov5s-seg_saved_model # TensorFlow SavedModel
yolov5s-seg.pb # TensorFlow GraphDef
yolov5s-seg.tflite # TensorFlow Lite
yolov5s-seg_edgetpu.tflite # TensorFlow Edge TPU
yolov5s-seg_paddle_model # PaddlePaddle
"""
import argparse
import json
import os
import subprocess
import sys
from multiprocessing.pool import ThreadPool
from pathlib import Path
import numpy as np
import torch
from tqdm import tqdm
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1]
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT))
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))
import torch.nn.functional as F
from models.common import DetectMultiBackend
from models.yolo import SegmentationModel
from utils.callbacks import Callbacks
from utils.general import (
LOGGER,
NUM_THREADS,
TQDM_BAR_FORMAT,
Profile,
check_dataset,
check_img_size,
check_requirements,
check_yaml,
coco80_to_coco91_class,
colorstr,
increment_path,
non_max_suppression,
print_args,
scale_boxes,
xywh2xyxy,
xyxy2xywh,
)
from utils.metrics import ConfusionMatrix, box_iou
from utils.plots import output_to_target, plot_val_study
from utils.segment.dataloaders import create_dataloader
from utils.segment.general import mask_iou, process_mask, process_mask_native, scale_image
from utils.segment.metrics import Metrics, ap_per_class_box_and_mask
from utils.segment.plots import plot_images_and_masks
from utils.torch_utils import de_parallel, select_device, smart_inference_mode
def save_one_txt(predn, save_conf, shape, file):
"""Saves detection results in txt format; includes class, xywh (normalized), optionally confidence if `save_conf` is
True.
"""
gn = torch.tensor(shape)[[1, 0, 1, 0]]
for *xyxy, conf, cls in predn.tolist():
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
line = (cls, *xywh, conf) if save_conf else (cls, *xywh)
with open(file, "a") as f:
f.write(("%g " * len(line)).rstrip() % line + "\n")
def save_one_json(predn, jdict, path, class_map, pred_masks):
"""
Saves a JSON file with detection results including bounding boxes, category IDs, scores, and segmentation masks.
Example JSON result: {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}.
"""
from pycocotools.mask import encode
def single_encode(x):
"""Encodes binary mask arrays into RLE (Run-Length Encoding) format for JSON serialization."""
rle = encode(np.asarray(x[:, :, None], order="F", dtype="uint8"))[0]
rle["counts"] = rle["counts"].decode("utf-8")
return rle
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
box = xyxy2xywh(predn[:, :4])
box[:, :2] -= box[:, 2:] / 2
pred_masks = np.transpose(pred_masks, (2, 0, 1))
with ThreadPool(NUM_THREADS) as pool:
rles = pool.map(single_encode, pred_masks)
for i, (p, b) in enumerate(zip(predn.tolist(), box.tolist())):
jdict.append(
{
"image_id": image_id,
"category_id": class_map[int(p[5])],
"bbox": [round(x, 3) for x in b],
"score": round(p[4], 5),
"segmentation": rles[i],
}
)
def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
Return correct prediction matrix
Arguments:
detections (array[N, 6]), x1, y1, x2, y2, conf, class
labels (array[M, 5]), class, x1, y1, x2, y2
Returns:
correct (array[N, 10]), for 10 IoU levels.
"""
if masks:
if overlap:
nl = len(labels)
index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
gt_masks = gt_masks.repeat(nl, 1, 1)
gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
if gt_masks.shape[1:] != pred_masks.shape[1:]:
gt_masks = F.interpolate(gt_masks[None], pred_masks.shape[1:], mode="bilinear", align_corners=False)[0]
gt_masks = gt_masks.gt_(0.5)
iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1))
else:
iou = box_iou(labels[:, 1:], detections[:, :4])
correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
correct_class = labels[:, 0:1] == detections[:, 5]
for i in range(len(iouv)):
x = torch.where((iou >= iouv[i]) & correct_class)
if x[0].shape[0]:
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
if x[0].shape[0] > 1:
matches = matches[matches[:, 2].argsort()[::-1]]
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
correct[matches[:, 1].astype(int), i] = True
return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
@smart_inference_mode()
def run(
data,
weights=None,
batch_size=32,
imgsz=640,
conf_thres=0.001,
iou_thres=0.6,
max_det=300,
task="val",
device="",
workers=8,
single_cls=False,
augment=False,
verbose=False,
save_txt=False,
save_hybrid=False,
save_conf=False,
save_json=False,
project=ROOT / "runs/val-seg",
name="exp",
exist_ok=False,
half=True,
dnn=False,
model=None,
dataloader=None,
save_dir=Path(""),
plots=True,
overlap=False,
mask_downsample_ratio=1,
compute_loss=None,
callbacks=Callbacks(),
):
"""Validates a YOLOv5 segmentation model on specified dataset, producing metrics, plots, and optional JSON
output.
"""
if save_json:
check_requirements("pycocotools>=2.0.6")
process = process_mask_native
else:
process = process_mask
training = model is not None
if training:
device, pt, jit, engine = next(model.parameters()).device, True, False, False
half &= device.type != "cpu"
model.half() if half else model.float()
nm = de_parallel(model).model[-1].nm
else:
device = select_device(device, batch_size=batch_size)
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)
(save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine
imgsz = check_img_size(imgsz, s=stride)
half = model.fp16
nm = de_parallel(model).model.model[-1].nm if isinstance(model, SegmentationModel) else 32
if engine:
batch_size = model.batch_size
else:
device = model.device
if not (pt or jit):
batch_size = 1
LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models")
data = check_dataset(data)
model.eval()
cuda = device.type != "cpu"
is_coco = isinstance(data.get("val"), str) and data["val"].endswith(f"coco{os.sep}val2017.txt")
nc = 1 if single_cls else int(data["nc"])
iouv = torch.linspace(0.5, 0.95, 10, device=device)
niou = iouv.numel()
if not training:
if pt and not single_cls:
ncm = model.model.nc
assert ncm == nc, (
f"{weights} ({ncm} classes) trained on different --data than what you passed ({nc} "
f"classes). Pass correct combination of --weights and --data that are trained together."
)
model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz))
pad, rect = (0.0, False) if task == "speed" else (0.5, pt)
task = task if task in ("train", "val", "test") else "val"
dataloader = create_dataloader(
data[task],
imgsz,
batch_size,
stride,
single_cls,
pad=pad,
rect=rect,
workers=workers,
prefix=colorstr(f"{task}: "),
overlap_mask=overlap,
mask_downsample_ratio=mask_downsample_ratio,
)[0]
seen = 0
confusion_matrix = ConfusionMatrix(nc=nc)
names = model.names if hasattr(model, "names") else model.module.names
if isinstance(names, (list, tuple)):
names = dict(enumerate(names))
class_map = coco80_to_coco91_class() if is_coco else list(range(1000))
s = ("%22s" + "%11s" * 10) % (
"Class",
"Images",
"Instances",
"Box(P",
"R",
"mAP50",
"mAP50-95)",
"Mask(P",
"R",
"mAP50",
"mAP50-95)",
)
dt = Profile(device=device), Profile(device=device), Profile(device=device)
metrics = Metrics()
loss = torch.zeros(4, device=device)
jdict, stats = [], []
pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT)
for batch_i, (im, targets, paths, shapes, masks) in enumerate(pbar):
with dt[0]:
if cuda:
im = im.to(device, non_blocking=True)
targets = targets.to(device)
masks = masks.to(device)
masks = masks.float()
im = im.half() if half else im.float()
im /= 255
nb, _, height, width = im.shape
with dt[1]:
preds, protos, train_out = model(im) if compute_loss else (*model(im, augment=augment)[:2], None)
if compute_loss:
loss += compute_loss((train_out, protos), targets, masks)[1]
targets[:, 2:] *= torch.tensor((width, height, width, height), device=device)
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []
with dt[2]:
preds = non_max_suppression(
preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det, nm=nm
)
plot_masks = []
for si, (pred, proto) in enumerate(zip(preds, protos)):
labels = targets[targets[:, 0] == si, 1:]
nl, npr = labels.shape[0], pred.shape[0]
path, shape = Path(paths[si]), shapes[si][0]
correct_masks = torch.zeros(npr, niou, dtype=torch.bool, device=device)
correct_bboxes = torch.zeros(npr, niou, dtype=torch.bool, device=device)
seen += 1
if npr == 0:
if nl:
stats.append((correct_masks, correct_bboxes, *torch.zeros((2, 0), device=device), labels[:, 0]))
if plots:
confusion_matrix.process_batch(detections=None, labels=labels[:, 0])
continue
midx = [si] if overlap else targets[:, 0] == si
gt_masks = masks[midx]
pred_masks = process(proto, pred[:, 6:], pred[:, :4], shape=im[si].shape[1:])
if single_cls:
pred[:, 5] = 0
predn = pred.clone()
scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])
if nl:
tbox = xywh2xyxy(labels[:, 1:5])
scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1])
labelsn = torch.cat((labels[:, 0:1], tbox), 1)
correct_bboxes = process_batch(predn, labelsn, iouv)
correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=overlap, masks=True)
if plots:
confusion_matrix.process_batch(predn, labelsn)
stats.append((correct_masks, correct_bboxes, pred[:, 4], pred[:, 5], labels[:, 0]))
pred_masks = torch.as_tensor(pred_masks, dtype=torch.uint8)
if plots and batch_i < 3:
plot_masks.append(pred_masks[:15])
if save_txt:
save_one_txt(predn, save_conf, shape, file=save_dir / "labels" / f"{path.stem}.txt")
if save_json:
pred_masks = scale_image(
im[si].shape[1:], pred_masks.permute(1, 2, 0).contiguous().cpu().numpy(), shape, shapes[si][1]
)
save_one_json(predn, jdict, path, class_map, pred_masks)
if plots and batch_i < 3:
if len(plot_masks):
plot_masks = torch.cat(plot_masks, dim=0)
plot_images_and_masks(im, targets, masks, paths, save_dir / f"val_batch{batch_i}_labels.jpg", names)
plot_images_and_masks(
im,
output_to_target(preds, max_det=15),
plot_masks,
paths,
save_dir / f"val_batch{batch_i}_pred.jpg",
names,
)
stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)]
if len(stats) and stats[0].any():
results = ap_per_class_box_and_mask(*stats, plot=plots, save_dir=save_dir, names=names)
metrics.update(results)
nt = np.bincount(stats[4].astype(int), minlength=nc)
pf = "%22s" + "%11i" * 2 + "%11.3g" * 8
LOGGER.info(pf % ("all", seen, nt.sum(), *metrics.mean_results()))
if nt.sum() == 0:
LOGGER.warning(f"WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels")
if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
for i, c in enumerate(metrics.ap_class_index):
LOGGER.info(pf % (names[c], seen, nt[c], *metrics.class_result(i)))
t = tuple(x.t / seen * 1e3 for x in dt)
if not training:
shape = (batch_size, 3, imgsz, imgsz)
LOGGER.info(f"Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {shape}" % t)
if plots:
confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask = metrics.mean_results()
if save_json and len(jdict):
w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else ""
anno_json = str(Path("../datasets/coco/annotations/instances_val2017.json"))
pred_json = str(save_dir / f"{w}_predictions.json")
LOGGER.info(f"\nEvaluating pycocotools mAP... saving {pred_json}...")
with open(pred_json, "w") as f:
json.dump(jdict, f)
try:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
anno = COCO(anno_json)
pred = anno.loadRes(pred_json)
results = []
for eval in COCOeval(anno, pred, "bbox"), COCOeval(anno, pred, "segm"):
if is_coco:
eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.im_files]
eval.evaluate()
eval.accumulate()
eval.summarize()
results.extend(eval.stats[:2])
map_bbox, map50_bbox, map_mask, map50_mask = results
except Exception as e:
LOGGER.info(f"pycocotools unable to run: {e}")
model.float()
if not training:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ""
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
final_metric = mp_bbox, mr_bbox, map50_bbox, map_bbox, mp_mask, mr_mask, map50_mask, map_mask
return (*final_metric, *(loss.cpu() / len(dataloader)).tolist()), metrics.get_maps(nc), t
def parse_opt():
"""Parses command line arguments for configuring YOLOv5 options like dataset path, weights, batch size, and
inference settings.
"""
parser = argparse.ArgumentParser()
parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
parser.add_argument("--batch-size", type=int, default=32, help="batch size")
parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)")
parser.add_argument("--conf-thres", type=float, default=0.001, help="confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.6, help="NMS IoU threshold")
parser.add_argument("--max-det", type=int, default=300, help="maximum detections per image")
parser.add_argument("--task", default="val", help="train, val, test, speed or study")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--workers", type=int, default=8, help="max dataloader workers (per RANK in DDP mode)")
parser.add_argument("--single-cls", action="store_true", help="treat as single-class dataset")
parser.add_argument("--augment", action="store_true", help="augmented inference")
parser.add_argument("--verbose", action="store_true", help="report mAP by class")
parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
parser.add_argument("--save-hybrid", action="store_true", help="save label+prediction hybrid results to *.txt")
parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
parser.add_argument("--save-json", action="store_true", help="save a COCO-JSON results file")
parser.add_argument("--project", default=ROOT / "runs/val-seg", help="save results to project/name")
parser.add_argument("--name", default="exp", help="save to project/name")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
opt = parser.parse_args()
opt.data = check_yaml(opt.data)
opt.save_txt |= opt.save_hybrid
print_args(vars(opt))
return opt
def main(opt):
"""Executes YOLOv5 tasks including training, validation, testing, speed, and study with configurable options."""
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
if opt.task in ("train", "val", "test"):
if opt.conf_thres > 0.001:
LOGGER.warning(f"WARNING ⚠️ confidence threshold {opt.conf_thres} > 0.001 produces invalid results")
if opt.save_hybrid:
LOGGER.warning("WARNING ⚠️ --save-hybrid returns high mAP from hybrid labels, not from predictions alone")
run(**vars(opt))
else:
weights = opt.weights if isinstance(opt.weights, list) else [opt.weights]
opt.half = torch.cuda.is_available() and opt.device != "cpu"
if opt.task == "speed":
opt.conf_thres, opt.iou_thres, opt.save_json = 0.25, 0.45, False
for opt.weights in weights:
run(**vars(opt), plots=False)
elif opt.task == "study":
for opt.weights in weights:
f = f"study_{Path(opt.data).stem}_{Path(opt.weights).stem}.txt"
x, y = list(range(256, 1536 + 128, 128)), []
for opt.imgsz in x:
LOGGER.info(f"\nRunning {f} --imgsz {opt.imgsz}...")
r, _, t = run(**vars(opt), plots=False)
y.append(r + t)
np.savetxt(f, y, fmt="%10.4g")
subprocess.run(["zip", "-r", "study.zip", "study_*.txt"])
plot_val_study(x=x)
else:
raise NotImplementedError(f'--task {opt.task} not in ("train", "val", "test", "speed", "study")')
if __name__ == "__main__":
opt = parse_opt()
main(opt)