1. 目标检测评估核心概念

在人脸检测等单类别目标检测任务中,我们需要通过多个置信度阈值来全面评估模型性能。关键指标包括:

  • 精确率 (Precision):正确预测占全部预测的比例
  • 召回率 (Recall):正确预测占全部真实目标的比例
  • AP (Average Precision):PR曲线下的面积
  • PR曲线:展示不同阈值下的精度-召回平衡关系

2. 数据准备与格式

2.1 真实标注格式

# 每个图像的真实标注格式:[xmin, ymin, xmax, ymax]
true_boxes = [
    [[50, 60, 120, 180], [200, 210, 280, 350]],  # 图像1
    [[80, 90, 150, 200]],                        # 图像2
    []                                            # 图像3(无人脸)
]

2.2 预测结果格式

# 每个预测结果格式:[xmin, ymin, xmax, ymax, confidence]
pred_boxes = [
    [[55, 65, 118, 178, 0.98], [195, 205, 285, 355, 0.76]],
    [[70, 85, 155, 195, 0.89], [300, 320, 400, 450, 0.65]],
    [[10, 15, 80, 100, 0.91]]
]

3. 核心评估函数实现

3.1 IoU计算

def calculate_iou(box1, box2):
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)
    box1_area = (box1[2]-box1[0]) * (box1[3]-box1[1])
    box2_area = (box2[2]-box2[0]) * (box2[3]-box2[1])
    
    return inter_area / (box1_area + box2_area - inter_area)

3.2 多阈值评估函数

def evaluate_detections(true_boxes, pred_boxes, iou_threshold=0.5, conf_thresholds=None):
    if conf_thresholds is None:
        conf_thresholds = np.linspace(0, 1, 101)
    
    # 收集所有预测并按置信度排序
    all_preds = []
    for img_preds in pred_boxes:
        for pred in img_preds:
            all_preds.append({
                'box': pred[:4],
                'confidence': pred[4],
                'matched': False
            })
    all_preds.sort(key=lambda x: -x['confidence'])
    
    results = []
    for thresh in conf_thresholds:
        # 过滤低置信度预测
        filtered_preds = [p for p in all_preds if p['confidence'] >= thresh]
        
        # 初始化统计
        tp = 0
        fp = 0
        used_gt = set()
        
        # 匹配预测与真实框
        for pred in filtered_preds:
            best_iou = 0
            best_gt_idx = -1
            
            # 查找对应图像的真实框
            img_index = ... # 根据实际情况匹配图像索引
            for gt_idx, gt_box in enumerate(true_boxes[img_index]):
                iou = calculate_iou(pred['box'], gt_box)
                if iou > best_iou and gt_idx not in used_gt:
                    best_iou = iou
                    best_gt_idx = gt_idx
            
            if best_iou >= iou_threshold:
                tp += 1
                used_gt.add(best_gt_idx)
            else:
                fp += 1
        
        # 计算指标
        total_gt = sum(len(img_boxes) for img_boxes in true_boxes)
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / total_gt if total_gt > 0 else 0
        
        results.append({
            'threshold': thresh,
            'precision': precision,
            'recall': recall,
            'tp': tp,
            'fp': fp
        })
    
    return results

4. 可视化与分析工具

4.1 PR曲线绘制

import matplotlib.pyplot as plt

def plot_pr_curve(eval_results):
    precisions = [r['precision'] for r in eval_results]
    recalls = [r['recall'] for r in eval_results]
    
    plt.figure(figsize=(10, 6))
    plt.plot(recalls, precisions, 'b-', linewidth=2)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.grid(True)
    plt.show()

4. 2 阈值趋势分析

def plot_metrics_trend(eval_results):
    thresholds = [r['threshold'] for r in eval_results]
    precisions = [r['precision'] for r in eval_results]
    recalls = [r['recall'] for r in eval_results]
    
    plt.figure(figsize=(10, 6))
    plt.plot(thresholds, precisions, 'r-', label='Precision')
    plt.plot(thresholds, recalls, 'g--', label='Recall')
    plt.xlabel('Confidence Threshold')
    plt.ylabel('Value')
    plt.title('Metrics vs Confidence Threshold')
    plt.legend()
    plt.grid(True)
    plt.show()

5. AP计算与结果解读

def calculate_ap(eval_results):
    # 按召回率排序
    sorted_results = sorted(eval_results, key=lambda x: x['recall'])
    
    ap = 0
    prev_recall = 0
    
    for result in sorted_results:
        recall = result['recall']
        precision = result['precision']
        
        if recall != prev_recall:
            ap += precision * (recall - prev_recall)
            prev_recall = recall
    
    return ap

6. 完整使用示例

# 评估参数配置
conf_thresholds = np.linspace(0, 1, 101)
iou_threshold = 0.5

# 执行评估
eval_results = evaluate_detections(
    true_boxes,
    pred_boxes,
    iou_threshold=iou_threshold,
    conf_thresholds=conf_thresholds
)

# 可视化结果
plot_pr_curve(eval_results)
plot_metrics_trend(eval_results)

# 计算AP
ap_score = calculate_ap(eval_results)
print(f"Average Precision: {ap_score:.4f}")

典型输出结果:

Average Precision: 0.8523

更多推荐