性能指标详解

选择合适的评估指标对于正确评估模型性能至关重要。不同的问题类型需要不同的评估指标，本章将详细介绍各种性能指标的含义、计算方法和使用场景。

分类问题评估指标

1. 基础指标：准确率、精确率、召回率、F1分数

python

import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns

# 创建示例数据
X, y = make_classification(
    n_samples=1000, n_features=20, n_informative=10,
    n_redundant=10, n_classes=3, random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 训练模型
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)

def calculate_basic_metrics(y_true, y_pred):
    """计算基础分类指标"""
    
    # 准确率
    accuracy = accuracy_score(y_true, y_pred)
    
    # 精确率、召回率、F1分数（多类别平均）
    precision_macro = precision_score(y_true, y_pred, average='macro')
    recall_macro = recall_score(y_true, y_pred, average='macro')
    f1_macro = f1_score(y_true, y_pred, average='macro')
    
    # 加权平均
    precision_weighted = precision_score(y_true, y_pred, average='weighted')
    recall_weighted = recall_score(y_true, y_pred, average='weighted')
    f1_weighted = f1_score(y_true, y_pred, average='weighted')
    
    print("=== 基础分类指标 ===")
    print(f"准确率 (Accuracy): {accuracy:.4f}")
    print(f"\n宏平均 (Macro Average):")
    print(f"  精确率 (Precision): {precision_macro:.4f}")
    print(f"  召回率 (Recall): {recall_macro:.4f}")
    print(f"  F1分数: {f1_macro:.4f}")
    print(f"\n加权平均 (Weighted Average):")
    print(f"  精确率 (Precision): {precision_weighted:.4f}")
    print(f"  召回率 (Recall): {recall_weighted:.4f}")
    print(f"  F1分数: {f1_weighted:.4f}")
    
    return {
        'accuracy': accuracy,
        'precision_macro': precision_macro,
        'recall_macro': recall_macro,
        'f1_macro': f1_macro,
        'precision_weighted': precision_weighted,
        'recall_weighted': recall_weighted,
        'f1_weighted': f1_weighted
    }

# 计算基础指标
basic_metrics = calculate_basic_metrics(y_test, y_pred)

2. 混淆矩阵

python

def plot_confusion_matrix(y_true, y_pred, class_names=None):
    """绘制混淆矩阵"""
    
    cm = confusion_matrix(y_true, y_pred)
    
    if class_names is None:
        class_names = [f'类别 {i}' for i in range(len(np.unique(y_true)))]
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names, yticklabels=class_names)
    plt.title('混淆矩阵')
    plt.xlabel('预测标签')
    plt.ylabel('真实标签')
    plt.show()
    
    # 计算每个类别的指标
    print("\n=== 各类别详细指标 ===")
    report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    
    for class_name in class_names:
        metrics = report[class_name]
        print(f"{class_name}:")
        print(f"  精确率: {metrics['precision']:.4f}")
        print(f"  召回率: {metrics['recall']:.4f}")
        print(f"  F1分数: {metrics['f1-score']:.4f}")
        print(f"  支持度: {metrics['support']}")
    
    return cm

# 绘制混淆矩阵
cm = plot_confusion_matrix(y_test, y_pred, ['类别A', '类别B', '类别C'])

3. ROC曲线和AUC

python

from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.preprocessing import label_binarize
from itertools import cycle

def plot_roc_curves(y_true, y_pred_proba, class_names=None):
    """绘制ROC曲线"""
    
    n_classes = y_pred_proba.shape[1]
    
    if class_names is None:
        class_names = [f'类别 {i}' for i in range(n_classes)]
    
    # 二值化标签
    y_true_bin = label_binarize(y_true, classes=range(n_classes))
    
    # 计算每个类别的ROC曲线
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # 计算微平均ROC曲线
    fpr["micro"], tpr["micro"], _ = roc_curve(y_true_bin.ravel(), y_pred_proba.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    
    # 绘制ROC曲线
    plt.figure(figsize=(10, 8))
    colors = cycle(['aqua', 'darkorange', 'cornflowerblue', 'red', 'green'])
    
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i], tpr[i], color=color, lw=2,
                label=f'{class_names[i]} (AUC = {roc_auc[i]:.2f})')
    
    plt.plot(fpr["micro"], tpr["micro"], color='deeppink', linestyle=':', linewidth=4,
             label=f'微平均 (AUC = {roc_auc["micro"]:.2f})')
    
    plt.plot([0, 1], [0, 1], 'k--', lw=2, label='随机分类器')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('假正率 (False Positive Rate)')
    plt.ylabel('真正率 (True Positive Rate)')
    plt.title('ROC曲线')
    plt.legend(loc="lower right")
    plt.grid(True)
    plt.show()
    
    # 打印AUC分数
    print("=== AUC 分数 ===")
    for i, class_name in enumerate(class_names):
        print(f"{class_name}: {roc_auc[i]:.4f}")
    print(f"微平均: {roc_auc['micro']:.4f}")
    
    # 多类别AUC（一对多）
    try:
        macro_auc = roc_auc_score(y_true, y_pred_proba, multi_class='ovr', average='macro')
        weighted_auc = roc_auc_score(y_true, y_pred_proba, multi_class='ovr', average='weighted')
        print(f"宏平均 AUC: {macro_auc:.4f}")
        print(f"加权平均 AUC: {weighted_auc:.4f}")
    except:
        print("无法计算多类别AUC")
    
    return roc_auc

# 绘制ROC曲线
roc_results = plot_roc_curves(y_test, y_pred_proba, ['类别A', '类别B', '类别C'])

4. 精确率-召回率曲线

python

from sklearn.metrics import precision_recall_curve, average_precision_score

def plot_precision_recall_curves(y_true, y_pred_proba, class_names=None):
    """绘制精确率-召回率曲线"""
    
    n_classes = y_pred_proba.shape[1]
    
    if class_names is None:
        class_names = [f'类别 {i}' for i in range(n_classes)]
    
    # 二值化标签
    y_true_bin = label_binarize(y_true, classes=range(n_classes))
    
    # 计算每个类别的PR曲线
    precision = dict()
    recall = dict()
    average_precision = dict()
    
    for i in range(n_classes):
        precision[i], recall[i], _ = precision_recall_curve(y_true_bin[:, i], y_pred_proba[:, i])
        average_precision[i] = average_precision_score(y_true_bin[:, i], y_pred_proba[:, i])
    
    # 计算微平均PR曲线
    precision["micro"], recall["micro"], _ = precision_recall_curve(
        y_true_bin.ravel(), y_pred_proba.ravel()
    )
    average_precision["micro"] = average_precision_score(y_true_bin, y_pred_proba, average="micro")
    
    # 绘制PR曲线
    plt.figure(figsize=(10, 8))
    colors = cycle(['navy', 'turquoise', 'darkorange', 'cornflowerblue', 'teal'])
    
    for i, color in zip(range(n_classes), colors):
        plt.plot(recall[i], precision[i], color=color, lw=2,
                label=f'{class_names[i]} (AP = {average_precision[i]:.2f})')
    
    plt.plot(recall["micro"], precision["micro"], color='gold', linestyle=':', linewidth=4,
             label=f'微平均 (AP = {average_precision["micro"]:.2f})')
    
    plt.xlabel('召回率 (Recall)')
    plt.ylabel('精确率 (Precision)')
    plt.title('精确率-召回率曲线')
    plt.legend(loc="lower left")
    plt.grid(True)
    plt.show()
    
    # 打印平均精确率
    print("=== 平均精确率 (Average Precision) ===")
    for i, class_name in enumerate(class_names):
        print(f"{class_name}: {average_precision[i]:.4f}")
    print(f"微平均: {average_precision['micro']:.4f}")
    
    return average_precision

# 绘制PR曲线
pr_results = plot_precision_recall_curves(y_test, y_pred_proba, ['类别A', '类别B', '类别C'])

回归问题评估指标

python

from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score,
    mean_absolute_percentage_error, explained_variance_score
)

# 创建回归数据
X_reg, y_reg = make_regression(
    n_samples=1000, n_features=10, noise=10, random_state=42
)

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg, y_reg, test_size=0.2, random_state=42
)

# 训练回归模型
reg_model = RandomForestRegressor(n_estimators=100, random_state=42)
reg_model.fit(X_train_reg, y_train_reg)
y_pred_reg = reg_model.predict(X_test_reg)

def calculate_regression_metrics(y_true, y_pred):
    """计算回归指标"""
    
    # 基础指标
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    # 其他指标
    try:
        mape = mean_absolute_percentage_error(y_true, y_pred)
    except:
        mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    explained_var = explained_variance_score(y_true, y_pred)
    
    print("=== 回归评估指标 ===")
    print(f"均方误差 (MSE): {mse:.4f}")
    print(f"均方根误差 (RMSE): {rmse:.4f}")
    print(f"平均绝对误差 (MAE): {mae:.4f}")
    print(f"决定系数 (R²): {r2:.4f}")
    print(f"平均绝对百分比误差 (MAPE): {mape:.4f}%")
    print(f"解释方差分数: {explained_var:.4f}")
    
    # 残差分析
    residuals = y_true - y_pred
    print(f"\n=== 残差分析 ===")
    print(f"残差均值: {np.mean(residuals):.4f}")
    print(f"残差标准差: {np.std(residuals):.4f}")
    print(f"残差最大值: {np.max(residuals):.4f}")
    print(f"残差最小值: {np.min(residuals):.4f}")
    
    return {
        'mse': mse, 'rmse': rmse, 'mae': mae, 'r2': r2,
        'mape': mape, 'explained_variance': explained_var,
        'residuals': residuals
    }

# 计算回归指标
reg_metrics = calculate_regression_metrics(y_test_reg, y_pred_reg)

回归可视化分析

python

def plot_regression_analysis(y_true, y_pred, metrics):
    """绘制回归分析图"""
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # 1. 预测值 vs 真实值
    axes[0, 0].scatter(y_true, y_pred, alpha=0.6)
    axes[0, 0].plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--', lw=2)
    axes[0, 0].set_xlabel('真实值')
    axes[0, 0].set_ylabel('预测值')
    axes[0, 0].set_title(f'预测值 vs 真实值 (R² = {metrics["r2"]:.3f})')
    axes[0, 0].grid(True)
    
    # 2. 残差图
    residuals = metrics['residuals']
    axes[0, 1].scatter(y_pred, residuals, alpha=0.6)
    axes[0, 1].axhline(y=0, color='r', linestyle='--')
    axes[0, 1].set_xlabel('预测值')
    axes[0, 1].set_ylabel('残差')
    axes[0, 1].set_title('残差图')
    axes[0, 1].grid(True)
    
    # 3. 残差直方图
    axes[1, 0].hist(residuals, bins=30, alpha=0.7, edgecolor='black')
    axes[1, 0].set_xlabel('残差')
    axes[1, 0].set_ylabel('频数')
    axes[1, 0].set_title('残差分布')
    axes[1, 0].grid(True)
    
    # 4. Q-Q图（正态性检验）
    from scipy import stats
    stats.probplot(residuals, dist="norm", plot=axes[1, 1])
    axes[1, 1].set_title('残差Q-Q图')
    axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.show()

# 绘制回归分析图
plot_regression_analysis(y_test_reg, y_pred_reg, reg_metrics)

聚类评估指标

python

from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import (
    adjusted_rand_score, normalized_mutual_info_score,
    silhouette_score, calinski_harabasz_score, davies_bouldin_score
)

# 创建聚类数据
X_cluster, y_cluster_true = make_blobs(
    n_samples=300, centers=4, n_features=2,
    random_state=42, cluster_std=0.60
)

# 执行聚类
kmeans = KMeans(n_clusters=4, random_state=42)
y_cluster_pred = kmeans.fit_predict(X_cluster)

def calculate_clustering_metrics(X, y_true, y_pred):
    """计算聚类指标"""
    
    # 外部指标（需要真实标签）
    ari = adjusted_rand_score(y_true, y_pred)
    nmi = normalized_mutual_info_score(y_true, y_pred)
    
    # 内部指标（不需要真实标签）
    silhouette = silhouette_score(X, y_pred)
    calinski_harabasz = calinski_harabasz_score(X, y_pred)
    davies_bouldin = davies_bouldin_score(X, y_pred)
    
    print("=== 聚类评估指标 ===")
    print(f"调整兰德指数 (ARI): {ari:.4f}")
    print(f"标准化互信息 (NMI): {nmi:.4f}")
    print(f"轮廓系数 (Silhouette): {silhouette:.4f}")
    print(f"Calinski-Harabasz指数: {calinski_harabasz:.4f}")
    print(f"Davies-Bouldin指数: {davies_bouldin:.4f}")
    
    return {
        'ari': ari, 'nmi': nmi, 'silhouette': silhouette,
        'calinski_harabasz': calinski_harabasz, 'davies_bouldin': davies_bouldin
    }

# 计算聚类指标
cluster_metrics = calculate_clustering_metrics(X_cluster, y_cluster_true, y_cluster_pred)

# 可视化聚类结果
def plot_clustering_results(X, y_true, y_pred):
    """可视化聚类结果"""
    
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    
    # 真实聚类
    axes[0].scatter(X[:, 0], X[:, 1], c=y_true, cmap='viridis')
    axes[0].set_title('真实聚类')
    axes[0].set_xlabel('特征1')
    axes[0].set_ylabel('特征2')
    
    # 预测聚类
    axes[1].scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis')
    axes[1].set_title('预测聚类')
    axes[1].set_xlabel('特征1')
    axes[1].set_ylabel('特征2')
    
    plt.tight_layout()
    plt.show()

plot_clustering_results(X_cluster, y_cluster_true, y_cluster_pred)

交叉验证评估

python

from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.metrics import make_scorer

def comprehensive_cross_validation(X, y, model, problem_type='classification'):
    """综合交叉验证评估"""
    
    if problem_type == 'classification':
        # 分类指标
        scoring = {
            'accuracy': 'accuracy',
            'precision': make_scorer(precision_score, average='macro'),
            'recall': make_scorer(recall_score, average='macro'),
            'f1': make_scorer(f1_score, average='macro')
        }
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        
    else:
        # 回归指标
        scoring = {
            'r2': 'r2',
            'neg_mse': 'neg_mean_squared_error',
            'neg_mae': 'neg_mean_absolute_error'
        }
        cv = 5
    
    # 执行交叉验证
    cv_results = cross_validate(
        model, X, y, cv=cv, scoring=scoring,
        return_train_score=True, n_jobs=-1
    )
    
    print(f"=== {problem_type.upper()} 交叉验证结果 ===")
    print(f"交叉验证折数: {cv if isinstance(cv, int) else cv.n_splits}")
    
    for metric in scoring.keys():
        test_scores = cv_results[f'test_{metric}']
        train_scores = cv_results[f'train_{metric}']
        
        print(f"\n{metric.upper()}:")
        print(f"  测试集: {test_scores.mean():.4f} (+/- {test_scores.std() * 2:.4f})")
        print(f"  训练集: {train_scores.mean():.4f} (+/- {train_scores.std() * 2:.4f})")
        print(f"  过拟合程度: {train_scores.mean() - test_scores.mean():.4f}")
    
    # 训练时间分析
    fit_times = cv_results['fit_time']
    score_times = cv_results['score_time']
    
    print(f"\n时间分析:")
    print(f"  平均训练时间: {fit_times.mean():.4f}s (+/- {fit_times.std() * 2:.4f}s)")
    print(f"  平均评估时间: {score_times.mean():.4f}s (+/- {score_times.std() * 2:.4f}s)")
    
    return cv_results

# 分类模型交叉验证
print("=== 分类模型评估 ===")
clf_cv_results = comprehensive_cross_validation(
    X, y, RandomForestClassifier(n_estimators=100, random_state=42), 'classification'
)

print("\n" + "="*50)

# 回归模型交叉验证
print("=== 回归模型评估 ===")
reg_cv_results = comprehensive_cross_validation(
    X_reg, y_reg, RandomForestRegressor(n_estimators=100, random_state=42), 'regression'
)

模型比较和统计检验

python

from scipy import stats
from sklearn.model_selection import cross_val_score

def compare_models_statistically(X, y, models, model_names, cv=5, scoring='accuracy'):
    """统计学模型比较"""
    
    # 收集所有模型的交叉验证分数
    all_scores = []
    
    for model in models:
        scores = cross_val_score(model, X, y, cv=cv, scoring=scoring)
        all_scores.append(scores)
    
    # 创建结果DataFrame
    results_df = pd.DataFrame(all_scores, index=model_names).T
    
    print(f"=== 模型性能比较 ({scoring}) ===")
    print(results_df.describe())
    
    # 配对t检验
    print(f"\n=== 配对t检验 (p值) ===")
    n_models = len(models)
    p_values = np.zeros((n_models, n_models))
    
    for i in range(n_models):
        for j in range(n_models):
            if i != j:
                _, p_value = stats.ttest_rel(all_scores[i], all_scores[j])
                p_values[i, j] = p_value
    
    p_values_df = pd.DataFrame(p_values, index=model_names, columns=model_names)
    print(p_values_df)
    
    # 可视化比较
    plt.figure(figsize=(10, 6))
    results_df.boxplot()
    plt.title(f'模型性能比较 ({scoring})')
    plt.ylabel(scoring)
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
    return results_df, p_values_df

# 比较多个分类模型
models_to_compare = [
    RandomForestClassifier(n_estimators=100, random_state=42),
    GradientBoostingClassifier(n_estimators=100, random_state=42),
    SVC(random_state=42)
]

model_names = ['随机森林', '梯度提升', 'SVM']

comparison_results, p_values = compare_models_statistically(
    X, y, models_to_compare, model_names, cv=5, scoring='accuracy'
)

自定义评估指标

python

from sklearn.metrics import make_scorer

def custom_business_metric(y_true, y_pred):
    """自定义业务指标示例"""
    # 假设这是一个业务场景：
    # - 正确预测正类的收益是10
    # - 正确预测负类的收益是1
    # - 错误预测正类的损失是-5
    # - 错误预测负类的损失是-2
    
    tp = np.sum((y_true == 1) & (y_pred == 1))  # 真正例
    tn = np.sum((y_true == 0) & (y_pred == 0))  # 真负例
    fp = np.sum((y_true == 0) & (y_pred == 1))  # 假正例
    fn = np.sum((y_true == 1) & (y_pred == 0))  # 假负例
    
    business_value = tp * 10 + tn * 1 + fp * (-5) + fn * (-2)
    return business_value

# 创建二分类数据进行演示
X_binary, y_binary = make_classification(
    n_samples=1000, n_features=10, n_classes=2, random_state=42
)

# 创建自定义评分器
business_scorer = make_scorer(custom_business_metric, greater_is_better=True)

# 使用自定义指标评估模型
model_binary = RandomForestClassifier(n_estimators=100, random_state=42)
business_scores = cross_val_score(model_binary, X_binary, y_binary, 
                                cv=5, scoring=business_scorer)

print("=== 自定义业务指标评估 ===")
print(f"业务价值分数: {business_scores.mean():.2f} (+/- {business_scores.std() * 2:.2f})")

# 与标准指标比较
accuracy_scores = cross_val_score(model_binary, X_binary, y_binary, 
                                cv=5, scoring='accuracy')
print(f"准确率: {accuracy_scores.mean():.4f} (+/- {accuracy_scores.std() * 2:.4f})")

评估指标选择指南

python

def metric_selection_guide():
    """评估指标选择指南"""
    
    guide = """
    === 评估指标选择指南 ===
    
    分类问题:
    ├── 平衡数据集
    │   ├── 整体性能 → 准确率 (Accuracy)
    │   ├── 各类别性能 → 宏平均 F1分数
    │   └── 概率预测 → AUC-ROC
    │
    ├── 不平衡数据集
    │   ├── 关注少数类 → 召回率, AUC-PR
    │   ├── 精确预测 → 精确率
    │   └── 平衡考虑 → F1分数, 加权平均指标
    │
    ├── 多类别问题
    │   ├── 宏平均 → 各类别等权重
    │   ├── 微平均 → 样本等权重
    │   └── 加权平均 → 按类别样本数加权
    │
    └── 业务场景
        ├── 医疗诊断 → 召回率 (避免漏诊)
        ├── 垃圾邮件 → 精确率 (避免误判)
        └── 推荐系统 → AUC, Top-K准确率
    
    回归问题:
    ├── 误差大小
    │   ├── 平均误差 → MAE
    │   ├── 大误差敏感 → MSE, RMSE
    │   └── 相对误差 → MAPE
    │
    ├── 解释性
    │   ├── 拟合优度 → R²
    │   ├── 方差解释 → 解释方差分数
    │   └── 基线比较 → 相对改进
    │
    └── 业务场景
        ├── 价格预测 → MAPE (相对误差重要)
        ├── 销量预测 → MAE (绝对误差重要)
        └── 风险评估 → MSE (大误差代价高)
    
    聚类问题:
    ├── 有真实标签
    │   ├── 聚类质量 → ARI, NMI
    │   └── 标签一致性 → 调整互信息
    │
    ├── 无真实标签
    │   ├── 簇内紧密度 → 轮廓系数
    │   ├── 簇间分离度 → Calinski-Harabasz指数
    │   └── 簇的紧凑性 → Davies-Bouldin指数
    │
    └── 选择聚类数
        ├── 肘部法则 → 簇内平方和
        ├── 轮廓分析 → 轮廓系数
        └── Gap统计 → 与随机数据比较
    """
    
    print(guide)

# 显示指标选择指南
metric_selection_guide()

总结

选择合适的评估指标是机器学习项目成功的关键：

关键原则：

问题导向：根据具体问题类型选择指标
业务相关：考虑实际业务场景和成本
数据特征：考虑数据平衡性、噪声等
多指标评估：使用多个指标全面评估
统计显著性：进行统计检验确保结果可靠

常用组合：

分类：准确率 + F1分数 + AUC
回归：R² + RMSE + MAE
聚类：轮廓系数 + Calinski-Harabasz指数

注意事项：

避免在测试集上反复调优
使用交叉验证获得稳定估计
考虑计算成本和解释性
根据业务需求自定义指标

下一章我们将学习管道与工作流，了解如何构建高效的机器学习管道。

性能指标详解 ​

分类问题评估指标 ​

1. 基础指标：准确率、精确率、召回率、F1分数 ​

2. 混淆矩阵 ​

3. ROC曲线和AUC ​

4. 精确率-召回率曲线 ​

回归问题评估指标 ​

回归可视化分析 ​

聚类评估指标 ​

交叉验证评估 ​

模型比较和统计检验 ​

自定义评估指标 ​

评估指标选择指南 ​

总结 ​

关键原则： ​

常用组合： ​

注意事项： ​

性能指标详解

分类问题评估指标

1. 基础指标：准确率、精确率、召回率、F1分数

2. 混淆矩阵

3. ROC曲线和AUC

4. 精确率-召回率曲线

回归问题评估指标

回归可视化分析

聚类评估指标

交叉验证评估

模型比较和统计检验

自定义评估指标

评估指标选择指南

总结

关键原则：

常用组合：

注意事项：