from experiments.experiment import experiments import torch import numpy as np from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score from Load_process.LoadData import Loding_Data_Root import os import seaborn as sns import datetime from Load_process.file_processing import Process_File from Load_process.Load_Indepentend import Load_Indepentend_Data from Training_Tools.PreProcess import Training_Precesses from Training_Tools.Tools import Tool import matplotlib.figure as figure import matplotlib.backends.backend_agg as agg from Calculate_Process.Calculate import Calculate import argparse import json from utils.Stomach_Config import Training_Config, Loading_Config from model_data_processing.processing import shuffle_data if __name__ == "__main__": # 解析命令行参数 parser = argparse.ArgumentParser(description='评估单一类别的准确度、精确度、召回率和F1值') parser.add_argument('--target_class', type=int, default=0, help='要评估的目标类别索引 (0, 1, 2)') args = parser.parse_args() # 测试GPU是否可用 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"使用设备: {device}") # 设置GPU if torch.cuda.is_available(): torch.cuda.set_device(0) print(f"GPU: {torch.cuda.get_device_name(0)}") # 初始化对象 tool = Tool() Status = 1 # 決定要使用什麼資料集 # 取得One-hot encording 的資料 tool.Set_OneHotEncording(Loading_Config["Training_Labels"]) Encording_Label = tool.Get_OneHot_Encording_Label() Label_Length = len(Loading_Config["Training_Labels"]) # 检查目标类别是否有效 target_class = args.target_class training_labels = Loading_Config["Training_Labels"] if target_class < 0 or target_class >= len(training_labels): print(f"错误: 目标类别索引 {target_class} 无效,必须在 0 到 {len(training_labels)-1} 之间") exit(1) print(f"正在评估类别: {training_labels[target_class]}") load = Loding_Data_Root(Loading_Config["Training_Labels"], Loading_Config["Train_Data_Root"], Loading_Config["ImageGenerator_Data_Root"]) cut_image = Load_Indepentend_Data(Loading_Config["Training_Labels"], Encording_Label) # 创建模型 experiment = experiments(Training_Config, Loading_Config, tool, 3, "Test") model = experiment.construct_model() # 加载模型权重 Model_Weight = [ "../Result/save_the_best_model/Xception Skin trains Stomach Cancer Dataset, and uses WeightRandomSampler Change HSV Channel of V is -150/Xception/best_model( 2025-05-19 )-_fold0.pt", "../Result/save_the_best_model/Xception Skin trains Stomach Cancer Dataset, and uses WeightRandomSampler Change HSV Channel of V is -150/Xception/best_model( 2025-05-19 )-_fold1.pt", "../Result/save_the_best_model/Xception Skin trains Stomach Cancer Dataset, and uses WeightRandomSampler Change HSV Channel of V is -150/Xception/best_model( 2025-05-19 )-_fold2.pt", "../Result/save_the_best_model/Xception Skin trains Stomach Cancer Dataset, and uses WeightRandomSampler Change HSV Channel of V is -150/Xception/best_model( 2025-05-19 )-_fold3.pt", "../Result/save_the_best_model/Xception Skin trains Stomach Cancer Dataset, and uses WeightRandomSampler Change HSV Channel of V is -150/Xception/best_model( 2025-05-19 )-_fold4.pt" ] Calculate_Tool = [Calculate() for i in range(3)] i = 0 for path in Model_Weight: if os.path.exists(path): model.load_state_dict(torch.load(path)) print("读取权重完成\n") model.eval() # 设置为评估模式 # 预处理对象 preprocess = Training_Precesses(256) cut_image.process_main(Loading_Config["Test_Data_Root"]) # 呼叫處理test Data與Validation Data的function test, test_label = cut_image.test, cut_image.test_label test, test_label = shuffle_data(test, test_label) # 只评估目标类别 # 转换为PyTorch张量并移动到设备 test_dataset = preprocess.Setting_DataSet(cut_image.test, cut_image.test_label, "Transform") test_loader = preprocess.Dataloader_Sampler(test_dataset, 1, False) all_results = [] all_labels = [] # 使用PyTorch的预测方式 with torch.no_grad(): # 不计算梯度 for inputs, labels, _, _ in test_loader: inputs = inputs.to(device) labels = labels.to(device) outputs = model(inputs) _, predicted = torch.max(outputs, 1) all_results.append(predicted.cpu().numpy()) all_labels.append(np.argmax(labels.cpu().numpy(), axis=1)) # 合并结果 Predict = np.concatenate(all_results) y_test = np.concatenate(all_labels) print(f"{training_labels[target_class]} 预测结果: {Predict}\n") # 计算评估指标 accuracy = accuracy_score(y_test, Predict) precision = precision_score(y_test, Predict, average=None) recall = recall_score(y_test, Predict, average=None) F1 = f1_score(y_test, Predict, average=None) # 计算每个类别的准确率 class_accuracies = [] for class_idx in range(len(training_labels)): class_mask = (y_test == class_idx) class_accuracy = accuracy_score(y_test[class_mask], Predict[class_mask]) class_accuracies.append(class_accuracy) print(f"运行 {i+1}:\n") print(f"整体准确率 (Accuracy): {accuracy:.4f}") for class_idx in range(len(training_labels)): print(f"类别 {training_labels[class_idx]} 的评估指标:") print(f" 准确率 (Accuracy): {class_accuracies[class_idx]:.4f}") print(f" 精确率 (Precision): {precision[class_idx]:.4f}") print(f" 召回率 (Recall): {recall[class_idx]:.4f}") print(f" F1值: {F1[class_idx]:.4f}\n") Calculate_Tool[class_idx].Append_numbers(0, class_accuracies[class_idx], precision[class_idx], recall[class_idx], 0, F1[class_idx]) i += 1 # 计算平均值和标准差 for i in range(3): Calculate_Tool[i].Calculate_Mean() Calculate_Tool[i].Calculate_Std() print(f"\n{training_labels[target_class]} 类别的评估结果:") print(Calculate_Tool[i].Output_Style())