Files
Stomach_Cancer_Pytorch/experiments/experiment.py

202 lines
9.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from torchinfo import summary
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from Training_Tools.PreProcess import Training_Precesses
from experiments.pytorch_Model import ModifiedXception
from experiments.Model_All_Step import All_Step
from Load_process.Load_Indepentend import Load_Indepentend_Data
from _validation.ValidationTheEnterData import validation_the_enter_data
from Load_process.file_processing import Process_File
from draw_tools.Grad_cam import GradCAM
from draw_tools.draw import plot_history, draw_heatmap
from Calculate_Process.Calculate import Calculate
import numpy as np
import torch
import torch.nn as nn
import time
import pandas as pd
import datetime
class experiments():
def __init__(self, Image_Size, Model_Name, Experiment_Name, Epoch, Train_Batch_Size, tools, Number_Of_Classes, status):
'''
# 實驗物件
## 說明:
* 用於開始訓練pytorch的物件裡面分為數個方法負責處理實驗過程的種種
## parmeter:
* Topic_Tool: 讀取訓練、驗證、測試的資料集與Label等等的內容
* cut_image: 呼叫切割影像物件
* merge: 合併的物件
* model_name: 模型名稱,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
* experiment_name: 實驗名稱
* epoch: 訓練次數
* train_batch_size: 訓練資料的batch
* convolution_name: Grad-CAM的最後一層的名稱
* Number_Of_Classes: Label的類別
* Status: 選擇現在資料集的狀態
* device: 決定使用GPU或CPU
## Method:
* processing_main: 實驗物件的進入點
* construct_model: 決定實驗用的Model
* Training_Step: 訓練步驟,開始進行訓練驗證的部分
* Evaluate_Model: 驗證模型的準確度
* record_matrix_image: 劃出混淆矩陣(熱力圖)
* record_everyTime_test_result: 記錄我單次的訓練結果並將它輸出到檔案中
'''
self.Topic_Tool = tools
self.validation_obj = validation_the_enter_data() # 呼叫驗證物件
self.cut_image = Load_Indepentend_Data(self.Topic_Tool.Get_Data_Label(), self.Topic_Tool.Get_OneHot_Encording_Label()) # 呼叫切割影像物件
self.model_name = Model_Name # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
self.experiment_name = Experiment_Name
self.epoch = Epoch
self.train_batch_size = Train_Batch_Size
self.Number_Of_Classes = Number_Of_Classes
self.Image_Size = Image_Size
self.Grad = ""
self.Status = status
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pass
def processing_main(self, Training_Data, Training_Label):
Train, Test = self.Topic_Tool.Get_Save_Roots(self.Status)
Calculate_Process = Calculate()
print(f"Training Data Content: {Training_Data[0]}")
start = time.time()
self.cut_image.process_main(Test) # 呼叫處理test Data與Validation Data的function
end = time.time()
print("讀取testing與validation資料(154)執行時間:%f\n" % (end - start))
# 將處理好的test Data 與 Validation Data 丟給這個物件的變數
self.test, self.test_label = self.cut_image.test, self.cut_image.test_label
# self.test = self.test.permute(0, 3, 1, 2)
# Training_Data = Training_Data.permute(0, 3, 1, 2)
PreProcess = Training_Precesses(self.Image_Size)
File = Process_File()
# print(f"Dataset_Data: \n{self.test}\nLabel: \n{self.test_label}\n")
Testing_Dataset = PreProcess.Setting_DataSet(self.test, self.test_label, "Transform")
self.Test_Dataloader = PreProcess.Dataloader_Sampler(Testing_Dataset, 1, False)
# for images, labels in self.Test_Dataloader:
# print(images.shape)
# Lists to store metrics across all folds
all_fold_train_losses = []
all_fold_val_losses = []
all_fold_train_accuracies = []
all_fold_val_accuracies = []
# Define K-fold cross-validator
K_Fold = KFold(n_splits = 5, shuffle = True, random_state = 42)
# Get the underlying dataset from PreProcess_Classes_Data
training_dataset = PreProcess.Setting_DataSet(Training_Data, Training_Label, "Transform")
# K-Fold loop
for fold, (train_idx, val_idx) in enumerate(K_Fold.split(training_dataset)):
cnn_model = self.construct_model() # 呼叫讀取模型的function
print(summary(cnn_model, input_size=(int(self.train_batch_size / 2), 3, self.Image_Size, self.Image_Size)))
for name, parameters in cnn_model.named_parameters():
print(f"Layer Name: {name}, Parameters: {parameters.size()}")
TargetLayer = cnn_model.base_model.conv4.pointwise
Grad = GradCAM(cnn_model, TargetLayer)
step = All_Step(cnn_model, self.epoch, self.Number_Of_Classes, self.model_name, self.experiment_name)
print("\n\n\n讀取訓練資料執行時間:%f\n\n" % (end - start))
print(f"\nStarting Fold {fold + 1}/5")
# Create training and validation subsets for this fold
train_subset = torch.utils.data.Subset(training_dataset, train_idx)
val_subset = torch.utils.data.Subset(training_dataset, val_idx)
# print(f"Dataset_Data: \n{train_subset.dataset.data}\nLabel: \n{train_subset.dataset.labels}\n")
# Wrap subsets in DataLoaders (use same batch size as original)
train_loader = PreProcess.Dataloader_Sampler(train_subset , self.train_batch_size, False)
val_loader = PreProcess.Dataloader_Sampler(val_subset, self.train_batch_size, False)
cnn_model, model_path, train_losses, val_losses, train_accuracies, val_accuracies, Total_Epoch = step.Training_Step(train_subset, val_subset, train_loader, val_loader, self.model_name, fold, TargetLayer)
# Store fold results
all_fold_train_losses.append(train_losses)
all_fold_val_losses.append(val_losses)
all_fold_train_accuracies.append(train_accuracies)
all_fold_val_accuracies.append(val_accuracies)
Losses = [train_losses, val_losses]
Accuracies = [train_accuracies, val_accuracies]
plot_history(Total_Epoch, Losses, Accuracies, "train" + str(fold), self.experiment_name) # 將訓練結果化成圖,並將化出來的圖丟出去儲存
cnn_model.load_state_dict(torch.load(model_path))
True_Label, Predict_Label, loss, accuracy, precision, recall, AUC, f1 = step.Evaluate_Model(cnn_model, self.Test_Dataloader)
Grad.Processing_Main(self.Test_Dataloader, f"../Result/GradCAM_Image/Testing/GradCAM_Image({str(datetime.date.today())})/fold-{str(fold)}")
Calculate_Process.Append_numbers(loss, accuracy, precision, recall, AUC, f1)
self.record_matrix_image(True_Label, Predict_Label, self.experiment_name, fold)
print(self.record_everyTime_test_result(loss, accuracy, precision, recall, AUC, f1, fold, self.experiment_name)) # 紀錄當前訓練完之後的預測結果並輸出成csv檔
# Aggregate results across folds
avg_train_losses = np.mean([losses[-1] for losses in all_fold_train_losses])
avg_val_losses = np.mean([losses[-1] for losses in all_fold_val_losses])
avg_train_accuracies = np.mean([acc[-1] for acc in all_fold_train_accuracies])
avg_val_accuracies = np.mean([acc[-1] for acc in all_fold_val_accuracies])
print(f"\nCross-Validation Results:")
print(f"Avg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}")
print(f"Avg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}")
Calculate_Process.Calculate_Mean()
Calculate_Process.Calculate_Std()
print(Calculate_Process.Output_Style())
File.Save_TXT_File(content = f"\nCross-Validation Results:\nAvg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}\nAvg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}\n", File_Name = "Training_Average_Result")
pass
def construct_model(self):
'''決定我這次訓練要用哪個model'''
cnn_model = ModifiedXception(self.Number_Of_Classes)
if torch.cuda.device_count() > 1:
cnn_model = nn.DataParallel(cnn_model)
cnn_model = cnn_model.to(self.device)
return cnn_model
def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index):
'''劃出混淆矩陣(熱力圖)'''
# 計算混淆矩陣
matrix = confusion_matrix(True_Labels, Predict_Labels)
draw_heatmap(matrix, model_name, index) # 呼叫畫出confusion matrix的function
def record_everyTime_test_result(self, loss, accuracy, precision, recall, auc, f, indexs, model_name):
'''記錄我單次的訓練結果並將它輸出到檔案中'''
File = Process_File()
Dataframe = pd.DataFrame(
{
"model_name" : str(model_name),
"loss" : "{:.2f}".format(loss),
"precision" : "{:.2f}%".format(precision * 100),
"recall" : "{:.2f}%".format(recall * 100),
"accuracy" : "{:.2f}%".format(accuracy * 100),
"f" : "{:.2f}%".format(f * 100),
"AUC" : "{:.2f}%".format(auc * 100)
}, index = [indexs])
File.Save_CSV_File("train_result", Dataframe)
return Dataframe