202 lines
9.9 KiB
Python
202 lines
9.9 KiB
Python
from torchinfo import summary
|
||
from sklearn.model_selection import KFold
|
||
from sklearn.metrics import confusion_matrix
|
||
|
||
from Training_Tools.PreProcess import Training_Precesses
|
||
from experiments.pytorch_Model import ModifiedXception
|
||
from experiments.Model_All_Step import All_Step
|
||
from Load_process.Load_Indepentend import Load_Indepentend_Data
|
||
from _validation.ValidationTheEnterData import validation_the_enter_data
|
||
from Load_process.file_processing import Process_File
|
||
from draw_tools.Grad_cam import GradCAM
|
||
from draw_tools.draw import plot_history, draw_heatmap
|
||
from Calculate_Process.Calculate import Calculate
|
||
|
||
import numpy as np
|
||
import torch
|
||
import torch.nn as nn
|
||
import time
|
||
import pandas as pd
|
||
import datetime
|
||
|
||
class experiments():
|
||
def __init__(self, Image_Size, Model_Name, Experiment_Name, Epoch, Train_Batch_Size, tools, Number_Of_Classes, status):
|
||
'''
|
||
# 實驗物件
|
||
|
||
## 說明:
|
||
* 用於開始訓練pytorch的物件,裡面分為數個方法,負責處理實驗過程的種種
|
||
|
||
## parmeter:
|
||
* Topic_Tool: 讀取訓練、驗證、測試的資料集與Label等等的內容
|
||
* cut_image: 呼叫切割影像物件
|
||
* merge: 合併的物件
|
||
* model_name: 模型名稱,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
|
||
* experiment_name: 實驗名稱
|
||
* epoch: 訓練次數
|
||
* train_batch_size: 訓練資料的batch
|
||
* convolution_name: Grad-CAM的最後一層的名稱
|
||
* Number_Of_Classes: Label的類別
|
||
* Status: 選擇現在資料集的狀態
|
||
* device: 決定使用GPU或CPU
|
||
|
||
## Method:
|
||
* processing_main: 實驗物件的進入點
|
||
* construct_model: 決定實驗用的Model
|
||
* Training_Step: 訓練步驟,開始進行訓練驗證的部分
|
||
* Evaluate_Model: 驗證模型的準確度
|
||
* record_matrix_image: 劃出混淆矩陣(熱力圖)
|
||
* record_everyTime_test_result: 記錄我單次的訓練結果並將它輸出到檔案中
|
||
'''
|
||
|
||
self.Topic_Tool = tools
|
||
|
||
self.validation_obj = validation_the_enter_data() # 呼叫驗證物件
|
||
self.cut_image = Load_Indepentend_Data(self.Topic_Tool.Get_Data_Label(), self.Topic_Tool.Get_OneHot_Encording_Label()) # 呼叫切割影像物件
|
||
|
||
self.model_name = Model_Name # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
|
||
self.experiment_name = Experiment_Name
|
||
self.epoch = Epoch
|
||
self.train_batch_size = Train_Batch_Size
|
||
self.Number_Of_Classes = Number_Of_Classes
|
||
self.Image_Size = Image_Size
|
||
|
||
self.Grad = ""
|
||
self.Status = status
|
||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||
|
||
pass
|
||
|
||
def processing_main(self, Training_Data, Training_Label):
|
||
Train, Test = self.Topic_Tool.Get_Save_Roots(self.Status)
|
||
Calculate_Process = Calculate()
|
||
|
||
print(f"Training Data Content: {Training_Data[0]}")
|
||
|
||
start = time.time()
|
||
self.cut_image.process_main(Test) # 呼叫處理test Data與Validation Data的function
|
||
end = time.time()
|
||
print("讀取testing與validation資料(154)執行時間:%f 秒\n" % (end - start))
|
||
|
||
# 將處理好的test Data 與 Validation Data 丟給這個物件的變數
|
||
self.test, self.test_label = self.cut_image.test, self.cut_image.test_label
|
||
# self.test = self.test.permute(0, 3, 1, 2)
|
||
# Training_Data = Training_Data.permute(0, 3, 1, 2)
|
||
|
||
PreProcess = Training_Precesses(self.Image_Size)
|
||
File = Process_File()
|
||
|
||
# print(f"Dataset_Data: \n{self.test}\nLabel: \n{self.test_label}\n")
|
||
Testing_Dataset = PreProcess.Setting_DataSet(self.test, self.test_label, "Transform")
|
||
self.Test_Dataloader = PreProcess.Dataloader_Sampler(Testing_Dataset, 1, False)
|
||
# for images, labels in self.Test_Dataloader:
|
||
# print(images.shape)
|
||
|
||
# Lists to store metrics across all folds
|
||
all_fold_train_losses = []
|
||
all_fold_val_losses = []
|
||
all_fold_train_accuracies = []
|
||
all_fold_val_accuracies = []
|
||
|
||
# Define K-fold cross-validator
|
||
K_Fold = KFold(n_splits = 5, shuffle = True, random_state = 42)
|
||
# Get the underlying dataset from PreProcess_Classes_Data
|
||
training_dataset = PreProcess.Setting_DataSet(Training_Data, Training_Label, "Transform")
|
||
# K-Fold loop
|
||
for fold, (train_idx, val_idx) in enumerate(K_Fold.split(training_dataset)):
|
||
|
||
cnn_model = self.construct_model() # 呼叫讀取模型的function
|
||
print(summary(cnn_model, input_size=(int(self.train_batch_size / 2), 3, self.Image_Size, self.Image_Size)))
|
||
for name, parameters in cnn_model.named_parameters():
|
||
print(f"Layer Name: {name}, Parameters: {parameters.size()}")
|
||
|
||
TargetLayer = cnn_model.base_model.conv4.pointwise
|
||
Grad = GradCAM(cnn_model, TargetLayer)
|
||
|
||
step = All_Step(cnn_model, self.epoch, self.Number_Of_Classes, self.model_name, self.experiment_name)
|
||
print("\n\n\n讀取訓練資料執行時間:%f 秒\n\n" % (end - start))
|
||
print(f"\nStarting Fold {fold + 1}/5")
|
||
|
||
# Create training and validation subsets for this fold
|
||
train_subset = torch.utils.data.Subset(training_dataset, train_idx)
|
||
val_subset = torch.utils.data.Subset(training_dataset, val_idx)
|
||
|
||
# print(f"Dataset_Data: \n{train_subset.dataset.data}\nLabel: \n{train_subset.dataset.labels}\n")
|
||
|
||
# Wrap subsets in DataLoaders (use same batch size as original)
|
||
train_loader = PreProcess.Dataloader_Sampler(train_subset , self.train_batch_size, False)
|
||
val_loader = PreProcess.Dataloader_Sampler(val_subset, self.train_batch_size, False)
|
||
|
||
cnn_model, model_path, train_losses, val_losses, train_accuracies, val_accuracies, Total_Epoch = step.Training_Step(train_subset, val_subset, train_loader, val_loader, self.model_name, fold, TargetLayer)
|
||
|
||
# Store fold results
|
||
all_fold_train_losses.append(train_losses)
|
||
all_fold_val_losses.append(val_losses)
|
||
all_fold_train_accuracies.append(train_accuracies)
|
||
all_fold_val_accuracies.append(val_accuracies)
|
||
|
||
Losses = [train_losses, val_losses]
|
||
Accuracies = [train_accuracies, val_accuracies]
|
||
plot_history(Total_Epoch, Losses, Accuracies, "train" + str(fold), self.experiment_name) # 將訓練結果化成圖,並將化出來的圖丟出去儲存
|
||
|
||
cnn_model.load_state_dict(torch.load(model_path))
|
||
True_Label, Predict_Label, loss, accuracy, precision, recall, AUC, f1 = step.Evaluate_Model(cnn_model, self.Test_Dataloader)
|
||
|
||
Grad.Processing_Main(self.Test_Dataloader, f"../Result/GradCAM_Image/Testing/GradCAM_Image({str(datetime.date.today())})/fold-{str(fold)}")
|
||
Calculate_Process.Append_numbers(loss, accuracy, precision, recall, AUC, f1)
|
||
|
||
self.record_matrix_image(True_Label, Predict_Label, self.experiment_name, fold)
|
||
print(self.record_everyTime_test_result(loss, accuracy, precision, recall, AUC, f1, fold, self.experiment_name)) # 紀錄當前訓練完之後的預測結果,並輸出成csv檔
|
||
|
||
# Aggregate results across folds
|
||
avg_train_losses = np.mean([losses[-1] for losses in all_fold_train_losses])
|
||
avg_val_losses = np.mean([losses[-1] for losses in all_fold_val_losses])
|
||
avg_train_accuracies = np.mean([acc[-1] for acc in all_fold_train_accuracies])
|
||
avg_val_accuracies = np.mean([acc[-1] for acc in all_fold_val_accuracies])
|
||
|
||
print(f"\nCross-Validation Results:")
|
||
print(f"Avg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}")
|
||
print(f"Avg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}")
|
||
|
||
Calculate_Process.Calculate_Mean()
|
||
Calculate_Process.Calculate_Std()
|
||
|
||
print(Calculate_Process.Output_Style())
|
||
|
||
File.Save_TXT_File(content = f"\nCross-Validation Results:\nAvg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}\nAvg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}\n", File_Name = "Training_Average_Result")
|
||
|
||
pass
|
||
|
||
def construct_model(self):
|
||
'''決定我這次訓練要用哪個model'''
|
||
cnn_model = ModifiedXception(self.Number_Of_Classes)
|
||
|
||
if torch.cuda.device_count() > 1:
|
||
cnn_model = nn.DataParallel(cnn_model)
|
||
|
||
cnn_model = cnn_model.to(self.device)
|
||
return cnn_model
|
||
|
||
def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index):
|
||
'''劃出混淆矩陣(熱力圖)'''
|
||
# 計算混淆矩陣
|
||
matrix = confusion_matrix(True_Labels, Predict_Labels)
|
||
draw_heatmap(matrix, model_name, index) # 呼叫畫出confusion matrix的function
|
||
|
||
def record_everyTime_test_result(self, loss, accuracy, precision, recall, auc, f, indexs, model_name):
|
||
'''記錄我單次的訓練結果並將它輸出到檔案中'''
|
||
File = Process_File()
|
||
|
||
Dataframe = pd.DataFrame(
|
||
{
|
||
"model_name" : str(model_name),
|
||
"loss" : "{:.2f}".format(loss),
|
||
"precision" : "{:.2f}%".format(precision * 100),
|
||
"recall" : "{:.2f}%".format(recall * 100),
|
||
"accuracy" : "{:.2f}%".format(accuracy * 100),
|
||
"f" : "{:.2f}%".format(f * 100),
|
||
"AUC" : "{:.2f}%".format(auc * 100)
|
||
}, index = [indexs])
|
||
File.Save_CSV_File("train_result", Dataframe)
|
||
|
||
return Dataframe |