20250308 Commits: K-Fold has been finish, but sampler has some question to solve

This commit is contained in:
2025-03-08 13:38:01 +00:00
parent f78cc738fb
commit ea8d08acc7
21 changed files with 251 additions and 196 deletions

View File

@@ -3,7 +3,7 @@ from _validation.ValidationTheEnterData import validation_the_enter_data
from Load_process.file_processing import Process_File
from Load_process.LoadData import Load_Data_Prepare
from torchvision import transforms
from Training_Tools.Tools import Tool
from Training_Tools.PreProcess import Training_Precesses
import numpy as np
from PIL import Image
import torch
@@ -51,7 +51,7 @@ class Image_generator():
'''
File = Process_File()
image_processing = Read_image_and_Process_image(self.Image_Size)
tool = Tool()
tool = Training_Precesses("", "", "", "")
Classes = []
Transform = self.Generator_Content(stardand)
@@ -60,7 +60,7 @@ class Image_generator():
save_root = File.Make_Save_Root(label, save_roots) # 合併路徑
Classes = image_processing.make_label_list(len(image), "1")
Training_Dataset = tool.Convert_Data_To_DataSet_And_Put_To_Dataloader(image, Classes, 1, False)
Training_Dataset = tool.Combine_Signal_Dataset_To_DataLoader(image, Classes, 1, False)
if File.JudgeRoot_MakeDir(save_root): # 判斷要存的資料夾存不存在,不存在則創立
print("The file is exist.This Script is not creating new fold.")

View File

@@ -19,10 +19,9 @@ Parmeter
herpes_data: 合併herpes Dataset的資料成一個List
MonkeyPox_data: 合併MonkeyPox DataSet 的資料成一個List
'''
def __init__(self, Training_Root,Test_Root, Validation_Root, Generator_Root, Labels, Image_Size) -> None:
def __init__(self, Training_Root,Test_Root, Generator_Root, Labels, Image_Size) -> None:
self.Training_Root = Training_Root
self.TestRoot = Test_Root
self.ValidationRoot = Validation_Root
self.GeneratoRoot = Generator_Root
self.Labels = Labels
self.Image_Size = Image_Size
@@ -37,13 +36,9 @@ Parmeter
Generator = Image_generator(self.GeneratoRoot, self.Labels, self.Image_Size)
# 將測試資料獨立出來
test_size = 0.1
test_size = 0.2
Indepentend.IndependentData_main(self.TestRoot, test_size)
# 將驗證資料獨立出來
test_size = 0.1
Indepentend.IndependentData_main(self.ValidationRoot, test_size)
if not File.Judge_File_Exist(self.GeneratoRoot): # 檔案若不存在
# 確定我要多少個List
Prepare.Set_Data_Content([], Data_Length)

View File

@@ -17,7 +17,7 @@ class Load_Indepentend_Data():
self.OneHot_Encording = OneHot_Encording
pass
def process_main(self, Test_data_root, Validation_data_root):
def process_main(self, Test_data_root):
self.test, self.test_label = self.get_Independent_image(Test_data_root)
print("\ntest_labels有" + str(len(self.test_label)) + "筆資料\n")

View File

@@ -14,7 +14,7 @@ if __name__ == "__main__":
tool.Set_Labels()
tool.Set_Save_Roots()
Labels = tool.Get_Data_Label()
Trainig_Root, Testing_Root, Validation_Root = tool.Get_Save_Roots(2)
Trainig_Root, Testing_Root = tool.Get_Save_Roots(2)
load = Loding_Data_Root(Labels, Trainig_Root, "")
Data_Root = load.get_Image_data_roots(Trainig_Root)

View File

@@ -1,4 +1,4 @@
from torch.utils.data import Dataset, DataLoader, RandomSampler
from torch.utils.data import Dataset, DataLoader, RandomSampler, WeightedRandomSampler
import torchvision.transforms as transforms
import torch
@@ -29,6 +29,12 @@ class Training_Precesses:
self.Training_Labels = Training_Labels
self.Testing_Datas = Testing_Datas
self.Testing_Labels = Testing_Labels
seed = 42 # 設定任意整數作為種子
# 產生隨機種子產生器
self.generator = torch.Generator()
self.generator.manual_seed(seed)
pass
def Total_Data_Combine_To_DataLoader(self, Batch_Size):
@@ -39,15 +45,16 @@ class Training_Precesses:
Testing_DataLoader = DataLoader(dataset = Testing_Dataset, batch_size = 1, num_workers = 0, pin_memory=True, shuffle = True)
return Training_DataLoader, Testing_DataLoader
def Combine_Signal_Dataset_To_DataLoader(self, datas : list, Labels : list, Batch_Size, status : bool = True):
dataset = self.Convert_Data_To_DataSet(datas, Labels, status)
sampler = WeightedRandomSampler(dataset, generator = self.generator) # 創建Sampler
Dataloader = DataLoader(dataset = dataset, batch_size = Batch_Size, num_workers = 0, pin_memory=True, sampler = sampler)
return Dataloader
def Convert_Data_To_DataSet(self, Datas : list, Labels : list, status : bool = True):
seed = 42 # 設定任意整數作為種子
# 產生隨機種子產生器
generator = torch.Generator()
generator.manual_seed(seed)
# 創建 Dataset
list_dataset = ListDataset(Datas, Labels, status)
# sampler = RandomSampler(list_dataset, generator = generator) # 創建Sampler
return list_dataset

View File

@@ -13,10 +13,6 @@ class Tool:
self.__Normal_Test_Data_Root = ""
self.__Comprehensive_Testing_Root = ""
self.__ICG_Validation_Data_Root = ""
self.__Normal_Validation_Data_Root = ""
self.__Comprehensive_Validation_Root = ""
self.__ICG_ImageGenerator_Data_Root = ""
self.__Normal_ImageGenerator_Data_Root = ""
self.__Comprehensive_Generator_Root = ""
@@ -37,10 +33,6 @@ class Tool:
self.__Normal_Test_Data_Root = "../Dataset/Training/Normal_TestData"
self.__Comprehensive_Testing_Root = "../Dataset/Training/Comprehensive_TestData"
self.__ICG_Validation_Data_Root = "../Dataset/Training/CA_ICG_ValidationData"
self.__Normal_Validation_Data_Root = "../Dataset/Training/Normal_ValidationData"
self.__Comprehensive_Validation_Root = "../Dataset/Training/Comprehensive_ValidationData"
self.__ICG_ImageGenerator_Data_Root = "../Dataset/Training/ICG_ImageGenerator"
self.__Normal_ImageGenerator_Data_Root = "../Dataset/Training/Normal_ImageGenerator"
self.__Comprehensive_Generator_Root = "../Dataset/Training/Comprehensive_ImageGenerator"
@@ -68,11 +60,11 @@ class Tool:
若choose != 1 || choose != 2 => 會回傳四個結果
'''
if choose == 1:
return self.__ICG_Training_Root, self.__ICG_Test_Data_Root, self.__ICG_Validation_Data_Root
return self.__ICG_Training_Root, self.__ICG_Test_Data_Root
if choose == 2:
return self.__Normal_Training_Root, self.__Normal_Test_Data_Root, self.__Normal_Validation_Data_Root
return self.__Normal_Training_Root, self.__Normal_Test_Data_Root
else:
return self.__Comprehensive_Training_Root, self.__Comprehensive_Testing_Root, self.__Comprehensive_Validation_Root
return self.__Comprehensive_Training_Root, self.__Comprehensive_Testing_Root
def Get_Generator_Save_Roots(self, choose):
'''回傳結果為Train, test, validation'''

View File

@@ -50,7 +50,7 @@ def call_back(model_name, index, optimizer):
model_dir = '../Result/save_the_best_model/' + model_name
File.JudgeRoot_MakeDir(model_dir)
modelfiles = File.Make_Save_Root('best_model( ' + str(datetime.date.today()) + " )-" + str(index) + ".pt", model_dir)
modelfiles = File.Make_Save_Root('best_model( ' + str(datetime.date.today()) + " )-" + index + ".pt", model_dir)
# model_mckp = ModelCheckpoint(modelfiles, monitor='val_loss', save_best_only=True, save_weights_only = True, mode='auto')

View File

@@ -1,20 +1,26 @@
from tqdm import tqdm
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold
from torchmetrics.functional import auroc
import torch.optim as optim
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from all_models_tools.all_model_tools import call_back
from Model_Loss.Loss import Entropy_Loss
from merge_class.merge import merge
from draw_tools.Grad_cam import GradCAM
from torch.utils.data import Subset, DataLoader
from Training_Tools.PreProcess import ListDataset
from Load_process.file_processing import Process_File
from draw_tools.draw import plot_history, draw_heatmap
from Load_process.file_processing import Process_File
import time
import torch.optim as optim
import numpy as np
import torch
import pandas as pd
class All_Step:
def __init__(self, PreProcess_Classes_Data, Batch, Model, Epoch, Number_Of_Classes, Model_Name):
def __init__(self, PreProcess_Classes_Data, Batch, Model, Epoch, Number_Of_Classes, Model_Name, Experiment_Name):
self.PreProcess_Classes_Data = PreProcess_Classes_Data
self.Training_DataLoader, self.Test_Dataloader = self.PreProcess_Classes_Data.Total_Data_Combine_To_DataLoader(Batch)
@@ -25,48 +31,67 @@ class All_Step:
self.Number_Of_Classes = Number_Of_Classes
self.Model_Name = Model_Name
pass
self.Experiment_Name = Experiment_Name
def Training_Step(self, model_name, counter):
# 定義優化器,並設定 weight_decay 參數來加入 L2 正則化
Optimizer = optim.SGD(self.Model.parameters(), lr=0.045, momentum = 0.9, weight_decay=0.1)
model_path, early_stopping, scheduler = call_back(model_name, counter, Optimizer)
# Lists to store metrics across all folds
all_fold_train_losses = []
all_fold_val_losses = []
all_fold_train_accuracies = []
all_fold_val_accuracies = []
criterion = Entropy_Loss() # 使用自定義的損失函數
Merge_Function = merge()
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
Total_Epoch = 0
# Define K-fold cross-validator
K_Fold = KFold(n_splits=5, shuffle=True, random_state=42)
K_Flod = KFold(n_splits = 5, shuffle = True, random_state = 42)
File = Process_File()
for epoch in range(self.Epoch): # 訓練迴圈
self.Model.train() # 開始訓練
running_loss = 0.0
all_train_preds = []
all_train_labels = []
processed_samples = 0
# Get the underlying dataset from PreProcess_Classes_Data
training_dataset = ListDataset(data_list = self.PreProcess_Classes_Data.Training_Datas, labels_list = self.PreProcess_Classes_Data.Training_Labels, status = True)
# 計算每個 epoch 的起始時間
start_time = time.time()
total_samples = len(self.Training_DataLoader)
train_subset = ""
val_subset = ""
# K-Fold loop
for fold, (train_idx, val_idx) in enumerate(K_Fold.split(training_dataset)):
print(f"\nStarting Fold {fold + 1}/5")
for fold, (train_idx, vali_idx) in enumerate( K_Flod.split(self.PreProcess_Classes_Data.Training_Datas)):
# Create training and validation subsets for this fold
train_subset = Subset(self.Training_DataLoader, train_idx)
val_subset = Subset(self.Training_DataLoader, vali_idx)
# Create training and validation subsets for this fold
train_subset = torch.utils.data.Subset(training_dataset, train_idx)
val_subset = torch.utils.data.Subset(training_dataset, val_idx)
Training_Data = DataLoader(train_subset, self.Training_DataLoader.batch_size, num_workers = 0, pin_memory=True, shuffle = True)
# Wrap subsets in DataLoaders (use same batch size as original)
batch_size = self.Training_DataLoader.batch_size
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=batch_size, shuffle=False)
epoch_iterator = tqdm(Training_Data, desc=f"Epoch [{epoch}/{self.Epoch}]")
# Reinitialize model and optimizer for each fold
self.Model = self.Model.__class__(self.Number_Of_Classes).to(self.device) # Reinitialize model
Optimizer = optim.SGD(self.Model.parameters(), lr=0.045, momentum=0.9, weight_decay=0.1)
model_path, early_stopping, scheduler = call_back(model_name, str(counter) + f"_fold{fold}", Optimizer)
criterion = Entropy_Loss() # Custom loss function
Merge_Function = merge()
# Lists to store metrics for this fold
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
# Epoch loop
for epoch in range(self.Epoch):
self.Model.train() # Start training
running_loss = 0.0
all_train_preds = []
all_train_labels = []
processed_samples = 0
# Calculate epoch start time
start_time = time.time()
total_samples = len(train_subset) # Total samples in subset, not DataLoader
# Progress bar for training batches
epoch_iterator = tqdm(train_loader, desc=f"Fold {fold + 1}/5, Epoch [{epoch + 1}/{self.Epoch}]")
for inputs, labels in epoch_iterator:
inputs, labels = torch.as_tensor(inputs).to(self.device), torch.as_tensor(labels).to(self.device)
inputs, labels = inputs.to(self.device), labels.to(self.device) # Already tensors from DataLoader
Optimizer.zero_grad()
outputs = self.Model(inputs)
@@ -75,85 +100,112 @@ class All_Step:
Optimizer.step()
running_loss += loss.item()
# 收集訓練預測和標籤
Output_Values, Output_Indexs = torch.max(outputs, dim = 1)
True_Indexs = np.argmax(labels.cpu().numpy(), 1)
# Collect training predictions and labels
Output_Values, Output_Indexs = torch.max(outputs, dim=1)
True_Indexs = np.argmax(labels.cpu().numpy(), axis=1)
all_train_preds.append(Output_Indexs.cpu().numpy())
all_train_labels.append(True_Indexs)
processed_samples += len(inputs)
processed_samples += inputs.size(0) # Use size(0) for batch size
# 計算當前進度
# Calculate progress and timing
progress = (processed_samples / total_samples) * 100
# 計算經過時間和剩餘時間
elapsed_time = time.time() - start_time
iterations_per_second = processed_samples / elapsed_time if elapsed_time > 0 else 0
eta = (total_samples - processed_samples) / iterations_per_second if iterations_per_second > 0 else 0
time_str = f"{int(elapsed_time//60):02d}:{int(elapsed_time%60):02d}<{int(eta//60):02d}:{int(eta%60):02d}"
# 計算當前批次的精確度(這裡需要根據你的具體需求調整)
# Calculate batch accuracy
batch_accuracy = (Output_Indexs.cpu().numpy() == True_Indexs).mean()
# 更新進度條顯示
epoch_iterator.set_description(f"Epoch [{epoch}/{self.Epoch}]")
# Update progress bar
epoch_iterator.set_postfix_str(
f"{processed_samples}/{total_samples} [{time_str}, {iterations_per_second:.2f}it/s, " +
f"acc={batch_accuracy:.3f}, loss={loss.item():.3f}, ]"
f"{processed_samples}/{total_samples} [{time_str}, {iterations_per_second:.2f}it/s, "
f"acc={batch_accuracy:.3f}, loss={loss.item():.3f}]"
)
epoch_iterator.close()
# Merge predictions and labels
all_train_preds = Merge_Function.merge_data_main(all_train_preds, 0, len(all_train_preds))
all_train_labels = Merge_Function.merge_data_main(all_train_labels, 0, len(all_train_labels))
Training_Loss = running_loss / len(self.Training_DataLoader)
Training_Loss = running_loss / len(train_loader)
train_accuracy = accuracy_score(all_train_labels, all_train_preds)
train_losses.append(Training_Loss)
train_accuracies.append(train_accuracy)
# Validation step
self.Model.eval()
val_loss = 0.0
all_val_preds = []
all_val_labels = []
with torch.no_grad():
for inputs, labels in val_subset:
inputs, labels = torch.as_tensor(inputs).to(self.device), torch.as_tensor(labels).to(self.device)
for inputs, labels in val_loader:
inputs, labels = inputs.to(self.device), labels.to(self.device)
outputs = self.Model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
# 收集訓練預測和標籤
Output_Values, Output_Indexs = torch.max(outputs, dim = 1)
True_Indexs = np.argmax(labels.cpu().numpy(), 1)
# Collect validation predictions and labels
Output_Values, Output_Indexs = torch.max(outputs, dim=1)
True_Indexs = np.argmax(labels.cpu().numpy(), axis=1)
all_val_preds.append(Output_Indexs.cpu().numpy())
all_val_labels.append(True_Indexs)
val_loss /= len(val_subset)
# Merge predictions and labels
all_val_preds = Merge_Function.merge_data_main(all_val_preds, 0, len(all_val_preds))
all_val_labels = Merge_Function.merge_data_main(all_val_labels, 0, len(all_val_labels))
val_loss /= len(val_loader)
val_accuracy = accuracy_score(all_val_labels, all_val_preds)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
# print(f"Val_loss: {val_loss:.4f}, Val_accuracy: {val_accuracy:0.2f}\n")
# Early stopping
early_stopping(val_loss, self.Model, model_path)
if early_stopping.early_stop:
print("Early stopping triggered. Training stopped.")
Total_Epoch = epoch
print(f"Early stopping triggered in Fold {fold + 1} at epoch {epoch + 1}")
Total_Epoch = epoch + 1
break
# 學習率調整
# Learning rate adjustment
scheduler.step(val_loss)
return train_losses, val_losses, train_accuracies, val_accuracies, Total_Epoch
else: # If no early stopping
Total_Epoch = self.Epoch
def Evaluate_Model(self, cnn_model, counter):
# 測試模型
# Store fold results
all_fold_train_losses.append(train_losses)
all_fold_val_losses.append(val_losses)
all_fold_train_accuracies.append(train_accuracies)
all_fold_val_accuracies.append(val_accuracies)
Losses = [train_losses, val_losses]
Accuracies = [train_accuracies, val_accuracies]
plot_history(Total_Epoch, Losses, Accuracies, "train" + str(fold), self.Experiment_Name) # 將訓練結果化成圖,並將化出來的圖丟出去儲存
# Aggregate results across folds
avg_train_losses = np.mean([losses[-1] for losses in all_fold_train_losses])
avg_val_losses = np.mean([losses[-1] for losses in all_fold_val_losses])
avg_train_accuracies = np.mean([acc[-1] for acc in all_fold_train_accuracies])
avg_val_accuracies = np.mean([acc[-1] for acc in all_fold_val_accuracies])
print(f"\nCross-Validation Results:")
print(f"Avg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}")
print(f"Avg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}")
File.Save_TXT_File(content = f"\nCross-Validation Results:\nAvg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}\nAvg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}\n", File_Name = "Training_Average_Result")
pass
def Evaluate_Model(self, cnn_model, Model_Name, counter):
# (Unchanged Evaluate_Model method)
cnn_model.eval()
True_Label, Predict_Label = [], []
True_Label_OneHot, Predict_Label_OneHot = [], []
@@ -162,33 +214,54 @@ class All_Step:
with torch.no_grad():
for images, labels in self.Test_Dataloader:
images, labels = torch.tensor(images).to(self.device), torch.tensor(labels).to(self.device)
outputs = cnn_model(images)
# 收集訓練預測和標籤
Output_Values, Output_Indexs = torch.max(outputs, 1)
True_Indexs = np.argmax(labels.cpu().numpy(), 1)
True_Label.append(Output_Indexs.cpu().numpy())
Predict_Label.append(True_Indexs)
Predict_Label_OneHot.append(torch.tensor(outputs, dtype = torch.float32).cpu().numpy()[0])
True_Label_OneHot.append(torch.tensor(labels, dtype = torch.int).cpu().numpy()[0])
# # 創建 GradCAM 實例
# Layers = cnn_model.base_model.body.conv4.pointwise
# grad_cam = GradCAM(cnn_model, target_layer="base_model")
# # 可視化 Grad-CAM
# grad_cam.visualize(outputs, images, target_class = 3, File_Name = counter, model_name = self.Model_Name)
Predict_Label_OneHot.append(torch.tensor(outputs, dtype=torch.float32).cpu().numpy()[0])
True_Label_OneHot.append(torch.tensor(labels, dtype=torch.int).cpu().numpy()[0])
loss /= len(self.Test_Dataloader)
True_Label_OneHot = torch.tensor(True_Label_OneHot, dtype = torch.int)
Predict_Label_OneHot = torch.tensor(Predict_Label_OneHot, dtype = torch.float32)
True_Label_OneHot = torch.tensor(True_Label_OneHot, dtype=torch.int)
Predict_Label_OneHot = torch.tensor(Predict_Label_OneHot, dtype=torch.float32)
accuracy = accuracy_score(True_Label, Predict_Label)
precision = precision_score(True_Label, Predict_Label, average = "macro")
recall = recall_score(True_Label, Predict_Label, average = "macro")
AUC = auroc(Predict_Label_OneHot, True_Label_OneHot, num_labels = self.Number_Of_Classes, task = "multilabel", average = "macro")
f1 = f1_score(True_Label, Predict_Label, average = "macro")
return loss, accuracy, precision, recall, AUC, f1, True_Label, Predict_Label
precision = precision_score(True_Label, Predict_Label, average="macro")
recall = recall_score(True_Label, Predict_Label, average="macro")
AUC = auroc(Predict_Label_OneHot, True_Label_OneHot, num_labels=self.Number_Of_Classes, task="multilabel", average="macro")
f1 = f1_score(True_Label, Predict_Label, average="macro")
Matrix = self.record_matrix_image(True_Label, Predict_Label, Model_Name, counter)
print(self.record_everyTime_test_result(loss, accuracy, precision, recall, AUC, f1, counter, self.Experiment_Name, Matrix)) # 紀錄當前訓練完之後的預測結果並輸出成csv檔
pass
def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index):
'''劃出混淆矩陣(熱力圖)'''
# 計算混淆矩陣
matrix = confusion_matrix(True_Labels, Predict_Labels)
draw_heatmap(matrix, model_name, index) # 呼叫畫出confusion matrix的function
return matrix
def record_everyTime_test_result(self, loss, accuracy, precision, recall, auc, f, indexs, model_name, Matrix):
'''記錄我單次的訓練結果並將它輸出到檔案中'''
File = Process_File()
Dataframe = pd.DataFrame(
{
"model_name" : str(model_name),
"loss" : "{:.2f}".format(loss),
"precision" : "{:.2f}%".format(precision * 100),
"recall" : "{:.2f}%".format(recall * 100),
"accuracy" : "{:.2f}%".format(accuracy * 100),
"f" : "{:.2f}%".format(f * 100),
"AUC" : "{:.2f}%".format(auc * 100)
}, index = [indexs])
File.Save_CSV_File("train_result", Dataframe)
return Dataframe

View File

@@ -1,13 +1,11 @@
from draw_tools.draw import plot_history, draw_heatmap
from Load_process.Load_Indepentend import Load_Indepentend_Data
from _validation.ValidationTheEnterData import validation_the_enter_data
from Load_process.file_processing import Process_File
from sklearn.metrics import confusion_matrix
from torchinfo import summary
from Training_Tools.PreProcess import Training_Precesses
from experiments.pytorch_Model import ModifiedXception
from experiments.Model_All_Step import All_Step
from Training_Tools.PreProcess import Training_Precesses
from torchinfo import summary
import pandas as pd
from Load_process.Load_Indepentend import Load_Indepentend_Data
from _validation.ValidationTheEnterData import validation_the_enter_data
import numpy as np
import torch
import torch.nn as nn
@@ -63,10 +61,10 @@ class experiments():
pass
def processing_main(self, Training_Data, Training_Label, counter):
Train, Test, Validation = self.Topic_Tool.Get_Save_Roots(self.Status) # 要換不同資料集就要改
Train, Test = self.Topic_Tool.Get_Save_Roots(self.Status) # 要換不同資料集就要改
start = time.time()
self.cut_image.process_main(Test, Validation) # 呼叫處理test Data與Validation Data的function
self.cut_image.process_main(Test) # 呼叫處理test Data與Validation Data的function
end = time.time()
print("讀取testing與validation資料(154)執行時間:%f\n" % (end - start))
@@ -80,21 +78,14 @@ class experiments():
for name, parameters in cnn_model.named_parameters():
print(f"Layer Name: {name}, Parameters: {parameters.size()}")
step = All_Step(PreProcess, self.train_batch_size, cnn_model, self.epoch, self.Number_Of_Classes, self.model_name)
step = All_Step(PreProcess, self.train_batch_size, cnn_model, self.epoch, self.Number_Of_Classes, self.model_name, self.experiment_name)
print("\n\n\n讀取訓練資料(70000)執行時間:%f\n\n" % (end - start))
train_losses, val_losses, train_accuracies, val_accuracies, Epoch = step.Training_Step(self.model_name, counter)
loss, accuracy, precision, recall, AUC, f1, True_Label, Predict_Label = step.Evaluate_Model(cnn_model, counter)
Matrix = self.record_matrix_image(True_Label, Predict_Label, self.model_name, counter)
print(self.record_everyTime_test_result(loss, accuracy, precision, recall, AUC, f1, counter, self.experiment_name, Matrix)) # 紀錄當前訓練完之後的預測結果並輸出成csv檔
Losses = [train_losses, val_losses]
Accuracies = [train_accuracies, val_accuracies]
plot_history(Epoch + 1, Losses, Accuracies, "train" + str(counter), self.experiment_name) # 將訓練結果化成圖,並將化出來的圖丟出去儲存
step.Training_Step(self.model_name, counter)
step.Evaluate_Model(cnn_model, self.model_name, counter)
# self.Grad.process_main(cnn_model, counter, Testing_Dataset)
return loss, accuracy, precision, recall, AUC, f1
pass
def construct_model(self):
'''決定我這次訓練要用哪個model'''
@@ -104,31 +95,4 @@ class experiments():
cnn_model = nn.DataParallel(cnn_model)
cnn_model = cnn_model.to(self.device)
return cnn_model
def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index):
'''劃出混淆矩陣(熱力圖)'''
# 計算混淆矩陣
matrix = confusion_matrix(True_Labels, Predict_Labels)
draw_heatmap(matrix, model_name, index) # 呼叫畫出confusion matrix的function
return matrix
def record_everyTime_test_result(self, loss, accuracy, precision, recall, auc, f, indexs, model_name, Matrix):
'''記錄我單次的訓練結果並將它輸出到檔案中'''
File = Process_File()
Dataframe = pd.DataFrame(
{
"model_name" : str(model_name),
"loss" : "{:.2f}".format(loss),
"precision" : "{:.2f}%".format(precision * 100),
"recall" : "{:.2f}%".format(recall * 100),
"accuracy" : "{:.2f}%".format(accuracy * 100),
"f" : "{:.2f}%".format(f * 100),
"AUC" : "{:.2f}%".format(auc * 100)
}, index = [indexs])
File.Save_CSV_File("train_result", Dataframe)
# File.Save_TXT_File("Matrix_Result : " + str(Matrix), model_name + "_train" + str(indexs))
return Dataframe
return cnn_model

View File

@@ -1,8 +1,4 @@
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
import timm

27
main.py
View File

@@ -6,6 +6,7 @@ from model_data_processing.processing import Balance_Process
from Load_process.LoadData import Load_Data_Prepare
from Calculate_Process.Calculate import Calculate
from merge_class.merge import merge
import time
import torch
@@ -24,7 +25,7 @@ if __name__ == "__main__":
Status = 1 # 決定要使用什麼資料集
Labels = tool.Get_Data_Label()
Trainig_Root, Testing_Root, Validation_Root = tool.Get_Save_Roots(Status) # 一般的
Trainig_Root, Testing_Root = tool.Get_Save_Roots(Status) # 一般的
Generator_Root = tool.Get_Generator_Save_Roots(Status)
# 取得One-hot encording 的資料
@@ -41,21 +42,25 @@ if __name__ == "__main__":
Image_Size = 256
Prepare = Load_Data_Prepare()
loading_data = Load_ImageGenerator(Trainig_Root, Testing_Root, Validation_Root, Generator_Root, Labels, Image_Size)
loading_data = Load_ImageGenerator(Trainig_Root, Testing_Root, Generator_Root, Labels, Image_Size)
experiment = experiments(Image_Size, Model_Name, Experiment_Name, Epoch, Train_Batch_Size, tool, Classification, Status)
image_processing = Read_image_and_Process_image(Image_Size)
Merge = merge()
Calculate_Tool = Calculate()
counter = 5
counter = 1
Batch_Size = 128
Train_Size = 0
for Run_Range in range(0, counter, 1): # 做規定次數的訓練
# 讀取資料
Data_Dict_Data = loading_data.process_main(Label_Length)
Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels)
# Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels)
for label in Labels:
Train_Size += len(Data_Dict_Data[label])
print("總共有 " + str(Train_Size * 3) + " 筆資料")
print("總共有 " + str(Train_Size) + " 筆資料")
# 做出跟資料相同數量的Label
Classes = []
@@ -88,15 +93,5 @@ if __name__ == "__main__":
end = time.time()
print("\n\n\n讀取訓練資料(70000)執行時間:%f\n\n" % (end - start))
loss, accuracy, precision, recall, AUC, f = experiment.processing_main(Training_Data, Training_Label, Run_Range) # 執行訓練方法
Calculate_Tool.Append_numbers(loss, accuracy, precision, recall, AUC, f)
print("實驗結果")
print("--------------------------------------------")
print("平均值: ")
print(Calculate_Tool.Calculate_Mean())
print("標準差: ")
print(Calculate_Tool.Calculate_Std())
print("結果: ")
print(Calculate_Tool.Output_Style())
experiment.processing_main(Training_Data, Training_Label, Run_Range) # 執行訓練方法

View File

@@ -1859,19 +1859,19 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(array([0, 3, 4, 5, 6, 7]), array([1, 2])), (array([0, 1, 2, 4, 6, 7]), array([3, 5])), (array([1, 2, 3, 5, 6, 7]), array([0, 4])), (array([0, 1, 2, 3, 4, 5, 6]), array([7])), (array([0, 1, 2, 3, 4, 5, 7]), array([6]))]\n",
"[0 1 2 3 4 7] [5 6]\n",
"[0 1 2 5 6 7] [3 4]\n",
"[1 2 3 4 5 6] [0 7]\n",
"[0 2 3 4 5 6 7] [1]\n",
"[0 1 3 4 5 6 7] [2]\n"
"[(array([0, 2, 3, 4, 5, 6]), array([1, 7])), (array([0, 1, 2, 4, 5, 7]), array([3, 6])), (array([0, 1, 3, 4, 6, 7]), array([2, 5])), (array([0, 1, 2, 3, 5, 6, 7]), array([4])), (array([1, 2, 3, 4, 5, 6, 7]), array([0]))]\n",
"0 [0 2 3 4 5 6] [1 7]\n",
"1 [0 1 3 4 6 7] [2 5]\n",
"2 [1 2 4 5 6 7] [0 3]\n",
"3 [0 1 2 3 5 6 7] [4]\n",
"4 [0 1 2 3 4 5 7] [6]\n"
]
}
],
@@ -1881,10 +1881,8 @@
"k = KFold(n_splits = 5, shuffle = True)\n",
"a = [1, 2, 3, 4 ,5, 6,7, 8]\n",
"\n",
"print(list(k.split(a)))\n",
"\n",
"for d, b in k.split(a):\n",
" print(d, b)"
"for d, (b, c) in enumerate(k.split(a)):\n",
" print(d, b, c)"
]
},
{
@@ -1892,7 +1890,42 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"from torch.utils.data import Subset, DataLoader, Dataset\n",
"\n",
"class ListDataset(Dataset):\n",
" def __init__(self, data_list, labels_list, status):\n",
" self.data = data_list\n",
" self.labels = labels_list\n",
" self.status = status\n",
"\n",
" def __len__(self):\n",
" return len(self.data)\n",
"\n",
" def __getitem__(self, idx):\n",
" sample = self.data[idx] \n",
"\n",
" if self.status:\n",
" from Image_Process.Image_Generator import Image_generator\n",
" ImageGenerator = Image_generator(\"\", \"\", 12)\n",
" Transform = ImageGenerator.Generator_Content(5)\n",
" sample = Transform(sample)\n",
"\n",
" label = self.labels[idx]\n",
" return sample, label\n",
"\n",
"k = KFold(n_splits = 5, shuffle = True)\n",
"a = [1, 2, 3, 4 ,5, 6,7, 8]\n",
"label = [10, 20, 30, 40, 50,60, 70, 80]\n",
"\n",
"f = ListDataset(a, label, True)\n",
"\n",
"for d, (b, c) in enumerate(k.split(a)):\n",
" # Create training and validation subsets for this fold\n",
" train_subset = torch.utils.data.Subset(training_dataset, train_idx)\n",
" val_subset = torch.utils.data.Subset(training_dataset, val_idx)\n",
"\n"
]
}
],
"metadata": {