20250214 Commits: Modification the Pytorch scripts uses Dataloader and Sampler

This commit is contained in:
whitekirin 2025-02-14 03:10:21 +08:00
parent c9d9e7882e
commit bff6e4251f
13 changed files with 80 additions and 102 deletions

View File

@ -1,5 +1,4 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from model_data_processing.processing import shuffle_data
from merge_class.merge import merge from merge_class.merge import merge
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
@ -29,8 +28,11 @@ class Load_Indepentend_Data():
classify_image = [] classify_image = []
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot) # 讀取測試資料集的資料 Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot) # 讀取測試資料集的資料
Total_Dict_Data_Root = self.Specified_Amount_Of_Data(Total_Dict_Data_Root) # 打亂並取出指定資料筆數的資料 # Total_Dict_Data_Root = Balance_Process(Total_Dict_Data_Root, self.Labels) # 打亂並取出指定資料筆數的資料
Total_List_Data_Root = [Total_Dict_Data_Root[self.Labels[0]], Total_Dict_Data_Root[self.Labels[1]]]
Total_List_Data_Root = []
for Label in self.Labels:
Total_List_Data_Root.append(Total_Dict_Data_Root[Label])
test_label, Classify_Label = [], [] test_label, Classify_Label = [], []
i = 0 # 計算classify_image的counter且計算總共有幾筆資料 i = 0 # 計算classify_image的counter且計算總共有幾筆資料
@ -48,7 +50,7 @@ class Load_Indepentend_Data():
test = [] test = []
test = image_processing.Data_Augmentation_Image(original_test_root) test = image_processing.Data_Augmentation_Image(original_test_root)
test, test_label = image_processing.image_data_processing(test, original_test_label) test, test_label = image_processing.image_data_processing(test, original_test_label)
test = image_processing.normalization(test) # test = image_processing.normalization(test)
return test, test_label return test, test_label
@ -62,17 +64,4 @@ class Load_Indepentend_Data():
Get_Data_Dict_Content = Prepare.Get_Data_Dict() Get_Data_Dict_Content = Prepare.Get_Data_Dict()
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, self.Labels) Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, self.Labels)
return Total_Data_Roots return Total_Data_Roots
def Specified_Amount_Of_Data(self, Data): # 打亂資料後重新處理
Data = shuffle_data(Data, self.Labels, 2)
tmp = []
if len(Data[self.Labels[0]]) >= len(Data[self.Labels[1]]):
for i in range(len(Data[self.Labels[1]])):
tmp.append(Data[self.Labels[0]][i])
Data[self.Labels[0]] = tmp
else:
for i in range(len(Data[self.Labels[0]])):
tmp.append(Data[self.Labels[1]][i])
Data[self.Labels[1]] = tmp
return Data

View File

@ -1,7 +1,7 @@
import pandas as pd import pandas as pd
from torch.nn import functional from torch.nn import functional
import torch import torch
from torch.utils.data import Dataset, DataLoader from torch.utils.data import Dataset, DataLoader, RandomSampler
class ListDataset(Dataset): class ListDataset(Dataset):
def __init__(self, data_list, labels_list): def __init__(self, data_list, labels_list):
@ -34,10 +34,6 @@ class Tool:
self.__Normal_ImageGenerator_Data_Root = "" self.__Normal_ImageGenerator_Data_Root = ""
self.__Comprehensive_Generator_Root = "" self.__Comprehensive_Generator_Root = ""
self.Training_Zip = ""
self.Validation_Zip = ""
self.Testing_Zip = ""
self.__Labels = [] self.__Labels = []
self.__OneHot_Encording = [] self.__OneHot_Encording = []
pass pass
@ -70,15 +66,7 @@ class Tool:
Counter = torch.tensor(Counter) Counter = torch.tensor(Counter)
self.__OneHot_Encording = functional.one_hot(Counter, len(content)) self.__OneHot_Encording = functional.one_hot(Counter, len(content))
pass pass
def Set_Data_To_DataSet(self, Datas : list, Labels : list, Batch_Size : int):
# 創建 Dataset
dataset = ListDataset(Datas, Labels)
return DataLoader(dataset = dataset, batch_size = Batch_Size, shuffle=True, num_workers = 0, pin_memory=True)
def Get_Data_Label(self): def Get_Data_Label(self):
''' '''
@ -112,5 +100,14 @@ class Tool:
def Get_OneHot_Encording_Label(self): def Get_OneHot_Encording_Label(self):
return self.__OneHot_Encording return self.__OneHot_Encording
def Get_Zip(self): def Convert_Data_To_DataSet(self, Datas : list, Labels : list, Batch_Size : int):
return self.Training_Zip, self.Testing_Zip, self.Validation_Zip seed = 42 # 設定任意整數作為種子
# 產生隨機種子產生器
generator = torch.Generator()
generator.manual_seed(seed)
# 創建 Dataset
list_dataset = ListDataset(Datas, Labels)
sampler = RandomSampler(list_dataset, generator = generator) # 創建Sampler
return DataLoader(dataset = list_dataset, batch_size = Batch_Size, num_workers = 0, pin_memory=True, sampler = sampler)

View File

@ -1,5 +1,4 @@
from tqdm import tqdm from tqdm import tqdm
from torch.nn import functional
import torch import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torchmetrics.functional import auroc from torchmetrics.functional import auroc
@ -21,10 +20,6 @@ class All_Step:
self.Epoch = Epoch self.Epoch = Epoch
self.Number_Of_Classes = Number_Of_Classes self.Number_Of_Classes = Number_Of_Classes
self.Training_Data_Length = len(list(zip(*Training_Data_And_Label))[1])
self.Testing_Data_Length = len(list(zip(*Test_Data_And_Label))[1])
self.Validation_Data_Length = len(list(zip(*Validation_Data_And_Label))[1])
pass pass
def Training_Step(self, model_name, counter): def Training_Step(self, model_name, counter):
@ -50,7 +45,7 @@ class All_Step:
for inputs, labels in epoch_iterator: for inputs, labels in epoch_iterator:
# print(inputs.shape) # print(inputs.shape)
# 輸入的維度為3維 但模型要的是4維 所以要再多加一維 # 輸入的維度為3維 但模型要的是4維 所以要再多加一維
inputs = np.expand_dims(inputs, axis = 0) # inputs = np.expand_dims(inputs, axis = 0)
# print(inputs.shape) # print(inputs.shape)
inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device) inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device)
@ -70,7 +65,7 @@ class All_Step:
all_train_preds.append(preds.cpu().numpy()) all_train_preds.append(preds.cpu().numpy())
all_train_labels.append(labels) all_train_labels.append(labels)
Training_Loss = running_loss / self.Training_Data_Length Training_Loss = running_loss / len(self.Training_Data_And_Label)
train_accuracy = accuracy_score(all_train_labels, all_train_preds) train_accuracy = accuracy_score(all_train_labels, all_train_preds)
train_losses.append(Training_Loss) train_losses.append(Training_Loss)
@ -84,8 +79,8 @@ class All_Step:
all_val_labels = [] all_val_labels = []
with torch.no_grad(): with torch.no_grad():
for inputs, labels in self.Validation_Data_And_Label: for batch_Index, (inputs, labels) in self.Validation_Data_And_Label:
inputs = np.expand_dims(inputs, axis = 0) # inputs = np.expand_dims(inputs, axis = 0)
inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device) inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device)
outputs = self.Model(inputs) outputs = self.Model(inputs)
@ -103,7 +98,7 @@ class All_Step:
all_val_labels.append(labels) all_val_labels.append(labels)
# 計算驗證損失與準確率 # 計算驗證損失與準確率
val_loss /= self.Validation_Data_Length val_loss /= len(self.Validation_Data_And_Label)
val_accuracy = accuracy_score(all_val_labels, all_val_preds) val_accuracy = accuracy_score(all_val_labels, all_val_preds)
val_losses.append(val_loss) val_losses.append(val_loss)
@ -126,8 +121,8 @@ class All_Step:
True_Label, Predict_Label = [], [] True_Label, Predict_Label = [], []
loss = 0.0 loss = 0.0
with torch.no_grad(): with torch.no_grad():
for images, labels in self.Test_Data_And_Label: for batch_Index, (images, labels) in self.Test_Data_And_Label:
images = np.expand_dims(images, axis = 0) # images = np.expand_dims(images, axis = 0)
images, labels = torch.tensor(images).to(self.device), torch.tensor(labels).to(self.device) images, labels = torch.tensor(images).to(self.device), torch.tensor(labels).to(self.device)
outputs = cnn_model(images) outputs = cnn_model(images)
@ -141,7 +136,7 @@ class All_Step:
# Predict_Label.extend(predicted.cpu().numpy()) # Predict_Label.extend(predicted.cpu().numpy())
# True_Label.extend(labels.cpu().numpy()) # True_Label.extend(labels.cpu().numpy())
loss /= self.Testing_Data_Length loss /= len(self.Test_Data_And_Label)
accuracy = accuracy_score(True_Label, Predict_Label) accuracy = accuracy_score(True_Label, Predict_Label)
precision = precision_score(True_Label, Predict_Label) precision = precision_score(True_Label, Predict_Label)

View File

@ -16,7 +16,7 @@ import torch.nn as nn
import time import time
class experiments(): class experiments():
def __init__(self, tools, Number_Of_Classes, status): def __init__(self, Model_Name, Experiment_Name, Generator_Batch_Size, Epoch, Train_Batch_Size, Convolution_Name, tools, Number_Of_Classes, status):
''' '''
# 實驗物件 # 實驗物件
@ -53,13 +53,13 @@ class experiments():
self.ImageGenerator = Image_generator("", "") self.ImageGenerator = Image_generator("", "")
self.merge = merge() self.merge = merge()
self.model_name = "Xception" # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型) self.model_name = Model_Name # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
self.experiment_name = "Xception Skin to train Normal stomach cancer" self.experiment_name = Experiment_Name
self.generator_batch_size = 50 self.generator_batch_size = Generator_Batch_Size
self.epoch = 10000 self.epoch = Epoch
self.train_batch_size = 128 self.train_batch_size = Train_Batch_Size
self.layers = 1 self.layers = 1
self.convolution_name = "block14_sepconv2" self.convolution_name = Convolution_Name
self.Number_Of_Classes = Number_Of_Classes self.Number_Of_Classes = Number_Of_Classes
self.Grad = "" self.Grad = ""
@ -68,7 +68,7 @@ class experiments():
pass pass
def processing_main(self, train, train_label, counter): def processing_main(self, Training_Dataset, counter):
Train, Test, Validation = self.Topic_Tool.Get_Save_Roots(self.Status) # 要換不同資料集就要改 Train, Test, Validation = self.Topic_Tool.Get_Save_Roots(self.Status) # 要換不同資料集就要改
start = time.time() start = time.time()
@ -82,20 +82,13 @@ class experiments():
self.test, self.test_label = self.cut_image.test, self.cut_image.test_label self.test, self.test_label = self.cut_image.test, self.cut_image.test_label
self.validation, self.validation_label = self.cut_image.validation, self.cut_image.validation_label self.validation, self.validation_label = self.cut_image.validation, self.cut_image.validation_label
train = self.Preprocess_Image_Data(train) Testing_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.test, self.test_label, 1)
self.test = self.Preprocess_Image_Data(self.test) Validation_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.validation, self.validation_label, self.train_batch_size)
self.validation = self.Preprocess_Image_Data(self.validation)
self.Topic_Tool.Set_Data_To_DataSet(train, train_label, "Training")
self.Topic_Tool.Set_Data_To_DataSet(self.test, self.test_label, "Testing")
self.Topic_Tool.Set_Data_To_DataSet(self.validation, self.validation_label, "Validation")
self.Training_Zip, self.Testing_Zip, self.Validation_Zip = self.Topic_Tool.Get_Zip()
# self.Grad = Grad_CAM(self.Topic_Tool.Get_Data_Label(), self.test_label, self.experiment_name, self.convolution_name) # self.Grad = Grad_CAM(self.Topic_Tool.Get_Data_Label(), self.test_label, self.experiment_name, self.convolution_name)
cnn_model = self.construct_model() # 呼叫讀取模型的function cnn_model = self.construct_model() # 呼叫讀取模型的function
step = All_Step(self.Training_Zip, self.Testing_Zip, self.Validation_Zip, cnn_model, self.epoch, self.Number_Of_Classes) step = All_Step(Training_Dataset, Testing_Dataset, Validation_Dataset, cnn_model, self.epoch, self.Number_Of_Classes)
# model_dir = '../save_the_best_model/Topic/Remove background with Normal image/best_model( 2023-10-17 )-2.h5' # 這是一個儲存模型權重的路徑,每一個模型都有一個自己權重儲存的檔 # model_dir = '../save_the_best_model/Topic/Remove background with Normal image/best_model( 2023-10-17 )-2.h5' # 這是一個儲存模型權重的路徑,每一個模型都有一個自己權重儲存的檔
# if os.path.exists(model_dir): # 如果這個檔案存在 # if os.path.exists(model_dir): # 如果這個檔案存在
@ -125,20 +118,6 @@ class experiments():
cnn_model = cnn_model.to(self.device) cnn_model = cnn_model.to(self.device)
return cnn_model return cnn_model
def Preprocess_Image_Data(self, Image_Datas):
transform = self.ImageGenerator.Generator_Content(5)
Transform_Image = []
for Image in Image_Datas:
Image = transform(Image)
Transform_Image.append(Image)
# Transform_Image.append(self.ImageGenerator._apply_transform(transform, Image))
Transform_Image = torch.tensor(np.array(Transform_Image))
print(Transform_Image.shape)
return Transform_Image
def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index): def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index):
'''劃出混淆矩陣(熱力圖)''' '''劃出混淆矩陣(熱力圖)'''

52
main.py
View File

@ -2,7 +2,7 @@ from experiments.experiment import experiments
from Image_Process.load_and_ImageGenerator import Load_ImageGenerator from Image_Process.load_and_ImageGenerator import Load_ImageGenerator
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Training_Tools.Tools import Tool from Training_Tools.Tools import Tool
from model_data_processing.processing import shuffle_data, Balance_Process from model_data_processing.processing import Balance_Process
from Load_process.LoadData import Load_Data_Prepare from Load_process.LoadData import Load_Data_Prepare
from Calculate_Process.Calculate import Calculate from Calculate_Process.Calculate import Calculate
from merge_class.merge import merge from merge_class.merge import merge
@ -18,12 +18,12 @@ if __name__ == "__main__":
else: else:
print(f"CUDA可用數量為{torch.cuda.device_count()}\n") print(f"CUDA可用數量為{torch.cuda.device_count()}\n")
Status = 2 # 決定要使用什麼資料集 # 參數設定
# 要換不同資料集就要改
tool = Tool() tool = Tool()
tool.Set_Labels() tool.Set_Labels()
tool.Set_Save_Roots() tool.Set_Save_Roots()
Status = 2 # 決定要使用什麼資料集
Labels = tool.Get_Data_Label() Labels = tool.Get_Data_Label()
Trainig_Root, Testing_Root, Validation_Root = tool.Get_Save_Roots(Status) # 一般的 Trainig_Root, Testing_Root, Validation_Root = tool.Get_Save_Roots(Status) # 一般的
Generator_Root = tool.Get_Generator_Save_Roots(Status) Generator_Root = tool.Get_Generator_Save_Roots(Status)
@ -34,31 +34,47 @@ if __name__ == "__main__":
Label_Length = len(Labels) Label_Length = len(Labels)
Classification = 2 # 分類數量 Classification = 2 # 分類數量
Model_Name = "Xception" # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
Experiment_Name = "Xception Skin to train Normal stomach cancer"
Generator_Batch_Size = 50
Epoch = 10000
Train_Batch_Size = 128
Convolution_Name = "block14_sepconv2"
Prepare = Load_Data_Prepare() Prepare = Load_Data_Prepare()
loading_data = Load_ImageGenerator(Trainig_Root, Testing_Root, Validation_Root, Generator_Root, Labels) loading_data = Load_ImageGenerator(Trainig_Root, Testing_Root, Validation_Root, Generator_Root, Labels)
experiment = experiments(tool, Classification, Status) experiment = experiments(Model_Name, Experiment_Name, Generator_Batch_Size, Epoch, Train_Batch_Size, Convolution_Name, tool, Classification, Status)
image_processing = Read_image_and_Process_image() image_processing = Read_image_and_Process_image()
Merge = merge() Merge = merge()
Calculate_Tool = Calculate() Calculate_Tool = Calculate()
counter = 5 counter = 5
Batch_Size = 128
for i in range(0, counter, 1): # 做規定次數的訓練 for i in range(0, counter, 1): # 做規定次數的訓練
# 讀取資料 # 讀取資料
Data_Dict_Data = loading_data.process_main(Label_Length) Data_Dict_Data = loading_data.process_main(Label_Length)
Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels) # Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels)
# 輸出內容 # 輸出內容
print("每個類別各有 " + str(Train_Size) + " 筆資料") i = 0
total = 0 total = 0
for j in range(Label_Length): Length_Array = []
total += Train_Size for label in Labels:
length = len(Data_Dict_Data[label])
Length_Array.append(length)
print(f"類別 {i}{str(length)} 筆資料")
total += length
i += 1
print("總共有 " + str(total) + " 筆資料") print("總共有 " + str(total) + " 筆資料")
# 做出跟資料相同數量的Label # 做出跟資料相同數量的Label
Classes = [] Classes = []
i = 0
for encording in Encording_Label: for encording in Encording_Label:
Classes.append(image_processing.make_label_list(Train_Size, encording)) Classes.append(image_processing.make_label_list(Length_Array[i], encording))
i += 1
# 將資料做成Dict的資料型態 # 將資料做成Dict的資料型態
Prepare.Set_Final_Dict_Data(Labels, Data_Dict_Data, Classes, Label_Length) Prepare.Set_Final_Dict_Data(Labels, Data_Dict_Data, Classes, Label_Length)
@ -74,17 +90,17 @@ if __name__ == "__main__":
training_label = Merge.merge_all_image_data(training_label, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list training_label = Merge.merge_all_image_data(training_label, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list
start = time.time() start = time.time()
trains_Data_Image = image_processing.Data_Augmentation_Image(training_data) # 多執行續讀檔 trains_Data_Image = image_processing.Data_Augmentation_Image(training_data) # 讀檔
total_trains, train_label = shuffle_data(trains_Data_Image, training_label) # 將資料打亂
training_data = list(total_trains) # 轉換資料型態 # total_trains, train_label = shuffle_data(trains_Data_Image, training_label) # 將資料打亂
# training_data = list(total_trains) # 轉換資料型態
training_data, train_label = image_processing.image_data_processing(trains_Data_Image, training_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式
Training_Dataset = tool.Convert_Data_To_DataSet(training_data, train_label, Train_Batch_Size)
print(len(training_data))
training_data, train_label = image_processing.image_data_processing(training_data, train_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式
# training_data = image_processing.normalization(training_data) # training_data = image_processing.normalization(training_data)
# training_data = training_data.permute(0, 3, 1, 2) # training_data = training_data.permute(0, 3, 1, 2)
print(training_data.shape)
end = time.time() end = time.time()
print("\n\n\n讀取訓練資料(70000)執行時間:%f\n\n" % (end - start)) print("\n\n\n讀取訓練資料(70000)執行時間:%f\n\n" % (end - start))
@ -117,7 +133,7 @@ if __name__ == "__main__":
# trains.append(All_Monkeypox_Data) # trains.append(All_Monkeypox_Data)
# training_label.append([0, 0, 0, 0, 1, 0, 0]) # training_label.append([0, 0, 0, 0, 1, 0, 0])
loss, accuracy, precision, recall, AUC, f = experiment.processing_main(training_data, train_label, i) # 執行訓練方法 loss, accuracy, precision, recall, AUC, f = experiment.processing_main(Training_Dataset, i) # 執行訓練方法
Calculate_Tool.Append_numbers(loss, accuracy, precision, recall, AUC, f) Calculate_Tool.Append_numbers(loss, accuracy, precision, recall, AUC, f)
print("實驗結果") print("實驗結果")

View File

@ -29,17 +29,20 @@ def shuffle_data(image, label, mode = 1):
return shuffle_image, shuffle_label return shuffle_image, shuffle_label
else: else:
shuffle_image = { shuffle_image = {
label[0] : [], label[0] : []
label[1] : [],
} }
for i in range(1, len(label)):
shuffle_image.update({label[i] : []})
for Label in label: for Label in label:
shuffle_image[Label] = image[Label] shuffle_image[Label] = image[Label]
random.shuffle(shuffle_image[Label]) random.shuffle(shuffle_image[Label])
return shuffle_image return shuffle_image
def Balance_Process(Data_Content, Labels): def Balance_Process(Data_Dict_Data, Labels):
Data_Dict_Data = shuffle_data(Data_Content, Labels, 2) # Data_Dict_Data = shuffle_data(Data_Content, Labels, 2)
Train_Size = 0 Train_Size = 0
Train_Size = min(len(Data_Dict_Data[Labels[0]]), len(Data_Dict_Data[Labels[1]])) Train_Size = min(len(Data_Dict_Data[Labels[0]]), len(Data_Dict_Data[Labels[1]]))

View File

@ -1,6 +1,5 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from model_data_processing.processing import shuffle_data
from merge_class.merge import merge from merge_class.merge import merge
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Load_process.LoadData import Load_Data_Prepare, Process_File, Load_Data_Tools from Load_process.LoadData import Load_Data_Prepare, Process_File, Load_Data_Tools