20250214 Commits: Modification the Pytorch scripts uses Dataloader and Sampler

This commit is contained in:
2025-02-14 03:10:21 +08:00
parent c9d9e7882e
commit bff6e4251f
13 changed files with 80 additions and 102 deletions

View File

@@ -1,5 +1,4 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from model_data_processing.processing import shuffle_data
from merge_class.merge import merge
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
@@ -29,8 +28,11 @@ class Load_Indepentend_Data():
classify_image = []
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot) # 讀取測試資料集的資料
Total_Dict_Data_Root = self.Specified_Amount_Of_Data(Total_Dict_Data_Root) # 打亂並取出指定資料筆數的資料
Total_List_Data_Root = [Total_Dict_Data_Root[self.Labels[0]], Total_Dict_Data_Root[self.Labels[1]]]
# Total_Dict_Data_Root = Balance_Process(Total_Dict_Data_Root, self.Labels) # 打亂並取出指定資料筆數的資料
Total_List_Data_Root = []
for Label in self.Labels:
Total_List_Data_Root.append(Total_Dict_Data_Root[Label])
test_label, Classify_Label = [], []
i = 0 # 計算classify_image的counter且計算總共有幾筆資料
@@ -48,7 +50,7 @@ class Load_Indepentend_Data():
test = []
test = image_processing.Data_Augmentation_Image(original_test_root)
test, test_label = image_processing.image_data_processing(test, original_test_label)
test = image_processing.normalization(test)
# test = image_processing.normalization(test)
return test, test_label
@@ -62,17 +64,4 @@ class Load_Indepentend_Data():
Get_Data_Dict_Content = Prepare.Get_Data_Dict()
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, self.Labels)
return Total_Data_Roots
def Specified_Amount_Of_Data(self, Data): # 打亂資料後重新處理
Data = shuffle_data(Data, self.Labels, 2)
tmp = []
if len(Data[self.Labels[0]]) >= len(Data[self.Labels[1]]):
for i in range(len(Data[self.Labels[1]])):
tmp.append(Data[self.Labels[0]][i])
Data[self.Labels[0]] = tmp
else:
for i in range(len(Data[self.Labels[0]])):
tmp.append(Data[self.Labels[1]][i])
Data[self.Labels[1]] = tmp
return Data
return Total_Data_Roots

View File

@@ -1,7 +1,7 @@
import pandas as pd
from torch.nn import functional
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import Dataset, DataLoader, RandomSampler
class ListDataset(Dataset):
def __init__(self, data_list, labels_list):
@@ -34,10 +34,6 @@ class Tool:
self.__Normal_ImageGenerator_Data_Root = ""
self.__Comprehensive_Generator_Root = ""
self.Training_Zip = ""
self.Validation_Zip = ""
self.Testing_Zip = ""
self.__Labels = []
self.__OneHot_Encording = []
pass
@@ -70,15 +66,7 @@ class Tool:
Counter = torch.tensor(Counter)
self.__OneHot_Encording = functional.one_hot(Counter, len(content))
pass
def Set_Data_To_DataSet(self, Datas : list, Labels : list, Batch_Size : int):
# 創建 Dataset
dataset = ListDataset(Datas, Labels)
return DataLoader(dataset = dataset, batch_size = Batch_Size, shuffle=True, num_workers = 0, pin_memory=True)
pass
def Get_Data_Label(self):
'''
@@ -112,5 +100,14 @@ class Tool:
def Get_OneHot_Encording_Label(self):
return self.__OneHot_Encording
def Get_Zip(self):
return self.Training_Zip, self.Testing_Zip, self.Validation_Zip
def Convert_Data_To_DataSet(self, Datas : list, Labels : list, Batch_Size : int):
seed = 42 # 設定任意整數作為種子
# 產生隨機種子產生器
generator = torch.Generator()
generator.manual_seed(seed)
# 創建 Dataset
list_dataset = ListDataset(Datas, Labels)
sampler = RandomSampler(list_dataset, generator = generator) # 創建Sampler
return DataLoader(dataset = list_dataset, batch_size = Batch_Size, num_workers = 0, pin_memory=True, sampler = sampler)

View File

@@ -1,5 +1,4 @@
from tqdm import tqdm
from torch.nn import functional
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torchmetrics.functional import auroc
@@ -21,10 +20,6 @@ class All_Step:
self.Epoch = Epoch
self.Number_Of_Classes = Number_Of_Classes
self.Training_Data_Length = len(list(zip(*Training_Data_And_Label))[1])
self.Testing_Data_Length = len(list(zip(*Test_Data_And_Label))[1])
self.Validation_Data_Length = len(list(zip(*Validation_Data_And_Label))[1])
pass
def Training_Step(self, model_name, counter):
@@ -50,7 +45,7 @@ class All_Step:
for inputs, labels in epoch_iterator:
# print(inputs.shape)
# 輸入的維度為3維 但模型要的是4維 所以要再多加一維
inputs = np.expand_dims(inputs, axis = 0)
# inputs = np.expand_dims(inputs, axis = 0)
# print(inputs.shape)
inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device)
@@ -70,7 +65,7 @@ class All_Step:
all_train_preds.append(preds.cpu().numpy())
all_train_labels.append(labels)
Training_Loss = running_loss / self.Training_Data_Length
Training_Loss = running_loss / len(self.Training_Data_And_Label)
train_accuracy = accuracy_score(all_train_labels, all_train_preds)
train_losses.append(Training_Loss)
@@ -84,8 +79,8 @@ class All_Step:
all_val_labels = []
with torch.no_grad():
for inputs, labels in self.Validation_Data_And_Label:
inputs = np.expand_dims(inputs, axis = 0)
for batch_Index, (inputs, labels) in self.Validation_Data_And_Label:
# inputs = np.expand_dims(inputs, axis = 0)
inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device)
outputs = self.Model(inputs)
@@ -103,7 +98,7 @@ class All_Step:
all_val_labels.append(labels)
# 計算驗證損失與準確率
val_loss /= self.Validation_Data_Length
val_loss /= len(self.Validation_Data_And_Label)
val_accuracy = accuracy_score(all_val_labels, all_val_preds)
val_losses.append(val_loss)
@@ -126,8 +121,8 @@ class All_Step:
True_Label, Predict_Label = [], []
loss = 0.0
with torch.no_grad():
for images, labels in self.Test_Data_And_Label:
images = np.expand_dims(images, axis = 0)
for batch_Index, (images, labels) in self.Test_Data_And_Label:
# images = np.expand_dims(images, axis = 0)
images, labels = torch.tensor(images).to(self.device), torch.tensor(labels).to(self.device)
outputs = cnn_model(images)
@@ -141,7 +136,7 @@ class All_Step:
# Predict_Label.extend(predicted.cpu().numpy())
# True_Label.extend(labels.cpu().numpy())
loss /= self.Testing_Data_Length
loss /= len(self.Test_Data_And_Label)
accuracy = accuracy_score(True_Label, Predict_Label)
precision = precision_score(True_Label, Predict_Label)

View File

@@ -16,7 +16,7 @@ import torch.nn as nn
import time
class experiments():
def __init__(self, tools, Number_Of_Classes, status):
def __init__(self, Model_Name, Experiment_Name, Generator_Batch_Size, Epoch, Train_Batch_Size, Convolution_Name, tools, Number_Of_Classes, status):
'''
# 實驗物件
@@ -53,13 +53,13 @@ class experiments():
self.ImageGenerator = Image_generator("", "")
self.merge = merge()
self.model_name = "Xception" # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
self.experiment_name = "Xception Skin to train Normal stomach cancer"
self.generator_batch_size = 50
self.epoch = 10000
self.train_batch_size = 128
self.model_name = Model_Name # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
self.experiment_name = Experiment_Name
self.generator_batch_size = Generator_Batch_Size
self.epoch = Epoch
self.train_batch_size = Train_Batch_Size
self.layers = 1
self.convolution_name = "block14_sepconv2"
self.convolution_name = Convolution_Name
self.Number_Of_Classes = Number_Of_Classes
self.Grad = ""
@@ -68,7 +68,7 @@ class experiments():
pass
def processing_main(self, train, train_label, counter):
def processing_main(self, Training_Dataset, counter):
Train, Test, Validation = self.Topic_Tool.Get_Save_Roots(self.Status) # 要換不同資料集就要改
start = time.time()
@@ -82,20 +82,13 @@ class experiments():
self.test, self.test_label = self.cut_image.test, self.cut_image.test_label
self.validation, self.validation_label = self.cut_image.validation, self.cut_image.validation_label
train = self.Preprocess_Image_Data(train)
self.test = self.Preprocess_Image_Data(self.test)
self.validation = self.Preprocess_Image_Data(self.validation)
self.Topic_Tool.Set_Data_To_DataSet(train, train_label, "Training")
self.Topic_Tool.Set_Data_To_DataSet(self.test, self.test_label, "Testing")
self.Topic_Tool.Set_Data_To_DataSet(self.validation, self.validation_label, "Validation")
self.Training_Zip, self.Testing_Zip, self.Validation_Zip = self.Topic_Tool.Get_Zip()
Testing_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.test, self.test_label, 1)
Validation_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.validation, self.validation_label, self.train_batch_size)
# self.Grad = Grad_CAM(self.Topic_Tool.Get_Data_Label(), self.test_label, self.experiment_name, self.convolution_name)
cnn_model = self.construct_model() # 呼叫讀取模型的function
step = All_Step(self.Training_Zip, self.Testing_Zip, self.Validation_Zip, cnn_model, self.epoch, self.Number_Of_Classes)
step = All_Step(Training_Dataset, Testing_Dataset, Validation_Dataset, cnn_model, self.epoch, self.Number_Of_Classes)
# model_dir = '../save_the_best_model/Topic/Remove background with Normal image/best_model( 2023-10-17 )-2.h5' # 這是一個儲存模型權重的路徑,每一個模型都有一個自己權重儲存的檔
# if os.path.exists(model_dir): # 如果這個檔案存在
@@ -125,20 +118,6 @@ class experiments():
cnn_model = cnn_model.to(self.device)
return cnn_model
def Preprocess_Image_Data(self, Image_Datas):
transform = self.ImageGenerator.Generator_Content(5)
Transform_Image = []
for Image in Image_Datas:
Image = transform(Image)
Transform_Image.append(Image)
# Transform_Image.append(self.ImageGenerator._apply_transform(transform, Image))
Transform_Image = torch.tensor(np.array(Transform_Image))
print(Transform_Image.shape)
return Transform_Image
def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index):
'''劃出混淆矩陣(熱力圖)'''

52
main.py
View File

@@ -2,7 +2,7 @@ from experiments.experiment import experiments
from Image_Process.load_and_ImageGenerator import Load_ImageGenerator
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Training_Tools.Tools import Tool
from model_data_processing.processing import shuffle_data, Balance_Process
from model_data_processing.processing import Balance_Process
from Load_process.LoadData import Load_Data_Prepare
from Calculate_Process.Calculate import Calculate
from merge_class.merge import merge
@@ -18,12 +18,12 @@ if __name__ == "__main__":
else:
print(f"CUDA可用數量為{torch.cuda.device_count()}\n")
Status = 2 # 決定要使用什麼資料集
# 要換不同資料集就要改
# 參數設定
tool = Tool()
tool.Set_Labels()
tool.Set_Save_Roots()
Status = 2 # 決定要使用什麼資料集
Labels = tool.Get_Data_Label()
Trainig_Root, Testing_Root, Validation_Root = tool.Get_Save_Roots(Status) # 一般的
Generator_Root = tool.Get_Generator_Save_Roots(Status)
@@ -34,31 +34,47 @@ if __name__ == "__main__":
Label_Length = len(Labels)
Classification = 2 # 分類數量
Model_Name = "Xception" # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
Experiment_Name = "Xception Skin to train Normal stomach cancer"
Generator_Batch_Size = 50
Epoch = 10000
Train_Batch_Size = 128
Convolution_Name = "block14_sepconv2"
Prepare = Load_Data_Prepare()
loading_data = Load_ImageGenerator(Trainig_Root, Testing_Root, Validation_Root, Generator_Root, Labels)
experiment = experiments(tool, Classification, Status)
experiment = experiments(Model_Name, Experiment_Name, Generator_Batch_Size, Epoch, Train_Batch_Size, Convolution_Name, tool, Classification, Status)
image_processing = Read_image_and_Process_image()
Merge = merge()
Calculate_Tool = Calculate()
counter = 5
Batch_Size = 128
for i in range(0, counter, 1): # 做規定次數的訓練
# 讀取資料
Data_Dict_Data = loading_data.process_main(Label_Length)
Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels)
# Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels)
# 輸出內容
print("每個類別各有 " + str(Train_Size) + " 筆資料")
# 輸出內容
i = 0
total = 0
for j in range(Label_Length):
total += Train_Size
Length_Array = []
for label in Labels:
length = len(Data_Dict_Data[label])
Length_Array.append(length)
print(f"類別 {i}{str(length)} 筆資料")
total += length
i += 1
print("總共有 " + str(total) + " 筆資料")
# 做出跟資料相同數量的Label
Classes = []
i = 0
for encording in Encording_Label:
Classes.append(image_processing.make_label_list(Train_Size, encording))
Classes.append(image_processing.make_label_list(Length_Array[i], encording))
i += 1
# 將資料做成Dict的資料型態
Prepare.Set_Final_Dict_Data(Labels, Data_Dict_Data, Classes, Label_Length)
@@ -74,17 +90,17 @@ if __name__ == "__main__":
training_label = Merge.merge_all_image_data(training_label, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list
start = time.time()
trains_Data_Image = image_processing.Data_Augmentation_Image(training_data) # 多執行續讀檔
total_trains, train_label = shuffle_data(trains_Data_Image, training_label) # 將資料打亂
training_data = list(total_trains) # 轉換資料型態
trains_Data_Image = image_processing.Data_Augmentation_Image(training_data) # 讀檔
# total_trains, train_label = shuffle_data(trains_Data_Image, training_label) # 將資料打亂
# training_data = list(total_trains) # 轉換資料型態
training_data, train_label = image_processing.image_data_processing(trains_Data_Image, training_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式
Training_Dataset = tool.Convert_Data_To_DataSet(training_data, train_label, Train_Batch_Size)
print(len(training_data))
training_data, train_label = image_processing.image_data_processing(training_data, train_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式
# training_data = image_processing.normalization(training_data)
# training_data = training_data.permute(0, 3, 1, 2)
print(training_data.shape)
end = time.time()
print("\n\n\n讀取訓練資料(70000)執行時間:%f\n\n" % (end - start))
@@ -117,7 +133,7 @@ if __name__ == "__main__":
# trains.append(All_Monkeypox_Data)
# training_label.append([0, 0, 0, 0, 1, 0, 0])
loss, accuracy, precision, recall, AUC, f = experiment.processing_main(training_data, train_label, i) # 執行訓練方法
loss, accuracy, precision, recall, AUC, f = experiment.processing_main(Training_Dataset, i) # 執行訓練方法
Calculate_Tool.Append_numbers(loss, accuracy, precision, recall, AUC, f)
print("實驗結果")

View File

@@ -29,17 +29,20 @@ def shuffle_data(image, label, mode = 1):
return shuffle_image, shuffle_label
else:
shuffle_image = {
label[0] : [],
label[1] : [],
label[0] : []
}
for i in range(1, len(label)):
shuffle_image.update({label[i] : []})
for Label in label:
shuffle_image[Label] = image[Label]
random.shuffle(shuffle_image[Label])
return shuffle_image
def Balance_Process(Data_Content, Labels):
Data_Dict_Data = shuffle_data(Data_Content, Labels, 2)
def Balance_Process(Data_Dict_Data, Labels):
# Data_Dict_Data = shuffle_data(Data_Content, Labels, 2)
Train_Size = 0
Train_Size = min(len(Data_Dict_Data[Labels[0]]), len(Data_Dict_Data[Labels[1]]))

View File

@@ -1,6 +1,5 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from sklearn.model_selection import train_test_split
from model_data_processing.processing import shuffle_data
from merge_class.merge import merge
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Load_process.LoadData import Load_Data_Prepare, Process_File, Load_Data_Tools