20250214 Commits: Modification the Pytorch scripts uses Dataloader and Sampler
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
|
||||
from model_data_processing.processing import shuffle_data
|
||||
from merge_class.merge import merge
|
||||
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
|
||||
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
|
||||
@@ -29,8 +28,11 @@ class Load_Indepentend_Data():
|
||||
|
||||
classify_image = []
|
||||
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot) # 讀取測試資料集的資料
|
||||
Total_Dict_Data_Root = self.Specified_Amount_Of_Data(Total_Dict_Data_Root) # 打亂並取出指定資料筆數的資料
|
||||
Total_List_Data_Root = [Total_Dict_Data_Root[self.Labels[0]], Total_Dict_Data_Root[self.Labels[1]]]
|
||||
# Total_Dict_Data_Root = Balance_Process(Total_Dict_Data_Root, self.Labels) # 打亂並取出指定資料筆數的資料
|
||||
|
||||
Total_List_Data_Root = []
|
||||
for Label in self.Labels:
|
||||
Total_List_Data_Root.append(Total_Dict_Data_Root[Label])
|
||||
|
||||
test_label, Classify_Label = [], []
|
||||
i = 0 # 計算classify_image的counter,且計算總共有幾筆資料
|
||||
@@ -48,7 +50,7 @@ class Load_Indepentend_Data():
|
||||
test = []
|
||||
test = image_processing.Data_Augmentation_Image(original_test_root)
|
||||
test, test_label = image_processing.image_data_processing(test, original_test_label)
|
||||
test = image_processing.normalization(test)
|
||||
# test = image_processing.normalization(test)
|
||||
|
||||
return test, test_label
|
||||
|
||||
@@ -62,17 +64,4 @@ class Load_Indepentend_Data():
|
||||
Get_Data_Dict_Content = Prepare.Get_Data_Dict()
|
||||
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, self.Labels)
|
||||
|
||||
return Total_Data_Roots
|
||||
|
||||
def Specified_Amount_Of_Data(self, Data): # 打亂資料後重新處理
|
||||
Data = shuffle_data(Data, self.Labels, 2)
|
||||
tmp = []
|
||||
if len(Data[self.Labels[0]]) >= len(Data[self.Labels[1]]):
|
||||
for i in range(len(Data[self.Labels[1]])):
|
||||
tmp.append(Data[self.Labels[0]][i])
|
||||
Data[self.Labels[0]] = tmp
|
||||
else:
|
||||
for i in range(len(Data[self.Labels[0]])):
|
||||
tmp.append(Data[self.Labels[1]][i])
|
||||
Data[self.Labels[1]] = tmp
|
||||
return Data
|
||||
return Total_Data_Roots
|
||||
Binary file not shown.
@@ -1,7 +1,7 @@
|
||||
import pandas as pd
|
||||
from torch.nn import functional
|
||||
import torch
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torch.utils.data import Dataset, DataLoader, RandomSampler
|
||||
|
||||
class ListDataset(Dataset):
|
||||
def __init__(self, data_list, labels_list):
|
||||
@@ -34,10 +34,6 @@ class Tool:
|
||||
self.__Normal_ImageGenerator_Data_Root = ""
|
||||
self.__Comprehensive_Generator_Root = ""
|
||||
|
||||
self.Training_Zip = ""
|
||||
self.Validation_Zip = ""
|
||||
self.Testing_Zip = ""
|
||||
|
||||
self.__Labels = []
|
||||
self.__OneHot_Encording = []
|
||||
pass
|
||||
@@ -70,15 +66,7 @@ class Tool:
|
||||
Counter = torch.tensor(Counter)
|
||||
|
||||
self.__OneHot_Encording = functional.one_hot(Counter, len(content))
|
||||
pass
|
||||
|
||||
def Set_Data_To_DataSet(self, Datas : list, Labels : list, Batch_Size : int):
|
||||
# 創建 Dataset
|
||||
dataset = ListDataset(Datas, Labels)
|
||||
|
||||
return DataLoader(dataset = dataset, batch_size = Batch_Size, shuffle=True, num_workers = 0, pin_memory=True)
|
||||
|
||||
|
||||
pass
|
||||
|
||||
def Get_Data_Label(self):
|
||||
'''
|
||||
@@ -112,5 +100,14 @@ class Tool:
|
||||
def Get_OneHot_Encording_Label(self):
|
||||
return self.__OneHot_Encording
|
||||
|
||||
def Get_Zip(self):
|
||||
return self.Training_Zip, self.Testing_Zip, self.Validation_Zip
|
||||
def Convert_Data_To_DataSet(self, Datas : list, Labels : list, Batch_Size : int):
|
||||
seed = 42 # 設定任意整數作為種子
|
||||
# 產生隨機種子產生器
|
||||
generator = torch.Generator()
|
||||
generator.manual_seed(seed)
|
||||
|
||||
# 創建 Dataset
|
||||
list_dataset = ListDataset(Datas, Labels)
|
||||
sampler = RandomSampler(list_dataset, generator = generator) # 創建Sampler
|
||||
|
||||
return DataLoader(dataset = list_dataset, batch_size = Batch_Size, num_workers = 0, pin_memory=True, sampler = sampler)
|
||||
Binary file not shown.
@@ -1,5 +1,4 @@
|
||||
from tqdm import tqdm
|
||||
from torch.nn import functional
|
||||
import torch
|
||||
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
||||
from torchmetrics.functional import auroc
|
||||
@@ -21,10 +20,6 @@ class All_Step:
|
||||
self.Epoch = Epoch
|
||||
self.Number_Of_Classes = Number_Of_Classes
|
||||
|
||||
self.Training_Data_Length = len(list(zip(*Training_Data_And_Label))[1])
|
||||
self.Testing_Data_Length = len(list(zip(*Test_Data_And_Label))[1])
|
||||
self.Validation_Data_Length = len(list(zip(*Validation_Data_And_Label))[1])
|
||||
|
||||
pass
|
||||
|
||||
def Training_Step(self, model_name, counter):
|
||||
@@ -50,7 +45,7 @@ class All_Step:
|
||||
for inputs, labels in epoch_iterator:
|
||||
# print(inputs.shape)
|
||||
# 輸入的維度為3維 但模型要的是4維 所以要再多加一維
|
||||
inputs = np.expand_dims(inputs, axis = 0)
|
||||
# inputs = np.expand_dims(inputs, axis = 0)
|
||||
# print(inputs.shape)
|
||||
inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device)
|
||||
|
||||
@@ -70,7 +65,7 @@ class All_Step:
|
||||
all_train_preds.append(preds.cpu().numpy())
|
||||
all_train_labels.append(labels)
|
||||
|
||||
Training_Loss = running_loss / self.Training_Data_Length
|
||||
Training_Loss = running_loss / len(self.Training_Data_And_Label)
|
||||
train_accuracy = accuracy_score(all_train_labels, all_train_preds)
|
||||
|
||||
train_losses.append(Training_Loss)
|
||||
@@ -84,8 +79,8 @@ class All_Step:
|
||||
all_val_labels = []
|
||||
|
||||
with torch.no_grad():
|
||||
for inputs, labels in self.Validation_Data_And_Label:
|
||||
inputs = np.expand_dims(inputs, axis = 0)
|
||||
for batch_Index, (inputs, labels) in self.Validation_Data_And_Label:
|
||||
# inputs = np.expand_dims(inputs, axis = 0)
|
||||
inputs, labels = torch.tensor(inputs).to(self.device), torch.tensor(labels).to(self.device)
|
||||
|
||||
outputs = self.Model(inputs)
|
||||
@@ -103,7 +98,7 @@ class All_Step:
|
||||
all_val_labels.append(labels)
|
||||
|
||||
# 計算驗證損失與準確率
|
||||
val_loss /= self.Validation_Data_Length
|
||||
val_loss /= len(self.Validation_Data_And_Label)
|
||||
val_accuracy = accuracy_score(all_val_labels, all_val_preds)
|
||||
|
||||
val_losses.append(val_loss)
|
||||
@@ -126,8 +121,8 @@ class All_Step:
|
||||
True_Label, Predict_Label = [], []
|
||||
loss = 0.0
|
||||
with torch.no_grad():
|
||||
for images, labels in self.Test_Data_And_Label:
|
||||
images = np.expand_dims(images, axis = 0)
|
||||
for batch_Index, (images, labels) in self.Test_Data_And_Label:
|
||||
# images = np.expand_dims(images, axis = 0)
|
||||
images, labels = torch.tensor(images).to(self.device), torch.tensor(labels).to(self.device)
|
||||
|
||||
outputs = cnn_model(images)
|
||||
@@ -141,7 +136,7 @@ class All_Step:
|
||||
# Predict_Label.extend(predicted.cpu().numpy())
|
||||
# True_Label.extend(labels.cpu().numpy())
|
||||
|
||||
loss /= self.Testing_Data_Length
|
||||
loss /= len(self.Test_Data_And_Label)
|
||||
|
||||
accuracy = accuracy_score(True_Label, Predict_Label)
|
||||
precision = precision_score(True_Label, Predict_Label)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -16,7 +16,7 @@ import torch.nn as nn
|
||||
import time
|
||||
|
||||
class experiments():
|
||||
def __init__(self, tools, Number_Of_Classes, status):
|
||||
def __init__(self, Model_Name, Experiment_Name, Generator_Batch_Size, Epoch, Train_Batch_Size, Convolution_Name, tools, Number_Of_Classes, status):
|
||||
'''
|
||||
# 實驗物件
|
||||
|
||||
@@ -53,13 +53,13 @@ class experiments():
|
||||
self.ImageGenerator = Image_generator("", "")
|
||||
self.merge = merge()
|
||||
|
||||
self.model_name = "Xception" # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
|
||||
self.experiment_name = "Xception Skin to train Normal stomach cancer"
|
||||
self.generator_batch_size = 50
|
||||
self.epoch = 10000
|
||||
self.train_batch_size = 128
|
||||
self.model_name = Model_Name # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
|
||||
self.experiment_name = Experiment_Name
|
||||
self.generator_batch_size = Generator_Batch_Size
|
||||
self.epoch = Epoch
|
||||
self.train_batch_size = Train_Batch_Size
|
||||
self.layers = 1
|
||||
self.convolution_name = "block14_sepconv2"
|
||||
self.convolution_name = Convolution_Name
|
||||
self.Number_Of_Classes = Number_Of_Classes
|
||||
|
||||
self.Grad = ""
|
||||
@@ -68,7 +68,7 @@ class experiments():
|
||||
|
||||
pass
|
||||
|
||||
def processing_main(self, train, train_label, counter):
|
||||
def processing_main(self, Training_Dataset, counter):
|
||||
Train, Test, Validation = self.Topic_Tool.Get_Save_Roots(self.Status) # 要換不同資料集就要改
|
||||
|
||||
start = time.time()
|
||||
@@ -82,20 +82,13 @@ class experiments():
|
||||
self.test, self.test_label = self.cut_image.test, self.cut_image.test_label
|
||||
self.validation, self.validation_label = self.cut_image.validation, self.cut_image.validation_label
|
||||
|
||||
train = self.Preprocess_Image_Data(train)
|
||||
self.test = self.Preprocess_Image_Data(self.test)
|
||||
self.validation = self.Preprocess_Image_Data(self.validation)
|
||||
|
||||
self.Topic_Tool.Set_Data_To_DataSet(train, train_label, "Training")
|
||||
self.Topic_Tool.Set_Data_To_DataSet(self.test, self.test_label, "Testing")
|
||||
self.Topic_Tool.Set_Data_To_DataSet(self.validation, self.validation_label, "Validation")
|
||||
|
||||
self.Training_Zip, self.Testing_Zip, self.Validation_Zip = self.Topic_Tool.Get_Zip()
|
||||
Testing_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.test, self.test_label, 1)
|
||||
Validation_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.validation, self.validation_label, self.train_batch_size)
|
||||
|
||||
# self.Grad = Grad_CAM(self.Topic_Tool.Get_Data_Label(), self.test_label, self.experiment_name, self.convolution_name)
|
||||
|
||||
cnn_model = self.construct_model() # 呼叫讀取模型的function
|
||||
step = All_Step(self.Training_Zip, self.Testing_Zip, self.Validation_Zip, cnn_model, self.epoch, self.Number_Of_Classes)
|
||||
step = All_Step(Training_Dataset, Testing_Dataset, Validation_Dataset, cnn_model, self.epoch, self.Number_Of_Classes)
|
||||
|
||||
# model_dir = '../save_the_best_model/Topic/Remove background with Normal image/best_model( 2023-10-17 )-2.h5' # 這是一個儲存模型權重的路徑,每一個模型都有一個自己權重儲存的檔
|
||||
# if os.path.exists(model_dir): # 如果這個檔案存在
|
||||
@@ -125,20 +118,6 @@ class experiments():
|
||||
|
||||
cnn_model = cnn_model.to(self.device)
|
||||
return cnn_model
|
||||
|
||||
def Preprocess_Image_Data(self, Image_Datas):
|
||||
transform = self.ImageGenerator.Generator_Content(5)
|
||||
|
||||
Transform_Image = []
|
||||
for Image in Image_Datas:
|
||||
Image = transform(Image)
|
||||
Transform_Image.append(Image)
|
||||
|
||||
# Transform_Image.append(self.ImageGenerator._apply_transform(transform, Image))
|
||||
Transform_Image = torch.tensor(np.array(Transform_Image))
|
||||
print(Transform_Image.shape)
|
||||
return Transform_Image
|
||||
|
||||
|
||||
def record_matrix_image(self, True_Labels, Predict_Labels, model_name, index):
|
||||
'''劃出混淆矩陣(熱力圖)'''
|
||||
|
||||
52
main.py
52
main.py
@@ -2,7 +2,7 @@ from experiments.experiment import experiments
|
||||
from Image_Process.load_and_ImageGenerator import Load_ImageGenerator
|
||||
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
|
||||
from Training_Tools.Tools import Tool
|
||||
from model_data_processing.processing import shuffle_data, Balance_Process
|
||||
from model_data_processing.processing import Balance_Process
|
||||
from Load_process.LoadData import Load_Data_Prepare
|
||||
from Calculate_Process.Calculate import Calculate
|
||||
from merge_class.merge import merge
|
||||
@@ -18,12 +18,12 @@ if __name__ == "__main__":
|
||||
else:
|
||||
print(f"CUDA可用,數量為{torch.cuda.device_count()}\n")
|
||||
|
||||
Status = 2 # 決定要使用什麼資料集
|
||||
# 要換不同資料集就要改
|
||||
# 參數設定
|
||||
tool = Tool()
|
||||
tool.Set_Labels()
|
||||
tool.Set_Save_Roots()
|
||||
|
||||
Status = 2 # 決定要使用什麼資料集
|
||||
Labels = tool.Get_Data_Label()
|
||||
Trainig_Root, Testing_Root, Validation_Root = tool.Get_Save_Roots(Status) # 一般的
|
||||
Generator_Root = tool.Get_Generator_Save_Roots(Status)
|
||||
@@ -34,31 +34,47 @@ if __name__ == "__main__":
|
||||
Label_Length = len(Labels)
|
||||
Classification = 2 # 分類數量
|
||||
|
||||
Model_Name = "Xception" # 取名,告訴我我是用哪個模型(可能是預處理模型/自己設計的模型)
|
||||
Experiment_Name = "Xception Skin to train Normal stomach cancer"
|
||||
Generator_Batch_Size = 50
|
||||
Epoch = 10000
|
||||
Train_Batch_Size = 128
|
||||
Convolution_Name = "block14_sepconv2"
|
||||
|
||||
Prepare = Load_Data_Prepare()
|
||||
loading_data = Load_ImageGenerator(Trainig_Root, Testing_Root, Validation_Root, Generator_Root, Labels)
|
||||
experiment = experiments(tool, Classification, Status)
|
||||
experiment = experiments(Model_Name, Experiment_Name, Generator_Batch_Size, Epoch, Train_Batch_Size, Convolution_Name, tool, Classification, Status)
|
||||
image_processing = Read_image_and_Process_image()
|
||||
Merge = merge()
|
||||
Calculate_Tool = Calculate()
|
||||
|
||||
counter = 5
|
||||
Batch_Size = 128
|
||||
|
||||
for i in range(0, counter, 1): # 做規定次數的訓練
|
||||
# 讀取資料
|
||||
Data_Dict_Data = loading_data.process_main(Label_Length)
|
||||
Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels)
|
||||
# Data_Dict_Data, Train_Size = Balance_Process(Data_Dict_Data, Labels)
|
||||
|
||||
# 輸出內容
|
||||
print("每個類別各有 " + str(Train_Size) + " 筆資料")
|
||||
# 輸出內容
|
||||
i = 0
|
||||
total = 0
|
||||
for j in range(Label_Length):
|
||||
total += Train_Size
|
||||
Length_Array = []
|
||||
for label in Labels:
|
||||
length = len(Data_Dict_Data[label])
|
||||
Length_Array.append(length)
|
||||
print(f"類別 {i} 有 {str(length)} 筆資料")
|
||||
total += length
|
||||
i += 1
|
||||
|
||||
print("總共有 " + str(total) + " 筆資料")
|
||||
|
||||
# 做出跟資料相同數量的Label
|
||||
Classes = []
|
||||
i = 0
|
||||
for encording in Encording_Label:
|
||||
Classes.append(image_processing.make_label_list(Train_Size, encording))
|
||||
Classes.append(image_processing.make_label_list(Length_Array[i], encording))
|
||||
i += 1
|
||||
|
||||
# 將資料做成Dict的資料型態
|
||||
Prepare.Set_Final_Dict_Data(Labels, Data_Dict_Data, Classes, Label_Length)
|
||||
@@ -74,17 +90,17 @@ if __name__ == "__main__":
|
||||
training_label = Merge.merge_all_image_data(training_label, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list
|
||||
|
||||
start = time.time()
|
||||
trains_Data_Image = image_processing.Data_Augmentation_Image(training_data) # 多執行續讀檔
|
||||
total_trains, train_label = shuffle_data(trains_Data_Image, training_label) # 將資料打亂
|
||||
training_data = list(total_trains) # 轉換資料型態
|
||||
trains_Data_Image = image_processing.Data_Augmentation_Image(training_data) # 讀檔
|
||||
|
||||
# total_trains, train_label = shuffle_data(trains_Data_Image, training_label) # 將資料打亂
|
||||
# training_data = list(total_trains) # 轉換資料型態
|
||||
|
||||
training_data, train_label = image_processing.image_data_processing(trains_Data_Image, training_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式
|
||||
Training_Dataset = tool.Convert_Data_To_DataSet(training_data, train_label, Train_Batch_Size)
|
||||
|
||||
print(len(training_data))
|
||||
training_data, train_label = image_processing.image_data_processing(training_data, train_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式
|
||||
# training_data = image_processing.normalization(training_data)
|
||||
# training_data = training_data.permute(0, 3, 1, 2)
|
||||
|
||||
print(training_data.shape)
|
||||
|
||||
end = time.time()
|
||||
print("\n\n\n讀取訓練資料(70000)執行時間:%f 秒\n\n" % (end - start))
|
||||
|
||||
@@ -117,7 +133,7 @@ if __name__ == "__main__":
|
||||
# trains.append(All_Monkeypox_Data)
|
||||
# training_label.append([0, 0, 0, 0, 1, 0, 0])
|
||||
|
||||
loss, accuracy, precision, recall, AUC, f = experiment.processing_main(training_data, train_label, i) # 執行訓練方法
|
||||
loss, accuracy, precision, recall, AUC, f = experiment.processing_main(Training_Dataset, i) # 執行訓練方法
|
||||
Calculate_Tool.Append_numbers(loss, accuracy, precision, recall, AUC, f)
|
||||
|
||||
print("實驗結果")
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -29,17 +29,20 @@ def shuffle_data(image, label, mode = 1):
|
||||
return shuffle_image, shuffle_label
|
||||
else:
|
||||
shuffle_image = {
|
||||
label[0] : [],
|
||||
label[1] : [],
|
||||
label[0] : []
|
||||
}
|
||||
|
||||
for i in range(1, len(label)):
|
||||
shuffle_image.update({label[i] : []})
|
||||
|
||||
for Label in label:
|
||||
shuffle_image[Label] = image[Label]
|
||||
random.shuffle(shuffle_image[Label])
|
||||
|
||||
return shuffle_image
|
||||
|
||||
def Balance_Process(Data_Content, Labels):
|
||||
Data_Dict_Data = shuffle_data(Data_Content, Labels, 2)
|
||||
def Balance_Process(Data_Dict_Data, Labels):
|
||||
# Data_Dict_Data = shuffle_data(Data_Content, Labels, 2)
|
||||
Train_Size = 0
|
||||
|
||||
Train_Size = min(len(Data_Dict_Data[Labels[0]]), len(Data_Dict_Data[Labels[1]]))
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
|
||||
from sklearn.model_selection import train_test_split
|
||||
from model_data_processing.processing import shuffle_data
|
||||
from merge_class.merge import merge
|
||||
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
|
||||
from Load_process.LoadData import Load_Data_Prepare, Process_File, Load_Data_Tools
|
||||
|
||||
Reference in New Issue
Block a user