20250215 Commits: The Training step can use Dataloader to run, and modification Image Generator uses Pytorch struct

This commit is contained in:
whitekirin 2025-02-15 02:16:38 +08:00
parent bff6e4251f
commit 96245bd095
11 changed files with 39 additions and 36 deletions

View File

@ -3,6 +3,7 @@ from _validation.ValidationTheEnterData import validation_the_enter_data
from Load_process.file_processing import Process_File from Load_process.file_processing import Process_File
from Load_process.LoadData import Load_Data_Prepare from Load_process.LoadData import Load_Data_Prepare
from torchvision import transforms from torchvision import transforms
from Training_Tools.Tools import Tool
class Image_generator(): class Image_generator():
'''製作資料強化''' '''製作資料強化'''
@ -70,42 +71,27 @@ class Image_generator():
strardand = 要使用哪種Image Augmentation strardand = 要使用哪種Image Augmentation
''' '''
File = Process_File() File = Process_File()
image_processing = Read_image_and_Process_image()
tool = Tool()
Classes = []
Transform = self.Generator_Content(stardand)
for label in self.Labels: # 分別對所有類別進行資料強化 for label in self.Labels: # 分別對所有類別進行資料強化
image = self.load_data(label) # 取的資料 image = self.load_data(stardand) # 取的資料
save_root = File.Make_Save_Root(label, save_roots) # 合併路徑 save_root = File.Make_Save_Root(label, save_roots) # 合併路徑
Classes = image_processing.make_label_list(len(image), "1")
Training_Dataset = tool.Convert_Data_To_DataSet_And_Put_To_Dataloader(image, Classes, 1)
if File.JudgeRoot_MakeDir(save_root): # 判斷要存的資料夾存不存在,不存在則創立 if File.JudgeRoot_MakeDir(save_root): # 判斷要存的資料夾存不存在,不存在則創立
print("The file is exist") print("The file is exist")
train_Generator = self.Generator_Content(stardand) # 取的要做怎樣的資料強化 for batch_idx, (images, labels) in enumerate(Training_Dataset):
stop_counter = 0 for i, img in enumerate(images):
for batches in train_Generator.flow(image, batch_size = 16): # 執行資料強化 img = Transform(img)
''' img_pil = transforms.ToPILImage()(img)
程式碼中的 batch_size 參數被設置為 12, 這表示每次從訓練資料集中產生 12 個影像資料作為一個 batch當程式碼迭代 train_Generator 物件時每次從訓練資料集中取出一個 batch 的影像並逐一處理其中的每個影像 File.Save_PIL_File("image_" + label + str(data_size) + ".png", save_root, img_pil) # 存檔
在迭代過程中, counter 變數用於計算存儲的 Numpy 陣列檔案的數量 i 變數用於計算迭代的次數 i 的值是 417 的倍數時, if i % 417 == 0: 條件就會成立進而執行 break 語句退出外層迴圈因此最多只會從訓
練資料集中產生 417 batch 的影像資料如果資料集中的影像總數不能被 12 整除最後一個 batch 的影像數量會少於 12因此, 最終產生的 Numpy 陣列檔案數量可能小於 image 資料集的影像總數除以 12
總之根據程式碼的設計最多只會從訓練資料集中產生 417 batch 的影像資料並將它們存儲為 Numpy 陣列檔案在最後一個 batch 中的影像數量可能少於 12
* train_Generator: 是一個ImageDataGenerator物件, 它從訓練資料中產生影像資料這些影像資料可以用於訓練神經網路模型
* flow(): 方法是ImageDataGenerator物件的一個方法, 它可以從資料集中產生一個batch的影像資料image 是要傳遞給ImageDataGenerator的影像資料, batch_size 是一個batch中包含的影像數量
* for batch in train_Generator.flow(image, batch_size = 12): 語句會迭代ImageDataGenerator物件產生的影像batch, 這些影像batch會逐一傳遞給batch 變數
* for batches in batch: 語句會迭代batch中的每個影像資料, 這些影像資料會逐一傳遞給batches 變數
* self.save_dir("image_" + label + str(counter) + ".npy", batches): 語句會以Numpy陣列的形式將每個影像資料存儲為一個Numpy陣列檔案, 檔案名稱會包含影像標籤和計數器
* counter += 1: 語句會將計數器增加1, 以便標識存儲的Numpy陣列檔案
* i += 1: 語句會增加迭代器的值, 以便在每417個影像batch之後退出迴圈
* if i % 417 == 0: break 語句會檢查迭代器是否達到了417的倍數, 如果是, 就會退出外層的迴圈, 以便結束影像資料的存儲
'''
for batch in batches: # 分別抓出每一張照片來儲存
File.Save_CV2_File("image_" + label + str(data_size) + ".png", save_root, batch) # 存檔
data_size += 1
stop_counter += 1
if stop_counter >= self.stop: # 若做指定次數則停止
break
print(str(label) + "" + str(stop_counter) + "筆資料")
return data_size return data_size
@ -113,6 +99,7 @@ class Image_generator():
'''Images is readed by myself''' '''Images is readed by myself'''
image_processing = Read_image_and_Process_image() image_processing = Read_image_and_Process_image()
img = image_processing.Data_Augmentation_Image(self.get_data_roots[label]) img = image_processing.Data_Augmentation_Image(self.get_data_roots[label])
self.stop = len(img) * 1.5 self.stop = len(img) * 1.5
return img return img

View File

@ -15,5 +15,5 @@ class Entropy_Loss(nn.Module):
# input shape has a question # input shape has a question
# print(f"Label result: {labels}, result: {outputs}") # print(f"Label result: {labels}, result: {outputs}")
labels = labels.float() labels = labels.float()
loss = functional.binary_cross_entropy(outputs[0], labels) loss = functional.binary_cross_entropy(outputs, labels)
return loss return loss

View File

@ -2,17 +2,27 @@ import pandas as pd
from torch.nn import functional from torch.nn import functional
import torch import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler from torch.utils.data import Dataset, DataLoader, RandomSampler
import torchvision.transforms as transforms
class ListDataset(Dataset): class ListDataset(Dataset):
def __init__(self, data_list, labels_list): def __init__(self, data_list, labels_list):
self.data = data_list self.data = data_list
self.labels = labels_list self.labels = labels_list
def Transform(self):
return transforms.Compose([
transforms.ToTensor() # 將 PIL Image 或 numpy array 轉換為 tensor 並自動調整通道順序為 (C, H, W)
])
def __len__(self): def __len__(self):
return len(self.data) return len(self.data)
def __getitem__(self, idx): def __getitem__(self, idx):
sample = self.data[idx] sample = self.data[idx]
Transform_Content = self.Transform()
sample = Transform_Content(sample)
label = self.labels[idx] label = self.labels[idx]
return sample, label return sample, label
@ -100,7 +110,7 @@ class Tool:
def Get_OneHot_Encording_Label(self): def Get_OneHot_Encording_Label(self):
return self.__OneHot_Encording return self.__OneHot_Encording
def Convert_Data_To_DataSet(self, Datas : list, Labels : list, Batch_Size : int): def Convert_Data_To_DataSet_And_Put_To_Dataloader(self, Datas : list, Labels : list, Batch_Size : int):
seed = 42 # 設定任意整數作為種子 seed = 42 # 設定任意整數作為種子
# 產生隨機種子產生器 # 產生隨機種子產生器
generator = torch.Generator() generator = torch.Generator()

View File

@ -43,7 +43,7 @@ class All_Step:
for inputs, labels in epoch_iterator: for inputs, labels in epoch_iterator:
# print(inputs.shape) print(inputs.shape)
# 輸入的維度為3維 但模型要的是4維 所以要再多加一維 # 輸入的維度為3維 但模型要的是4維 所以要再多加一維
# inputs = np.expand_dims(inputs, axis = 0) # inputs = np.expand_dims(inputs, axis = 0)
# print(inputs.shape) # print(inputs.shape)

View File

@ -82,8 +82,8 @@ class experiments():
self.test, self.test_label = self.cut_image.test, self.cut_image.test_label self.test, self.test_label = self.cut_image.test, self.cut_image.test_label
self.validation, self.validation_label = self.cut_image.validation, self.cut_image.validation_label self.validation, self.validation_label = self.cut_image.validation, self.cut_image.validation_label
Testing_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.test, self.test_label, 1) Testing_Dataset = self.Topic_Tool.Convert_Data_To_DataSet_And_Put_To_Dataloader(self.test, self.test_label, 1)
Validation_Dataset = self.Topic_Tool.Convert_Data_To_DataSet(self.validation, self.validation_label, self.train_batch_size) Validation_Dataset = self.Topic_Tool.Convert_Data_To_DataSet_And_Put_To_Dataloader(self.validation, self.validation_label, self.train_batch_size)
# self.Grad = Grad_CAM(self.Topic_Tool.Get_Data_Label(), self.test_label, self.experiment_name, self.convolution_name) # self.Grad = Grad_CAM(self.Topic_Tool.Get_Data_Label(), self.test_label, self.experiment_name, self.convolution_name)

10
main.py
View File

@ -38,7 +38,7 @@ if __name__ == "__main__":
Experiment_Name = "Xception Skin to train Normal stomach cancer" Experiment_Name = "Xception Skin to train Normal stomach cancer"
Generator_Batch_Size = 50 Generator_Batch_Size = 50
Epoch = 10000 Epoch = 10000
Train_Batch_Size = 128 Train_Batch_Size = 50
Convolution_Name = "block14_sepconv2" Convolution_Name = "block14_sepconv2"
Prepare = Load_Data_Prepare() Prepare = Load_Data_Prepare()
@ -96,7 +96,13 @@ if __name__ == "__main__":
# training_data = list(total_trains) # 轉換資料型態 # training_data = list(total_trains) # 轉換資料型態
training_data, train_label = image_processing.image_data_processing(trains_Data_Image, training_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式 training_data, train_label = image_processing.image_data_processing(trains_Data_Image, training_label) # 將讀出來的檔做正規化。降label轉成numpy array 格式
Training_Dataset = tool.Convert_Data_To_DataSet(training_data, train_label, Train_Batch_Size) Training_Dataset = tool.Convert_Data_To_DataSet_And_Put_To_Dataloader(training_data, train_label, Train_Batch_Size)
for idx, data in enumerate(Training_Dataset):
datas = data[0]
print(f"Shape: {datas.shape}")
# training_data = image_processing.normalization(training_data) # training_data = image_processing.normalization(training_data)
# training_data = training_data.permute(0, 3, 1, 2) # training_data = training_data.permute(0, 3, 1, 2)