The Stage is Xception+U-Net+Xception and U-Net estimates Convolution

This commit is contained in:
whitekirin 2025-10-19 16:56:00 +08:00
parent 54bb9b0072
commit 780559d77b
133 changed files with 7619 additions and 1121 deletions

View File

@ -22,11 +22,11 @@ class Calculate():
DataFrame = pd.DataFrame( DataFrame = pd.DataFrame(
{ {
"loss" : "{:.2f}".format(Loss), "loss" : "{:.2f}".format(Loss),
"precision" : "{:.2f}".format(Precision * 100), "precision" : "{:.2f}".format(Precision),
"recall" : "{:.2f}".format(Recall * 100), "recall" : "{:.2f}".format(Recall),
"accuracy" : "{:.2f}".format(Accuracy * 100), "accuracy" : "{:.2f}".format(Accuracy),
"f1" : "{:.2f}".format(F1 * 100), "f1" : "{:.2f}".format(F1),
"AUC" : "{:.2f}".format(AUC * 100) "AUC" : "{:.2f}".format(AUC)
}, index = [0] }, index = [0]
) )
self.History.append(DataFrame) self.History.append(DataFrame)
@ -40,7 +40,7 @@ class Calculate():
F1_Mean = np.mean(self.F1_Record) F1_Mean = np.mean(self.F1_Record)
AUC_Mean = np.mean(self.AUC_Record) AUC_Mean = np.mean(self.AUC_Record)
Mean_DataFram = self.Construction_To_DataFrame(Loss_Mean, Accuracy_Mean, Precision_Mean, Recall_Mean, F1_Mean, AUC_Mean) Mean_DataFram = self.Construction_To_DataFrame(Loss_Mean, Accuracy_Mean * 100, Precision_Mean * 100, Recall_Mean * 100, F1_Mean * 100, AUC_Mean * 100)
return Mean_DataFram return Mean_DataFram

Binary file not shown.

View File

@ -1,39 +1,51 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image from model_data_processing.processing import make_label_list
from _validation.ValidationTheEnterData import validation_the_enter_data from _validation.ValidationTheEnterData import validation_the_enter_data
from Load_process.file_processing import Process_File from Load_process.file_processing import Process_File
from Load_process.LoadData import Load_Data_Prepare from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
from torchvision import transforms
from Training_Tools.PreProcess import Training_Precesses from Training_Tools.PreProcess import Training_Precesses
import numpy as np
from PIL import Image from torchvision import transforms
import torch
import cv2
class Image_generator(): class Image_generator():
'''製作資料強化''' '''製作資料強化'''
def __init__(self, Generator_Root, Labels, Image_Size) -> None: def __init__(self, Training_Root, Generator_Root, Labels, Image_Size, Class_Count) -> None:
self._validation = validation_the_enter_data() self._validation = validation_the_enter_data()
self.stop = 0 self.stop = 0
self.Labels = Labels self.Labels = Labels
self.Training_Root = Training_Root
self.Generator_Root = Generator_Root self.Generator_Root = Generator_Root
self.Image_Size = Image_Size self.Image_Size = Image_Size
self.Class_Count = 904 self.Class_Count = Class_Count
pass pass
def Processing_Main(self, Training_Dict_Data_Root): def Processing_Main(self):
data_size = 2712 data_size = 2712
File = Process_File()
Prepare = Load_Data_Prepare()
Load_Tool = Load_Data_Tools()
# 製作標準資料增強 if not File.Judge_File_Exist(self.Generator_Root): # 檔案若不存在
''' # 確定我要多少個List
這裡我想要做的是依照paper上的資料強化IMAGE DATA COLLECTION AND IMPLEMENTATION OF DEEP LEARNING-BASED MODEL IN DETECTING MONKEYPOX DISEASE USING MODIFIED VGG16 Prepare.Set_Data_Content([], len(self.Labels))
產生出資料強化後的影像
'''
for i in range(1, 5, 1):
print("\nAugmentation one Generator image")
data_size = self.get_processing_Augmentation(Training_Dict_Data_Root, i, data_size)
self.stop += data_size
print() # 製作讀檔字典並回傳檔案路徑
Prepare.Set_Label_List(self.Labels)
Prepare.Set_Data_Dictionary(Prepare.Get_Label_List(), Prepare.Get_Data_Content(), len(self.Labels))
Original_Dict_Data_Root = Prepare.Get_Data_Dict()
get_all_original_image_data = Load_Tool.get_data_root(self.Training_Root, Original_Dict_Data_Root, Prepare.Get_Label_List())
# 儲存資料強化後資料
# 製作標準資料增強
'''
這裡我想要做的是依照paper上的資料強化IMAGE DATA COLLECTION AND IMPLEMENTATION OF DEEP LEARNING-BASED MODEL IN DETECTING MONKEYPOX DISEASE USING MODIFIED VGG16
產生出資料強化後的影像
'''
for i in range(1, 5, 1):
print(f"\nAugmentation {i} Generator image")
data_size = self.get_processing_Augmentation(get_all_original_image_data, i, data_size)
self.stop += data_size
else: # 若檔案存在
print("standard data and myself data are exist\n")
def get_processing_Augmentation(self, original_image_root : dict, Augment_choose, data_size): def get_processing_Augmentation(self, original_image_root : dict, Augment_choose, data_size):
Prepaer = Load_Data_Prepare() Prepaer = Load_Data_Prepare()
@ -51,7 +63,6 @@ class Image_generator():
strardand = 要使用哪種Image Augmentation strardand = 要使用哪種Image Augmentation
''' '''
File = Process_File() File = Process_File()
image_processing = Read_image_and_Process_image(self.Image_Size)
tool = Training_Precesses(self.Image_Size) tool = Training_Precesses(self.Image_Size)
Classes = [] Classes = []
Transform = self.Generator_Content(stardand) Transform = self.Generator_Content(stardand)
@ -60,15 +71,15 @@ class Image_generator():
Image_Roots = self.get_data_roots[label] Image_Roots = self.get_data_roots[label]
save_root = File.Make_Save_Root(label, save_roots) # 合併路徑 save_root = File.Make_Save_Root(label, save_roots) # 合併路徑
Classes = image_processing.make_label_list(len(Image_Roots), "1") Classes = make_label_list(len(Image_Roots), "1")
Training_Dataset = tool.Setting_DataSet(Image_Roots, Classes) Training_Dataset = tool.Setting_DataSet(Image_Roots, Classes, "Generator")
Training_DataLoader = tool.Dataloader_Sampler(Training_Dataset, 1, False) Training_DataLoader = tool.Dataloader_Sampler(Training_Dataset, 1, False)
if File.JudgeRoot_MakeDir(save_root): # 判斷要存的資料夾存不存在,不存在則創立 if File.JudgeRoot_MakeDir(save_root): # 判斷要存的資料夾存不存在,不存在則創立
print("The file is exist.This Script is not creating new fold.") print("The file is exist.This Script is not creating new fold.")
for i in range(1, int(self.Class_Count / len(Image_Roots)) + 1, 1): for i in range(1, int(self.Class_Count / len(Image_Roots)) + 1, 1):
for batch_idx, (images, labels) in enumerate(Training_DataLoader): for batch_idx, (images, labels, File_Name, File_Classes) in enumerate(Training_DataLoader):
for j, img in enumerate(images): for j, img in enumerate(images):
# if i == self.stop: # if i == self.stop:
# break # break
@ -78,7 +89,6 @@ class Image_generator():
# 轉換為 NumPy 陣列並從 BGR 轉為 RGB # 轉換為 NumPy 陣列並從 BGR 轉為 RGB
img_np = img.numpy().transpose(1, 2, 0) # 轉回 HWC 格式 img_np = img.numpy().transpose(1, 2, 0) # 轉回 HWC 格式
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) # BGR 轉 RGB
img_pil = transforms.ToPILImage()(img_np) img_pil = transforms.ToPILImage()(img_np)
File.Save_PIL_File("image_" + label + str(data_size) + ".png", save_root, img_pil) # 存檔 File.Save_PIL_File("image_" + label + str(data_size) + ".png", save_root, img_pil) # 存檔

View File

@ -0,0 +1,262 @@
import xml.etree.ElementTree as ET
import cv2
import os
import numpy as np
from typing import List, Dict, Optional, Tuple
from utils.Stomach_Config import Loading_Config
class XMLAnnotationProcessor:
"""
XML標註檔案處理器
專門處理包含bounding box資訊的XML檔案並在對應圖片上繪製邊界框
"""
def __init__(self, dataset_root: str):
"""
初始化XML處理器
Args:
dataset_root: 圖片資料集根目錄
output_folder: 輸出資料夾
"""
self.dataset_root = dataset_root
self.box_color = (0, 255, 0) # 綠色邊界框
self.text_color = (0, 255, 0) # 綠色文字
self.box_thickness = 2
self.font_scale = 0.5
self.font = cv2.FONT_HERSHEY_SIMPLEX
def _ensure_output_folder(self, Save_Root: str) -> None:
"""確保輸出資料夾存在"""
if not os.path.exists(Save_Root):
os.makedirs(Save_Root)
def parse_xml(self, xml_file_path: str, Label: str) -> Optional[Dict]:
"""
解析XML檔案並提取所有相關資訊
Args:
xml_file_path: XML檔案路徑
Returns:
Dict: 包含檔案資訊和bounding box的字典解析失敗時返回None
"""
try:
tree = ET.parse(xml_file_path)
root = tree.getroot()
# 提取基本資訊
filename_element = root.find('filename')
if filename_element is None:
print(f"找不到path元素在 {xml_file_path}")
return None
filename = filename_element.text if filename_element is not None else "Unknown"
Original_Image_Data_Root = os.path.join(self.dataset_root, Label)
Original_Image_Data_Root = os.path.join(Original_Image_Data_Root, filename)
# 提取圖片尺寸
size_element = root.find('size')
width = int(size_element.find('width').text) if size_element is not None else 0
height = int(size_element.find('height').text) if size_element is not None else 0
depth = int(size_element.find('depth').text) if size_element is not None else 3
# 提取所有bounding box
bounding_boxes = []
objects = root.findall('object')
for obj in objects:
bndbox = obj.find('bndbox')
if bndbox is not None:
bbox_info = {
'name': obj.find('name').text if obj.find('name') is not None else "Unknown",
'pose': obj.find('pose').text if obj.find('pose') is not None else "Unspecified",
'truncated': int(obj.find('truncated').text) if obj.find('truncated') is not None else 0,
'difficult': int(obj.find('difficult').text) if obj.find('difficult') is not None else 0,
'xmin': int(bndbox.find('xmin').text),
'ymin': int(bndbox.find('ymin').text),
'xmax': int(bndbox.find('xmax').text),
'ymax': int(bndbox.find('ymax').text)
}
bounding_boxes.append(bbox_info)
return {
'filename': filename,
'image_path': Original_Image_Data_Root,
'width': width,
'height': height,
'depth': depth,
'bounding_boxes': bounding_boxes
}
except Exception as e:
print(f"解析XML檔案 {xml_file_path} 時發生錯誤: {str(e)}")
return None
def load_image(self, image_path: str) -> Optional[np.ndarray]:
"""
載入圖片檔案
Args:
image_path: 圖片檔案路徑
Returns:
np.ndarray: 圖片陣列載入失敗時返回None
"""
if not os.path.exists(image_path):
print(f"圖片檔案不存在: {image_path}")
return None
image = cv2.imread(image_path)
if image is None:
print(f"無法讀取圖片: {image_path}")
return None
return image
def draw_bounding_boxes(self, image: np.ndarray, bounding_boxes: List[Dict]) -> np.ndarray:
"""
創建遮罩圖片bounding box內保持原圖外部為黑色
Args:
image: 圖片陣列
bounding_boxes: bounding box資訊列表
Returns:
np.ndarray: 處理後的遮罩圖片陣列
"""
# 創建黑色背景圖片
height, width = image.shape[:2]
result_image = np.zeros((height, width, 3), dtype=np.uint8)
for i, bbox in enumerate(bounding_boxes):
xmin, ymin = bbox['xmin'], bbox['ymin']
xmax, ymax = bbox['xmax'], bbox['ymax']
object_name = bbox['name']
# 確保座標在圖片範圍內
xmin = max(0, min(xmin, width-1))
ymin = max(0, min(ymin, height-1))
xmax = max(0, min(xmax, width-1))
ymax = max(0, min(ymax, height-1))
# 將bounding box範圍內的原圖複製到結果圖像中
result_image[ymin:ymax, xmin:xmax] = image[ymin:ymax, xmin:xmax]
print(f"Object {i+1}: {object_name} - 座標: ({xmin}, {ymin}, {xmax}, {ymax})")
return result_image
def save_annotated_image(self, image: np.ndarray, original_filename: str, Annotation_Root : str, Label : str) -> str:
"""
儲存標註後的圖片
Args:
image: 標註後的圖片陣列
original_filename: 原始檔案名稱
Returns:
str: 儲存的檔案路徑
"""
output_filename = f"annotated_{original_filename}"
output_path = os.path.join(Annotation_Root, Label)
Save_Image_Roots = os.path.join(output_path, output_filename)
# 確保輸出資料夾存在
self._ensure_output_folder(output_path)
cv2.imwrite(Save_Image_Roots, image)
print(f"已儲存標註圖片至: {Save_Image_Roots}")
return Save_Image_Roots
def process_single_xml(self, xml_file_path: str, Annotation_Root : str, Label : str) -> Optional[Tuple[np.ndarray, str]]:
"""
處理單一XML檔案
Args:
xml_file_path: XML檔案路徑
Returns:
Tuple[np.ndarray, str]: (標註後的圖片, 輸出路徑)處理失敗時返回None
"""
# 解析XML
xml_data = self.parse_xml(xml_file_path, Label)
if xml_data is None:
return None
# 載入圖片
image = self.load_image(xml_data['image_path'])
if image is None:
return None
# 繪製bounding box
annotated_image = self.draw_bounding_boxes(image, xml_data['bounding_boxes'])
# 儲存結果
output_path = self.save_annotated_image(annotated_image, xml_data['filename'], Annotation_Root, Label)
return annotated_image, output_path
def process_multiple_xml(self, xml_folder_path: str, Annotation_Root : str, Label : str) -> List[Tuple[str, bool]]:
"""
批量處理多個XML檔案
Args:
xml_folder_path: 包含XML檔案的資料夾路徑
Returns:
List[Tuple[str, bool]]: [(檔案名稱, 處理成功與否), ...]
"""
if not os.path.exists(xml_folder_path):
print(f"XML資料夾不存在: {xml_folder_path}")
return []
xml_files = [f for f in os.listdir(xml_folder_path) if f.endswith('.xml')]
if not xml_files:
print(f"{xml_folder_path} 中找不到XML檔案")
return []
print(f"找到 {len(xml_files)} 個XML檔案")
for xml_file in xml_files:
try:
Read_XML_File = os.path.join(xml_folder_path, xml_file)
self.process_single_xml(Read_XML_File, Annotation_Root, Label)
print(f"\n處理檔案: {xml_file}")
except Exception as e:
print(f"處理 {xml_file} 時發生錯誤: {str(e)}")
return
def get_bounding_boxes_info(self, xml_file_path: str) -> Optional[Dict]:
"""
僅提取XML中的bounding box資訊不進行圖片處理
Args:
xml_file_path: XML檔案路徑
Returns:
Dict: 包含檔案資訊和bounding box座標的字典
"""
return self.parse_xml(xml_file_path)
def set_drawing_style(self, box_color: Tuple[int, int, int] = None,
text_color: Tuple[int, int, int] = None,
box_thickness: int = None,
font_scale: float = None) -> None:
"""
設定繪圖樣式
Args:
box_color: 邊界框顏色 (B, G, R)
text_color: 文字顏色 (B, G, R)
box_thickness: 邊界框粗細
font_scale: 字體大小
"""
if box_color is not None:
self.box_color = box_color
if text_color is not None:
self.text_color = text_color
if box_thickness is not None:
self.box_thickness = box_thickness
if font_scale is not None:
self.font_scale = font_scale

View File

@ -1,87 +1,309 @@
import cv2 import cv2
import numpy as np import numpy as np
import torch
from PIL import Image
import torchvision
import functools
import inspect
def shapen(image): # 銳化處理 # 套用裝飾器到現有函數
sigma = 100 def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):
blur_img = cv2.GaussianBlur(image, (0, 0), sigma) """使用OpenCV實現的Unsharp Mask銳化處理
usm = cv2.addWeighted(image, 1.5, blur_img, -0.5, 0) 參數:
image: PIL.Image對象(RGB格式)
return usm kernel_size: 高斯模糊的核大小必須是奇數
sigma: 高斯模糊的標準差
def increase_contrast(image): # 增加資料對比度 amount: 銳化程度值越大效果越強
output = image # 建立 output 變數 threshold: 邊緣檢測閾值僅在邊緣處進行銳化
alpha = 2 返回:
beta = 10 銳化後的PIL.Image對象
cv2.convertScaleAbs(image, output, alpha, beta) # 套用 convertScaleAbs """
# 轉換PIL圖像為numpy數組
return output numpy_img = np.array(image, dtype=np.uint8)
def adaptive_histogram_equalization(image):
ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
channels = cv2.split(ycrcb)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe.apply(channels[0], channels[0])
ycrcb = cv2.merge(channels)
Change_image = cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR)
return Change_image # 對原圖進行高斯模糊
blurred = cv2.GaussianBlur(numpy_img, kernel_size, sigma)
# 計算銳化後的圖像
sharpened = cv2.addWeighted(numpy_img, 1 + amount, blurred, -amount, 0)
# 如果設置了threshold只在邊緣處應用銳化
if threshold > 0:
low_contrast_mask = np.absolute(numpy_img - blurred) < threshold
np.copyto(sharpened, numpy_img, where=low_contrast_mask)
# 確保像素值在有效範圍內
sharpened = np.clip(sharpened, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(sharpened)
def Remove_Background(image, Matrix_Size): def histogram_equalization(image):
skinCrCbHist = np.zeros((256,256), dtype= np.uint8) """GPU加速的一般直方圖等化
cv2.ellipse(skinCrCbHist, (113,155),(23,25), 43, 0, 360, (255, 255, 255), -1) #繪製橢圓弧線 參數:
image: PIL.Image對象(RGB格式)
返回:
直方圖等化後的PIL.Image對象
"""
# 轉換為numpy數組並轉為PyTorch張量
numpy_img = np.array(image)
tensor_img = torch.from_numpy(numpy_img).float().to('cuda')
# 分離通道並進行直方圖等化
result = torch.zeros_like(tensor_img)
for i in range(3): # 對RGB三個通道分別處理
channel = tensor_img[..., i]
# 計算直方圖
hist = torch.histc(channel, bins=256, min=0, max=255)
# 計算累積分布函數(CDF)
cdf = torch.cumsum(hist, dim=0)
cdf_normalized = ((cdf - cdf.min()) * 255) / (cdf.max() - cdf.min())
# 應用直方圖等化
result[..., i] = cdf_normalized[channel.long()]
# 轉回CPU和numpy數組
result = torch.clamp(result, 0, 255).byte()
result_np = result.cpu().numpy()
return Image.fromarray(result_np)
img_ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB) def Contrast_Limited_Adaptive_Histogram_Equalization(image, clip_limit=3.0, tile_size=(8, 8)):
y,cr,cb = cv2.split(img_ycrcb) #拆分出Y,Cr,Cb值 """使用OpenCV實現的對比度限制自適應直方圖均衡化(CLAHE)
參數:
image: PIL.Image對象(RGB格式)
clip_limit: 剪切限制用於限制對比度增強的程度較大的值會產生更強的對比度
tile_size: 圖像分塊大小的元組(height, width)較小的值會產生更局部的增強效果
返回:
CLAHE處理後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到LAB色彩空間
lab_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2LAB)
# 創建CLAHE對象
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_size)
# 分離LAB通道
l, a, b = cv2.split(lab_img)
# 對L通道應用CLAHE
l_clahe = clahe.apply(l)
# 合併處理後的L通道與原始的a和b通道
lab_output = cv2.merge([l_clahe, a, b])
# 將LAB轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(lab_output, cv2.COLOR_LAB2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
skin = np.zeros(cr.shape, dtype = np.uint8) #掩膜 def adaptive_histogram_equalization_without_limit(image, tile_size=(8, 8)):
(x,y) = cr.shape """使用OpenCV實現的自適應直方圖均衡化(AHE)
參數:
image: PIL.Image對象(RGB格式)
tile_size: 圖像分塊大小的元組(height, width)較小的值會產生更局部的增強效果
返回:
AHE處理後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到LAB色彩空間
lab_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2LAB)
# 分離LAB通道
l, a, b = cv2.split(lab_img)
# 創建AHE對象不設置clip limit
clahe = cv2.createCLAHE(clipLimit=None, tileGridSize=tile_size)
# 對L通道應用AHE
l_ahe = clahe.apply(l)
# 合併處理後的L通道與原始的a和b通道
lab_output = cv2.merge([l_ahe, a, b])
# 將LAB轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(lab_output, cv2.COLOR_LAB2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
# 依序取出圖片中每個像素 def laplacian_sharpen(image):
for i in range(x): """
for j in range(y): GPU加速的拉普拉斯銳化處理函數
if skinCrCbHist [cr[i][j], cb[i][j]] > 0: #若不在橢圓區間中 參數:
skin[i][j] = 255 image: PIL.Image對象(RGB格式)
# 如果該像素的灰階度大於 200調整該像素的透明度 返回:
# 使用 255 - gray[y, x] 可以將一些邊緣的像素變成半透明,避免太過鋸齒的邊緣 銳化後的PIL.Image對象
# img_change = cv2.cvtColor(img_change, cv2.COLOR_BGRA2BGR) """
img = cv2.bitwise_and(image, image, mask = skin) # 轉換為numpy數組並轉為PyTorch張量
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) numpy_img = np.array(image)
tensor_img = torch.from_numpy(numpy_img).float().to('cuda')
# 創建拉普拉斯算子
laplacian_kernel = torch.tensor([
[0, 1, 0],
[1, -4, 1],
[0, 1, 0]
], dtype=torch.float32, device='cuda').unsqueeze(0).unsqueeze(0)
# 對每個通道進行處理
result = torch.zeros_like(tensor_img)
for i in range(3): # RGB三個通道
channel = tensor_img[..., i]
# 添加批次和通道維度
channel = channel.unsqueeze(0).unsqueeze(0)
# 應用拉普拉斯算子
laplacian = torch.nn.functional.conv2d(channel, laplacian_kernel, padding=1)
# 移除批次和通道維度
laplacian = laplacian.squeeze()
# 銳化處理:原圖 - 拉普拉斯
result[..., i] = channel.squeeze() - laplacian
# 確保像素值在合理範圍內
result = torch.clamp(result, 0, 255).byte()
# 轉回CPU和numpy數組
result_np = result.cpu().numpy()
return Image.fromarray(result_np)
h = image.shape[0] # 取得圖片高度 def adjust_hsv(image, v_adjustment=0):
w = image.shape[1] # 取得圖片寬度 """調整圖像的HSV色彩空間中的H和V通道
參數:
image: PIL.Image對象(RGB格式)
v_adjustment: V通道的調整值範圍建議在[-255, 255]之間
返回:
HSV調整後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到HSV色彩空間
hsv_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2HSV)
# 調整V通道
hsv_img[..., 2] = np.clip(hsv_img[..., 2] + v_adjustment, 0, 255)
# 將HSV轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
for x in range(w): def gamma_correction(image, gamma=1.0):
for y in range(h): """對圖像進行伽馬校正
if img_gray[y, x] == 0:
# if x == 0 and y == 0: # 當X Y都在左上角時 參數:
# image[y, x] = Add(1, Matrix_Size, image[y, x]) / Matrix_Size image: PIL.Image對象(RGB格式)
# if x == w - 1 and y == 0: # 當X Y都在右上角時 gamma: 伽馬值gamma > 1 時圖像變暗gamma < 1 時圖像變亮gamma = 1 時保持不變
# image[y, x] = Add(w - Matrix_Size, w, image[y, x]) / Matrix_Size
# if x == 0 and y == h - 1: # 當X Y都在左下角時 返回:
# image[y, x] = (image[y - 1, x] + image[y - 1, x + 1] + image[y, x + 1]) / 3 伽馬校正後的PIL.Image對象
# if x == w - 1 and y == h - 1: # 當X Y都在右下角時 """
# image[y, x] = (image[y, x - 1] + image[y - 1, x - 1] + image[y - 1, x]) / 3 # 將PIL圖像轉換為numpy數組
numpy_img = np.array(image)
# 將像素值歸一化到[0, 1]範圍
normalized = numpy_img.astype(float) / 255.0
# 應用伽馬校正
corrected = np.power(normalized, gamma)
# 將值縮放回[0, 255]範圍
output = np.clip(corrected * 255.0, 0, 255).astype(np.uint8)
# 轉換回PIL圖像並返回
return Image.fromarray(output)
# if (x > 0 and x < w - 1) and y == 0: # 當上面的X Y從左到右 def Hight_Light(image, Threshold):
# image[y, x] = (image[y, x - 1] + image[y + 1, x - 1] + image[y + 1, x] + image[y, x + 1] + image[y + 1, x + 1]) / 5 image = np.array(image)
# if (x > 0 and x < w - 1) and y == h - 1: # 當下面的X Y從左到右
# image[y, x] = (image[y, x - 1] + image[y - 1, x - 1] + image[y - 1, x] + image[y, x + 1] + image[y - 1, x + 1]) / 5
# if x == 0 and (y > 0 and y < h - 1): # 當左邊的X Y從上到下
# image[y, x] = (image[y - 1, x] + image[y - 1, x + 1] + image[y, x + 1] + image[y + 1, x + 1] + image[y + 1, x]) / 5
# if x == w - 1 and (y > 0 and y < h - 1): # 當右邊X Y從上到下
# image[y, x] = (image[y - 1, x] + image[y - 1, x - 1] + image[y, x - 1] + image[y + 1, x - 1] + image[y + 1, x]) / 5
if (x >= 1 and x < w - 1) and (y >= 1 and y < h - 1): # 當y >= 2 且 X >= 2 gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
image[y, x] = Add(x, y, image, Matrix_Size) / Matrix_Size # 使用閾值檢測高光點(白色液滴)
# BGRA_image[y, x, 3] = 255 - gray[y, x] _, thresh = cv2.threshold(gray, Threshold, 255, cv2.THRESH_BINARY)
return image # 使用形態學操作(膨脹)來擴大遮罩區域
kernel = np.ones((5, 5), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=1)
# 使用 inpaint 修復高光點
image_inpaint = cv2.inpaint(image, dilated, 3, cv2.INPAINT_TELEA)
return Image.fromarray(image_inpaint)
def Add(width_Center, Height_Center, image, Mask_Size): def median_filter(image: Image.Image, kernel_size: int = 3):
total = 0 """
for i in range(Mask_Size): 中值濾波Median Filter實現
for j in range(Mask_Size):
total += image[width_Center - ((Mask_Size - 1) / 2) + j, Height_Center - ((Mask_Size - 1) / 2) + i] 參數:
image: PIL.Image對象(RGB格式)
kernel_size: 濾波核大小必須是奇數
返回:
濾波後的PIL.Image對象
"""
# 確保kernel_size是奇數
if kernel_size % 2 == 0:
kernel_size += 1
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
# 對每個通道應用中值濾波
result = np.zeros_like(numpy_img)
for i in range(3): # 對RGB三個通道分別處理
result[:, :, i] = cv2.medianBlur(numpy_img[:, :, i], kernel_size)
# 確保像素值在有效範圍內
result = np.clip(result, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(result)
return total def mean_filter(image: Image.Image, kernel_size: int = 3):
"""
均質濾波Mean Filter實現
參數:
image: PIL.Image對象(RGB格式)
kernel_size: 濾波核大小必須是奇數
返回:
濾波後的PIL.Image對象
"""
# 確保kernel_size是奇數
if kernel_size % 2 == 0:
kernel_size += 1
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
# 創建均質濾波核所有元素都是1/(kernel_size*kernel_size)
kernel = np.ones((kernel_size, kernel_size), np.float32) / (kernel_size * kernel_size)
# 對每個通道應用均質濾波
result = np.zeros_like(numpy_img)
for i in range(3): # 對RGB三個通道分別處理
result[:, :, i] = cv2.filter2D(numpy_img[:, :, i], -1, kernel)
# 確保像素值在有效範圍內
result = np.clip(result, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(result)

View File

@ -1,58 +0,0 @@
from Load_process.LoadData import Loding_Data_Root
from Image_Process.Image_Generator import Image_generator
from Load_process.file_processing import Process_File
from model_data_processing.processing_for_cut_image import Cut_Indepentend_Data
from Load_process.Loading_Tools import Load_Data_Prepare, Load_Data_Tools
class Load_ImageGenerator():
'''
這是一個拿來進行資料強化的物件最主要結合了學姊給的資料強化與我自行設定的資料強化
藉由此物件先將資料讀取出來並將資料分別進行資料強化利用資料強化來迷部資料的不平衡
這只是其中一個實驗
Parmeter
standard_root: 做跟學姊給的資料強化同一種的資料強化
myself_root: 資料強化的內容參數是我自己設定的
IndependentDataRoot: 要存回去的資料夾路徑
Herpeslabels: 皰疹的類別
MonKeyPoxlabels: 猴痘的類別(猴痘水痘正常)
herpes_data: 合併herpes Dataset的資料成一個List
MonkeyPox_data: 合併MonkeyPox DataSet 的資料成一個List
'''
def __init__(self, Training_Root,Test_Root, Generator_Root, Labels, Image_Size) -> None:
self.Training_Root = Training_Root
self.TestRoot = Test_Root
self.GeneratoRoot = Generator_Root
self.Labels = Labels
self.Image_Size = Image_Size
pass
def process_main(self, Data_Length : int):
File = Process_File()
Prepare = Load_Data_Prepare()
load = Loding_Data_Root(self.Labels, self.Training_Root, self.GeneratoRoot)
Indepentend = Cut_Indepentend_Data(self.Training_Root, self.Labels)
Load_Tool = Load_Data_Tools()
Generator = Image_generator(self.GeneratoRoot, self.Labels, self.Image_Size)
# 將測試資料獨立出來
test_size = 0.2
Indepentend.IndependentData_main(self.TestRoot, test_size)
if not File.Judge_File_Exist(self.GeneratoRoot): # 檔案若不存在
# 確定我要多少個List
Prepare.Set_Data_Content([], Data_Length)
# 製作讀檔字典並回傳檔案路徑
Prepare.Set_Label_List(self.Labels)
Prepare.Set_Data_Dictionary(Prepare.Get_Label_List(), Prepare.Get_Data_Content(), Data_Length)
Original_Dict_Data_Root = Prepare.Get_Data_Dict()
get_all_original_image_data = Load_Tool.get_data_root(self.Training_Root, Original_Dict_Data_Root, Prepare.Get_Label_List())
# 儲存資料強化後資料
Generator.Processing_Main(get_all_original_image_data) # 執行資料強化
else: # 若檔案存在
print("standard data and myself data are exist\n")
# 執行讀檔
return load.process_main()

View File

@ -11,15 +11,16 @@ class Loding_Data_Root(Process_File):
super().__init__() super().__init__()
pass pass
def process_main(self): def process_main(self, status):
'''處理讀Training、Image Generator檔資料''' '''處理讀Training、Image Generator檔資料'''
Merge = merge() Merge = merge()
get_Image_Data = self.get_Image_data_roots(self.Train_Root) get_Image_Data = self.get_Image_data_roots(self.Train_Root)
Get_ImageGenerator_Image_Data = self.get_Image_data_roots(self.Generator_Root)
# Get_Total_Image_Data_Root = Merge.merge_dict_to_dict(get_Image_Data, Get_ImageGenerator_Image_Data) if status:
# Get_Total_Image_Data_Root = Merge.merge_data_main(get_Image_Data, 0, len(self.Label_List)) Get_ImageGenerator_Image_Data = self.get_Image_data_roots(self.Generator_Root)
Get_Total_Image_Data_Root = Merge.merge_dict_to_dict(get_Image_Data, Get_ImageGenerator_Image_Data)
return Get_Total_Image_Data_Root
return get_Image_Data return get_Image_Data

View File

@ -1,11 +1,10 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from merge_class.merge import merge from merge_class.merge import merge
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
from model_data_processing.processing import Balance_Process from model_data_processing.processing import make_label_list
from utils.Stomach_Config import Loading_Config
class Load_Indepentend_Data(): class Load_Indepentend_Data():
def __init__(self, Labels, OneHot_Encording): def __init__(self, OneHot_Encording):
''' '''
影像切割物件 影像切割物件
label有2類,會將其轉成one-hot-encoding的形式 label有2類,會將其轉成one-hot-encoding的形式
@ -13,33 +12,33 @@ class Load_Indepentend_Data():
[1, 0] = NPC_positive [1, 0] = NPC_positive
''' '''
self.merge = merge() self.merge = merge()
self.Labels = Labels
self.OneHot_Encording = OneHot_Encording self.OneHot_Encording = OneHot_Encording
pass pass
def process_main(self, Test_data_root): def process_main(self, Test_data_root, Test_mask_root):
self.test, self.test_label = self.get_Independent_image(Test_data_root) self.test, self.test_label, self.test_mask = self.get_Independent_image(Test_data_root, Test_mask_root)
print("\ntest_labels有" + str(len(self.test_label)) + "筆資料\n") print("\ntest_labels有" + str(len(self.test_label)) + "筆資料\n")
# self.validation, self.validation_label = self.get_Independent_image(Validation_data_root) def get_Independent_image(self, independent_DataRoot, independent_MaskRoot):
# print("validation_labels有 " + str(len(self.validation_label)) + " 筆資料\n")
def get_Independent_image(self, independent_DataRoot):
image_processing = Read_image_and_Process_image(123)
classify_image = []
Total_Size_List = [] Total_Size_List = []
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot) # 讀取測試資料集的資料 Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot, Loading_Config["Training_Labels"], len(Loading_Config["Training_Labels"])) # 讀取測試資料集的資料
Total_Dict_Mask_Root = self.Get_Independent_data_Root(independent_MaskRoot, Loading_Config["XML_Loading_Label"], len(Loading_Config["XML_Loading_Label"])) # 讀取測試資料集的mask資料
# 將測試資料字典轉成列表,並且將其排序
Total_List_Data_Root = [] Total_List_Data_Root = []
for Label in self.Labels: for Label in Loading_Config["Training_Labels"]:
Total_List_Data_Root.append(Total_Dict_Data_Root[Label]) Total_List_Data_Root.append(Total_Dict_Data_Root[Label])
# 將測試資料字典轉成列表,並且將其排序
Total_List_Mask_Data_Root = []
for Label in Loading_Config["XML_Loading_Label"]:
Total_List_Mask_Data_Root.append(Total_Dict_Mask_Root[Label])
test_label, Classify_Label = [], [] classify_image, Classify_Label = [], []
i = 0 # 計算classify_image的counter且計算總共有幾筆資料 i = 0 # 計算classify_image的counter且計算總共有幾筆資料
for test_title in Total_List_Data_Root: # 藉由讀取所有路徑來進行讀檔 for test_title in Total_List_Data_Root: # 藉由讀取所有路徑來進行讀檔
test_label = image_processing.make_label_list(len(test_title), self.OneHot_Encording[i]) # 製作對應圖片數量的label出來+ test_label = make_label_list(len(test_title), self.OneHot_Encording[i]) # 製作對應圖片數量的label出來+
print(self.Labels[i] + "" + str(len(test_label)) + " 筆資料 ") print(Loading_Config["Training_Labels"][i] + "" + str(len(test_label)) + " 筆資料 ")
Total_Size_List.append(len(test_label)) Total_Size_List.append(len(test_label))
@ -47,28 +46,28 @@ class Load_Indepentend_Data():
Classify_Label.append(test_label) Classify_Label.append(test_label)
i += 1 i += 1
test = self.merge.merge_data_main(classify_image, 0, len(self.Labels)) classify_Mask_image = []
test_label = self.merge.merge_data_main(Classify_Label, 0, len(self.Labels)) i = 0 # 計算classify_image的counter且計算總共有幾筆資料
for test_title in Total_List_Mask_Data_Root: # 藉由讀取所有路徑來進行讀檔
print(Loading_Config["XML_Loading_Label"][i] + "" + str(len(test_title)) + " 筆資料 ")
# test = [] classify_Mask_image.append(test_title)
# test = image_processing.Data_Augmentation_Image(original_test_root) i += 1
# test, test_label = image_processing.image_data_processing(test, original_test_label)
# Balance_Data = list(zip(test, test_label)) test = self.merge.merge_data_main(classify_image, 0, len(Loading_Config["Training_Labels"]))
# test, test_label = Balance_Process(Balance_Data, Total_Size_List) # 打亂並取出指定資料筆數的資料 test_label = self.merge.merge_data_main(Classify_Label, 0, len(Loading_Config["Training_Labels"]))
# test = image_processing.normalization(test) test_Mask = self.merge.merge_data_main(classify_Mask_image, 0, len(Loading_Config["XML_Loading_Label"]))
return test, test_label, test_Mask
return test, test_label
def Get_Independent_data_Root(self, load_data_root): def Get_Independent_data_Root(self, load_data_root, Dictory_Keys, Length):
Prepare = Load_Data_Prepare() Prepare = Load_Data_Prepare()
Load_Tool = Load_Data_Tools() Load_Tool = Load_Data_Tools()
Prepare.Set_Data_Content([], len(self.Labels)) Prepare.Set_Data_Content([], Length)
Prepare.Set_Data_Dictionary(self.Labels, Prepare.Get_Data_Content(), 2) Prepare.Set_Data_Dictionary(Dictory_Keys, Prepare.Get_Data_Content(), Length)
Get_Data_Dict_Content = Prepare.Get_Data_Dict() Get_Data_Dict_Content = Prepare.Get_Data_Dict()
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, self.Labels) Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, Dictory_Keys)
return Total_Data_Roots return Total_Data_Roots

Binary file not shown.

Binary file not shown.

View File

@ -35,16 +35,14 @@ class Process_File():
save_root = self.Make_Save_Root(FileName, save_root) save_root = self.Make_Save_Root(FileName, save_root)
np.save(save_root, image) np.save(save_root, image)
def Save_CSV_File(self, file_name, data): # 儲存訓練結果 def Save_CSV_File(self, Save_Root, file_name, data): # 儲存訓練結果
Save_Root = '../Result/Training_Result/save_the_train_result(' + str(datetime.date.today()) + ")"
self.JudgeRoot_MakeDir(Save_Root) self.JudgeRoot_MakeDir(Save_Root)
modelfiles = self.Make_Save_Root(file_name + ".csv", Save_Root) # 將檔案名稱及路徑字串合併成完整路徑 modelfiles = self.Make_Save_Root(file_name + ".csv", Save_Root) # 將檔案名稱及路徑字串合併成完整路徑
data.to_csv(modelfiles, mode = "a") data.to_csv(modelfiles, mode = "a")
def Save_TXT_File(self, content, File_Name): def Save_TXT_File(self, content, Save_Root, File_Name):
model_dir = '../Result/save_the_train_result(' + str(datetime.date.today()) + ")" # 儲存的檔案路徑由save_the_train_result + 當天日期 self.JudgeRoot_MakeDir(Save_Root)
self.JudgeRoot_MakeDir(model_dir) modelfiles = self.Make_Save_Root(f"{File_Name}.txt", Save_Root) # 將檔案名稱及路徑字串合併成完整路徑
modelfiles = self.Make_Save_Root(File_Name + ".txt", model_dir) # 將檔案名稱及路徑字串合併成完整路徑
with open(modelfiles, mode = 'a') as file: with open(modelfiles, mode = 'a') as file:
file.write(content) file.write(content)

315
Model_Loss/CIOU_Loss.py Normal file
View File

@ -0,0 +1,315 @@
import torch
import torch.nn as nn
import math
class CIOULoss(nn.Module):
"""
Complete Intersection over Union (CIOU) Loss
適用於目標檢測中的邊界框回歸任務
CIOU Loss 考慮了三個幾何因子
1. 重疊面積 (Overlap area)
2. 中心點距離 (Central point distance)
3. 長寬比一致性 (Aspect ratio consistency)
"""
def __init__(self, eps=1e-7):
super(CIOULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
"""
計算 CIOU Loss
Args:
pred_boxes: 預測邊界框 [N, 4] (x1, y1, x2, y2) [N, 4] (cx, cy, w, h) 或分割掩碼 [B, 1, H, W]
target_boxes: 真實邊界框 [N, 4] (x1, y1, x2, y2) [N, 4] (cx, cy, w, h) 或分割掩碼 [B, 1, H, W]
Returns:
CIOU loss value
"""
# 檢查輸入是否為分割掩碼格式
if len(pred_boxes.shape) == 4 and pred_boxes.shape[1] == 1:
# 將分割掩碼轉換為邊界框格式
pred_boxes = self._mask_to_boxes(pred_boxes)
target_boxes = self._mask_to_boxes(target_boxes)
# 如果無法從掩碼中提取有效的邊界框,則返回一個小的損失值
if pred_boxes is None or target_boxes is None:
return torch.tensor(0.01, device=pred_boxes.device if pred_boxes is not None else target_boxes.device)
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 檢查邊界框維度是否正確
if pred_boxes.dim() == 1:
# 如果是單個邊界框擴展為批次大小為1的張量
pred_boxes = pred_boxes.unsqueeze(0)
if target_boxes.dim() == 1:
target_boxes = target_boxes.unsqueeze(0)
# 確保邊界框有4個坐標
if pred_boxes.shape[1] != 4 or target_boxes.shape[1] != 4:
# 如果坐標數量不正確,返回一個小的損失值
return torch.tensor(0.01, device=pred_boxes.device)
# 如果輸入是 (cx, cy, w, h) 格式,轉換為 (x1, y1, x2, y2)
if self._is_center_format(pred_boxes, target_boxes):
pred_boxes = self._center_to_corner(pred_boxes)
target_boxes = self._center_to_corner(target_boxes)
# 計算交集區域
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自的面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集面積
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
# 計算最小外接矩形的對角線距離平方
enclose_diagonal_sq = (enclose_x2 - enclose_x1) ** 2 + (enclose_y2 - enclose_y1) ** 2 + self.eps
# 計算兩個邊界框中心點之間的距離平方
pred_center_x = (pred_boxes[:, 0] + pred_boxes[:, 2]) / 2
pred_center_y = (pred_boxes[:, 1] + pred_boxes[:, 3]) / 2
target_center_x = (target_boxes[:, 0] + target_boxes[:, 2]) / 2
target_center_y = (target_boxes[:, 1] + target_boxes[:, 3]) / 2
center_distance_sq = (pred_center_x - target_center_x) ** 2 + (pred_center_y - target_center_y) ** 2
# 計算長寬比一致性項
pred_w = pred_boxes[:, 2] - pred_boxes[:, 0]
pred_h = pred_boxes[:, 3] - pred_boxes[:, 1]
target_w = target_boxes[:, 2] - target_boxes[:, 0]
target_h = target_boxes[:, 3] - target_boxes[:, 1]
# 避免除零
pred_w = torch.clamp(pred_w, min=self.eps)
pred_h = torch.clamp(pred_h, min=self.eps)
target_w = torch.clamp(target_w, min=self.eps)
target_h = torch.clamp(target_h, min=self.eps)
v = (4 / (math.pi ** 2)) * torch.pow(torch.atan(target_w / target_h) - torch.atan(pred_w / pred_h), 2)
# 計算 alpha 參數
with torch.no_grad():
alpha = v / (1 - iou + v + self.eps)
# 計算 CIOU
ciou = iou - (center_distance_sq / enclose_diagonal_sq) - alpha * v
# 返回 CIOU Loss (1 - CIOU)
ciou_loss = 1 - ciou
return ciou_loss.mean()
def _is_center_format(self, pred_boxes, target_boxes):
"""
判斷輸入格式是否為中心點格式 (cx, cy, w, h)
簡單的啟發式判斷如果第三四列的值都是正數且相對較小可能是寬高
"""
# 這裡使用簡單的判斷邏輯,實際使用時可能需要更精確的判斷
return False # 預設假設輸入為 (x1, y1, x2, y2) 格式
def _center_to_corner(self, boxes):
"""
將中心點格式 (cx, cy, w, h) 轉換為角點格式 (x1, y1, x2, y2)
"""
cx, cy, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
x1 = cx - w / 2
y1 = cy - h / 2
x2 = cx + w / 2
y2 = cy + h / 2
return torch.stack([x1, y1, x2, y2], dim=1)
def _mask_to_boxes(self, masks):
"""
將分割掩碼轉換為邊界框格式 [N, 4] (x1, y1, x2, y2)
Args:
masks: 分割掩碼 [B, 1, H, W]
Returns:
boxes: 邊界框 [B, 4] (x1, y1, x2, y2)
"""
batch_size = masks.size(0)
device = masks.device
# 將掩碼轉換為二值掩碼
binary_masks = (torch.sigmoid(masks) > 0.5).float()
# 初始化邊界框張量
boxes = torch.zeros(batch_size, 4, device=device)
# 對每個批次處理
for b in range(batch_size):
mask = binary_masks[b, 0] # [H, W]
# 找出非零元素的索引
non_zero_indices = torch.nonzero(mask, as_tuple=True)
# 如果掩碼中沒有非零元素,則使用默認的小邊界框
if len(non_zero_indices[0]) == 0:
# 返回一個默認的小邊界框
boxes[b] = torch.tensor([0, 0, 1, 1], device=device)
continue
# 計算邊界框坐標
y_min = torch.min(non_zero_indices[0])
y_max = torch.max(non_zero_indices[0])
x_min = torch.min(non_zero_indices[1])
x_max = torch.max(non_zero_indices[1])
# 存儲邊界框 [x1, y1, x2, y2]
boxes[b] = torch.tensor([x_min, y_min, x_max, y_max], device=device)
return boxes
def _calculate_intersection(self, pred_boxes, target_boxes):
"""
計算兩個邊界框的交集面積
"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
# 計算交集的寬度和高度
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h
class DIoULoss(nn.Module):
"""
Distance Intersection over Union (DIoU) Loss
CIOU 的簡化版本只考慮重疊面積和中心點距離
"""
def __init__(self, eps=1e-7):
super(DIoULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 計算交集區域
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自的面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集面積
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形的對角線距離平方
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
enclose_diagonal_sq = (enclose_x2 - enclose_x1) ** 2 + (enclose_y2 - enclose_y1) ** 2 + self.eps
# 計算中心點距離平方
pred_center_x = (pred_boxes[:, 0] + pred_boxes[:, 2]) / 2
pred_center_y = (pred_boxes[:, 1] + pred_boxes[:, 3]) / 2
target_center_x = (target_boxes[:, 0] + target_boxes[:, 2]) / 2
target_center_y = (target_boxes[:, 1] + target_boxes[:, 3]) / 2
center_distance_sq = (pred_center_x - target_center_x) ** 2 + (pred_center_y - target_center_y) ** 2
# 計算 DIoU
diou = iou - (center_distance_sq / enclose_diagonal_sq)
# 返回 DIoU Loss
diou_loss = 1 - diou
return diou_loss.mean()
def _calculate_intersection(self, pred_boxes, target_boxes):
"""計算交集面積"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h
class GIoULoss(nn.Module):
"""
Generalized Intersection over Union (GIoU) Loss
IoU 的泛化版本考慮了最小外接矩形
"""
def __init__(self, eps=1e-7):
super(GIoULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 計算交集
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形面積
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
enclose_area = (enclose_x2 - enclose_x1) * (enclose_y2 - enclose_y1) + self.eps
# 計算 GIoU
giou = iou - (enclose_area - union) / enclose_area
# 返回 GIoU Loss
giou_loss = 1 - giou
return giou_loss.mean()
def _calculate_intersection(self, pred_boxes, target_boxes):
"""計算交集面積"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h

View File

@ -8,10 +8,20 @@ class Entropy_Loss(nn.Module):
super(Entropy_Loss, self).__init__() super(Entropy_Loss, self).__init__()
def forward(self, outputs, labels): def forward(self, outputs, labels):
# 範例: 使用均方誤差作為損失計算 # 转换为张量
# outputs = torch.argmax(outputs, 1)
outputs_New = torch.as_tensor(outputs, dtype=torch.float32) outputs_New = torch.as_tensor(outputs, dtype=torch.float32)
labels_New = torch.as_tensor(labels, dtype=torch.float32) labels_New = torch.as_tensor(labels, dtype=torch.float32)
loss = functional.cross_entropy(outputs_New, labels_New) # 检查输出和标签的维度是否匹配
if outputs_New.shape[1] != labels_New.shape[1]:
# 如果维度不匹配,使用交叉熵损失函数
# 对于交叉熵损失标签需要是类别索引而不是one-hot编码
# 将one-hot编码转换为类别索引
_, labels_indices = torch.max(labels_New, dim=1)
loss = functional.cross_entropy(outputs_New, labels_indices)
else:
# 如果维度匹配始终使用binary_cross_entropy_with_logits
# 它会自动应用sigmoid函数避免输入值超出[0,1]范围
loss = functional.binary_cross_entropy_with_logits(outputs_New, labels_New)
return torch.as_tensor(loss, dtype = torch.float32) return torch.as_tensor(loss, dtype = torch.float32)

View File

@ -0,0 +1,116 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
class VGGPerceptualLoss(nn.Module):
"""
基於VGG19的感知損失函數
使用預訓練的VGG19網絡提取特徵計算特徵空間中的損失
"""
def __init__(self, feature_layers=[2, 7, 12, 21, 30], use_normalization=True):
super(VGGPerceptualLoss, self).__init__()
# 載入預訓練的VGG19模型
vgg = models.vgg19(pretrained=True).features
# 凍結VGG參數
for param in vgg.parameters():
param.requires_grad = False
# 將模型移到與輸入相同的設備上在forward中處理
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 選擇要使用的特徵層
self.feature_layers = feature_layers
self.vgg_layers = nn.ModuleList()
# 分割VGG網絡到指定層
layer_idx = 0
current_layer = 0
for i, layer in enumerate(vgg):
if layer_idx < len(feature_layers) and i <= feature_layers[layer_idx]:
self.vgg_layers.append(layer)
if i == feature_layers[layer_idx]:
layer_idx += 1
else:
break
# 是否使用ImageNet標準化
self.use_normalization = use_normalization
if use_normalization:
self.normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
# 損失權重
self.weights = [1.0, 1.0, 1.0, 1.0, 1.0] # 可以調整不同層的權重
def extract_features(self, x):
"""
提取VGG特徵
"""
# 確保輸入在[0,1]範圍內
if x.min() < 0 or x.max() > 1:
x = torch.clamp(x, 0, 1)
# 標準化
if self.use_normalization:
# 確保normalize在與輸入相同的設備上
if hasattr(self, 'normalize') and not isinstance(self.normalize, torch.nn.Module):
self.normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
).to(x.device)
x = self.normalize(x)
features = []
layer_idx = 0
# 確保所有VGG層都在與輸入相同的設備上
device = x.device
for i, layer in enumerate(self.vgg_layers):
layer = layer.to(device) # 確保層在正確的設備上
x = layer(x)
# 檢查是否到達目標特徵層
if layer_idx < len(self.feature_layers) and i == self.feature_layers[layer_idx]:
features.append(x)
layer_idx += 1
return features
def forward(self, pred, target):
"""
計算感知損失
pred: 預測圖像 [B, C, H, W]
target: 目標圖像 [B, C, H, W]
"""
# 確保模型在與輸入相同的設備上
device = pred.device
self.vgg_layers = nn.ModuleList([layer.to(device) for layer in self.vgg_layers])
# 確保輸入尺寸匹配
if pred.shape != target.shape:
pred = F.interpolate(pred, size=target.shape[2:], mode='bilinear', align_corners=False)
# 如果是單通道,轉換為三通道
if pred.shape[1] == 1:
pred = pred.repeat(1, 3, 1, 1)
if target.shape[1] == 1:
target = target.repeat(1, 3, 1, 1)
# 提取特徵
pred_features = self.extract_features(pred)
target_features = self.extract_features(target)
# 計算特徵損失
perceptual_loss = 0
for i, (pred_feat, target_feat) in enumerate(zip(pred_features, target_features)):
# 使用MSE計算特徵差異
feat_loss = F.mse_loss(pred_feat, target_feat)
perceptual_loss += self.weights[i] * feat_loss
return perceptual_loss

View File

@ -0,0 +1,22 @@
from multiprocessing import Value
import pstats
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms
from Model_Loss.CIOU_Loss import CIOULoss
from Model_Loss.Perceptual_Loss import VGGPerceptualLoss
class Segmentation_Loss(nn.Module):
def __init__(self) -> None:
super(Segmentation_Loss, self).__init__()
self.Perceptual_Loss = VGGPerceptualLoss()
self.CIOU = CIOULoss()
pass
def forward(self, Output_Result, GroundTruth_Result):
Perceptual_Loss = self.Perceptual_Loss(Output_Result, GroundTruth_Result)
CIOU_Loss = self.CIOU(Output_Result, GroundTruth_Result)
return Perceptual_Loss + CIOU_Loss

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,145 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class BinaryCrossEntropy(nn.Module):
"""
基本的二元交叉熵損失函數
"""
def __init__(self, reduction='mean'):
"""
初始化
Args:
reduction (str): 'mean', 'sum' 'none'指定如何減少損失
"""
super(BinaryCrossEntropy, self).__init__()
def forward(self, predictions, targets):
"""
計算二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
return F.binary_cross_entropy_with_logits(predictions, targets)
# # 檢查輸出和標籤的維度是否匹配
# if predictions.shape[1] != targets.shape[1]:
# # 如果維度不匹配,使用交叉熵損失函數
# # 對於交叉熵損失標籤需要是類別索引而不是one-hot編碼
# # 將one-hot編碼轉換為類別索引
# _, targets_indices = torch.max(targets, dim=1)
# return F.cross_entropy(predictions, targets_indices, reduction=self.reduction)
# else:
# # 如果維度匹配,使用二元交叉熵損失函數
# # 使用 PyTorch 內建的 binary_cross_entropy_with_logits 函數
# # 它會自動應用 sigmoid 函數,避免輸入值超出 [0,1] 範圍
# return F.binary_cross_entropy_with_logits(predictions, targets, reduction=self.reduction)
class WeightedBinaryCrossEntropy(nn.Module):
"""
帶權重的二元交叉熵損失函數
"""
def __init__(self, pos_weight=1.0, neg_weight=1.0, reduction='mean'):
"""
初始化
Args:
pos_weight (float): 正樣本的權重
neg_weight (float): 負樣本的權重
reduction (str): 'mean', 'sum' 'none'指定如何減少損失
"""
super(WeightedBinaryCrossEntropy, self).__init__()
self.pos_weight = pos_weight
self.neg_weight = neg_weight
self.reduction = reduction
def forward(self, predictions, targets):
"""
計算帶權重的二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
# 使用 sigmoid 確保預測值在 [0,1] 範圍內
predictions = torch.sigmoid(predictions)
# 計算帶權重的二元交叉熵損失
loss = -self.pos_weight * targets * torch.log(predictions + 1e-7) - \
self.neg_weight * (1 - targets) * torch.log(1 - predictions + 1e-7)
# 根據 reduction 方式返回損失
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class LabelSmoothingBCE(nn.Module):
"""
帶標籤平滑的二元交叉熵損失函數
"""
def __init__(self, smoothing=0.1, reduction='mean'):
"""
初始化
Args:
smoothing (float): 標籤平滑係數範圍 [0, 1]
reduction (str): 'mean', 'sum' 'none'指定如何減少損失
"""
super(LabelSmoothingBCE, self).__init__()
self.smoothing = smoothing
self.reduction = reduction
def forward(self, predictions, targets):
"""
計算帶標籤平滑的二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
# 應用標籤平滑
targets = targets * (1 - self.smoothing) + 0.5 * self.smoothing
# 使用 sigmoid 確保預測值在 [0,1] 範圍內
predictions = torch.sigmoid(predictions)
# 計算二元交叉熵損失
loss = -targets * torch.log(predictions + 1e-7) - (1 - targets) * torch.log(1 - predictions + 1e-7)
# 根據 reduction 方式返回損失
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss

122
README.md
View File

@ -1,16 +1,116 @@
main.py: 主程式檔 # 胃試鏡疾病判斷系統
## 項目概述
本項目是一個基於深度學習的胃試鏡疾病自動診斷系統主要用於檢測和分類胃部疾病特別是胃癌CA。系統採用了分割和分類的兩階段方法首先對胃試鏡圖像進行分割以識別可疑區域然後對這些區域進行分類以確定是否存在疾病。
* 資料集類別: 3分類(胃癌、非胃癌但有病、正常資料)
* 基礎模型: Xception
## 主要功能
- **圖像預處理**包括直方圖均衡化、自適應直方圖均衡化、銳化、HSV調整、伽馬校正等多種圖像增強方法
- **數據增強**:通過圖像生成器擴充訓練數據集
- **疾病分割**使用GastroSegNet模型對胃試鏡圖像中的可疑區域進行分割
- **疾病分類**使用修改版Xception模型對分割後的區域進行分類分為正常Normal、胃癌CA和待確認Have_Question三類
- **結果可視化**:提供混淆矩陣、訓練曲線等可視化工具
## 系統架構
系統採用了兩階段的處理流程:
1. **分割階段**使用GastroSegNet模型對輸入圖像進行分割識別可疑區域
2. **分類階段**
- 第一組分類器區分正常Normal和其他Others
- 第二組分類器區分胃癌CA和待確認Have_Question
## 環境要求
- Python 3.8+
- PyTorch 1.8+
- CUDA推薦用於加速訓練
- 其他依賴庫torchvision, numpy, opencv-python, scikit-image, pandas等
## 使用方法
### 數據準備
1. 將訓練數據放置在`../Dataset/Training`目錄下
2. 將測試數據放置在`../Dataset/Testing`目錄下
3. 標註數據XML格式放置在`../Label_Image`目錄下
### 訓練模型
```bash
uv run main.py
```
訓練過程將自動執行以下步驟:
1. 數據預處理和增強
2. 訓練分割模型GastroSegNet
3. 使用分割模型處理圖像
4. 訓練分類模型修改版Xception
### 結果查看
訓練結果將保存在`../Result`目錄下,包括:
- 訓練曲線圖:`../Result/Training_Image`
- 混淆矩陣:`../Result/Matrix_Image`
- 訓練結果數據:`../Result/Training_Result`
- 最佳模型:`../Result/save_the_best_model`
## 項目結構
- `main.py`:程序入口點
- `experiments/`:實驗相關代碼
- `experiment.py`:實驗主流程
- `Training/`:訓練相關代碼
- `Models/`:模型定義
- `Image_Process/`:圖像處理相關代碼
- `Model_Loss/`:損失函數定義
- `Training_Tools/`:訓練工具
- `utils/`:工具函數和配置
## 配置說明
系統配置在`utils/Stomach_Config.py`中定義,主要包括:
- `Image_Enhance`:圖像增強方法
- `Loading_Config`:數據加載配置
- `Training_Config`:訓練參數配置
- `Model_Config`:模型參數配置
- `Save_Result_File_Config`:結果保存路徑配置
## 模型說明
### 分割模型GastroSegNet
用於識別胃試鏡圖像中的可疑區域,輸出分割掩碼。
### 分類模型修改版Xception
基於Xception架構針對胃試鏡圖像分類任務進行了修改主要用於區分正常、胃癌和待確認三類。
* 主執行檔: main.py
## load_process ## load_process
### 負責讀取影像檔案、分割獨立資料(測試、驗證)、讀取獨立資料、一般檔案的操作 ### 負責讀取影像檔案、分割獨立資料(測試、驗證)、讀取獨立資料、一般檔案的操作
File_Process : 檔案操作的主程式包含開檔、創立檔案、判斷檔案是否存在等都是他負責的範圍。是一般物件也是LoadData的父物件 * File_Process : 檔案操作的主程式包含開檔、創立檔案、判斷檔案是否存在等都是他負責的範圍。是一般物件也是LoadData的父物件
LoadData : 讀檔主程式一切讀檔動作由他開始。繼承File_Process(子物件) * LoadData : 讀檔主程式一切讀檔動作由他開始。繼承File_Process(子物件)
Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件 * Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
## Calculate_Process
### 計算模型的評估指標的內容
* Calculate: 計算模型的評估指標的平均跟標準差,並將結果儲存到檔案中
## Image_Process ## Image_Process
### 負責進行資料擴增、影像處理等的操作 ### 負責進行資料擴增、影像處理等的操作
* Generator_Content : 負責建立基礎Generator項目為Image_Generator的父類別 * Image_Generator : 負責製造資料擴增的資料,並將資料存到檔案中。
* Image_Generator : 負責製造資料擴增的資料並將資料存到檔案中。繼承Generator_Content(子物件) * image_enhancement : 負責進行影像處理將資料強化。
* image_enhancement : 負責進行影像處理並將資料回傳 *
## all_models_tools
### 模型的調控細節如early stop、降低學習率和儲存最佳模型
* all_model_tools: call back的方法
## Model_Tools ## Model_Tools
### 負責進行模型的基礎架構包含Convolution、Dense、以及其他模型的配件 ### 負責進行模型的基礎架構包含Convolution、Dense、以及其他模型的配件
@ -42,11 +142,13 @@ Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
### 負責驗證程式碼內的資料型態或輸入錯誤等問題 ### 負責驗證程式碼內的資料型態或輸入錯誤等問題
* Validation : 驗證程式碼錯誤 * Validation : 驗證程式碼錯誤
## Draw ## draw_tools
### 負責畫圖的工具 ### 負責畫圖的工具
* Draw_Tools : 畫出混淆矩陣、走勢圖的工具 * draw : 畫出混淆矩陣、走勢圖的工具
* Grad_CAM : 畫出模型可視化的熱力圖的工具 * Grad_CAM : 畫出模型可視化的熱力圖的工具
## Experiment ## Experiment
### 執行實驗的主程式 ### 執行實驗的主程式
* Experiment : 負責執行讀檔、設定模型Compile的細節、執行訓練、驗證結果等功能 * Experiment : 負責執行讀檔、設定模型與實驗的細節、執行訓練、驗證結果等功能
* Model_All_Step : 執行模型的訓練流程設定與細節參數設定
* pytorch_Model: 設定模型的架構

View File

@ -7,32 +7,68 @@ import numpy as np
import cv2 import cv2
class ListDataset(Dataset): class ListDataset(Dataset):
def __init__(self, data_list, labels_list, transform): def __init__(self, data_list, labels_list, Mask_List, transform):
self.data = data_list self.data = data_list
self.labels = labels_list self.labels = labels_list
self.Mask_Truth_List = Mask_List
self.transform = transform self.transform = transform
self.roots = []
def __len__(self): def __len__(self):
return len(self.data) return len(self.data)
def __getitem__(self, idx): def __getitem__(self, idx):
Image_Root = self.data[idx] Image_Root = self.data[idx]
Mask_Ground_Truth = None
if self.Mask_Truth_List is not None:
mask_path = self.Mask_Truth_List[idx]
if mask_path is not None: # 確保掩碼路徑不為None
try:
Mask_Ground_Truth = Image.open(mask_path).convert("RGB")
# 先不轉換為 tensor等待 transform 處理完後再轉換
except Exception as e:
print(e)
Split_Roots = Image_Root.split("/")
# Split_Roots = Split_Roots[-1].split("\\")
File_Name = Split_Roots[-1]
classes = Split_Roots[-2]
try: try:
with open(Image_Root, 'rb') as file: Images = Image.open(Image_Root).convert("RGB")
Images = Image.open(file).convert("RGB")
# Image = cv2.imread(Image_Root, cv2.IMREAD_COLOR) # 讀檔(彩色)
# Image = cv2.cvtColor(Image, cv2.COLOR_BGR2RGB)
except Exception as e: except Exception as e:
print(e) assert e is not None, f"Error loading image {Image_Root}: {e}"
if self.transform is not "Generator": if self.transform != "Generator":
Images = self.transform(Images) Images = self.transform(Images)
if self.Mask_Truth_List is not None and Mask_Ground_Truth is not None and not isinstance(Mask_Ground_Truth, torch.Tensor):
Mask_Ground_Truth = self.transform(Mask_Ground_Truth)
Images = torch.tensor(np.array(Images)) # 確保 Images 是 tensor
if not isinstance(Images, torch.Tensor):
Images = torch.tensor(np.array(Images))
# 確保 Mask_Ground_Truth 是 tensor
if self.Mask_Truth_List is not None and Mask_Ground_Truth is not None and not isinstance(Mask_Ground_Truth, torch.Tensor):
Mask_Ground_Truth = torch.tensor(np.array(Mask_Ground_Truth))
label = self.labels[idx] label = self.labels[idx]
if self.Mask_Truth_List is not None:
# 如果掩碼為None創建一個與圖像相同大小的空掩碼
if Mask_Ground_Truth is None:
if isinstance(Images, torch.Tensor):
# 創建與圖像相同大小的空掩碼張量
Mask_Ground_Truth = torch.zeros_like(Images)
else:
# 如果圖像不是張量創建一個空的PIL圖像
Mask_Ground_Truth = Image.new('RGB', Images.size, (0, 0, 0))
if self.transform != "Generator":
Mask_Ground_Truth = self.transform(Mask_Ground_Truth)
return Images, Mask_Ground_Truth, label, File_Name, classes
# print(f"Dataset_Data: \n{sample}\n") # print(f"Dataset_Data: \n{sample}\n")
return Images, label return Images, label, File_Name, classes
class Training_Precesses: class Training_Precesses:
def __init__(self, ImageSize): def __init__(self, ImageSize):
@ -43,27 +79,19 @@ class Training_Precesses:
def Dataloader_Sampler(self, SubDataSet, Batch_Size, Sampler=True): def Dataloader_Sampler(self, SubDataSet, Batch_Size, Sampler=True):
if Sampler: if Sampler:
# Data_Loader = DataLoader(
# dataset=SubDataSet,
# batch_size=Batch_Size,
# num_workers=0,
# pin_memory=True,
# sampler=self.Setting_RandomSampler_Content(SubDataSet)
# )
Data_Loader = DataLoader( Data_Loader = DataLoader(
dataset=SubDataSet, dataset=SubDataSet,
batch_size=Batch_Size, batch_size=Batch_Size,
num_workers=0, num_workers=0,
pin_memory=True, pin_memory=True,
sampler=self.Setting_RandomSampler_Content(SubDataSet) sampler=self.Setting_WeightedRandomSampler_Content(SubDataSet)
) )
else: else:
Data_Loader = DataLoader( Data_Loader = DataLoader(
dataset=SubDataSet, dataset=SubDataSet,
batch_size=Batch_Size, batch_size=Batch_Size,
num_workers=0, num_workers=0,
pin_memory=True, pin_memory=True
shuffle=True
) )
return Data_Loader return Data_Loader
@ -84,6 +112,16 @@ class Training_Precesses:
if labels.ndim > 1: # If one-hot encoded if labels.ndim > 1: # If one-hot encoded
labels = np.argmax(labels, axis=1) labels = np.argmax(labels, axis=1)
# 確保標籤是整數類型
try:
# 嘗試將標籤轉換為整數
labels = labels.astype(np.int64)
except ValueError:
# 如果標籤是字符串,先將其映射到整數
unique_labels = np.unique(labels)
label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
labels = np.array([label_to_idx[label] for label in labels])
# Count occurrences of each class # Count occurrences of each class
class_counts = np.bincount(labels) class_counts = np.bincount(labels)
class_weights = 1.0 / class_counts # Inverse frequency as weight class_weights = 1.0 / class_counts # Inverse frequency as weight
@ -98,20 +136,28 @@ class Training_Precesses:
def Setting_RandomSampler_Content(self, Dataset): def Setting_RandomSampler_Content(self, Dataset):
return RandomSampler(Dataset, generator = self.generator) return RandomSampler(Dataset, generator = self.generator)
def Setting_DataSet(self, Datas, Labels, transform = None): def Setting_DataSet(self, Datas, Labels, Mask_List, transform = None):
# 資料預處理 # 資料預處理
if transform == None: if transform == None:
transform = transforms.Compose([ transform = transforms.Compose([
transforms.Resize((256, 256)) transforms.Resize((self.ImageSize, self.ImageSize))
]) ])
elif transform == "Transform": elif transform == "Transform":
transform = transforms.Compose([ transform = transforms.Compose([
transforms.Resize((256, 256)), transforms.Resize((self.ImageSize, self.ImageSize)),
transforms.ToTensor() transforms.ToTensor()
]) ])
elif transform == "Generator": elif transform == "Generator":
transform = "Generator" transform = "Generator"
# Create Dataset # Create Dataset
list_dataset = ListDataset(Datas, Labels , transform) list_dataset = ListDataset(Datas, Labels, Mask_List, transform)
return list_dataset return list_dataset
def Setting_SubsetRandomSampler_Content(self, SubDataSet):
# Calculate subset indices (example: using a fraction of the dataset)
dataset_size = len(SubDataSet)
subset_size = int(0.8 * dataset_size) # Use 80% of the dataset as an example
subset_indices = torch.randperm(dataset_size, generator=self.generator)[:subset_size]
return SubsetRandomSampler(subset_indices, generator=self.generator)

View File

@ -5,39 +5,9 @@ import torch
class Tool: class Tool:
def __init__(self) -> None: def __init__(self) -> None:
self.__ICG_Training_Root = ""
self.__Normal_Training_Root = ""
self.__Comprehensive_Training_Root = ""
self.__ICG_Test_Data_Root = ""
self.__Normal_Test_Data_Root = ""
self.__Comprehensive_Testing_Root = ""
self.__ICG_ImageGenerator_Data_Root = ""
self.__Normal_ImageGenerator_Data_Root = ""
self.__Comprehensive_Generator_Root = ""
self.__Labels = []
self.__OneHot_Encording = [] self.__OneHot_Encording = []
pass pass
def Set_Labels(self):
self.__Labels = ["stomach_cancer_Crop", "Normal_Crop", "Have_Question_Crop"]
# self.__Labels = ["NPC_negative", "NPC_positive"]
def Set_Save_Roots(self):
self.__ICG_Training_Root = "../Dataset/Training"
self.__Normal_Training_Root = "../Dataset/Training/CA"
self.__Comprehensive_Training_Root = "../Dataset/Training/Mixed"
self.__ICG_Test_Data_Root = "../Dataset/Testing"
self.__Normal_Test_Data_Root = "../Dataset/Training/Normal_TestData"
self.__Comprehensive_Testing_Root = "../Dataset/Training/Comprehensive_TestData"
self.__ICG_ImageGenerator_Data_Root = "../Dataset/ImageGenerator"
self.__Normal_ImageGenerator_Data_Root = "../Dataset/Training/Normal_ImageGenerator"
self.__Comprehensive_Generator_Root = "../Dataset/Training/Comprehensive_ImageGenerator"
def Set_OneHotEncording(self, content): def Set_OneHotEncording(self, content):
Counter = [] Counter = []
for i in range(len(content)): for i in range(len(content)):
@ -46,35 +16,6 @@ class Tool:
Counter = torch.tensor(Counter) Counter = torch.tensor(Counter)
self.__OneHot_Encording = functional.one_hot(Counter, len(content)) self.__OneHot_Encording = functional.one_hot(Counter, len(content))
pass pass
def Get_Data_Label(self):
'''
取得所需資料的Labels
'''
return self.__Labels
def Get_Save_Roots(self, choose):
'''回傳結果為Train, test, validation
choose = 1 => 取白光 Label
else => 取濾光 Label
若choose != 1 || choose != 2 => 會回傳四個結果
'''
if choose == 1:
return self.__ICG_Training_Root, self.__ICG_Test_Data_Root
if choose == 2:
return self.__Normal_Training_Root, self.__Normal_Test_Data_Root
else:
return self.__Comprehensive_Training_Root, self.__Comprehensive_Testing_Root
def Get_Generator_Save_Roots(self, choose):
'''回傳結果為Train, test, validation'''
if choose == 1:
return self.__ICG_ImageGenerator_Data_Root
if choose == 2:
return self.__Normal_ImageGenerator_Data_Root
else:
return self.__Comprehensive_Generator_Root
def Get_OneHot_Encording_Label(self): def Get_OneHot_Encording_Label(self):
return self.__OneHot_Encording return self.__OneHot_Encording

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

26
a00.xml Normal file
View File

@ -0,0 +1,26 @@
<annotation>
<folder>Processing_Image</folder>
<filename>a00.jpg</filename>
<path>D:\Programing\stomach_cancer\Processing_Image\a00.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1074</width>
<height>1074</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>Have_Question</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>263</xmin>
<ymin>740</ymin>
<xmax>333</xmax>
<ymax>814</ymax>
</bndbox>
</object>
</annotation>

Binary file not shown.

View File

@ -2,19 +2,6 @@ from Load_process.file_processing import Process_File
import datetime import datetime
import torch import torch
# def attention_block(input):
# channel = input.shape[-1]
# GAP = GlobalAveragePooling2D()(input)
# block = Dense(units = channel // 16, activation = "relu")(GAP)
# block = Dense(units = channel, activation = "sigmoid")(block)
# block = Reshape((1, 1, channel))(block)
# block = Multiply()([input, block])
# return block
class EarlyStopping: class EarlyStopping:
def __init__(self, patience=74, verbose=False, delta=0): def __init__(self, patience=74, verbose=False, delta=0):
self.patience = patience self.patience = patience
@ -45,12 +32,11 @@ class EarlyStopping:
print(f"Validation loss decreased ({self.best_loss:.6f} --> {val_loss:.6f}). Saving model to {save_path}") print(f"Validation loss decreased ({self.best_loss:.6f} --> {val_loss:.6f}). Saving model to {save_path}")
def call_back(model_name, index, optimizer): def call_back(Save_Root, index, optimizer):
File = Process_File() File = Process_File()
model_dir = '../Result/save_the_best_model/' + model_name File.JudgeRoot_MakeDir(Save_Root)
File.JudgeRoot_MakeDir(model_dir) modelfiles = File.Make_Save_Root('best_model( ' + str(datetime.date.today()) + " )-" + index + ".pt", Save_Root)
modelfiles = File.Make_Save_Root('best_model( ' + str(datetime.date.today()) + " )-" + index + ".pt", model_dir)
# model_mckp = ModelCheckpoint(modelfiles, monitor='val_loss', save_best_only=True, save_weights_only = True, mode='auto') # model_mckp = ModelCheckpoint(modelfiles, monitor='val_loss', save_best_only=True, save_weights_only = True, mode='auto')
@ -60,7 +46,6 @@ def call_back(model_name, index, optimizer):
optimizer, optimizer,
factor = 0.94, # 學習率降低的量。 new_lr = lr * factor factor = 0.94, # 學習率降低的量。 new_lr = lr * factor
patience = 2, # 沒有改進的時期數,之後學習率將降低 patience = 2, # 沒有改進的時期數,之後學習率將降低
verbose = 0,
min_lr = 0 # 學習率下限 min_lr = 0 # 學習率下限
) )

View File

@ -1,116 +0,0 @@
from all_models_tools.all_model_tools import attention_block
from keras.activations import softmax, sigmoid
from keras.applications import VGG16,VGG19, ResNet50, ResNet50V2, ResNet101, ResNet101V2, ResNet152, ResNet152V2, InceptionV3, InceptionResNetV2, MobileNet, MobileNetV2, DenseNet121, NASNetLarge, Xception
from keras.layers import GlobalAveragePooling2D, Dense, Flatten
from keras import regularizers
from keras.layers import Add
from application.Xception_indepentment import Xception_indepentment
def Original_VGG19_Model():
vgg19 = VGG19(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(vgg19.output)
dense = Dense(units = 4096, activation = "relu")(GAP)
dense = Dense(units = 4096, activation = "relu")(dense)
output = Dense(units = 2, activation = "softmax")(dense)
return vgg19.input, output
def Original_ResNet50_model():
xception = ResNet50(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(xception.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return xception.input, dense
def Original_NASNetLarge_model():
nasnetlarge = NASNetLarge(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(nasnetlarge.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return nasnetlarge.input, dense
def Original_DenseNet121_model():
Densenet201 = DenseNet121(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(Densenet201.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return Densenet201.input, dense
def Original_Xception_model():
xception = Xception(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(xception.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return xception.input, dense
def Original_VGG16_Model():
vgg16 = VGG16(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
flatten = Flatten()(vgg16.output)
dense = Dense(units = 4096, activation = "relu")(flatten)
dense = Dense(units = 4096, activation = "relu")(dense)
output = Dense(units = 2, activation = "softmax")(dense)
return vgg16.input, output
def Original_ResNet50v2_model():
resnet50v2 = ResNet50V2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet50v2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet50v2.input, dense
def Original_ResNet101_model():
resnet101 = ResNet101(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet101.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet101.input, dense
def Original_ResNet101V2_model():
resnet101v2 = ResNet101V2(include_top = False, weights = "imagenet", input_shape = (512, 512, 3))
GAP = GlobalAveragePooling2D()(resnet101v2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet101v2.input, dense
def Original_ResNet152_model():
resnet152 = ResNet152(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet152.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet152.input, dense
def Original_ResNet152V2_model():
resnet152v2 = ResNet152V2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet152v2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet152v2.input, dense
def Original_InceptionV3_model():
inceptionv3 = InceptionV3(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(inceptionv3.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return inceptionv3.input, dense
def Original_InceptionResNetV2_model():
inceptionResnetv2 = InceptionResNetV2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(inceptionResnetv2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return inceptionResnetv2.input, dense
def Original_MobileNet_model():
mobilenet = MobileNet(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(mobilenet.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return mobilenet.input, dense
def Original_MobileNetV2_model():
mobilenetv2 = MobileNetV2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(mobilenetv2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return mobilenetv2.input, dense

BIN
annotation_files.txt Normal file

Binary file not shown.

Binary file not shown.

View File

@ -23,7 +23,7 @@ class GradCAM:
def Processing_Main(self, Test_Dataloader, File_Path): def Processing_Main(self, Test_Dataloader, File_Path):
File = Process_File() File = Process_File()
for batch_idx, (images, labels) in enumerate(Test_Dataloader): for batch_idx, (images, labels, File_Name, File_Classes) in enumerate(Test_Dataloader):
# Move data to device # Move data to device
images = images.to(self.device, dtype=torch.float32) # [64, C, H, W] images = images.to(self.device, dtype=torch.float32) # [64, C, H, W]
labels = labels.to(self.device, dtype=torch.float32) # [64, num_classes] labels = labels.to(self.device, dtype=torch.float32) # [64, num_classes]
@ -36,14 +36,18 @@ class GradCAM:
# Process each image in the batch # Process each image in the batch
for i in range(images.size(0)): # Loop over batch size (64) for i in range(images.size(0)): # Loop over batch size (64)
class_idx = label_classes[i]
heatmap = heatmaps[i] # Extract heatmap for this image heatmap = heatmaps[i] # Extract heatmap for this image
overlaid_image = self.overlay_heatmap(heatmap, images[i]) overlaid_image = self.overlay_heatmap(heatmap, images[i], alpha=0.5)
# Create file path based on class # Create file path based on class
path = f"{File_Path}/class_{class_idx}" path = f"{File_Path}/{File_Classes[i]}"
File.JudgeRoot_MakeDir(path) File.JudgeRoot_MakeDir(path)
File.Save_CV2_File(f"batch_{batch_idx}_img_{i}.png", path, overlaid_image) # Save overlaid image
File.Save_CV2_File(f"batch_{batch_idx}_{File_Name[i]}", path, overlaid_image)
# # Save raw heatmap separately
# heatmap_resized = cv2.resize(heatmap, (images[i].shape[2], images[i].shape[1]), interpolation=cv2.INTER_CUBIC)
# heatmap_colored = (plt.cm.viridis(heatmap_resized)[:, :, :3] * 255).astype(np.uint8)
# File.Save_CV2_File(f"batch_{batch_idx}_img_{i}_heatmap.png", path, heatmap_colored)
def save_activations(self, module, input, output): def save_activations(self, module, input, output):
self.activations = output.detach() # [64, C, H', W'] self.activations = output.detach() # [64, C, H', W']
@ -86,19 +90,29 @@ class GradCAM:
grad_cam = torch.sum(weights * activations, dim=1)[0] # [64, H', W'] -> [H', W'] grad_cam = torch.sum(weights * activations, dim=1)[0] # [64, H', W'] -> [H', W']
grad_cam = F.relu(grad_cam) # Apply ReLU grad_cam = F.relu(grad_cam) # Apply ReLU
grad_cam = grad_cam / (grad_cam.max() + 1e-8) # Normalize to [0, 1] grad_cam = grad_cam / (grad_cam.max() + 1e-8) # Normalize to [0, 1]
return grad_cam.cpu().numpy()
# Apply Gaussian smoothing to reduce artifacts
grad_cam_np = grad_cam.cpu().numpy()
grad_cam_np = cv2.GaussianBlur(grad_cam_np, (5, 5), 0)
# Re-normalize after blur
grad_cam_np = grad_cam_np / (grad_cam_np.max() + 1e-8)
return grad_cam_np
def overlay_heatmap(self, heatmap, image, alpha=0.5): def overlay_heatmap(self, heatmap, image, alpha=0.5):
# Resize heatmap to match input image spatial dimensions # Resize heatmap to match input image spatial dimensions using INTER_CUBIC for smoother results
heatmap = np.uint8(255 * heatmap) # Scale to [0, 255] heatmap = np.uint8(255 * heatmap) # Scale to [0, 255]
heatmap = Image.fromarray(heatmap).resize((image.shape[1], image.shape[2]), Image.BILINEAR) heatmap = cv2.resize(heatmap, (image.shape[2], image.shape[1]), interpolation=cv2.INTER_CUBIC)
heatmap = np.array(heatmap) # Use viridis colormap for better interpretability
heatmap = plt.cm.jet(heatmap)[:, :, :3] # Apply colormap (jet) heatmap = plt.cm.viridis(heatmap)[:, :, :3] # Apply viridis colormap
# Convert image tensor to numpy and denormalize (assuming ImageNet stats) # Convert image tensor to numpy and denormalize (assuming ImageNet stats)
image_np = image.detach().cpu().permute(1, 2, 0).numpy() # [H, W, C] image_np = image.detach().cpu().permute(1, 2, 0).numpy() # [H, W, C]
# Ensure image is in [0, 1] range (if not already)
if image_np.max() > 1.0:
image_np = (image_np - image_np.min()) / (image_np.max() - image_np.min())
# Overlay # Overlay heatmap on the image
overlay = alpha * heatmap + (1 - alpha) * image_np / 255.0 overlay = alpha * heatmap + (1 - alpha) * image_np
overlay = np.clip(overlay, 0, 1) * 255 overlay = np.clip(overlay, 0, 1) * 255
return overlay.astype(np.uint8) # Return uint8 for cv2 return overlay.astype(np.uint8) # Return uint8 for cv2

195
draw_tools/Saliency_Map.py Normal file
View File

@ -0,0 +1,195 @@
import torch
import torch.nn as nn
import numpy as np
import cv2
import matplotlib.pyplot as plt
from Load_process.file_processing import Process_File
class SaliencyMap:
def __init__(self, model):
self.model = model
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model.to(self.device)
self.model.eval() # 設置為評估模式
def Processing_Main(self, Test_Dataloader, File_Path):
"""處理測試數據集並生成顯著性圖"""
File = Process_File()
for batch_idx, (images, labels, File_Name, File_Classes) in enumerate(Test_Dataloader):
# 將數據移至設備
images = images.to(self.device, dtype=torch.float32)
labels = labels.to(self.device, dtype=torch.float32)
# 獲取真實類別索引
label_classes = torch.argmax(labels, dim=1).cpu().numpy()
# 為批次中的每個圖像生成顯著性圖
for i in range(images.size(0)):
# 獲取單個圖像和類別
image = images[i:i+1] # 保持批次維度
target_class = label_classes[i]
# 生成顯著性圖
saliency_map = self.generate_saliency(image, target_class)
# 將顯著性圖疊加到原始圖像上
overlaid_image = self.overlay_saliency(saliency_map, image[0])
# 創建保存路徑
path = f"{File_Path}/{File_Classes[i]}"
File.JudgeRoot_MakeDir(path)
# 保存結果
File.Save_CV2_File(f"saliency_{batch_idx}_{File_Name[i]}", path, overlaid_image)
def generate_saliency(self, image, target_class):
"""生成單個圖像的顯著性圖"""
# 確保需要梯度
image.requires_grad_(True)
# 前向傳播
output = self.model(image)
# 清除之前的梯度
self.model.zero_grad()
# 創建one-hot編碼的目標
one_hot = torch.zeros_like(output)
one_hot[0, target_class] = 1
# 反向傳播
output.backward(gradient=one_hot)
# 獲取梯度
gradients = image.grad.data
# 計算顯著性圖 (取絕對值並在通道維度上取最大值)
saliency, _ = torch.max(gradients.abs(), dim=1)
# 轉換為numpy並歸一化
saliency_np = saliency.cpu().numpy()[0]
saliency_np = self._normalize(saliency_np)
# 應用平滑處理以減少噪聲
saliency_np = cv2.GaussianBlur(saliency_np, (5, 5), 0)
saliency_np = self._normalize(saliency_np) # 再次歸一化
return saliency_np
def _normalize(self, x):
"""將數組歸一化到[0,1]範圍"""
# 添加小的epsilon以避免除以零
return (x - x.min()) / (x.max() - x.min() + 1e-8)
def overlay_saliency(self, saliency, image, alpha=0.5):
"""將顯著性圖疊加到原始圖像上"""
# 將顯著性圖縮放到[0,255]範圍
saliency_uint8 = np.uint8(255 * saliency)
# 應用顏色映射
heatmap = cv2.applyColorMap(saliency_uint8, cv2.COLORMAP_JET)
# 將圖像張量轉換為numpy數組
image_np = image.detach().cpu().permute(1, 2, 0).numpy()
# 確保圖像在[0,1]範圍內
if image_np.max() > 1.0:
image_np = (image_np - image_np.min()) / (image_np.max() - image_np.min())
# 將圖像轉換為uint8
image_uint8 = np.uint8(255 * image_np)
# 如果圖像是單通道的轉換為3通道
if len(image_uint8.shape) == 2 or image_uint8.shape[2] == 1:
image_uint8 = cv2.cvtColor(image_uint8, cv2.COLOR_GRAY2BGR)
# 疊加顯著性圖和原始圖像
overlaid = cv2.addWeighted(heatmap, alpha, image_uint8, 1-alpha, 0)
return overlaid
def generate_smooth_saliency(self, image, target_class, n_samples=20, noise_level=0.1):
"""使用SmoothGrad技術生成更平滑的顯著性圖"""
# 獲取輸入圖像的標準差
stdev = noise_level * (torch.max(image) - torch.min(image)).item()
# 累積梯度
accumulated_gradients = None
# 生成多個帶噪聲的樣本並計算梯度
for _ in range(n_samples):
# 添加高斯噪聲
noisy_image = image + torch.randn_like(image) * stdev
noisy_image.requires_grad_(True)
# 前向傳播
output = self.model(noisy_image)
# 反向傳播
self.model.zero_grad()
one_hot = torch.zeros_like(output)
one_hot[0, target_class] = 1
output.backward(gradient=one_hot)
# 獲取梯度
gradients = noisy_image.grad.data
# 累積梯度
if accumulated_gradients is None:
accumulated_gradients = gradients
else:
accumulated_gradients += gradients
# 計算平均梯度
avg_gradients = accumulated_gradients / n_samples
# 計算顯著性圖
saliency, _ = torch.max(avg_gradients.abs(), dim=1)
# 轉換為numpy並歸一化
saliency_np = saliency.cpu().numpy()[0]
saliency_np = self._normalize(saliency_np)
return saliency_np
def generate_guided_saliency(self, image, target_class):
"""使用Guided Backpropagation生成顯著性圖"""
# 保存原始ReLU反向傳播函數
relu_backward_functions = {}
for module in self.model.modules():
if isinstance(module, nn.ReLU):
relu_backward_functions[module] = module.backward
module.backward = self._guided_relu_backward
# 生成顯著性圖
image.requires_grad_(True)
output = self.model(image)
self.model.zero_grad()
one_hot = torch.zeros_like(output)
one_hot[0, target_class] = 1
output.backward(gradient=one_hot)
# 獲取梯度
gradients = image.grad.data
# 恢復原始ReLU反向傳播函數
for module in relu_backward_functions:
module.backward = relu_backward_functions[module]
# 計算顯著性圖 (只保留正梯度)
saliency = torch.clamp(gradients, min=0)
saliency, _ = torch.max(saliency, dim=1)
# 轉換為numpy並歸一化
saliency_np = saliency.cpu().numpy()[0]
saliency_np = self._normalize(saliency_np)
return saliency_np
def _guided_relu_backward(self, grad_output):
"""Guided ReLU的反向傳播函數"""
# 只允許正梯度流過
positive_grad_output = torch.clamp(grad_output, min=0)
return positive_grad_output

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -5,33 +5,51 @@ import matplotlib.figure as figure
import matplotlib.backends.backend_agg as agg import matplotlib.backends.backend_agg as agg
from Load_process.file_processing import Process_File from Load_process.file_processing import Process_File
def plot_history(Epochs, Losses, Accuracys, file_name, model_name): def plot_history(Losses, Accuracys, Save_Root, File_Name):
File = Process_File() File = Process_File()
plt.figure(figsize=(16,4)) plt.figure(figsize=(16,4))
plt.subplot(1,2,1) plt.subplot(1,2,1)
plt.plot(range(1, Epochs + 1), Losses[0])
plt.plot(range(1, Epochs + 1), Losses[1]) # 修正維度不匹配問題
train_losses = Losses[0]
val_losses = Losses[1]
# 分別繪製訓練損失和驗證損失
train_epochs = range(1, len(train_losses) + 1)
plt.plot(train_epochs, train_losses, label='Train')
val_epochs = range(1, len(val_losses) + 1)
plt.plot(val_epochs, val_losses, label='Validation')
plt.ylabel('Losses') plt.ylabel('Losses')
plt.xlabel('epoch') plt.xlabel('epoch')
plt.legend(['Train','Validation'], loc='upper left') plt.legend(loc='upper left')
plt.title('Model Loss') plt.title('Model Loss')
plt.subplot(1,2,2) if Accuracys is not None:
plt.plot(range(1, Epochs + 1), Accuracys[0]) plt.subplot(1,2,2)
plt.plot(range(1, Epochs + 1), Accuracys[1]) train_acc = Accuracys[0]
plt.ylabel('Accuracies') val_acc = Accuracys[1]
plt.xlabel('epoch')
plt.legend(['Train','Validation'], loc='upper left') # 分別繪製訓練準確率和驗證準確率
plt.title('Model Accuracy') train_epochs_acc = range(1, len(train_acc) + 1)
plt.plot(train_epochs_acc, train_acc, label='Train')
val_epochs_acc = range(1, len(val_acc) + 1)
plt.plot(val_epochs_acc, val_acc, label='Validation')
plt.ylabel('Accuracies')
plt.xlabel('epoch')
plt.legend(loc='upper left')
plt.title('Model Accuracy')
model_dir = '../Result/Training_Image/save_the_train_image( ' + str(datetime.date.today()) + " )" File.JudgeRoot_MakeDir(Save_Root)
File.JudgeRoot_MakeDir(model_dir) modelfiles = File.Make_Save_Root(f"{str(File_Name)}.png", Save_Root)
modelfiles = File.Make_Save_Root(str(model_name) + " " + str(file_name) + ".png", model_dir)
plt.savefig(modelfiles) plt.savefig(modelfiles)
plt.close("all") # 關閉圖表 plt.close("all") # 關閉圖表
def draw_heatmap(matrix, model_name, index): # 二分類以上混淆矩陣做法 def draw_heatmap(matrix, Save_Root, File_Name, index): # 二分類以上混淆矩陣做法
File = Process_File() File = Process_File()
# 创建热图 # 创建热图
@ -40,20 +58,19 @@ def draw_heatmap(matrix, model_name, index): # 二分類以上混淆矩陣做法
Ax = fig.add_subplot(111) Ax = fig.add_subplot(111)
sns.heatmap(matrix, square = True, annot = True, fmt = 'd', linecolor = 'white', cmap = "Purples", ax = Ax)#画热力图cmap表示设定的颜色集 sns.heatmap(matrix, square = True, annot = True, fmt = 'd', linecolor = 'white', cmap = "Purples", ax = Ax)#画热力图cmap表示设定的颜色集
model_dir = '../Result/Matrix_Image/model_matrix_image ( ' + str(datetime.date.today()) + " )" File.JudgeRoot_MakeDir(Save_Root)
File.JudgeRoot_MakeDir(model_dir) modelfiles = File.Make_Save_Root(f"{File_Name}-{str(index)}.png", Save_Root)
modelfiles = File.Make_Save_Root(str(model_name) + "-" + str(index) + ".png", model_dir)
# confusion.figure.savefig(modelfiles) # confusion.figure.savefig(modelfiles)
# 设置图像参数 # 设置图像参数
Ax.set_title(str(model_name) + " confusion matrix") Ax.set_title(f"{File_Name} confusion matrix")
Ax.set_xlabel("X-Predict label of the model") Ax.set_xlabel("X-Predict label of the model")
Ax.set_ylabel("Y-True label of the model") Ax.set_ylabel("Y-True label of the model")
# 保存图像到文件中 # 保存图像到文件中
canvas.print_figure(modelfiles) canvas.print_figure(modelfiles)
def Confusion_Matrix_of_Two_Classification(Model_Name, Matrix, index): def Confusion_Matrix_of_Two_Classification(Matrix, Save_Root, File_Name, index):
File = Process_File() File = Process_File()
fx = sns.heatmap(Matrix, annot=True, cmap='turbo') fx = sns.heatmap(Matrix, annot=True, cmap='turbo')
@ -63,13 +80,20 @@ def Confusion_Matrix_of_Two_Classification(Model_Name, Matrix, index):
fx.set_xlabel('answer Values ') fx.set_xlabel('answer Values ')
fx.set_ylabel('Predicted Values') fx.set_ylabel('Predicted Values')
# labels the boxes # 根据矩阵维度动态设置标签
fx.xaxis.set_ticklabels(['False','True']) n_classes = Matrix.shape[0]
fx.yaxis.set_ticklabels(['False','True']) # 如果是2类问题使用False/True标签
if n_classes == 2:
labels = ['False', 'True']
else:
# 对于多类问题,使用数字标签
labels = [str(i) for i in range(n_classes)]
fx.xaxis.set_ticklabels(labels)
fx.yaxis.set_ticklabels(labels)
model_dir = '../Result/model_matrix_image ( ' + str(datetime.date.today()) + " )" File.JudgeRoot_MakeDir(Save_Root)
File.JudgeRoot_MakeDir(model_dir) modelfiles = File.Make_Save_Root(f"{File_Name}-{str(index)}.png", Save_Root)
modelfiles = File.Make_Save_Root(str(Model_Name) + "-" + str(index) + ".png", model_dir)
plt.savefig(modelfiles) plt.savefig(modelfiles)
plt.close("all") # 關閉圖表 plt.close("all") # 關閉圖表

View File

@ -1,210 +0,0 @@
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torchmetrics.functional import auroc
from torch.nn import functional
from all_models_tools.all_model_tools import call_back
from Model_Loss.Loss import Entropy_Loss
from merge_class.merge import merge
from draw_tools.Grad_cam import GradCAM
import time
import torch.optim as optim
import numpy as np
import torch
import pandas as pd
import datetime
class All_Step:
def __init__(self, Model, Epoch, Number_Of_Classes, Model_Name, Experiment_Name):
self.Model = Model
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.Epoch = Epoch
self.Number_Of_Classes = Number_Of_Classes
self.Model_Name = Model_Name
self.Experiment_Name = Experiment_Name
def Training_Step(self, train_subset, val_subset, train_loader, val_loader, model_name, fold, TargetLayer):
# Reinitialize model and optimizer for each fold
# self.Model = self.Model.__class__(self.Number_Of_Classes).to(self.device) # Reinitialize model
Optimizer = optim.SGD(self.Model.parameters(), lr=0.045, momentum=0.9, weight_decay=0.01)
model_path, early_stopping, scheduler = call_back(model_name, f"_fold{fold}", Optimizer)
criterion = Entropy_Loss() # Custom loss function
Merge_Function = merge()
# Lists to store metrics for this fold
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
epoch = 0
# Epoch loop
for epoch in range(self.Epoch):
self.Model.train() # Start training
running_loss = 0.0
all_train_preds = []
all_train_labels = []
processed_samples = 0
# Calculate epoch start time
start_time = time.time()
total_samples = len(train_subset) # Total samples in subset, not DataLoader
total_Validation_samples = len(val_subset)
# Progress bar for training batches
epoch_iterator = tqdm(train_loader, desc=f"Fold {fold + 1}/5, Epoch [{epoch + 1}/{self.Epoch}]")
for inputs, labels in epoch_iterator:
inputs, labels = inputs.to(self.device), labels.to(self.device) # Already tensors from DataLoader
Optimizer.zero_grad()
outputs = self.Model(inputs)
loss = criterion(outputs, labels)
loss.backward()
Optimizer.step()
running_loss += loss.item()
# Collect training predictions and labels
Output_Values, Output_Indexs = torch.max(outputs, dim=1)
True_Indexs = np.argmax(labels.cpu().numpy(), axis=1)
all_train_preds.append(Output_Indexs.cpu().numpy())
all_train_labels.append(True_Indexs)
processed_samples += inputs.size(0) # Use size(0) for batch size
# Calculate progress and timing
progress = (processed_samples / total_samples) * 100
elapsed_time = time.time() - start_time
iterations_per_second = processed_samples / elapsed_time if elapsed_time > 0 else 0
eta = (total_samples - processed_samples) / iterations_per_second if iterations_per_second > 0 else 0
time_str = f"{int(elapsed_time//60):02d}:{int(elapsed_time%60):02d}<{int(eta//60):02d}:{int(eta%60):02d}"
# Calculate batch accuracy(正確label數量 / 該batch總共的label數量)
batch_accuracy = (Output_Indexs.cpu().numpy() == True_Indexs).mean()
# Update progress bar
epoch_iterator.set_postfix_str(
f"{processed_samples}/{total_samples} [{time_str}, {iterations_per_second:.2f}it/s, "
f"acc={batch_accuracy:.3f}, loss={loss.item():.3f}]"
)
epoch_iterator.close()
# Merge predictions and labels
all_train_preds = Merge_Function.merge_data_main(all_train_preds, 0, len(all_train_preds))
all_train_labels = Merge_Function.merge_data_main(all_train_labels, 0, len(all_train_labels))
Training_Loss = running_loss / len(train_loader)
train_accuracy = accuracy_score(all_train_labels, all_train_preds)
train_losses.append(Training_Loss)
train_accuracies.append(train_accuracy)
# Validation step
self.Model.eval()
val_loss = 0.0
all_val_preds = []
all_val_labels = []
start_Validation_time = time.time()
epoch_iterator = tqdm(val_loader, desc=f"\tValidation-Fold {fold + 1}/5, Epoch [{epoch + 1}/{self.Epoch}]")
with torch.no_grad():
for inputs, labels in epoch_iterator:
inputs, labels = inputs.to(self.device), labels.to(self.device)
outputs = self.Model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
# Collect validation predictions and labels
Output_Values, Output_Indexs = torch.max(outputs, dim=1)
True_Indexs = np.argmax(labels.cpu().numpy(), axis=1)
all_val_preds.append(Output_Indexs.cpu().numpy())
all_val_labels.append(True_Indexs)
processed_samples += inputs.size(0) # Use size(0) for batch size
# Calculate progress and timing
progress = (processed_samples / total_Validation_samples) * 100
elapsed_time = time.time() - start_Validation_time
iterations_per_second = processed_samples / elapsed_time if elapsed_time > 0 else 0
eta = (total_Validation_samples - processed_samples) / iterations_per_second if iterations_per_second > 0 else 0
time_str = f"{int(elapsed_time//60):02d}:{int(elapsed_time%60):02d}<{int(eta//60):02d}:{int(eta%60):02d}"
# Calculate batch accuracy
batch_accuracy = (Output_Indexs.cpu().numpy() == True_Indexs).mean()
# Update progress bar
epoch_iterator.set_postfix_str(
f"{processed_samples}/{total_Validation_samples} [{time_str}, {iterations_per_second:.2f}it/s, "
f"acc={batch_accuracy:.3f}, loss={loss.item():.3f}]"
)
epoch_iterator.close()
print("\n")
# Merge predictions and labels
all_val_preds = Merge_Function.merge_data_main(all_val_preds, 0, len(all_val_preds))
all_val_labels = Merge_Function.merge_data_main(all_val_labels, 0, len(all_val_labels))
val_loss /= len(val_loader)
val_accuracy = accuracy_score(all_val_labels, all_val_preds)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
print(f"Traini Loss: {Training_Loss:.4f}, Accuracy: {train_accuracy:0.2f}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:0.2f}\n")
if epoch % 10 == 0:
Grad = GradCAM(self.Model, TargetLayer)
Grad.Processing_Main(val_loader, f"../Result/GradCAM_Image/Validation/GradCAM_Image({str(datetime.date.today())})/fold-{str(fold)}/")
# Early stopping
early_stopping(val_loss, self.Model, model_path)
if early_stopping.early_stop:
print(f"Early stopping triggered in Fold {fold + 1} at epoch {epoch + 1}")
break
# Learning rate adjustment
scheduler.step(val_loss)
Total_Epoch = epoch + 1
return self.Model, model_path, train_losses, val_losses, train_accuracies, val_accuracies, Total_Epoch
def Evaluate_Model(self, cnn_model, Test_Dataloader):
# (Unchanged Evaluate_Model method)
cnn_model.eval()
True_Label, Predict_Label = [], []
True_Label_OneHot, Predict_Label_OneHot = [], []
loss = 0.0
with torch.no_grad():
for images, labels in Test_Dataloader:
images, labels = torch.as_tensor(images).to(self.device), torch.as_tensor(labels).to(self.device)
outputs = cnn_model(images)
Output_Values, Output_Indexs = torch.max(outputs, 1)
True_Indexs = np.argmax(labels.cpu().numpy(), 1)
True_Label.append(Output_Indexs.cpu().numpy())
Predict_Label.append(True_Indexs)
Predict_Label_OneHot.append(torch.tensor(functional.one_hot(Output_Indexs, self.Number_Of_Classes), dtype=torch.float32).cpu().numpy()[0])
True_Label_OneHot.append(torch.tensor(labels, dtype=torch.int).cpu().numpy()[0])
loss /= len(Test_Dataloader)
True_Label_OneHot = torch.as_tensor(True_Label_OneHot, dtype=torch.int)
Predict_Label_OneHot = torch.as_tensor(Predict_Label_OneHot, dtype=torch.float32)
accuracy = accuracy_score(True_Label, Predict_Label)
precision = precision_score(True_Label, Predict_Label, average="macro")
recall = recall_score(True_Label, Predict_Label, average="macro")
AUC = auroc(Predict_Label_OneHot, True_Label_OneHot, num_labels=self.Number_Of_Classes, task="multilabel", average="macro")
f1 = f1_score(True_Label, Predict_Label, average="macro")
return True_Label, Predict_Label, loss, accuracy, precision, recall, AUC, f1

View File

@ -0,0 +1,91 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class Encode_Block(nn.Module):
"""基本的卷積塊Conv2d + BatchNorm + ReLU"""
def __init__(self, in_channels, out_channels):
super(Encode_Block, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.conv(x)
class GastroSegNet(nn.Module):
"""簡單的U-Net實現"""
def __init__(self, in_channels=3, out_channels=3, features=[32, 64, 128, 256]):
super(GastroSegNet, self).__init__()
# 編碼器(下採樣路徑)
self.encoder = nn.ModuleList()
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# 第一層
self.encoder.append(Encode_Block(in_channels, features[0]))
# 其他編碼層
for i in range(1, len(features)):
self.encoder.append(Encode_Block(features[i-1], features[i]))
# 瓶頸層(最底層)
self.bottleneck = Encode_Block(features[-1], features[-1] * 2)
# 解碼器(上採樣路徑)
self.decoder = nn.ModuleList()
self.upconv = nn.ModuleList()
# 創建上採樣和解碼層
for i in range(len(features)):
self.upconv.append(
nn.ConvTranspose2d(features[-1-i] * 2, features[-1-i], kernel_size=2, stride=2)
)
self.decoder.append(
Encode_Block(features[-1-i] * 2, features[-1-i])
)
# 最終輸出層
self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
def forward(self, x):
# 存儲跳躍連接
skip_connections = []
# 編碼器路徑
for encoder_layer in self.encoder:
x = encoder_layer(x)
skip_connections.append(x)
x = self.pool(x)
# 瓶頸層
x = self.bottleneck(x)
# 反轉跳躍連接列表
skip_connections = skip_connections[::-1]
# 解碼器路徑
for i, (upconv, decoder) in enumerate(zip(self.upconv, self.decoder)):
# 上採樣
x = upconv(x)
# 獲取對應的跳躍連接
skip = skip_connections[i]
# 如果尺寸不匹配,調整大小
if x.shape != skip.shape:
x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=False)
# 連接跳躍連接
x = torch.cat([skip, x], dim=1)
# 通過解碼塊
x = decoder(x)
# 最終輸出
return self.final_conv(x)

View File

@ -0,0 +1,148 @@
import torch.nn as nn
import torch.nn.functional as F
import torch
class SeparableConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True):
super(SeparableConv2d, self).__init__()
self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride,
padding=padding, groups=in_channels, bias=bias)
self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1,
padding=0, bias=bias)
def forward(self, x):
x = self.depthwise(x)
x = self.pointwise(x)
return x
class EntryFlow(nn.Module):
def __init__(self, in_channels=3):
super(EntryFlow, self).__init__()
self.conv1 = nn.Conv2d(in_channels, 32, 3, stride=2, padding=1, bias=False, dilation = 2)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1, bias=False, dilation = 2)
self.bn2 = nn.BatchNorm2d(64)
self.conv3_residual = nn.Sequential(
SeparableConv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(128, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv3_shortcut = nn.Conv2d(64, 128, 1, stride=2, bias=False)
self.bn3 = nn.BatchNorm2d(128)
self.conv4_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(256, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv4_shortcut = nn.Conv2d(128, 256, 1, stride=2, bias=False)
self.bn4 = nn.BatchNorm2d(256)
self.conv5_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(256, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv5_shortcut = nn.Conv2d(256, 728, 1, stride=2, bias=False)
self.bn5 = nn.BatchNorm2d(728)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
residual = self.conv3_residual(x)
shortcut = self.conv3_shortcut(x)
x = F.relu(self.bn3(residual + shortcut))
residual = self.conv4_residual(x)
shortcut = self.conv4_shortcut(x)
x = F.relu(self.bn4(residual + shortcut))
residual = self.conv5_residual(x)
shortcut = self.conv5_shortcut(x)
x = F.relu(self.bn5(residual + shortcut))
return x
class MiddleFlow(nn.Module):
def __init__(self):
super(MiddleFlow, self).__init__()
self.conv_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728)
)
def forward(self, x):
return self.conv_residual(x) + x
class ExitFlow(nn.Module):
def __init__(self, num_classes=2):
super(ExitFlow, self).__init__()
self.conv1_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 1024, 3, padding=1),
nn.BatchNorm2d(1024),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(1024, 1024, 3, padding=1),
nn.BatchNorm2d(1024),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv1_shortcut = nn.Conv2d(728, 1024, 1, stride=2, bias=False)
self.bn1 = nn.BatchNorm2d(1024)
self.conv2 = nn.Sequential(
SeparableConv2d(1024, 1536, 3, padding=1),
nn.BatchNorm2d(1536),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(1536, 2048, 3, padding=1),
nn.BatchNorm2d(2048),
nn.ReLU(inplace=False) # 修改這裡
)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.Hidden = nn.Linear(2048, 1025)
self.fc = nn.Linear(1025, num_classes)
def forward(self, x):
residual = self.conv1_residual(x)
shortcut = self.conv1_shortcut(x)
x = F.relu(self.bn1(residual + shortcut))
x = self.conv2(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.Hidden(x)
x = self.fc(x)
return x
class Xception(nn.Module):
def __init__(self, num_classes=2):
super(Xception, self).__init__()
self.entry_flow = EntryFlow(in_channels=3) # 默认输入通道为3
self.middle_flow = nn.Sequential(*[MiddleFlow() for _ in range(8)])
self.exit_flow = ExitFlow(num_classes)
def forward(self, x):
# 正常的前向傳播
x = self.entry_flow(x)
x = self.middle_flow(x)
x = self.exit_flow(x)
return x

View File

View File

@ -0,0 +1,30 @@
import torch.nn as nn
import timm
from utils.Stomach_Config import Model_Config
class ModifiedXception(nn.Module):
def __init__(self, num_classes):
super(ModifiedXception, self).__init__()
# 加載 Xception 預訓練模型,去掉最後一層 (fc 層)
self.base_model = timm.create_model(Model_Config["Model Name"], pretrained=True)
self.base_model.fc = nn.Identity() # 移除原來的 fully connected 層
# 新增全局平均池化層、隱藏層和輸出層
self.global_avg_pool = nn.AdaptiveAvgPool1d(Model_Config["GPA Output Nodes"]) # 全局平均池化
self.hidden_layer = nn.Linear(Model_Config["GPA Output Nodes"], Model_Config["Linear Hidden Nodes"]) # 隱藏層,輸入大小取決於 Xception 的輸出大小
self.output_layer = nn.Linear(Model_Config["Linear Hidden Nodes"], Model_Config["Output Linear Nodes"]) # 輸出層,依據分類數目設定
# 激活函數與 dropout
self.relu = nn.ReLU()
self.softmax = nn.Softmax(1)
self.dropout = nn.Dropout(Model_Config["Dropout Rate"])
def forward(self, x):
x = self.base_model(x) # Xception 主體
x = self.global_avg_pool(x) # 全局平均池化
x = self.relu(self.hidden_layer(x)) # 隱藏層 + ReLU
x = self.dropout(x) # Dropout
x = self.output_layer(x) # 輸出層
return x

View File

@ -0,0 +1,461 @@
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torchmetrics.functional import auroc
from torch.nn import functional
from torch import nn
import torch
from sklearn.model_selection import KFold
from torchinfo import summary
from sklearn.metrics import confusion_matrix
from all_models_tools.all_model_tools import call_back
from Model_Loss.Loss import Entropy_Loss
from Model_Loss.binary_cross_entropy import BinaryCrossEntropy
from merge_class.merge import merge
from draw_tools.Saliency_Map import SaliencyMap
from utils.Stomach_Config import Training_Config, Loading_Config, Save_Result_File_Config
from experiments.Models.Xception_Model_Modification import Xception
from Load_process.LoadData import Loding_Data_Root
from Training_Tools.PreProcess import Training_Precesses
from Calculate_Process.Calculate import Calculate
from Load_process.file_processing import Process_File
from draw_tools.draw import plot_history, draw_heatmap
from Load_process.file_processing import Process_File
import time
import torch.optim as optim
import numpy as np
import torch
import pandas as pd
import datetime
import argparse
import os
class Identification_Block_Training_Step(Loding_Data_Root, Training_Precesses):
def __init__(self, Experiment_Name, Best_Model_Save_Root):
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.Model = self.Construct_Identification_Model_CUDA() # 模型變數
self.train_subset = None # Training Dataset 的子集
self.val_subset = None # Validation Dataset 的子集
self.train_loader = None # Training DataLoader 的讀檔器
self.val_loader = None # Validation DataLoader 的讀檔器
self.Mask = None # 遮罩變數接收GastroSegNet產出來的Mask
self.Grad = None # 梯度變數後面用來執行Grad CAM
self.model_name = Training_Config["Model_Name"] # 取名,使用哪個模型(可能是預處理模型/自己設計的模型)
self.Epoch = Training_Config["Epoch"] # 訓練該模型的次數
self.train_batch_size = Training_Config["Train_Batch_Size"] # 訓練模型的Batch Size
self.Experiment_Name = Experiment_Name
self.Number_Of_Classes = Loading_Config["Identification_Label_Length"]
self.Best_Model_Save_Root = Best_Model_Save_Root
# 初始化多個繼承物件
Training_Precesses.__init__(self, Training_Config["Image_Size"])
Loding_Data_Root.__init__(self, Loading_Config["Training_Labels"], Loading_Config["Train_Data_Root"], Loading_Config["Test_Data_Root"])
pass
def Processing_Main(self, training_dataset, Test_Dataloader=None):
# Lists to store metrics across all folds
all_fold_train_losses = []
all_fold_val_losses = []
all_fold_train_accuracies = []
all_fold_val_accuracies = []
Calculate_Process = Calculate()
File = Process_File()
Calculate_Tool = [Calculate() for i in range(3)]
Best_Model_Path = None
Best_Validation_Loss = 100000000
# K-Fold loop
kf = KFold(n_splits=5, shuffle=True, random_state=42)
for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(training_dataset)))): # K-Fold 交叉驗證迴圈
print(f"\nStarting Fold {fold + 1}/5")
# Create training and validation subsets for this fold
self.train_subset = torch.utils.data.Subset(training_dataset, train_idx)
self.val_subset = torch.utils.data.Subset(training_dataset, val_idx)
# Wrap subsets in DataLoaders (use same batch size as original)
self.train_loader = self.Dataloader_Sampler(self.train_subset , self.train_batch_size, True)
self.val_loader = self.Dataloader_Sampler(self.val_subset, self.train_batch_size, True)
# 模型訓練與驗證
model_path, train_losses, Validation_losses, train_accuracies, Validation_accuracies, Total_Epoch, best_val_loss = self.Training_And_Validation(fold)
# Store fold results
all_fold_train_losses.append(train_losses)
all_fold_val_losses.append(Validation_losses)
all_fold_train_accuracies.append(train_accuracies)
all_fold_val_accuracies.append(Validation_accuracies)
# 确保张量在CPU上以便可以转换为NumPy数组
if torch.is_tensor(train_losses):
train_losses = train_losses.cpu().detach().numpy()
if torch.is_tensor(Validation_losses):
Validation_losses = Validation_losses.cpu().detach().numpy()
if torch.is_tensor(train_accuracies):
train_accuracies = train_accuracies.cpu().detach().numpy()
if torch.is_tensor(Validation_accuracies):
Validation_accuracies = Validation_accuracies.cpu().detach().numpy()
Losses = [train_losses, Validation_losses]
Accuracies = [train_accuracies, Validation_accuracies]
plot_history(Losses, Accuracies, f"{Save_Result_File_Config['Identification_Plot_Image']}/{self.Experiment_Name}", f"train-{str(fold)}") # 將訓練結果化成圖,並將化出來的圖丟出去儲存
# 驗證結果
True_Label, Predict_Label, loss, accuracy, precision, recall, AUC, f1 = self.Evaluate_Model(self.Model, Test_Dataloader, fold)
# 紀錄該次訓練結果
Calculate_Process.Append_numbers(loss, accuracy, precision, recall, AUC, f1)
self.record_matrix_image(True_Label, Predict_Label, fold)
print(self.record_everyTime_test_result(loss, accuracy, precision, recall, AUC, f1, fold, self.Experiment_Name)) # 紀錄當前訓練完之後的預測結果並輸出成csv檔
# 使用識別模型進行各類別評估
Calculate_Tool = self.Evaluate_Per_Class_Metrics(self.Model, Test_Dataloader, Loading_Config["Training_Labels"], Calculate_Tool)
if best_val_loss < Best_Validation_Loss:
Best_Validation_Loss = best_val_loss
Best_Model_Path = model_path
Calculate_Process.Calculate_Mean()
Calculate_Process.Calculate_Std()
File.Save_CSV_File(f"../Result/Experiment_Result/{self.Experiment_Name}/Total/{str(datetime.date.today())}", f"Total_Training_Result-{fold}", Calculate_Process.Output_Style())
for Calculate_Every_Class in Calculate_Tool:
Calculate_Every_Class.Calculate_Mean()
Calculate_Every_Class.Calculate_Std()
# Aggregate results across folds
avg_train_losses = np.mean([losses[-1] for losses in all_fold_train_losses])
avg_val_losses = np.mean([losses[-1] for losses in all_fold_val_losses])
avg_train_accuracies = np.mean([acc[-1] for acc in all_fold_train_accuracies])
avg_val_accuracies = np.mean([acc[-1] for acc in all_fold_val_accuracies])
print(f"\nCross-Validation Results:")
print(f"Avg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}")
print(f"Avg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}")
File.Save_TXT_File(content = f"\nCross-Validation Results:\nAvg Train Loss: {avg_train_losses:.4f}, Avg Val Loss: {avg_val_losses:.4f}\nAvg Train Acc: {avg_train_accuracies:.4f}, Avg Val Acc: {avg_val_accuracies:.4f}\n", Save_Root = Save_Result_File_Config["Identification_Average_Result"], File_Name = "Training_Average_Result")
# 返回最後一個fold的模型路徑和平均指標
return Best_Model_Path, Calculate_Process, Calculate_Tool
def Training_And_Validation(self, Fold):
# Reinitialize model and optimizer for each fold
# self.Model = self.Model.__class__(self.Number_Of_Classes).to(self.device) # Reinitialize model
self.Model = self.Construct_Identification_Model_CUDA() # 模型初始化
Optimizer = optim.SGD(self.Model.parameters(), lr=0.045, momentum=0.9, weight_decay = Training_Config["weight_decay"])
model_path, early_stopping, scheduler = call_back(self.Best_Model_Save_Root, f"fold{Fold}", Optimizer)
# Lists to store metrics for this fold
train_losses = []
Validation_losses = []
train_accuracies = []
Validation_accuracies = []
# Epoch loop
for epoch in range(self.Epoch):
self.Model.train() # Start training
Training_Loss = 0.0
All_Predict_List, All_Label_List = [], []
# Progress bar for training batches
epoch_iterator = tqdm(self.train_loader, desc=f"Fold {Fold + 1}/5, Epoch [{epoch + 1}/{self.Epoch}]")
Start_Time = time.time()
for inputs, labels, File_Name, File_Classes in epoch_iterator:
Optimizer.zero_grad() # 清零梯度,防止梯度累積
Total_Losses, Training_Loss, All_Predict_List, All_Label_List, Predict_Indexs, Truth_Indexs = self.Model_Branch(inputs, labels, All_Predict_List, All_Label_List, Training_Loss)
Total_Losses.backward()
Optimizer.step()
self.Calculate_Progress_And_Timing(inputs, Predict_Indexs, Truth_Indexs, self.train_subset, Total_Losses, epoch_iterator, Start_Time)
train_losses, train_accuracies, Training_Loss, Train_accuracy = self.Calculate_Average_Scores(self.train_loader, Training_Loss, All_Predict_List, All_Label_List, train_losses, train_accuracies)
# Validation step
self.Model.eval()
val_loss = 0.0
all_val_preds = []
all_val_labels = []
start_Validation_time = time.time()
epoch_iterator = tqdm(self.val_loader, desc=f"\tValidation-Fold {Fold + 1}/5, Epoch [{epoch + 1}/{self.Epoch}]")
with torch.no_grad():
for inputs, labels, File_Name, File_Classes in epoch_iterator:
Validation_Total_Loss, val_loss, all_val_preds, all_val_labels, Predict_Indexs, Truth_Indexs = self.Model_Branch(inputs, labels, all_val_preds, all_val_labels, val_loss)
self.Calculate_Progress_And_Timing(inputs, Predict_Indexs, Truth_Indexs, self.val_subset, Validation_Total_Loss, epoch_iterator, start_Validation_time)
Validation_losses, Validation_accuracies, val_loss, val_accuracy = self.Calculate_Average_Scores(self.val_loader, val_loss, all_val_preds, all_val_labels, Validation_losses, Validation_accuracies)
print(f"Traini Loss: {Training_Loss:.4f}, Accuracy: {Train_accuracy:0.2f}, Validation Loss: {val_loss:.4f}, Accuracy: {val_accuracy:0.2f}\n")
# if epoch % 5 == 0:
# # Grad = GradCAM(self.Model, TargetLayer)
# # Grad.Processing_Main(val_loader, f"../Result/GradCAM_Image/Validation/GradCAM_Image({str(datetime.date.today())})/{self.Experiment_Name}/fold-{str(fold)}/")
# # 創建SaliencyMap實例
# saliency_map = SaliencyMap(self.Model)
# # 處理測試數據集
# saliency_map.Processing_Main(self.val_loader, f"../Result/Saliency_Image/Validation/Saliency_Image({str(datetime.date.today())})/{self.Experiment_Name}/fold-{str(Fold)}/")
# Early stopping
early_stopping(val_loss, self.Model, model_path)
best_val_loss = early_stopping.best_loss
if early_stopping.early_stop:
print(f"Early stopping triggered in Fold {Fold + 1} at epoch {epoch + 1}")
break
# Learning rate adjustment
scheduler.step(val_loss)
Total_Epoch = epoch + 1
# 確保返回模型路徑
return model_path, train_losses, Validation_losses, train_accuracies, Validation_accuracies, Total_Epoch, best_val_loss
def Construct_Identification_Model_CUDA(self):
# 从Model_Config中获取输出节点数量
Model = Xception(num_classes=Loading_Config["Identification_Label_Length"])
# 注释掉summary调用避免Mask参数问题
# 直接打印模型结构
print(f"Model structure: {Model}")
# 打印模型参数
for name, parameters in Model.named_parameters():
print(f"Layer Name: {name}, Parameters: {parameters.size()}")
return self.Convert_Model_To_CUDA(Model)
def Convert_Model_To_CUDA(self, model):
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
model = model.to(self.device)
return model
def Model_Branch(self, Input_Images, Labels, All_Predict_List : list, All_Label_List : list, running_loss):
# 直接將張量移到設備上,不需要重新創建
Input_Images.requires_grad = False
Input_Images, Labels = Input_Images.to(self.device), Labels.to(self.device)
# 確保輸入圖像在有效範圍內
Input_Images = torch.clamp(Input_Images, 0.0, 1.0)
Predict = self.Model(Input_Images)
# Collect training predictions and labels
Output_Values, Output_Indexs = torch.max(Predict, dim=1)
True_Indexs = np.argmax(Labels.cpu().numpy(), axis=1)
All_Predict_List.append(Output_Indexs.cpu().numpy())
All_Label_List.append(True_Indexs)
# 不需要對Predict進行clamp因為我們已經修改了損失函數使用binary_cross_entropy_with_logits
Losses = self.Losses(Predict, Labels)
running_loss += Losses.item()
return Losses, running_loss, All_Predict_List, All_Label_List, Output_Indexs, True_Indexs
def Losses(self, Predicts, Labels):
criterion = BinaryCrossEntropy()
Loss = criterion(Predicts, Labels)
return Loss
def Evaluate_Model(self, cnn_model, Test_Dataloader, index, identification_model_path=None):
# 載入識別模型權重(如果提供了路徑)
if identification_model_path is not None:
cnn_model.load_state_dict(torch.load(identification_model_path))
else:
assert identification_model_path is None, "No identification model path provided for evaluation."
# 評估模型
cnn_model.eval()
True_Label, Predict_Label = [], []
True_Label_OneHot, Predict_Label_OneHot = [], []
loss = 0.0
with torch.no_grad():
for images, labels, File_Name, File_Classes in Test_Dataloader:
Total_Loss, Running_Loss, Predict_Label, True_Label, Output_Indexs, Truth_Index = self.Model_Branch(images, labels, Predict_Label, True_Label, 0)
Predict_Label_OneHot.append(torch.tensor(functional.one_hot(Output_Indexs, self.Number_Of_Classes), dtype=torch.float32).cpu().numpy()[0])
True_Label_OneHot.append(torch.tensor(labels, dtype=torch.int).cpu().numpy()[0])
loss /= len(Test_Dataloader)
True_Label_OneHot = torch.as_tensor(True_Label_OneHot, dtype=torch.int)
Predict_Label_OneHot = torch.as_tensor(Predict_Label_OneHot, dtype=torch.float32)
accuracy = accuracy_score(True_Label, Predict_Label)
precision = precision_score(True_Label, Predict_Label, average="macro")
recall = recall_score(True_Label, Predict_Label, average="macro")
AUC = auroc(Predict_Label_OneHot, True_Label_OneHot, num_labels=self.Number_Of_Classes, task="multilabel", average="macro")
f1 = f1_score(True_Label, Predict_Label, average="macro")
# 計算混淆矩陣
matrix = confusion_matrix(True_Label, Predict_Label)
draw_heatmap(matrix, f"{Save_Result_File_Config['Identification_Marix_Image']}/{self.Experiment_Name}/Identification_Test_Marix_Image", f"confusion_matrix", index) # 呼叫畫出confusion matrix的function
return True_Label, Predict_Label, loss, accuracy, precision, recall, AUC, f1
def Evaluate_Per_Class_Metrics(self, cnn_model, Test_Dataloader, Labels, Calculate_Tool, identification_model_path=None):
"""
Evaluate the model on the test dataloader and compute binary classification metrics for each class.
Parameters:
- cnn_model: The trained model to evaluate.
- Test_Dataloader: DataLoader for the test dataset.
- Labels: List of class names for better readability.
- Calculate_Tool: Tool for recording metrics.
- identification_model_path: Path to the trained model weights (optional).
Returns:
- Calculate_Tool: Updated with binary classification metrics for each class.
"""
# 載入識別模型權重(如果提供了路徑)
if identification_model_path is not None:
cnn_model.load_state_dict(torch.load(identification_model_path))
# 测试GPU是否可用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用设备: {device}")
# 设置为评估模式
cnn_model.eval()
all_results = []
all_labels = []
# 使用PyTorch的预测方式
with torch.no_grad(): # 不计算梯度
for inputs, labels, _, _ in Test_Dataloader:
inputs = inputs.to(device)
labels = labels.to(device)
outputs = cnn_model(inputs)
_, predicted = torch.max(outputs, 1)
all_results.append(predicted.cpu().numpy())
all_labels.append(np.argmax(labels.cpu().numpy(), axis=1))
# 将所有批次的结果合并为一个数组
Predict = np.concatenate(all_results)
y_test = np.concatenate(all_labels)
print(f"预测结果: {Predict}\n")
# 计算整体评估指标
accuracy = accuracy_score(y_test, Predict)
# 打印整体准确率
print(f"整体准确率 (Accuracy): {accuracy:.4f}")
# 假设只有两个类别0和1
# 计算类别0的二分类评估指标将类别0视为正类类别1视为负类
print(f"类别 {Labels[0]} 的二分类评估指标:")
y_binary_0 = (y_test == 0).astype(int)
predict_binary_0 = (Predict == 0).astype(int)
# 计算二分类指标
binary_accuracy_0 = accuracy_score(y_binary_0, predict_binary_0)
binary_precision_0 = precision_score(y_binary_0, predict_binary_0, zero_division=0)
binary_recall_0 = recall_score(y_binary_0, predict_binary_0, zero_division=0)
binary_f1_0 = f1_score(y_binary_0, predict_binary_0, zero_division=0)
# 打印二分类指标
print(f" 准确率 (Accuracy): {binary_accuracy_0:.4f}")
print(f" 精确率 (Precision): {binary_precision_0:.4f}")
print(f" 召回率 (Recall): {binary_recall_0:.4f}")
print(f" F1值: {binary_f1_0:.4f}\n")
# 记录类别0的指标
Calculate_Tool[0].Append_numbers(0, binary_accuracy_0, binary_precision_0, binary_recall_0, 0, binary_f1_0)
# 计算类别1的二分类评估指标将类别1视为正类类别0视为负类
print(f"类别 {Labels[1]} 的二分类评估指标:")
y_binary_1 = (y_test == 1).astype(int)
predict_binary_1 = (Predict == 1).astype(int)
# 计算二分类指标
binary_accuracy_1 = accuracy_score(y_binary_1, predict_binary_1)
binary_precision_1 = precision_score(y_binary_1, predict_binary_1, zero_division=0)
binary_recall_1 = recall_score(y_binary_1, predict_binary_1, zero_division=0)
binary_f1_1 = f1_score(y_binary_1, predict_binary_1, zero_division=0)
# 打印二分类指标
print(f" 准确率 (Accuracy): {binary_accuracy_1:.4f}")
print(f" 精确率 (Precision): {binary_precision_1:.4f}")
print(f" 召回率 (Recall): {binary_recall_1:.4f}")
print(f" F1值: {binary_f1_1:.4f}\n")
# 记录类别1的指标
Calculate_Tool[1].Append_numbers(0, binary_accuracy_1, binary_precision_1, binary_recall_1, 0, binary_f1_1)
return Calculate_Tool
def Calculate_Progress_And_Timing(self, inputs, Predict_Labels, Truth_Labels, Subset, loss, epoch_iterator, Start_Time):
# Calculate progress and timing
total_samples = len(Subset)
processed_samples = 0
processed_samples += inputs.size(0) # Use size(0) for batch size
# Calculate progress and timing
elapsed_time = time.time() - Start_Time
iterations_per_second = processed_samples / elapsed_time if elapsed_time > 0 else 0
eta = (total_samples - processed_samples) / iterations_per_second if iterations_per_second > 0 else 0
time_str = f"{int(elapsed_time//60):02d}:{int(elapsed_time%60):02d}<{int(eta//60):02d}:{int(eta%60):02d}"
# Calculate batch metrics using PSNR/SSIM loss
# 检查loss是否为张量如果是则调用item(),否则直接使用浮点数值
batch_loss = loss.item() if torch.is_tensor(loss) else loss
# Calculate batch accuracy
batch_accuracy = (Predict_Labels.cpu().numpy() == Truth_Labels).mean()
# Update progress bar
epoch_iterator.set_postfix_str(
f"{processed_samples}/{total_samples} [{time_str}, {iterations_per_second:.2f}it/s, "
f"acc={batch_accuracy:.3f}, loss={batch_loss:.3f}]"
)
return epoch_iterator
def Calculate_Average_Scores(self, Data_Loader, Running_Losses, All_Predict_Labels, All_Truth_Labels, Losses, Accuracies):
Merge_Function = merge()
All_Predicts = Merge_Function.merge_data_main(All_Predict_Labels, 0, len(All_Predict_Labels))
All_Truths = Merge_Function.merge_data_main(All_Truth_Labels, 0, len(All_Truth_Labels))
Running_Losses /= len(Data_Loader)
Accuracy = accuracy_score(All_Truths, All_Predicts)
Losses.append(Running_Losses)
Accuracies.append(Accuracy)
return Losses, Accuracies, Running_Losses, Accuracy
def record_matrix_image(self, True_Labels, Predict_Labels, index):
'''劃出混淆矩陣(熱力圖)'''
# 計算混淆矩陣
matrix = confusion_matrix(True_Labels, Predict_Labels)
# Confusion_Matrix_of_Two_Classification(matrix, Save_Result_File_Config["Identification_Marix_Image"], Experiment_Name, index) # 呼叫畫出confusion matrix的function
draw_heatmap(matrix, Save_Result_File_Config["Identification_Marix_Image"], self.Experiment_Name, index) # 呼叫畫出confusion matrix的function
def record_everyTime_test_result(self, loss, accuracy, precision, recall, auc, f, indexs, model_name):
'''記錄我單次的訓練結果並將它輸出到檔案中'''
File = Process_File()
Dataframe = pd.DataFrame(
{
"model_name" : str(model_name),
"loss" : "{:.2f}".format(loss),
"precision" : "{:.2f}%".format(precision * 100),
"recall" : "{:.2f}%".format(recall * 100),
"accuracy" : "{:.2f}%".format(accuracy * 100),
"f" : "{:.2f}%".format(f * 100),
"AUC" : "{:.2f}%".format(auc * 100)
}, index = [indexs])
File.Save_CSV_File(Save_Result_File_Config["Identification_Every_Fold_Training_Result"], "train_result", Dataframe)
return Dataframe

View File

@ -0,0 +1,467 @@
from tqdm import tqdm
from torch import nn
from sklearn.model_selection import KFold
from skimage import measure
from all_models_tools.all_model_tools import call_back
from utils.Stomach_Config import Training_Config, Loading_Config, Save_Result_File_Config
from Load_process.LoadData import Loding_Data_Root
from Training_Tools.PreProcess import Training_Precesses
from ..Models.GastroSegNet_Model import GastroSegNet
from Model_Loss.Segmentation_Loss import Segmentation_Loss
from draw_tools.draw import plot_history
import time
import torch.optim as optim
import torch
import torch.nn.functional as F
import numpy as np
import cv2
import os
class Segmentation_Block_Training_Step(Loding_Data_Root, Training_Precesses):
def __init__(self, Best_Model_Save_Root):
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 設置裝置若有GPU則使用GPU否則使用CPU
self.Model = self.Construct_Segment_Model_CUDA() # 模型變數
self.train_subset = None # Training Dataset 的子集
self.val_subset = None # Validation Dataset 的子集
self.train_loader = None # Training DataLoader 的讀檔器
self.val_loader = None # Validation DataLoader 的讀檔器
self.Mask = None # 遮罩變數接收RD Net產出來的Mask
self.Grad = None # 梯度變數後面用來執行Grad CAM
self.model_name = Training_Config["Mask_Experiment_Name"] # 取名,使用哪個模型(可能是預處理模型/自己設計的模型)
self.epoch = Training_Config["Epoch"] # 訓練該模型的次數
self.train_batch_size = Training_Config["Train_Batch_Size"] # 訓練模型的Batch Size
self.Experiment_Name = Training_Config["Mask_Experiment_Name"] # 取名,使用哪個模型(可能是預處理模型/自己設計的模型)
self.Best_Model_Save_Root = Best_Model_Save_Root
# 初始化繼承物件
Training_Precesses.__init__(self, Training_Config["Image_Size"])
Loding_Data_Root.__init__(self, Loading_Config["Training_Labels"], Loading_Config["Train_Data_Root"], Loading_Config["Test_Data_Root"])
def Processing_Main(self, training_dataset, return_processed_images=False, test_dataloader=None):
Best_Model_Path = None
Best_Validation_Loss = 100000000
# K-Fold loop
kf = KFold(n_splits=5, shuffle=True, random_state=42)
Training_Data_Lader = self.Dataloader_Sampler(training_dataset, self.train_batch_size, True)
for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(training_dataset)))): # K-Fold 交叉驗證迴圈
print(f"\nStarting Fold {fold + 1}/5")
# Create training and validation subsets for this fold
self.train_subset = torch.utils.data.Subset(training_dataset, train_idx)
self.val_subset = torch.utils.data.Subset(training_dataset, val_idx)
# Wrap subsets in DataLoaders (use same batch size as original)
self.train_loader = self.Dataloader_Sampler(self.train_subset , self.train_batch_size, True)
self.val_loader = self.Dataloader_Sampler(self.val_subset, self.train_batch_size, True)
# 模型訓練與驗證
model_path, Training_Losses, Validation_Losses, Total_Epoch, best_val_loss = self.Training_And_Validation(fold)
Losses = [Training_Losses, Validation_Losses]
if best_val_loss < Best_Validation_Loss:
Best_Validation_Loss = best_val_loss
Best_Model_Path = model_path
# 將訓練結果化成圖,並將化出來的圖丟出去儲存
plot_history(Losses, None, f"{Save_Result_File_Config['Segument_Plot_Image']}/{self.Experiment_Name}", f"train-{str(fold)}") # 將訓練結果化成圖,並將化出來的圖丟出去儲存
# 如果需要返回處理後的圖像(用於後續識別訓練)
if return_processed_images is not None:
# 載入最佳模型
self.Model = self.Construct_Segment_Model_CUDA()
self.Model.load_state_dict(torch.load(Best_Model_Path))
self.Model.eval()
# 處理測試數據
with torch.no_grad():
for Input_Images, Mask_Ground_Truth_Image, Labels, File_Name, File_Classes in Training_Data_Lader:
# 使用Model_Branch處理圖像並獲取分割結果同時傳遞文件名以保存邊界框圖像
self.Model_Branch(Input_Images, Mask_Ground_Truth_Image, 0.0, Save_Result_File_Config["Segument_Bounding_Box_Image"], return_processed_image=True, file_names=File_Name, Classes=File_Classes)
avg_test_loss = self.evaluate_on_test(Best_Model_Path, test_dataloader)
return Best_Model_Path, avg_test_loss
return Best_Model_Path
def Training_And_Validation(self, Fold):
self.Model = self.Construct_Segment_Model_CUDA() # 模型初始化
Optimizer = optim.SGD(self.Model.parameters(), lr=0.045, momentum=0.9, weight_decay=0.01) # 優化器初始化
model_path, early_stopping, scheduler = call_back(f"{self.Best_Model_Save_Root}/{self.Experiment_Name}", f"fold{Fold}", Optimizer) # 防止過擬合細節函數初始化
epoch = 0
Training_Losses, Validation_Losses = [], []
Training_Running_Losses, Validation_Running_Losses = 0.0, 0.0
# Epoch loop
for epoch in range(self.epoch):
self.Model.train() # Start training
# Progress bar for training batches
epoch_iterator = tqdm(self.train_loader, desc=f"Fold {Fold + 1}/5, Epoch [{epoch + 1}/{self.epoch}]")
Start_Time = time.time()
for Input_Images, Mask_Ground_Truth_Image, Labels, File_Name, File_Classes in epoch_iterator:
Optimizer.zero_grad() # 清零梯度,防止梯度累積
Training_Total_Losses, Training_Running_Losses = self.Model_Branch(Input_Images, Mask_Ground_Truth_Image, Training_Running_Losses)
Training_Total_Losses.backward()
Optimizer.step()
epoch_iterator = self.Calculate_Progress_And_Timing(Input_Images, self.train_subset, Training_Total_Losses, epoch_iterator, Start_Time)
Training_Losses, Training_Running_Losses = self.Calculate_Average_Scores(self.train_loader, Training_Running_Losses, Training_Losses)
# Validation step
self.Model.eval()
epoch_iterator_Validation = tqdm(self.val_loader, desc=f"\tValidation-Fold {Fold + 1}/5, Epoch [{epoch + 1}/{self.epoch}]")
with torch.no_grad():
for Input_Images, Mask_Ground_Truth_Image, Labels, File_Name, File_Classes in epoch_iterator_Validation:
Validation_Total_Losses, Validation_Running_Losses = self.Model_Branch(Input_Images, Mask_Ground_Truth_Image, Validation_Running_Losses)
# 添加Start_Time參數
Start_Time = time.time()
epoch_iterator_Validation = self.Calculate_Progress_And_Timing(Input_Images, self.val_subset, Validation_Total_Losses, epoch_iterator_Validation, Start_Time)
Validation_Losses, Validation_Running_Losses = self.Calculate_Average_Scores(self.val_loader, Validation_Running_Losses, Validation_Losses)
print(f"Traini Loss: {Training_Running_Losses:.4f}, Validation Loss: {Validation_Running_Losses:.4f}\n")
# Early stopping
early_stopping(Validation_Running_Losses, self.Model, model_path)
if early_stopping.early_stop:
print(f"Early stopping triggered in Fold {Fold + 1} at epoch {epoch + 1}")
break
# Scheduler step
scheduler.step(Validation_Running_Losses)
Total_Epoch = epoch + 1
best_val_loss = early_stopping.best_loss
return model_path, Training_Losses, Validation_Losses, Total_Epoch, best_val_loss
def Construct_Segment_Model_CUDA(self):
GaSeg = GastroSegNet()
# 添加輸出模型摘要的功能
print("\n==== GastroSegNet 模型摘要 ====\n")
print(f"輸入通道數: {GaSeg.encoder[0].conv[0].in_channels}")
print(f"輸出通道數: {GaSeg.final_conv.out_channels}")
# 計算總參數量
total_params = sum(p.numel() for p in GaSeg.parameters() if p.requires_grad)
print(f"可訓練參數總量: {total_params:,}")
# 顯示模型結構
print("\n模型結構:")
print(f"- 編碼器層數: {len(GaSeg.encoder)}")
print(f"- 解碼器層數: {len(GaSeg.decoder)}")
print("\n特徵通道配置:")
features_str = ", ".join([str(GaSeg.encoder[i].conv[0].out_channels) for i in range(len(GaSeg.encoder))])
print(f" - 編碼器特徵通道: {features_str}")
print(f" - 瓶頸層特徵通道: {GaSeg.bottleneck.conv[0].out_channels}")
print("\n==== 摘要結束 ====\n")
return self.Convert_Model_To_CUDA(GaSeg)
def Convert_Model_To_CUDA(self, model):
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
model = model.to(self.device)
return model
def Model_Branch(self, Input_Images, Mask_Ground_Truth_Image, running_loss, Save_Dir = None, return_processed_image=False, file_names=None, Classes=None):
# 直接將張量移到設備上,不需要重新創建
Input_Images.requires_grad = False
Input_Images = Input_Images.to(self.device)
Segmentation_Output = self.Model(Input_Images)
# 如果需要返回處理後的圖像(用於推理階段)
if return_processed_image:
# 調整模型產出影像大小
Segmentation_Output = self.Compare_Image_And_Resize_It(Segmentation_Output, Input_Images)
# 處理分割輸出,選擇候選框並將框外像素變黑,同時保存邊界框圖像
return self.process_segmentation_output(Input_Images, Segmentation_Output, Save_Dir, save_bbox_images=True, file_names=file_names, Classes=Classes)
Mask_Ground_Truth_Image = Mask_Ground_Truth_Image.to(self.device)
# 調整模型產出影像大小
Segmentation_Output = self.Compare_Image_And_Resize_It(Segmentation_Output, Mask_Ground_Truth_Image)
Losses = self.Losses(Segmentation_Output, Mask_Ground_Truth_Image)
# 計算損失不需要手動設置requires_grad因為損失計算會自動處理梯度
running_loss += Losses.item()
return Losses, running_loss
def Compare_Image_And_Resize_It(self, Image, Target): # 調整兩張影像大小到一樣
# 檢查Target的維度
if Target.dim() < 3:
# 如果Target是2D張量將其擴展為4D張量 [batch_size, channels, height, width]
Target = Target.unsqueeze(0).unsqueeze(0)
elif Target.dim() == 3:
# 如果Target是3D張量將其擴展為4D張量 [batch_size, channels, height, width]
Target = Target.unsqueeze(0)
# 獲取目標尺寸
target_height = Target.size(-2) # 使用倒數第二維作為高度
target_width = Target.size(-1) # 使用倒數第一維作為寬度
# 調整Image大小
Image = torch.nn.functional.interpolate(Image, size=(target_height, target_width), mode='nearest')
# 動態調整通道維度
if Image.size(1) != Target.size(1) and Target.dim() >= 3:
conv = torch.nn.Conv2d(Image.size(1), Target.size(1), kernel_size=1).to(self.device)
Image = conv(Image)
return Image
def Losses(self, Segmentation_Output_Image, Segmentation_Mask_GroundTruth_Image):
criterion = Segmentation_Loss()
Loss = criterion(Segmentation_Output_Image, Segmentation_Mask_GroundTruth_Image)
return Loss
def Record_Average_Losses(self, DataLoader):
loss = 0.0
losses = []
# Calculate average validation loss
loss /= len(DataLoader)
losses.append(loss)
return losses
def Calculate_Progress_And_Timing(self, inputs, Subset, loss, epoch_iterator, Start_Time):
# Calculate progress and timing
total_samples = len(Subset)
processed_samples = 0
processed_samples += inputs.size(0) # Use size(0) for batch size
# Calculate progress and timing
elapsed_time = time.time() - Start_Time
iterations_per_second = processed_samples / elapsed_time if elapsed_time > 0 else 0
eta = (total_samples - processed_samples) / iterations_per_second if iterations_per_second > 0 else 0
time_str = f"{int(elapsed_time//60):02d}:{int(elapsed_time%60):02d}<{int(eta//60):02d}:{int(eta%60):02d}"
# Calculate batch metrics using PSNR/SSIM loss
batch_loss = loss.item()
# Update progress bar with PSNR/SSIM loss
epoch_iterator.set_postfix_str(
f"{processed_samples}/{total_samples} [{time_str}, {iterations_per_second:.2f}it/s, "
f"loss={batch_loss:.3f}]"
)
return epoch_iterator
def Calculate_Average_Scores(self, Data_Loader, Running_Losses, Losses):
Running_Losses /= len(Data_Loader)
Losses.append(Running_Losses)
return Losses, Running_Losses
def process_segmentation_output(self, input_images, segmentation_output, bbox_save_dir = None, save_bbox_images=True, file_names=None, Classes = None):
"""處理分割輸出,選擇候選框並將框外像素變黑,同時保存邊界框圖像
Args:
input_images: 原始輸入圖像 [B, C, H, W]
segmentation_output: 分割模型輸出 [B, 1, H, W]
save_bbox_images: 是否保存邊界框圖像
file_names: 圖像文件名列表用於保存邊界框圖像
Returns:
processed_images: 處理後的圖像框外像素變黑 [B, 3, H, W] (始終確保輸出是3通道)
"""
# 將輸出轉換為二值掩碼 (閾值為0.5)
Batch_Size_Of_Image = segmentation_output.size(0)
binary_masks = (torch.sigmoid(segmentation_output) > 0.5).float()
# 創建一個輸出張量確保始終是3通道
# 獲取批次大小、高度和寬度
Batch_Size_Of_Image, _, height, width = input_images.size()
# 創建3通道的輸出張量不管輸入是幾個通道輸出都是3通道
processed_images = torch.zeros(Batch_Size_Of_Image, 3, height, width, device=input_images.device)
# 對批次中的每張圖像進行處理
for Batch_Size in range(Batch_Size_Of_Image):
# 創建保存邊界框圖像的目錄
new_bbox_save_dir = bbox_save_dir
new_bbox_save_dir = os.path.join(new_bbox_save_dir, Classes[Batch_Size])
if save_bbox_images and not os.path.exists(new_bbox_save_dir):
os.makedirs(new_bbox_save_dir, exist_ok=True)
print(f"創建邊界框圖像保存目錄: {new_bbox_save_dir}")
# 獲取當前圖像的二值掩碼並轉換為numpy數組
mask = binary_masks[Batch_Size, 0].cpu().numpy().astype(np.uint8)
# 使用連通區域分析找出所有候選區域
labeled_mask, num_labels = measure.label(mask, return_num=True, connectivity=2)
if num_labels > 0:
# 計算每個區域的面積
regions = measure.regionprops(labeled_mask)
# 根據面積排序區域(從大到小)
regions.sort(key=lambda x: x.area, reverse=True)
# 選擇最大的區域作為最終掩碼
if len(regions) > 0:
# 創建一個新的掩碼,只包含最大的區域
final_mask = np.zeros_like(mask)
for coords in regions[0].coords:
final_mask[coords[0], coords[1]] = 1
# 獲取最大區域的邊界框
bbox = regions[0].bbox # (min_row, min_col, max_row, max_col)
# 將最終掩碼轉換回PyTorch張量
final_mask_tensor = torch.from_numpy(final_mask).float().to(self.device)
# 確保掩碼與輸入圖像的尺寸匹配
if final_mask_tensor.shape != input_images[Batch_Size, 0].shape:
# 調整掩碼大小以匹配輸入圖像
final_mask_tensor = torch.nn.functional.interpolate(
final_mask_tensor.unsqueeze(0).unsqueeze(0),
size=input_images[Batch_Size, 0].shape,
mode='nearest'
).squeeze(0).squeeze(0)
# 將掩碼應用到原始圖像上(保留框內像素,將框外像素變黑)
# 處理不同通道數的輸入圖像
if input_images.size(1) == 1: # 單通道輸入
# 將單通道複製到3個通道
for Channel in range(3):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, 0] * final_mask_tensor
elif input_images.size(1) == 3: # 三通道輸入
# 直接複製三個通道
for Channel in range(3):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, Channel] * final_mask_tensor
else: # 其他通道數(不太可能,但為了健壯性)
# 取前三個通道或複製第一個通道
for Channel in range(3):
if Channel < input_images.size(1):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, Channel] * final_mask_tensor
else:
processed_images[Batch_Size, Channel] = input_images[Batch_Size, 0] * final_mask_tensor
# 保存帶有邊界框的圖像
if save_bbox_images:
# 將輸入圖像轉換為numpy數組並調整為適合顯示的格式
img_tensor = input_images[Batch_Size].clone().detach().cpu()
img_np = img_tensor.permute(1, 2, 0).numpy()
# 將圖像從[0,1]範圍轉換為[0,255]範圍
img_np = (img_np * 255).astype(np.uint8)
# 確保圖像是連續的內存塊
img_np = np.ascontiguousarray(img_np)
# 如果圖像是單通道的,轉換為三通道
if img_np.shape[2] == 1:
img_np = cv2.cvtColor(img_np, cv2.COLOR_GRAY2BGR)
elif img_np.shape[2] == 3:
# 確保是BGR格式OpenCV默認格式
img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
# 繪製邊界框
min_row, min_col, max_row, max_col = bbox
cv2.rectangle(img_np, (min_col, min_row), (max_col, max_row), (0, 255, 0), 2)
# 生成保存文件名
if file_names is not None and Batch_Size < len(file_names):
# 使用提供的文件名
file_name = os.path.basename(file_names[Batch_Size])
save_path = os.path.join(new_bbox_save_dir, f"bbox_{file_name}.png")
else:
# 使用索引作為文件名
save_path = os.path.join(new_bbox_save_dir, file_names)
# 保存圖像 (已經是BGR格式直接保存)
cv2.imwrite(save_path, img_np)
print(f"已保存邊界框圖像: {save_path}")
else:
# 如果沒有找到區域則保留原始圖像但確保是3通道
if input_images.size(1) == 1: # 單通道輸入
# 將單通道複製到3個通道
for Channel in range(3):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, 0]
elif input_images.size(1) == 3: # 三通道輸入
# 直接複製三個通道
processed_images[Batch_Size] = input_images[Batch_Size]
else: # 其他通道數
# 取前三個通道或複製第一個通道
for Channel in range(3):
if Channel < input_images.size(1):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, Channel]
else:
processed_images[Batch_Size, Channel] = input_images[Batch_Size, 0]
else:
# 如果沒有找到任何區域則保留原始圖像但確保是3通道
if input_images.size(1) == 1: # 單通道輸入
# 將單通道複製到3個通道
for Channel in range(3):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, 0]
elif input_images.size(1) == 3: # 三通道輸入
# 直接複製三個通道
for Channel in range(3):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, Channel]
else: # 其他通道數
# 取前三個通道或複製第一個通道
for Channel in range(3):
if Channel < input_images.size(1):
processed_images[Batch_Size, Channel] = input_images[Batch_Size, Channel]
else:
processed_images[Batch_Size, Channel] = input_images[Batch_Size, 0]
# 在沒有候選區域的情況下也保存處理後的圖像
if save_bbox_images:
# 轉換為可保存的numpy格式與上方保存邊界框圖像一致的流程
img_tensor = processed_images[Batch_Size].clone().detach().cpu()
img_np = img_tensor.permute(1, 2, 0).numpy()
img_np = (img_np * 255).astype(np.uint8)
img_np = np.ascontiguousarray(img_np)
# 保證三通道BGR格式
if img_np.shape[2] == 1:
img_np = cv2.cvtColor(img_np, cv2.COLOR_GRAY2BGR)
elif img_np.shape[2] == 3:
img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
# 生成保存文件名(使用提供的檔名或索引),並標記為無邊界框
if file_names is not None and isinstance(file_names, (list, tuple)) and Batch_Size < len(file_names):
file_name = os.path.basename(file_names[Batch_Size])
save_path = os.path.join(new_bbox_save_dir, f"no_bbox_{file_name}.png")
else:
base_name = file_names if isinstance(file_names, str) and len(file_names) > 0 else f"no_bbox_{Batch_Size}.png"
save_path = os.path.join(new_bbox_save_dir, base_name)
cv2.imwrite(save_path, img_np)
print(f"已保存無邊界框圖像: {save_path}")
def evaluate_on_test(self, model_path, test_dataloader):
if test_dataloader is None:
raise ValueError("Test dataloader is required for evaluation.")
self.Model = self.Construct_Segment_Model_CUDA()
self.Model.load_state_dict(torch.load(model_path))
self.Model.eval()
Test_Losses = []
test_loss = 0.0
with torch.no_grad():
for Input_Images, Mask_Ground_Truth_Image, _, _, _ in test_dataloader:
losses, test_loss = self.Model_Branch(Input_Images, Mask_Ground_Truth_Image, test_loss)
losses, test_loss = self.Calculate_Average_Scores(test_dataloader, test_loss, Test_Losses)
print(f"Average Test Loss: {test_loss}")
return test_loss

View File

@ -0,0 +1,2 @@
from ..Models.GastroSegNet_Model import GastroSegNet
from ..Models.Xception_Model_Modification import Xception

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More