Compare commits

...

6 Commits

152 changed files with 8365 additions and 2010 deletions

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.11

View File

@@ -22,11 +22,11 @@ class Calculate():
DataFrame = pd.DataFrame(
{
"loss" : "{:.2f}".format(Loss),
"precision" : "{:.2f}".format(Precision * 100),
"recall" : "{:.2f}".format(Recall * 100),
"accuracy" : "{:.2f}".format(Accuracy * 100),
"f1" : "{:.2f}".format(F1 * 100),
"AUC" : "{:.2f}".format(AUC * 100)
"precision" : "{:.2f}".format(Precision),
"recall" : "{:.2f}".format(Recall),
"accuracy" : "{:.2f}".format(Accuracy),
"f1" : "{:.2f}".format(F1),
"AUC" : "{:.2f}".format(AUC)
}, index = [0]
)
self.History.append(DataFrame)
@@ -40,7 +40,7 @@ class Calculate():
F1_Mean = np.mean(self.F1_Record)
AUC_Mean = np.mean(self.AUC_Record)
Mean_DataFram = self.Construction_To_DataFrame(Loss_Mean, Accuracy_Mean, Precision_Mean, Recall_Mean, F1_Mean, AUC_Mean)
Mean_DataFram = self.Construction_To_DataFrame(Loss_Mean, Accuracy_Mean * 100, Precision_Mean * 100, Recall_Mean * 100, F1_Mean * 100, AUC_Mean * 100)
return Mean_DataFram
@@ -58,12 +58,12 @@ class Calculate():
def Output_Style(self):
Result = pd.DataFrame(
{
"loss" : "{}±{}".format(self.History[0]["loss"][0], self.History[1]["loss"][0]),
"precision" : "{}±{}".format(self.History[0]["precision"][0], self.History[1]["precision"][0]),
"recall" : "{}±{}".format(self.History[0]["recall"][0], self.History[1]["recall"][0]),
"accuracy" : "{}±{}".format(self.History[0]["accuracy"][0], self.History[1]["accuracy"][0]),
"f1" : "{}±{}".format(self.History[0]["f1"][0], self.History[1]["f1"][0]),
"AUC" : "{}±{}".format(self.History[0]["AUC"][0], self.History[1]["AUC"][0])
"loss" : "{}%±{}".format(self.History[0]["loss"][0], self.History[1]["loss"][0]),
"precision" : "{}%±{}".format(self.History[0]["precision"][0], self.History[1]["precision"][0]),
"recall" : "{}%±{}".format(self.History[0]["recall"][0], self.History[1]["recall"][0]),
"accuracy" : "{}%±{}".format(self.History[0]["accuracy"][0], self.History[1]["accuracy"][0]),
"f1" : "{}%±{}".format(self.History[0]["f1"][0], self.History[1]["f1"][0]),
"AUC" : "{}%±{}".format(self.History[0]["AUC"][0], self.History[1]["AUC"][0])
}, index = [0]
)
return Result

Binary file not shown.

View File

@@ -1,38 +1,51 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from model_data_processing.processing import make_label_list
from _validation.ValidationTheEnterData import validation_the_enter_data
from Load_process.file_processing import Process_File
from Load_process.LoadData import Load_Data_Prepare
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
from Training_Tools.PreProcess import Training_Precesses
from torchvision import transforms
from Training_Tools.Tools import Tool
import numpy as np
from PIL import Image
import torch
import cv2
class Image_generator():
'''製作資料強化'''
def __init__(self, Generator_Root, Labels, Image_Size) -> None:
def __init__(self, Training_Root, Generator_Root, Labels, Image_Size, Class_Count) -> None:
self._validation = validation_the_enter_data()
self.stop = 0
self.Labels = Labels
self.Training_Root = Training_Root
self.Generator_Root = Generator_Root
self.Image_Size = Image_Size
self.Image_Size = Image_Size
self.Class_Count = Class_Count
pass
def Processing_Main(self, Training_Dict_Data_Root):
data_size = 0
def Processing_Main(self):
data_size = 2712
File = Process_File()
Prepare = Load_Data_Prepare()
Load_Tool = Load_Data_Tools()
# 製作標準資料增強
'''
這裡我想要做的是依照paper上的資料強化IMAGE DATA COLLECTION AND IMPLEMENTATION OF DEEP LEARNING-BASED MODEL IN DETECTING MONKEYPOX DISEASE USING MODIFIED VGG16
產生出資料強化後的影像
'''
for i in range(1, 5, 1):
print("\nAugmentation one Generator image")
data_size = self.get_processing_Augmentation(Training_Dict_Data_Root, i, data_size)
self.stop += data_size
if not File.Judge_File_Exist(self.Generator_Root): # 檔案若不存在
# 確定我要多少個List
Prepare.Set_Data_Content([], len(self.Labels))
print()
# 製作讀檔字典並回傳檔案路徑
Prepare.Set_Label_List(self.Labels)
Prepare.Set_Data_Dictionary(Prepare.Get_Label_List(), Prepare.Get_Data_Content(), len(self.Labels))
Original_Dict_Data_Root = Prepare.Get_Data_Dict()
get_all_original_image_data = Load_Tool.get_data_root(self.Training_Root, Original_Dict_Data_Root, Prepare.Get_Label_List())
# 儲存資料強化後資料
# 製作標準資料增強
'''
這裡我想要做的是依照paper上的資料強化IMAGE DATA COLLECTION AND IMPLEMENTATION OF DEEP LEARNING-BASED MODEL IN DETECTING MONKEYPOX DISEASE USING MODIFIED VGG16
產生出資料強化後的影像
'''
for i in range(1, 5, 1):
print(f"\nAugmentation {i} Generator image")
data_size = self.get_processing_Augmentation(get_all_original_image_data, i, data_size)
self.stop += data_size
else: # 若檔案存在
print("standard data and myself data are exist\n")
def get_processing_Augmentation(self, original_image_root : dict, Augment_choose, data_size):
Prepaer = Load_Data_Prepare()
@@ -50,84 +63,75 @@ class Image_generator():
strardand = 要使用哪種Image Augmentation
'''
File = Process_File()
image_processing = Read_image_and_Process_image(self.Image_Size)
tool = Tool()
tool = Training_Precesses(self.Image_Size)
Classes = []
Transform = self.Generator_Content(stardand)
for label in self.Labels: # 分別對所有類別進行資料強化
image = self.load_data(label) # 取的資料
Image_Roots = self.get_data_roots[label]
save_root = File.Make_Save_Root(label, save_roots) # 合併路徑
Classes = image_processing.make_label_list(len(image), "1")
Training_Dataset = tool.Convert_Data_To_DataSet_And_Put_To_Dataloader(image, Classes, 1, False)
Classes = make_label_list(len(Image_Roots), "1")
Training_Dataset = tool.Setting_DataSet(Image_Roots, Classes, "Generator")
Training_DataLoader = tool.Dataloader_Sampler(Training_Dataset, 1, False)
if File.JudgeRoot_MakeDir(save_root): # 判斷要存的資料夾存不存在,不存在則創立
print("The file is exist.This Script is not creating new fold.")
for batch_idx, (images, labels) in enumerate(Training_Dataset):
for i, img in enumerate(images):
if i == self.stop:
break
for i in range(1, int(self.Class_Count / len(Image_Roots)) + 1, 1):
for batch_idx, (images, labels, File_Name, File_Classes) in enumerate(Training_DataLoader):
for j, img in enumerate(images):
# if i == self.stop:
# break
img = img.permute(2, 0, 1)
img = Transform(img)
img = img.permute(2, 0, 1)
img = Transform(img)
# 轉換為 NumPy 陣列並從 BGR 轉為 RGB
img_np = img.numpy().transpose(1, 2, 0) # 轉回 HWC 格式
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) # BGR 轉 RGB
# 轉換為 NumPy 陣列並從 BGR 轉為 RGB
img_np = img.numpy().transpose(1, 2, 0) # 轉回 HWC 格式
img_pil = transforms.ToPILImage()(img_np)
File.Save_PIL_File("image_" + label + str(data_size) + ".png", save_root, img_pil) # 存檔
data_size += 1
img_pil = transforms.ToPILImage()(img_np)
File.Save_PIL_File("image_" + label + str(data_size) + ".png", save_root, img_pil) # 存檔
data_size += 1
return data_size
def load_data(self, label):
'''Images is readed by myself'''
image_processing = Read_image_and_Process_image(self.Image_Size)
img = image_processing.Data_Augmentation_Image(self.get_data_roots[label])
img = torch.tensor(img)
self.stop = len(img) * 5
return img
def Generator_Content(self, judge): # 影像資料增強
'''
ImageGenerator的參數:
featurewise_center : 布爾值。將輸入數據的均值設置為0逐特徵進行。
samplewise_center : 布爾值。將每個樣本的均值設置為0。
featurewise_std_normalization : Boolean. 布爾值。將輸入除以數據標準差,逐特徵進行。
samplewise_std_normalization : 布爾值。將每個輸入除以其標準差。
zca_epsilon : ZCA 白化的epsilon 值默認為1e-6。
zca_whitening : 布爾值。是否應用ZCA 白化。
rotation_range : 整數。隨機旋轉的度數範圍。
width_shift_range : 浮點數、一維數組或整數
float: 如果<1則是除以總寬度的值或者如果>=1則為像素值。
1-D 數組: 數組中的隨機元素。
int: 來自間隔 (-width_shift_range, +width_shift_range) 之間的整數個像素。
width_shift_range=2時可能值是整數[-1, 0, +1],與 width_shift_range=[-1, 0, +1] 相同;而 width_shift_range=1.0 時,可能值是 [-1.0, +1.0) 之間的浮點數。
height_shift_range : 浮點數、一維數組或整數
float: 如果<1則是除以總寬度的值或者如果>=1則為像素值。
1-D array-like: 數組中的隨機元素。
int: 來自間隔 (-height_shift_range, +height_shift_range) 之間的整數個像素。
height_shift_range=2時可能值是整數[-1, 0, +1],與 height_shift_range=[-1, 0, +1] 相同;而 height_shift_range=1.0 時,可能值是 [-1.0, +1.0) 之間的浮點數。
shear_range : 浮點數。剪切強度(以弧度逆時針方向剪切角度)。
zoom_range : 浮點數或[lower, upper]。隨機縮放範圍。如果是浮點數,[lower, upper] = [1-zoom_range, 1+zoom_range]。
channel_shift_range : 浮點數。隨機通道轉換的範圍。
fill_mode : {"constant", "nearest", "reflect" or "wrap"} 之一。默認為'nearest'。輸入邊界以外的點根據給定的模式填充:
'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k)
'nearest': aaaaaaaa|abcd|dddddddd
'reflect': abcddcba|abcd|dcbaabcd
'wrap': abcdabcd|abcd|abcdabcd
cval : 浮點數或整數。用於邊界之外的點的值,當 fill_mode = "constant" 時。
horizontal_flip : 布爾值。隨機水平翻轉。
vertical_flip : 布爾值。隨機垂直翻轉。
rescale : 重縮放因子。默認為None。如果是None 或0不進行縮放否則將數據乘以所提供的值在應用任何其他轉換之前
preprocessing_function : 應用於每個輸入的函數。這個函數會在任何其他改變之前運行。這個函數需要一個參數一張圖像秩為3 的Numpy 張量並且應該輸出一個同尺寸的Numpy 張量。
data_format : 圖像數據格式,{"channels_first", "channels_last"} 之一。"channels_last" 模式表示圖像輸入尺寸應該為(samples, height, width, channels)"channels_first" 模式表示輸入尺寸應該為(samples, channels, height, width)。默認為在Keras 配置文件 ~/.keras/keras.json 中的 image_data_format 值。如果你從未設置它,那它就是"channels_last"
validation_split : 浮點數。Float. 保留用於驗證的圖像的比例嚴格在0和1之間
dtype : 生成數組使用的數據類型。
## Parameters:
<b>featurewise_center</b> : 布爾值。將輸入數據的均值設置為0逐特徵進行。<br/>
<b>samplewise_center</b> : 布爾值。將每個樣本的均值設置為0。<br/>
<b>featurewise_std_normalization</b> : Boolean. 布爾值。將輸入除以數據標準差,逐特徵進行。<br/>
<b>samplewise_std_normalization</b> : 布爾值。將每個輸入除以其標準差。<br/>
<b>zca_epsilon</b> : ZCA 白化的epsilon 值默認為1e-6。<br/>
<b>zca_whitening</b> : 布爾值。是否應用ZCA 白化。<br/>
<b>rotation_range</b> : 整數。隨機旋轉的度數範圍。<br/>
<b>width_shift_range</b> : 浮點數、一維數組或整數<br/>
float: 如果<1則是除以總寬度的值或者如果>=1則為像素值。
1-D 數組: 數組中的隨機元素。
int: 來自間隔 (-width_shift_range, +width_shift_range) 之間的整數個像素。
width_shift_range=2時可能值是整數[-1, 0, +1],與 width_shift_range=[-1, 0, +1] 相同;而 width_shift_range=1.0 時,可能值是 [-1.0, +1.0) 之間的浮點數。
<b>height_shift_range</b> : 浮點數、一維數組或整數<br/>
float: 如果<1則是除以總寬度的值或者如果>=1則為像素值。
1-D array-like: 數組中的隨機元素。
int: 來自間隔 (-height_shift_range, +height_shift_range) 之間的整數個像素。
height_shift_range=2時可能值是整數[-1, 0, +1],與 height_shift_range=[-1, 0, +1] 相同;而 height_shift_range=1.0 時,可能值是 [-1.0, +1.0) 之間的浮點數。
<b>shear_range</b> : 浮點數。剪切強度(以弧度逆時針方向剪切角度)。<br/>
<b>zoom_range</b> : 浮點數或[lower, upper]。隨機縮放範圍。如果是浮點數,[lower, upper] = [1-zoom_range, 1+zoom_range]。<br/>
<b>channel_shift_range</b> : 浮點數。隨機通道轉換的範圍。<br/>
<b>fill_mode</b> : {"constant", "nearest", "reflect" or "wrap"} 之一。默認為'nearest'。輸入邊界以外的點根據給定的模式填充:<br/>
'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k)
'nearest': aaaaaaaa|abcd|dddddddd
'reflect': abcddcba|abcd|dcbaabcd
'wrap': abcdabcd|abcd|abcdabcd
<b>cval</b> : 浮點數或整數。用於邊界之外的點的值,當 fill_mode = "constant" 時。<br/>
<b>horizontal_flip</b> : 布爾值。隨機水平翻轉。<br/>
<b>vertical_flip</b> : 布爾值。隨機垂直翻轉。<br/>
<b>rescale</b> : 重縮放因子。默認為None。如果是None 或0不進行縮放否則將數據乘以所提供的值在應用任何其他轉換之前<br/>
<b>preprocessing_function</b> : 應用於每個輸入的函數。這個函數會在任何其他改變之前運行。這個函數需要一個參數一張圖像秩為3 的Numpy 張量並且應該輸出一個同尺寸的Numpy 張量。<br/>
<b>data_format</b> : 圖像數據格式,{"channels_first", "channels_last"} 之一。"channels_last" 模式表示圖像輸入尺寸應該為(samples, height, width, channels)"channels_first" 模式表示輸入尺寸應該為(samples, channels, height, width)。默認為在Keras 配置文件 ~/.keras/keras.json 中的 image_data_format 值。如果你從未設置它,那它就是"channels_last"<br/>
<b>validation_split</b> : 浮點數。Float. 保留用於驗證的圖像的比例嚴格在0和1之間<br/>
<b>dtype</b> : 生成數組使用的數據類型。<br/>
'''
if judge == 1:
return transforms.Compose([
@@ -159,6 +163,4 @@ class Image_generator():
transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.2),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
])
else:
return transforms.ToTensor() # 將數值歸一化到[0, 1]之間
])

View File

@@ -0,0 +1,262 @@
import xml.etree.ElementTree as ET
import cv2
import os
import numpy as np
from typing import List, Dict, Optional, Tuple
from utils.Stomach_Config import Loading_Config
class XMLAnnotationProcessor:
"""
XML標註檔案處理器
專門處理包含bounding box資訊的XML檔案並在對應圖片上繪製邊界框
"""
def __init__(self, dataset_root: str):
"""
初始化XML處理器
Args:
dataset_root: 圖片資料集根目錄
output_folder: 輸出資料夾
"""
self.dataset_root = dataset_root
self.box_color = (0, 255, 0) # 綠色邊界框
self.text_color = (0, 255, 0) # 綠色文字
self.box_thickness = 2
self.font_scale = 0.5
self.font = cv2.FONT_HERSHEY_SIMPLEX
def _ensure_output_folder(self, Save_Root: str) -> None:
"""確保輸出資料夾存在"""
if not os.path.exists(Save_Root):
os.makedirs(Save_Root)
def parse_xml(self, xml_file_path: str, Label: str) -> Optional[Dict]:
"""
解析XML檔案並提取所有相關資訊
Args:
xml_file_path: XML檔案路徑
Returns:
Dict: 包含檔案資訊和bounding box的字典解析失敗時返回None
"""
try:
tree = ET.parse(xml_file_path)
root = tree.getroot()
# 提取基本資訊
filename_element = root.find('filename')
if filename_element is None:
print(f"找不到path元素在 {xml_file_path}")
return None
filename = filename_element.text if filename_element is not None else "Unknown"
Original_Image_Data_Root = os.path.join(self.dataset_root, Label)
Original_Image_Data_Root = os.path.join(Original_Image_Data_Root, filename)
# 提取圖片尺寸
size_element = root.find('size')
width = int(size_element.find('width').text) if size_element is not None else 0
height = int(size_element.find('height').text) if size_element is not None else 0
depth = int(size_element.find('depth').text) if size_element is not None else 3
# 提取所有bounding box
bounding_boxes = []
objects = root.findall('object')
for obj in objects:
bndbox = obj.find('bndbox')
if bndbox is not None:
bbox_info = {
'name': obj.find('name').text if obj.find('name') is not None else "Unknown",
'pose': obj.find('pose').text if obj.find('pose') is not None else "Unspecified",
'truncated': int(obj.find('truncated').text) if obj.find('truncated') is not None else 0,
'difficult': int(obj.find('difficult').text) if obj.find('difficult') is not None else 0,
'xmin': int(bndbox.find('xmin').text),
'ymin': int(bndbox.find('ymin').text),
'xmax': int(bndbox.find('xmax').text),
'ymax': int(bndbox.find('ymax').text)
}
bounding_boxes.append(bbox_info)
return {
'filename': filename,
'image_path': Original_Image_Data_Root,
'width': width,
'height': height,
'depth': depth,
'bounding_boxes': bounding_boxes
}
except Exception as e:
print(f"解析XML檔案 {xml_file_path} 時發生錯誤: {str(e)}")
return None
def load_image(self, image_path: str) -> Optional[np.ndarray]:
"""
載入圖片檔案
Args:
image_path: 圖片檔案路徑
Returns:
np.ndarray: 圖片陣列載入失敗時返回None
"""
if not os.path.exists(image_path):
print(f"圖片檔案不存在: {image_path}")
return None
image = cv2.imread(image_path)
if image is None:
print(f"無法讀取圖片: {image_path}")
return None
return image
def draw_bounding_boxes(self, image: np.ndarray, bounding_boxes: List[Dict]) -> np.ndarray:
"""
創建遮罩圖片bounding box內保持原圖外部為黑色
Args:
image: 圖片陣列
bounding_boxes: bounding box資訊列表
Returns:
np.ndarray: 處理後的遮罩圖片陣列
"""
# 創建黑色背景圖片
height, width = image.shape[:2]
result_image = np.zeros((height, width, 3), dtype=np.uint8)
for i, bbox in enumerate(bounding_boxes):
xmin, ymin = bbox['xmin'], bbox['ymin']
xmax, ymax = bbox['xmax'], bbox['ymax']
object_name = bbox['name']
# 確保座標在圖片範圍內
xmin = max(0, min(xmin, width-1))
ymin = max(0, min(ymin, height-1))
xmax = max(0, min(xmax, width-1))
ymax = max(0, min(ymax, height-1))
# 將bounding box範圍內的原圖複製到結果圖像中
result_image[ymin:ymax, xmin:xmax] = image[ymin:ymax, xmin:xmax]
print(f"Object {i+1}: {object_name} - 座標: ({xmin}, {ymin}, {xmax}, {ymax})")
return result_image
def save_annotated_image(self, image: np.ndarray, original_filename: str, Annotation_Root : str, Label : str) -> str:
"""
儲存標註後的圖片
Args:
image: 標註後的圖片陣列
original_filename: 原始檔案名稱
Returns:
str: 儲存的檔案路徑
"""
output_filename = f"annotated_{original_filename}"
output_path = os.path.join(Annotation_Root, Label)
Save_Image_Roots = os.path.join(output_path, output_filename)
# 確保輸出資料夾存在
self._ensure_output_folder(output_path)
cv2.imwrite(Save_Image_Roots, image)
print(f"已儲存標註圖片至: {Save_Image_Roots}")
return Save_Image_Roots
def process_single_xml(self, xml_file_path: str, Annotation_Root : str, Label : str) -> Optional[Tuple[np.ndarray, str]]:
"""
處理單一XML檔案
Args:
xml_file_path: XML檔案路徑
Returns:
Tuple[np.ndarray, str]: (標註後的圖片, 輸出路徑)處理失敗時返回None
"""
# 解析XML
xml_data = self.parse_xml(xml_file_path, Label)
if xml_data is None:
return None
# 載入圖片
image = self.load_image(xml_data['image_path'])
if image is None:
return None
# 繪製bounding box
annotated_image = self.draw_bounding_boxes(image, xml_data['bounding_boxes'])
# 儲存結果
output_path = self.save_annotated_image(annotated_image, xml_data['filename'], Annotation_Root, Label)
return annotated_image, output_path
def process_multiple_xml(self, xml_folder_path: str, Annotation_Root : str, Label : str) -> List[Tuple[str, bool]]:
"""
批量處理多個XML檔案
Args:
xml_folder_path: 包含XML檔案的資料夾路徑
Returns:
List[Tuple[str, bool]]: [(檔案名稱, 處理成功與否), ...]
"""
if not os.path.exists(xml_folder_path):
print(f"XML資料夾不存在: {xml_folder_path}")
return []
xml_files = [f for f in os.listdir(xml_folder_path) if f.endswith('.xml')]
if not xml_files:
print(f"{xml_folder_path} 中找不到XML檔案")
return []
print(f"找到 {len(xml_files)} 個XML檔案")
for xml_file in xml_files:
try:
Read_XML_File = os.path.join(xml_folder_path, xml_file)
self.process_single_xml(Read_XML_File, Annotation_Root, Label)
print(f"\n處理檔案: {xml_file}")
except Exception as e:
print(f"處理 {xml_file} 時發生錯誤: {str(e)}")
return
def get_bounding_boxes_info(self, xml_file_path: str) -> Optional[Dict]:
"""
僅提取XML中的bounding box資訊不進行圖片處理
Args:
xml_file_path: XML檔案路徑
Returns:
Dict: 包含檔案資訊和bounding box座標的字典
"""
return self.parse_xml(xml_file_path)
def set_drawing_style(self, box_color: Tuple[int, int, int] = None,
text_color: Tuple[int, int, int] = None,
box_thickness: int = None,
font_scale: float = None) -> None:
"""
設定繪圖樣式
Args:
box_color: 邊界框顏色 (B, G, R)
text_color: 文字顏色 (B, G, R)
box_thickness: 邊界框粗細
font_scale: 字體大小
"""
if box_color is not None:
self.box_color = box_color
if text_color is not None:
self.text_color = text_color
if box_thickness is not None:
self.box_thickness = box_thickness
if font_scale is not None:
self.font_scale = font_scale

View File

@@ -1,87 +1,309 @@
import cv2
import numpy as np
import torch
from PIL import Image
import torchvision
import functools
import inspect
def shapen(image): # 銳化處理
sigma = 100
blur_img = cv2.GaussianBlur(image, (0, 0), sigma)
usm = cv2.addWeighted(image, 1.5, blur_img, -0.5, 0)
return usm
def increase_contrast(image): # 增加資料對比度
output = image # 建立 output 變數
alpha = 2
beta = 10
cv2.convertScaleAbs(image, output, alpha, beta) # 套用 convertScaleAbs
return output
def adaptive_histogram_equalization(image):
ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
channels = cv2.split(ycrcb)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe.apply(channels[0], channels[0])
ycrcb = cv2.merge(channels)
Change_image = cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR)
# 套用裝飾器到現有函數
def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):
"""使用OpenCV實現的Unsharp Mask銳化處理
參數:
image: PIL.Image對象(RGB格式)
kernel_size: 高斯模糊的核大小,必須是奇數
sigma: 高斯模糊的標準差
amount: 銳化程度,值越大效果越強
threshold: 邊緣檢測閾值,僅在邊緣處進行銳化
返回:
銳化後的PIL.Image對象
"""
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
return Change_image
# 對原圖進行高斯模糊
blurred = cv2.GaussianBlur(numpy_img, kernel_size, sigma)
# 計算銳化後的圖像
sharpened = cv2.addWeighted(numpy_img, 1 + amount, blurred, -amount, 0)
# 如果設置了threshold只在邊緣處應用銳化
if threshold > 0:
low_contrast_mask = np.absolute(numpy_img - blurred) < threshold
np.copyto(sharpened, numpy_img, where=low_contrast_mask)
# 確保像素值在有效範圍內
sharpened = np.clip(sharpened, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(sharpened)
def Remove_Background(image, Matrix_Size):
skinCrCbHist = np.zeros((256,256), dtype= np.uint8)
cv2.ellipse(skinCrCbHist, (113,155),(23,25), 43, 0, 360, (255, 255, 255), -1) #繪製橢圓弧線
def histogram_equalization(image):
"""GPU加速的一般直方圖等化
參數:
image: PIL.Image對象(RGB格式)
返回:
直方圖等化後的PIL.Image對象
"""
# 轉換為numpy數組並轉為PyTorch張量
numpy_img = np.array(image)
tensor_img = torch.from_numpy(numpy_img).float().to('cuda')
# 分離通道並進行直方圖等化
result = torch.zeros_like(tensor_img)
for i in range(3): # 對RGB三個通道分別處理
channel = tensor_img[..., i]
# 計算直方圖
hist = torch.histc(channel, bins=256, min=0, max=255)
# 計算累積分布函數(CDF)
cdf = torch.cumsum(hist, dim=0)
cdf_normalized = ((cdf - cdf.min()) * 255) / (cdf.max() - cdf.min())
# 應用直方圖等化
result[..., i] = cdf_normalized[channel.long()]
# 轉回CPU和numpy數組
result = torch.clamp(result, 0, 255).byte()
result_np = result.cpu().numpy()
return Image.fromarray(result_np)
img_ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
y,cr,cb = cv2.split(img_ycrcb) #拆分出Y,Cr,Cb值
def Contrast_Limited_Adaptive_Histogram_Equalization(image, clip_limit=3.0, tile_size=(8, 8)):
"""使用OpenCV實現的對比度限制自適應直方圖均衡化(CLAHE)
參數:
image: PIL.Image對象(RGB格式)
clip_limit: 剪切限制,用於限制對比度增強的程度,較大的值會產生更強的對比度
tile_size: 圖像分塊大小的元組(height, width),較小的值會產生更局部的增強效果
返回:
CLAHE處理後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到LAB色彩空間
lab_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2LAB)
# 創建CLAHE對象
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_size)
# 分離LAB通道
l, a, b = cv2.split(lab_img)
# 對L通道應用CLAHE
l_clahe = clahe.apply(l)
# 合併處理後的L通道與原始的a和b通道
lab_output = cv2.merge([l_clahe, a, b])
# 將LAB轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(lab_output, cv2.COLOR_LAB2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
skin = np.zeros(cr.shape, dtype = np.uint8) #掩膜
(x,y) = cr.shape
def adaptive_histogram_equalization_without_limit(image, tile_size=(8, 8)):
"""使用OpenCV實現的自適應直方圖均衡化(AHE)
參數:
image: PIL.Image對象(RGB格式)
tile_size: 圖像分塊大小的元組(height, width),較小的值會產生更局部的增強效果
返回:
AHE處理後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到LAB色彩空間
lab_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2LAB)
# 分離LAB通道
l, a, b = cv2.split(lab_img)
# 創建AHE對象不設置clip limit
clahe = cv2.createCLAHE(clipLimit=None, tileGridSize=tile_size)
# 對L通道應用AHE
l_ahe = clahe.apply(l)
# 合併處理後的L通道與原始的a和b通道
lab_output = cv2.merge([l_ahe, a, b])
# 將LAB轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(lab_output, cv2.COLOR_LAB2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
# 依序取出圖片中每個像素
for i in range(x):
for j in range(y):
if skinCrCbHist [cr[i][j], cb[i][j]] > 0: #若不在橢圓區間中
skin[i][j] = 255
# 如果該像素的灰階度大於 200調整該像素的透明度
# 使用 255 - gray[y, x] 可以將一些邊緣的像素變成半透明,避免太過鋸齒的邊緣
# img_change = cv2.cvtColor(img_change, cv2.COLOR_BGRA2BGR)
img = cv2.bitwise_and(image, image, mask = skin)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def laplacian_sharpen(image):
"""
GPU加速的拉普拉斯銳化處理函數
參數:
image: PIL.Image對象(RGB格式)
返回:
銳化後的PIL.Image對象
"""
# 轉換為numpy數組並轉為PyTorch張量
numpy_img = np.array(image)
tensor_img = torch.from_numpy(numpy_img).float().to('cuda')
# 創建拉普拉斯算子
laplacian_kernel = torch.tensor([
[0, 1, 0],
[1, -4, 1],
[0, 1, 0]
], dtype=torch.float32, device='cuda').unsqueeze(0).unsqueeze(0)
# 對每個通道進行處理
result = torch.zeros_like(tensor_img)
for i in range(3): # RGB三個通道
channel = tensor_img[..., i]
# 添加批次和通道維度
channel = channel.unsqueeze(0).unsqueeze(0)
# 應用拉普拉斯算子
laplacian = torch.nn.functional.conv2d(channel, laplacian_kernel, padding=1)
# 移除批次和通道維度
laplacian = laplacian.squeeze()
# 銳化處理:原圖 - 拉普拉斯
result[..., i] = channel.squeeze() - laplacian
# 確保像素值在合理範圍內
result = torch.clamp(result, 0, 255).byte()
# 轉回CPU和numpy數組
result_np = result.cpu().numpy()
return Image.fromarray(result_np)
h = image.shape[0] # 取得圖片高度
w = image.shape[1] # 取得圖片寬度
def adjust_hsv(image, v_adjustment=0):
"""調整圖像的HSV色彩空間中的H和V通道
參數:
image: PIL.Image對象(RGB格式)
v_adjustment: V通道的調整值範圍建議在[-255, 255]之間
返回:
HSV調整後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到HSV色彩空間
hsv_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2HSV)
# 調整V通道
hsv_img[..., 2] = np.clip(hsv_img[..., 2] + v_adjustment, 0, 255)
# 將HSV轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
for x in range(w):
for y in range(h):
if img_gray[y, x] == 0:
# if x == 0 and y == 0: # 當X Y都在左上角時
# image[y, x] = Add(1, Matrix_Size, image[y, x]) / Matrix_Size
# if x == w - 1 and y == 0: # 當X Y都在右上角時
# image[y, x] = Add(w - Matrix_Size, w, image[y, x]) / Matrix_Size
# if x == 0 and y == h - 1: # 當X Y都在左下角時
# image[y, x] = (image[y - 1, x] + image[y - 1, x + 1] + image[y, x + 1]) / 3
# if x == w - 1 and y == h - 1: # 當X Y都在右下角時
# image[y, x] = (image[y, x - 1] + image[y - 1, x - 1] + image[y - 1, x]) / 3
def gamma_correction(image, gamma=1.0):
"""對圖像進行伽馬校正
參數:
image: PIL.Image對象(RGB格式)
gamma: 伽馬值gamma > 1 時圖像變暗gamma < 1 時圖像變亮gamma = 1 時保持不變
返回:
伽馬校正後的PIL.Image對象
"""
# 將PIL圖像轉換為numpy數組
numpy_img = np.array(image)
# 將像素值歸一化到[0, 1]範圍
normalized = numpy_img.astype(float) / 255.0
# 應用伽馬校正
corrected = np.power(normalized, gamma)
# 將值縮放回[0, 255]範圍
output = np.clip(corrected * 255.0, 0, 255).astype(np.uint8)
# 轉換回PIL圖像並返回
return Image.fromarray(output)
# if (x > 0 and x < w - 1) and y == 0: # 當上面的X Y從左到右
# image[y, x] = (image[y, x - 1] + image[y + 1, x - 1] + image[y + 1, x] + image[y, x + 1] + image[y + 1, x + 1]) / 5
# if (x > 0 and x < w - 1) and y == h - 1: # 當下面的X Y從左到右
# image[y, x] = (image[y, x - 1] + image[y - 1, x - 1] + image[y - 1, x] + image[y, x + 1] + image[y - 1, x + 1]) / 5
# if x == 0 and (y > 0 and y < h - 1): # 當左邊的X Y從上到下
# image[y, x] = (image[y - 1, x] + image[y - 1, x + 1] + image[y, x + 1] + image[y + 1, x + 1] + image[y + 1, x]) / 5
# if x == w - 1 and (y > 0 and y < h - 1): # 當右邊X Y從上到下
# image[y, x] = (image[y - 1, x] + image[y - 1, x - 1] + image[y, x - 1] + image[y + 1, x - 1] + image[y + 1, x]) / 5
def Hight_Light(image, Threshold):
image = np.array(image)
if (x >= 1 and x < w - 1) and (y >= 1 and y < h - 1): # 當y >= 2 且 X >= 2
image[y, x] = Add(x, y, image, Matrix_Size) / Matrix_Size
# BGRA_image[y, x, 3] = 255 - gray[y, x]
return image
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# 使用閾值檢測高光點(白色液滴)
_, thresh = cv2.threshold(gray, Threshold, 255, cv2.THRESH_BINARY)
# 使用形態學操作(膨脹)來擴大遮罩區域
kernel = np.ones((5, 5), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=1)
# 使用 inpaint 修復高光點
image_inpaint = cv2.inpaint(image, dilated, 3, cv2.INPAINT_TELEA)
return Image.fromarray(image_inpaint)
def Add(width_Center, Height_Center, image, Mask_Size):
total = 0
for i in range(Mask_Size):
for j in range(Mask_Size):
total += image[width_Center - ((Mask_Size - 1) / 2) + j, Height_Center - ((Mask_Size - 1) / 2) + i]
def median_filter(image: Image.Image, kernel_size: int = 3):
"""
中值濾波Median Filter實現
參數:
image: PIL.Image對象(RGB格式)
kernel_size: 濾波核大小,必須是奇數
返回:
濾波後的PIL.Image對象
"""
# 確保kernel_size是奇數
if kernel_size % 2 == 0:
kernel_size += 1
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
# 對每個通道應用中值濾波
result = np.zeros_like(numpy_img)
for i in range(3): # 對RGB三個通道分別處理
result[:, :, i] = cv2.medianBlur(numpy_img[:, :, i], kernel_size)
# 確保像素值在有效範圍內
result = np.clip(result, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(result)
return total
def mean_filter(image: Image.Image, kernel_size: int = 3):
"""
均質濾波Mean Filter實現
參數:
image: PIL.Image對象(RGB格式)
kernel_size: 濾波核大小,必須是奇數
返回:
濾波後的PIL.Image對象
"""
# 確保kernel_size是奇數
if kernel_size % 2 == 0:
kernel_size += 1
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
# 創建均質濾波核所有元素都是1/(kernel_size*kernel_size)
kernel = np.ones((kernel_size, kernel_size), np.float32) / (kernel_size * kernel_size)
# 對每個通道應用均質濾波
result = np.zeros_like(numpy_img)
for i in range(3): # 對RGB三個通道分別處理
result[:, :, i] = cv2.filter2D(numpy_img[:, :, i], -1, kernel)
# 確保像素值在有效範圍內
result = np.clip(result, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(result)

View File

@@ -1,63 +0,0 @@
from Load_process.LoadData import Loding_Data_Root
from Image_Process.Image_Generator import Image_generator
from Load_process.file_processing import Process_File
from model_data_processing.processing_for_cut_image import Cut_Indepentend_Data
from Load_process.Loading_Tools import Load_Data_Prepare, Load_Data_Tools
class Load_ImageGenerator():
'''
這是一個拿來進行資料強化的物件,最主要結合了學姊給的資料強化與我自行設定的資料強化。
藉由此物件先將資料讀取出來,並將資料分別進行資料強化,利用資料強化來迷部資料的不平衡
這只是其中一個實驗
Parmeter
standard_root: 做跟學姊給的資料強化同一種的資料強化
myself_root: 資料強化的內容參數是我自己設定的
IndependentDataRoot: 要存回去的資料夾路徑
Herpeslabels: 皰疹的類別
MonKeyPoxlabels: 猴痘的類別(猴痘、水痘、正常)
herpes_data: 合併herpes Dataset的資料成一個List
MonkeyPox_data: 合併MonkeyPox DataSet 的資料成一個List
'''
def __init__(self, Training_Root,Test_Root, Validation_Root, Generator_Root, Labels, Image_Size) -> None:
self.Training_Root = Training_Root
self.TestRoot = Test_Root
self.ValidationRoot = Validation_Root
self.GeneratoRoot = Generator_Root
self.Labels = Labels
self.Image_Size = Image_Size
pass
def process_main(self, Data_Length : int):
File = Process_File()
Prepare = Load_Data_Prepare()
load = Loding_Data_Root(self.Labels, self.Training_Root, self.GeneratoRoot)
Indepentend = Cut_Indepentend_Data(self.Training_Root, self.Labels)
Load_Tool = Load_Data_Tools()
Generator = Image_generator(self.GeneratoRoot, self.Labels, self.Image_Size)
# 將測試資料獨立出來
test_size = 0.1
Indepentend.IndependentData_main(self.TestRoot, test_size)
# 將驗證資料獨立出來
test_size = 0.1
Indepentend.IndependentData_main(self.ValidationRoot, test_size)
if not File.Judge_File_Exist(self.GeneratoRoot): # 檔案若不存在
# 確定我要多少個List
Prepare.Set_Data_Content([], Data_Length)
# 製作讀檔字典並回傳檔案路徑
Prepare.Set_Label_List(self.Labels)
Prepare.Set_Data_Dictionary(Prepare.Get_Label_List(), Prepare.Get_Data_Content(), Data_Length)
Original_Dict_Data_Root = Prepare.Get_Data_Dict()
get_all_original_image_data = Load_Tool.get_data_root(self.Training_Root, Original_Dict_Data_Root, Prepare.Get_Label_List())
# 儲存資料強化後資料
Generator.Processing_Main(get_all_original_image_data) # 執行資料強化
else: # 若檔案存在
print("standard data and myself data are exist\n")
# 執行讀檔
return load.process_main()

View File

@@ -11,16 +11,18 @@ class Loding_Data_Root(Process_File):
super().__init__()
pass
def process_main(self):
def process_main(self, status):
'''處理讀Training、Image Generator檔資料'''
Merge = merge()
get_Image_Data = self.get_Image_data_roots(self.Train_Root)
Get_ImageGenerator_Image_Data = self.get_Image_data_roots(self.Generator_Root)
Get_Total_Image_Data_Root = Merge.merge_dict_to_dict(get_Image_Data, Get_ImageGenerator_Image_Data)
if status:
Get_ImageGenerator_Image_Data = self.get_Image_data_roots(self.Generator_Root)
Get_Total_Image_Data_Root = Merge.merge_dict_to_dict(get_Image_Data, Get_ImageGenerator_Image_Data)
return Get_Total_Image_Data_Root
return Get_Total_Image_Data_Root
return get_Image_Data
def get_Image_data_roots(self, DataRoot) -> dict:
Prepare = Load_Data_Prepare()

View File

@@ -1,11 +1,10 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from merge_class.merge import merge
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
from model_data_processing.processing import Balance_Process
from model_data_processing.processing import make_label_list
from utils.Stomach_Config import Loading_Config
class Load_Indepentend_Data():
def __init__(self, Labels, OneHot_Encording):
def __init__(self, OneHot_Encording):
'''
影像切割物件
label有2類,會將其轉成one-hot-encoding的形式
@@ -13,55 +12,62 @@ class Load_Indepentend_Data():
[1, 0] = NPC_positive
'''
self.merge = merge()
self.Labels = Labels
self.OneHot_Encording = OneHot_Encording
pass
def process_main(self, Test_data_root, Validation_data_root):
self.test, self.test_label = self.get_Independent_image(Test_data_root)
def process_main(self, Test_data_root, Test_mask_root):
self.test, self.test_label, self.test_mask = self.get_Independent_image(Test_data_root, Test_mask_root)
print("\ntest_labels有" + str(len(self.test_label)) + "筆資料\n")
self.validation, self.validation_label = self.get_Independent_image(Validation_data_root)
print("validation_labels有 " + str(len(self.validation_label)) + " 筆資料\n")
def get_Independent_image(self, independent_DataRoot):
image_processing = Read_image_and_Process_image(123)
classify_image = []
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot) # 讀取測試資料集的資料
Total_Dict_Data_Root, Size = Balance_Process(Total_Dict_Data_Root, self.Labels) # 打亂並取出指定資料筆數的資料
def get_Independent_image(self, independent_DataRoot, independent_MaskRoot):
Total_Size_List = []
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot, Loading_Config["Training_Labels"], len(Loading_Config["Training_Labels"])) # 讀取測試資料集的資料
Total_Dict_Mask_Root = self.Get_Independent_data_Root(independent_MaskRoot, Loading_Config["XML_Loading_Label"], len(Loading_Config["XML_Loading_Label"])) # 讀取測試資料集的mask資料
# 將測試資料字典轉成列表,並且將其排序
Total_List_Data_Root = []
for Label in self.Labels:
for Label in Loading_Config["Training_Labels"]:
Total_List_Data_Root.append(Total_Dict_Data_Root[Label])
# 將測試資料字典轉成列表,並且將其排序
Total_List_Mask_Data_Root = []
for Label in Loading_Config["XML_Loading_Label"]:
Total_List_Mask_Data_Root.append(Total_Dict_Mask_Root[Label])
test_label, Classify_Label = [], []
classify_image, Classify_Label = [], []
i = 0 # 計算classify_image的counter且計算總共有幾筆資料
for test_title in Total_List_Data_Root: # 藉由讀取所有路徑來進行讀檔
test_label = image_processing.make_label_list(len(test_title), self.OneHot_Encording[i]) # 製作對應圖片數量的label出來+
print(self.Labels[i] + "" + str(len(test_label)) + " 筆資料 ")
test_label = make_label_list(len(test_title), self.OneHot_Encording[i]) # 製作對應圖片數量的label出來+
print(Loading_Config["Training_Labels"][i] + "" + str(len(test_label)) + " 筆資料 ")
Total_Size_List.append(len(test_label))
classify_image.append(test_title)
Classify_Label.append(test_label)
i += 1
original_test_root = self.merge.merge_data_main(classify_image, 0)
original_test_label = self.merge.merge_data_main(Classify_Label, 0)
classify_Mask_image = []
i = 0 # 計算classify_image的counter且計算總共有幾筆資料
for test_title in Total_List_Mask_Data_Root: # 藉由讀取所有路徑來進行讀檔
print(Loading_Config["XML_Loading_Label"][i] + "" + str(len(test_title)) + " 筆資料 ")
test = []
test = image_processing.Data_Augmentation_Image(original_test_root)
test, test_label = image_processing.image_data_processing(test, original_test_label)
classify_Mask_image.append(test_title)
i += 1
return test, test_label
test = self.merge.merge_data_main(classify_image, 0, len(Loading_Config["Training_Labels"]))
test_label = self.merge.merge_data_main(Classify_Label, 0, len(Loading_Config["Training_Labels"]))
test_Mask = self.merge.merge_data_main(classify_Mask_image, 0, len(Loading_Config["XML_Loading_Label"]))
return test, test_label, test_Mask
def Get_Independent_data_Root(self, load_data_root):
def Get_Independent_data_Root(self, load_data_root, Dictory_Keys, Length):
Prepare = Load_Data_Prepare()
Load_Tool = Load_Data_Tools()
Prepare.Set_Data_Content([], len(self.Labels))
Prepare.Set_Data_Dictionary(self.Labels, Prepare.Get_Data_Content(), 2)
Prepare.Set_Data_Content([], Length)
Prepare.Set_Data_Dictionary(Dictory_Keys, Prepare.Get_Data_Content(), Length)
Get_Data_Dict_Content = Prepare.Get_Data_Dict()
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, self.Labels)
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, Dictory_Keys)
return Total_Data_Roots
return Total_Data_Roots

Binary file not shown.

Binary file not shown.

View File

@@ -35,16 +35,14 @@ class Process_File():
save_root = self.Make_Save_Root(FileName, save_root)
np.save(save_root, image)
def Save_CSV_File(self, file_name, data): # 儲存訓練結果
Save_Root = '../Result/save_the_train_result(' + str(datetime.date.today()) + ")"
def Save_CSV_File(self, Save_Root, file_name, data): # 儲存訓練結果
self.JudgeRoot_MakeDir(Save_Root)
modelfiles = self.Make_Save_Root(file_name + ".csv", Save_Root) # 將檔案名稱及路徑字串合併成完整路徑
data.to_csv(modelfiles, mode = "a")
def Save_TXT_File(self, content, File_Name):
model_dir = '../Result/save_the_train_result(' + str(datetime.date.today()) + ")" # 儲存的檔案路徑由save_the_train_result + 當天日期
self.JudgeRoot_MakeDir(model_dir)
modelfiles = self.Make_Save_Root(File_Name + ".txt", model_dir) # 將檔案名稱及路徑字串合併成完整路徑
def Save_TXT_File(self, content, Save_Root, File_Name):
self.JudgeRoot_MakeDir(Save_Root)
modelfiles = self.Make_Save_Root(f"{File_Name}.txt", Save_Root) # 將檔案名稱及路徑字串合併成完整路徑
with open(modelfiles, mode = 'a') as file:
file.write(content)

315
Model_Loss/CIOU_Loss.py Normal file
View File

@@ -0,0 +1,315 @@
import torch
import torch.nn as nn
import math
class CIOULoss(nn.Module):
"""
Complete Intersection over Union (CIOU) Loss
適用於目標檢測中的邊界框回歸任務
CIOU Loss 考慮了三個幾何因子:
1. 重疊面積 (Overlap area)
2. 中心點距離 (Central point distance)
3. 長寬比一致性 (Aspect ratio consistency)
"""
def __init__(self, eps=1e-7):
super(CIOULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
"""
計算 CIOU Loss
Args:
pred_boxes: 預測邊界框 [N, 4] (x1, y1, x2, y2) 或 [N, 4] (cx, cy, w, h) 或分割掩碼 [B, 1, H, W]
target_boxes: 真實邊界框 [N, 4] (x1, y1, x2, y2) 或 [N, 4] (cx, cy, w, h) 或分割掩碼 [B, 1, H, W]
Returns:
CIOU loss value
"""
# 檢查輸入是否為分割掩碼格式
if len(pred_boxes.shape) == 4 and pred_boxes.shape[1] == 1:
# 將分割掩碼轉換為邊界框格式
pred_boxes = self._mask_to_boxes(pred_boxes)
target_boxes = self._mask_to_boxes(target_boxes)
# 如果無法從掩碼中提取有效的邊界框,則返回一個小的損失值
if pred_boxes is None or target_boxes is None:
return torch.tensor(0.01, device=pred_boxes.device if pred_boxes is not None else target_boxes.device)
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 檢查邊界框維度是否正確
if pred_boxes.dim() == 1:
# 如果是單個邊界框擴展為批次大小為1的張量
pred_boxes = pred_boxes.unsqueeze(0)
if target_boxes.dim() == 1:
target_boxes = target_boxes.unsqueeze(0)
# 確保邊界框有4個坐標
if pred_boxes.shape[1] != 4 or target_boxes.shape[1] != 4:
# 如果坐標數量不正確,返回一個小的損失值
return torch.tensor(0.01, device=pred_boxes.device)
# 如果輸入是 (cx, cy, w, h) 格式,轉換為 (x1, y1, x2, y2)
if self._is_center_format(pred_boxes, target_boxes):
pred_boxes = self._center_to_corner(pred_boxes)
target_boxes = self._center_to_corner(target_boxes)
# 計算交集區域
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自的面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集面積
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
# 計算最小外接矩形的對角線距離平方
enclose_diagonal_sq = (enclose_x2 - enclose_x1) ** 2 + (enclose_y2 - enclose_y1) ** 2 + self.eps
# 計算兩個邊界框中心點之間的距離平方
pred_center_x = (pred_boxes[:, 0] + pred_boxes[:, 2]) / 2
pred_center_y = (pred_boxes[:, 1] + pred_boxes[:, 3]) / 2
target_center_x = (target_boxes[:, 0] + target_boxes[:, 2]) / 2
target_center_y = (target_boxes[:, 1] + target_boxes[:, 3]) / 2
center_distance_sq = (pred_center_x - target_center_x) ** 2 + (pred_center_y - target_center_y) ** 2
# 計算長寬比一致性項
pred_w = pred_boxes[:, 2] - pred_boxes[:, 0]
pred_h = pred_boxes[:, 3] - pred_boxes[:, 1]
target_w = target_boxes[:, 2] - target_boxes[:, 0]
target_h = target_boxes[:, 3] - target_boxes[:, 1]
# 避免除零
pred_w = torch.clamp(pred_w, min=self.eps)
pred_h = torch.clamp(pred_h, min=self.eps)
target_w = torch.clamp(target_w, min=self.eps)
target_h = torch.clamp(target_h, min=self.eps)
v = (4 / (math.pi ** 2)) * torch.pow(torch.atan(target_w / target_h) - torch.atan(pred_w / pred_h), 2)
# 計算 alpha 參數
with torch.no_grad():
alpha = v / (1 - iou + v + self.eps)
# 計算 CIOU
ciou = iou - (center_distance_sq / enclose_diagonal_sq) - alpha * v
# 返回 CIOU Loss (1 - CIOU)
ciou_loss = 1 - ciou
return ciou_loss.mean()
def _is_center_format(self, pred_boxes, target_boxes):
"""
判斷輸入格式是否為中心點格式 (cx, cy, w, h)
簡單的啟發式判斷:如果第三、四列的值都是正數且相對較小,可能是寬高
"""
# 這裡使用簡單的判斷邏輯,實際使用時可能需要更精確的判斷
return False # 預設假設輸入為 (x1, y1, x2, y2) 格式
def _center_to_corner(self, boxes):
"""
將中心點格式 (cx, cy, w, h) 轉換為角點格式 (x1, y1, x2, y2)
"""
cx, cy, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
x1 = cx - w / 2
y1 = cy - h / 2
x2 = cx + w / 2
y2 = cy + h / 2
return torch.stack([x1, y1, x2, y2], dim=1)
def _mask_to_boxes(self, masks):
"""
將分割掩碼轉換為邊界框格式 [N, 4] (x1, y1, x2, y2)
Args:
masks: 分割掩碼 [B, 1, H, W]
Returns:
boxes: 邊界框 [B, 4] (x1, y1, x2, y2)
"""
batch_size = masks.size(0)
device = masks.device
# 將掩碼轉換為二值掩碼
binary_masks = (torch.sigmoid(masks) > 0.5).float()
# 初始化邊界框張量
boxes = torch.zeros(batch_size, 4, device=device)
# 對每個批次處理
for b in range(batch_size):
mask = binary_masks[b, 0] # [H, W]
# 找出非零元素的索引
non_zero_indices = torch.nonzero(mask, as_tuple=True)
# 如果掩碼中沒有非零元素,則使用默認的小邊界框
if len(non_zero_indices[0]) == 0:
# 返回一個默認的小邊界框
boxes[b] = torch.tensor([0, 0, 1, 1], device=device)
continue
# 計算邊界框坐標
y_min = torch.min(non_zero_indices[0])
y_max = torch.max(non_zero_indices[0])
x_min = torch.min(non_zero_indices[1])
x_max = torch.max(non_zero_indices[1])
# 存儲邊界框 [x1, y1, x2, y2]
boxes[b] = torch.tensor([x_min, y_min, x_max, y_max], device=device)
return boxes
def _calculate_intersection(self, pred_boxes, target_boxes):
"""
計算兩個邊界框的交集面積
"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
# 計算交集的寬度和高度
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h
class DIoULoss(nn.Module):
"""
Distance Intersection over Union (DIoU) Loss
CIOU 的簡化版本,只考慮重疊面積和中心點距離
"""
def __init__(self, eps=1e-7):
super(DIoULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 計算交集區域
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自的面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集面積
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形的對角線距離平方
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
enclose_diagonal_sq = (enclose_x2 - enclose_x1) ** 2 + (enclose_y2 - enclose_y1) ** 2 + self.eps
# 計算中心點距離平方
pred_center_x = (pred_boxes[:, 0] + pred_boxes[:, 2]) / 2
pred_center_y = (pred_boxes[:, 1] + pred_boxes[:, 3]) / 2
target_center_x = (target_boxes[:, 0] + target_boxes[:, 2]) / 2
target_center_y = (target_boxes[:, 1] + target_boxes[:, 3]) / 2
center_distance_sq = (pred_center_x - target_center_x) ** 2 + (pred_center_y - target_center_y) ** 2
# 計算 DIoU
diou = iou - (center_distance_sq / enclose_diagonal_sq)
# 返回 DIoU Loss
diou_loss = 1 - diou
return diou_loss.mean()
def _calculate_intersection(self, pred_boxes, target_boxes):
"""計算交集面積"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h
class GIoULoss(nn.Module):
"""
Generalized Intersection over Union (GIoU) Loss
IoU 的泛化版本,考慮了最小外接矩形
"""
def __init__(self, eps=1e-7):
super(GIoULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 計算交集
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形面積
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
enclose_area = (enclose_x2 - enclose_x1) * (enclose_y2 - enclose_y1) + self.eps
# 計算 GIoU
giou = iou - (enclose_area - union) / enclose_area
# 返回 GIoU Loss
giou_loss = 1 - giou
return giou_loss.mean()
def _calculate_intersection(self, pred_boxes, target_boxes):
"""計算交集面積"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h

View File

@@ -8,10 +8,20 @@ class Entropy_Loss(nn.Module):
super(Entropy_Loss, self).__init__()
def forward(self, outputs, labels):
# 範例: 使用均方誤差作為損失計算
# outputs = torch.argmax(outputs, 1)
# 转换为张量
outputs_New = torch.as_tensor(outputs, dtype=torch.float32)
labels_New = torch.as_tensor(labels, dtype=torch.float32)
loss = functional.cross_entropy(outputs_New, labels_New)
# 检查输出和标签的维度是否匹配
if outputs_New.shape[1] != labels_New.shape[1]:
# 如果维度不匹配,使用交叉熵损失函数
# 对于交叉熵损失标签需要是类别索引而不是one-hot编码
# 将one-hot编码转换为类别索引
_, labels_indices = torch.max(labels_New, dim=1)
loss = functional.cross_entropy(outputs_New, labels_indices)
else:
# 如果维度匹配始终使用binary_cross_entropy_with_logits
# 它会自动应用sigmoid函数避免输入值超出[0,1]范围
loss = functional.binary_cross_entropy_with_logits(outputs_New, labels_New)
return torch.as_tensor(loss, dtype = torch.float32)

View File

@@ -0,0 +1,116 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
class VGGPerceptualLoss(nn.Module):
"""
基於VGG19的感知損失函數
使用預訓練的VGG19網絡提取特徵計算特徵空間中的損失
"""
def __init__(self, feature_layers=[2, 7, 12, 21, 30], use_normalization=True):
super(VGGPerceptualLoss, self).__init__()
# 載入預訓練的VGG19模型
vgg = models.vgg19(pretrained=True).features
# 凍結VGG參數
for param in vgg.parameters():
param.requires_grad = False
# 將模型移到與輸入相同的設備上在forward中處理
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 選擇要使用的特徵層
self.feature_layers = feature_layers
self.vgg_layers = nn.ModuleList()
# 分割VGG網絡到指定層
layer_idx = 0
current_layer = 0
for i, layer in enumerate(vgg):
if layer_idx < len(feature_layers) and i <= feature_layers[layer_idx]:
self.vgg_layers.append(layer)
if i == feature_layers[layer_idx]:
layer_idx += 1
else:
break
# 是否使用ImageNet標準化
self.use_normalization = use_normalization
if use_normalization:
self.normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
# 損失權重
self.weights = [1.0, 1.0, 1.0, 1.0, 1.0] # 可以調整不同層的權重
def extract_features(self, x):
"""
提取VGG特徵
"""
# 確保輸入在[0,1]範圍內
if x.min() < 0 or x.max() > 1:
x = torch.clamp(x, 0, 1)
# 標準化
if self.use_normalization:
# 確保normalize在與輸入相同的設備上
if hasattr(self, 'normalize') and not isinstance(self.normalize, torch.nn.Module):
self.normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
).to(x.device)
x = self.normalize(x)
features = []
layer_idx = 0
# 確保所有VGG層都在與輸入相同的設備上
device = x.device
for i, layer in enumerate(self.vgg_layers):
layer = layer.to(device) # 確保層在正確的設備上
x = layer(x)
# 檢查是否到達目標特徵層
if layer_idx < len(self.feature_layers) and i == self.feature_layers[layer_idx]:
features.append(x)
layer_idx += 1
return features
def forward(self, pred, target):
"""
計算感知損失
pred: 預測圖像 [B, C, H, W]
target: 目標圖像 [B, C, H, W]
"""
# 確保模型在與輸入相同的設備上
device = pred.device
self.vgg_layers = nn.ModuleList([layer.to(device) for layer in self.vgg_layers])
# 確保輸入尺寸匹配
if pred.shape != target.shape:
pred = F.interpolate(pred, size=target.shape[2:], mode='bilinear', align_corners=False)
# 如果是單通道,轉換為三通道
if pred.shape[1] == 1:
pred = pred.repeat(1, 3, 1, 1)
if target.shape[1] == 1:
target = target.repeat(1, 3, 1, 1)
# 提取特徵
pred_features = self.extract_features(pred)
target_features = self.extract_features(target)
# 計算特徵損失
perceptual_loss = 0
for i, (pred_feat, target_feat) in enumerate(zip(pred_features, target_features)):
# 使用MSE計算特徵差異
feat_loss = F.mse_loss(pred_feat, target_feat)
perceptual_loss += self.weights[i] * feat_loss
return perceptual_loss

View File

@@ -0,0 +1,22 @@
from multiprocessing import Value
import pstats
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms
from Model_Loss.CIOU_Loss import CIOULoss
from Model_Loss.Perceptual_Loss import VGGPerceptualLoss
class Segmentation_Loss(nn.Module):
def __init__(self) -> None:
super(Segmentation_Loss, self).__init__()
self.Perceptual_Loss = VGGPerceptualLoss()
self.CIOU = CIOULoss()
pass
def forward(self, Output_Result, GroundTruth_Result):
Perceptual_Loss = self.Perceptual_Loss(Output_Result, GroundTruth_Result)
CIOU_Loss = self.CIOU(Output_Result, GroundTruth_Result)
return Perceptual_Loss + CIOU_Loss

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,145 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class BinaryCrossEntropy(nn.Module):
"""
基本的二元交叉熵損失函數
"""
def __init__(self, reduction='mean'):
"""
初始化
Args:
reduction (str): 'mean', 'sum''none',指定如何減少損失
"""
super(BinaryCrossEntropy, self).__init__()
def forward(self, predictions, targets):
"""
計算二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出,形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤,形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
return F.binary_cross_entropy_with_logits(predictions, targets)
# # 檢查輸出和標籤的維度是否匹配
# if predictions.shape[1] != targets.shape[1]:
# # 如果維度不匹配,使用交叉熵損失函數
# # 對於交叉熵損失標籤需要是類別索引而不是one-hot編碼
# # 將one-hot編碼轉換為類別索引
# _, targets_indices = torch.max(targets, dim=1)
# return F.cross_entropy(predictions, targets_indices, reduction=self.reduction)
# else:
# # 如果維度匹配,使用二元交叉熵損失函數
# # 使用 PyTorch 內建的 binary_cross_entropy_with_logits 函數
# # 它會自動應用 sigmoid 函數,避免輸入值超出 [0,1] 範圍
# return F.binary_cross_entropy_with_logits(predictions, targets, reduction=self.reduction)
class WeightedBinaryCrossEntropy(nn.Module):
"""
帶權重的二元交叉熵損失函數
"""
def __init__(self, pos_weight=1.0, neg_weight=1.0, reduction='mean'):
"""
初始化
Args:
pos_weight (float): 正樣本的權重
neg_weight (float): 負樣本的權重
reduction (str): 'mean', 'sum''none',指定如何減少損失
"""
super(WeightedBinaryCrossEntropy, self).__init__()
self.pos_weight = pos_weight
self.neg_weight = neg_weight
self.reduction = reduction
def forward(self, predictions, targets):
"""
計算帶權重的二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出,形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤,形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
# 使用 sigmoid 確保預測值在 [0,1] 範圍內
predictions = torch.sigmoid(predictions)
# 計算帶權重的二元交叉熵損失
loss = -self.pos_weight * targets * torch.log(predictions + 1e-7) - \
self.neg_weight * (1 - targets) * torch.log(1 - predictions + 1e-7)
# 根據 reduction 方式返回損失
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class LabelSmoothingBCE(nn.Module):
"""
帶標籤平滑的二元交叉熵損失函數
"""
def __init__(self, smoothing=0.1, reduction='mean'):
"""
初始化
Args:
smoothing (float): 標籤平滑係數,範圍 [0, 1]
reduction (str): 'mean', 'sum''none',指定如何減少損失
"""
super(LabelSmoothingBCE, self).__init__()
self.smoothing = smoothing
self.reduction = reduction
def forward(self, predictions, targets):
"""
計算帶標籤平滑的二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出,形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤,形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
# 應用標籤平滑
targets = targets * (1 - self.smoothing) + 0.5 * self.smoothing
# 使用 sigmoid 確保預測值在 [0,1] 範圍內
predictions = torch.sigmoid(predictions)
# 計算二元交叉熵損失
loss = -targets * torch.log(predictions + 1e-7) - (1 - targets) * torch.log(1 - predictions + 1e-7)
# 根據 reduction 方式返回損失
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss

View File

@@ -14,7 +14,7 @@ if __name__ == "__main__":
tool.Set_Labels()
tool.Set_Save_Roots()
Labels = tool.Get_Data_Label()
Trainig_Root, Testing_Root, Validation_Root = tool.Get_Save_Roots(2)
Trainig_Root, Testing_Root = tool.Get_Save_Roots(2)
load = Loding_Data_Root(Labels, Trainig_Root, "")
Data_Root = load.get_Image_data_roots(Trainig_Root)

122
README.md
View File

@@ -1,16 +1,116 @@
main.py: 主程式檔
# 胃試鏡疾病判斷系統
## 項目概述
本項目是一個基於深度學習的胃試鏡疾病自動診斷系統主要用於檢測和分類胃部疾病特別是胃癌CA。系統採用了分割和分類的兩階段方法首先對胃試鏡圖像進行分割以識別可疑區域然後對這些區域進行分類以確定是否存在疾病。
* 資料集類別: 3分類(胃癌、非胃癌但有病、正常資料)
* 基礎模型: Xception
## 主要功能
- **圖像預處理**包括直方圖均衡化、自適應直方圖均衡化、銳化、HSV調整、伽馬校正等多種圖像增強方法
- **數據增強**:通過圖像生成器擴充訓練數據集
- **疾病分割**使用GastroSegNet模型對胃試鏡圖像中的可疑區域進行分割
- **疾病分類**使用修改版Xception模型對分割後的區域進行分類分為正常Normal、胃癌CA和待確認Have_Question三類
- **結果可視化**:提供混淆矩陣、訓練曲線等可視化工具
## 系統架構
系統採用了兩階段的處理流程:
1. **分割階段**使用GastroSegNet模型對輸入圖像進行分割識別可疑區域
2. **分類階段**
- 第一組分類器區分正常Normal和其他Others
- 第二組分類器區分胃癌CA和待確認Have_Question
## 環境要求
- Python 3.8+
- PyTorch 1.8+
- CUDA推薦用於加速訓練
- 其他依賴庫torchvision, numpy, opencv-python, scikit-image, pandas等
## 使用方法
### 數據準備
1. 將訓練數據放置在`../Dataset/Training`目錄下
2. 將測試數據放置在`../Dataset/Testing`目錄下
3. 標註數據XML格式放置在`../Label_Image`目錄下
### 訓練模型
```bash
uv run main.py
```
訓練過程將自動執行以下步驟:
1. 數據預處理和增強
2. 訓練分割模型GastroSegNet
3. 使用分割模型處理圖像
4. 訓練分類模型修改版Xception
### 結果查看
訓練結果將保存在`../Result`目錄下,包括:
- 訓練曲線圖:`../Result/Training_Image`
- 混淆矩陣:`../Result/Matrix_Image`
- 訓練結果數據:`../Result/Training_Result`
- 最佳模型:`../Result/save_the_best_model`
## 項目結構
- `main.py`:程序入口點
- `experiments/`:實驗相關代碼
- `experiment.py`:實驗主流程
- `Training/`:訓練相關代碼
- `Models/`:模型定義
- `Image_Process/`:圖像處理相關代碼
- `Model_Loss/`:損失函數定義
- `Training_Tools/`:訓練工具
- `utils/`:工具函數和配置
## 配置說明
系統配置在`utils/Stomach_Config.py`中定義,主要包括:
- `Image_Enhance`:圖像增強方法
- `Loading_Config`:數據加載配置
- `Training_Config`:訓練參數配置
- `Model_Config`:模型參數配置
- `Save_Result_File_Config`:結果保存路徑配置
## 模型說明
### 分割模型GastroSegNet
用於識別胃試鏡圖像中的可疑區域,輸出分割掩碼。
### 分類模型修改版Xception
基於Xception架構針對胃試鏡圖像分類任務進行了修改主要用於區分正常、胃癌和待確認三類。
* 主執行檔: main.py
## load_process
### 負責讀取影像檔案、分割獨立資料(測試、驗證)、讀取獨立資料、一般檔案的操作
File_Process : 檔案操作的主程式包含開檔、創立檔案、判斷檔案是否存在等都是他負責的範圍。是一般物件也是LoadData的父物件
LoadData : 讀檔主程式一切讀檔動作由他開始。繼承File_Process(子物件)
Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
* File_Process : 檔案操作的主程式包含開檔、創立檔案、判斷檔案是否存在等都是他負責的範圍。是一般物件也是LoadData的父物件
* LoadData : 讀檔主程式一切讀檔動作由他開始。繼承File_Process(子物件)
* Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
## Calculate_Process
### 計算模型的評估指標的內容
* Calculate: 計算模型的評估指標的平均跟標準差,並將結果儲存到檔案中
## Image_Process
### 負責進行資料擴增、影像處理等的操作
* Generator_Content : 負責建立基礎Generator項目為Image_Generator的父類別
* Image_Generator : 負責製造資料擴增的資料並將資料存到檔案中。繼承Generator_Content(子物件)
* image_enhancement : 負責進行影像處理並將資料回傳
* Image_Generator : 負責製造資料擴增的資料,並將資料存到檔案中。
* image_enhancement : 負責進行影像處理將資料強化。
*
## all_models_tools
### 模型的調控細節如early stop、降低學習率和儲存最佳模型
* all_model_tools: call back的方法
## Model_Tools
### 負責進行模型的基礎架構包含Convolution、Dense、以及其他模型的配件
@@ -42,11 +142,13 @@ Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
### 負責驗證程式碼內的資料型態或輸入錯誤等問題
* Validation : 驗證程式碼錯誤
## Draw
## draw_tools
### 負責畫圖的工具
* Draw_Tools : 畫出混淆矩陣、走勢圖的工具
* draw : 畫出混淆矩陣、走勢圖的工具
* Grad_CAM : 畫出模型可視化的熱力圖的工具
## Experiment
### 執行實驗的主程式
* Experiment : 負責執行讀檔、設定模型Compile的細節、執行訓練、驗證結果等功能
* Experiment : 負責執行讀檔、設定模型與實驗的細節、執行訓練、驗證結果等功能
* Model_All_Step : 執行模型的訓練流程設定與細節參數設定
* pytorch_Model: 設定模型的架構

View File

@@ -1,74 +0,0 @@
import cv2
import numpy as np
import torch
class Read_image_and_Process_image:
def __init__(self, Image_Size) -> None:
self.Image_Size = Image_Size
pass
def get_data(self, path):
'''讀檔'''
try:
img_arr = cv2.imread(path, cv2.IMREAD_COLOR) # 讀檔(彩色)
# img_arr = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # 讀檔(灰階)
resized_arr = cv2.resize(img_arr, (self.Image_Size, self.Image_Size)) # 濤整圖片大小
except Exception as e:
print(e)
return resized_arr
def Data_Augmentation_Image(self, path):
resized_arr = []
for p in path:
try:
img_arr = cv2.imread(p, cv2.IMREAD_COLOR) # 讀檔(彩色)
# img_arr = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # 讀檔(灰階)
resized_arr.append(cv2.resize(img_arr, (self.Image_Size, self.Image_Size))) # 調整圖片大小
except Exception as e:
print(e)
return np.array(resized_arr)
def image_data_processing(self, data, label):
'''讀檔後處理圖片'''
data = np.asarray(data).astype(np.float32) # 將圖list轉成np.array
data = data.reshape(-1, self.Image_Size, self.Image_Size, 3) # 更改陣列形狀
label = np.array(label) # 將label從list型態轉成 numpy array
return data, label
def normalization(self, images):
imgs = []
for img in images:
img = np.asarray(img).astype(np.float32) # 將圖list轉成np.array
img = img / 255 # 標準化影像資料
imgs.append(img)
return np.array(imgs)
# def load_numpy_data(self, file_names):
# '''載入numpy圖檔並執行影像處理提高特徵擷取'''
# i = 0
# numpy_image = []
# original_image = []
# for file_name in file_names:
# compare = str(file_name).split(".")
# if compare[-1] == "npy":
# image = np.load(file_name) # 讀圖片檔
# numpy_image.append(image) # 合併成一個陣列
# else:
# original_image.append(file_name)
# original_image = self.get_data(original_image)
# for file in original_image:
# numpy_image.append(file)
# return numpy_image
def make_label_list(self, length, content):
'''製作label的列表'''
label_list = []
for i in range(length):
label_list.append(content)
return label_list

View File

@@ -0,0 +1,163 @@
from torch.utils.data import Dataset, DataLoader, RandomSampler, WeightedRandomSampler, SubsetRandomSampler, Subset
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from PIL import Image
import torch
import numpy as np
import cv2
class ListDataset(Dataset):
def __init__(self, data_list, labels_list, Mask_List, transform):
self.data = data_list
self.labels = labels_list
self.Mask_Truth_List = Mask_List
self.transform = transform
self.roots = []
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
Image_Root = self.data[idx]
Mask_Ground_Truth = None
if self.Mask_Truth_List is not None:
mask_path = self.Mask_Truth_List[idx]
if mask_path is not None: # 確保掩碼路徑不為None
try:
Mask_Ground_Truth = Image.open(mask_path).convert("RGB")
# 先不轉換為 tensor等待 transform 處理完後再轉換
except Exception as e:
print(e)
Split_Roots = Image_Root.split("/")
# Split_Roots = Split_Roots[-1].split("\\")
File_Name = Split_Roots[-1]
classes = Split_Roots[-2]
try:
Images = Image.open(Image_Root).convert("RGB")
except Exception as e:
assert e is not None, f"Error loading image {Image_Root}: {e}"
if self.transform != "Generator":
Images = self.transform(Images)
if self.Mask_Truth_List is not None and Mask_Ground_Truth is not None and not isinstance(Mask_Ground_Truth, torch.Tensor):
Mask_Ground_Truth = self.transform(Mask_Ground_Truth)
# 確保 Images 是 tensor
if not isinstance(Images, torch.Tensor):
Images = torch.tensor(np.array(Images))
# 確保 Mask_Ground_Truth 是 tensor
if self.Mask_Truth_List is not None and Mask_Ground_Truth is not None and not isinstance(Mask_Ground_Truth, torch.Tensor):
Mask_Ground_Truth = torch.tensor(np.array(Mask_Ground_Truth))
label = self.labels[idx]
if self.Mask_Truth_List is not None:
# 如果掩碼為None創建一個與圖像相同大小的空掩碼
if Mask_Ground_Truth is None:
if isinstance(Images, torch.Tensor):
# 創建與圖像相同大小的空掩碼張量
Mask_Ground_Truth = torch.zeros_like(Images)
else:
# 如果圖像不是張量創建一個空的PIL圖像
Mask_Ground_Truth = Image.new('RGB', Images.size, (0, 0, 0))
if self.transform != "Generator":
Mask_Ground_Truth = self.transform(Mask_Ground_Truth)
return Images, Mask_Ground_Truth, label, File_Name, classes
# print(f"Dataset_Data: \n{sample}\n")
return Images, label, File_Name, classes
class Training_Precesses:
def __init__(self, ImageSize):
seed = 42 # Set an arbitrary integer as the seed
self.ImageSize = ImageSize
self.generator = torch.Generator()
self.generator.manual_seed(seed)
def Dataloader_Sampler(self, SubDataSet, Batch_Size, Sampler=True):
if Sampler:
Data_Loader = DataLoader(
dataset=SubDataSet,
batch_size=Batch_Size,
num_workers=0,
pin_memory=True,
sampler=self.Setting_WeightedRandomSampler_Content(SubDataSet)
)
else:
Data_Loader = DataLoader(
dataset=SubDataSet,
batch_size=Batch_Size,
num_workers=0,
pin_memory=True
)
return Data_Loader
def Setting_WeightedRandomSampler_Content(self, SubDataSet):
# Check if SubDataSet is a Subset or a full dataset
if isinstance(SubDataSet, Subset):
# Get the underlying dataset and subset indices
base_dataset = SubDataSet.dataset
subset_indices = SubDataSet.indices
# Extract labels for the subset
labels = [base_dataset.labels[i] for i in subset_indices]
else:
# Assume SubDataSet is a ListDataset or similar
labels = SubDataSet.labels
# Convert labels to class indices if they are one-hot encoded
labels = np.array(labels)
if labels.ndim > 1: # If one-hot encoded
labels = np.argmax(labels, axis=1)
# 確保標籤是整數類型
try:
# 嘗試將標籤轉換為整數
labels = labels.astype(np.int64)
except ValueError:
# 如果標籤是字符串,先將其映射到整數
unique_labels = np.unique(labels)
label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
labels = np.array([label_to_idx[label] for label in labels])
# Count occurrences of each class
class_counts = np.bincount(labels)
class_weights = 1.0 / class_counts # Inverse frequency as weight
sample_weights = class_weights[labels] # Assign weight to each sample
return WeightedRandomSampler(
weights=sample_weights,
num_samples=len(sample_weights),
replacement=True
)
def Setting_RandomSampler_Content(self, Dataset):
return RandomSampler(Dataset, generator = self.generator)
def Setting_DataSet(self, Datas, Labels, Mask_List, transform = None):
# 資料預處理
if transform == None:
transform = transforms.Compose([
transforms.Resize((self.ImageSize, self.ImageSize))
])
elif transform == "Transform":
transform = transforms.Compose([
transforms.Resize((self.ImageSize, self.ImageSize)),
transforms.ToTensor()
])
elif transform == "Generator":
transform = "Generator"
# Create Dataset
list_dataset = ListDataset(Datas, Labels, Mask_List, transform)
return list_dataset
def Setting_SubsetRandomSampler_Content(self, SubDataSet):
# Calculate subset indices (example: using a fraction of the dataset)
dataset_size = len(SubDataSet)
subset_size = int(0.8 * dataset_size) # Use 80% of the dataset as an example
subset_indices = torch.randperm(dataset_size, generator=self.generator)[:subset_size]
return SubsetRandomSampler(subset_indices, generator=self.generator)

View File

@@ -1,72 +1,13 @@
import pandas as pd
from torch.nn import functional
import torch
from torch.utils.data import Dataset, DataLoader, RandomSampler
import torchvision.transforms as transforms
class ListDataset(Dataset):
def __init__(self, data_list, labels_list, status):
self.data = data_list
self.labels = labels_list
self.status = status
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
sample = self.data[idx]
if self.status:
from Image_Process.Image_Generator import Image_generator
ImageGenerator = Image_generator("", "", 12)
Transform = ImageGenerator.Generator_Content(5)
sample = Transform(sample)
label = self.labels[idx]
return sample, label
class Tool:
def __init__(self) -> None:
self.__ICG_Training_Root = ""
self.__Normal_Training_Root = ""
self.__Comprehensive_Training_Root = ""
self.__ICG_Test_Data_Root = ""
self.__Normal_Test_Data_Root = ""
self.__Comprehensive_Testing_Root = ""
self.__ICG_Validation_Data_Root = ""
self.__Normal_Validation_Data_Root = ""
self.__Comprehensive_Validation_Root = ""
self.__ICG_ImageGenerator_Data_Root = ""
self.__Normal_ImageGenerator_Data_Root = ""
self.__Comprehensive_Generator_Root = ""
self.__Labels = []
self.__OneHot_Encording = []
pass
def Set_Labels(self):
self.__Labels = ["stomach_cancer_Crop", "Normal_Crop", "Have_Question_Crop"]
def Set_Save_Roots(self):
self.__ICG_Training_Root = "../Dataset/Training/CA_ICG"
self.__Normal_Training_Root = "../Dataset/Training/CA"
self.__Comprehensive_Training_Root = "../Dataset/Training/Mixed"
self.__ICG_Test_Data_Root = "../Dataset/Training/CA_ICG_TestData"
self.__Normal_Test_Data_Root = "../Dataset/Training/Normal_TestData"
self.__Comprehensive_Testing_Root = "../Dataset/Training/Comprehensive_TestData"
self.__ICG_Validation_Data_Root = "../Dataset/Training/CA_ICG_ValidationData"
self.__Normal_Validation_Data_Root = "../Dataset/Training/Normal_ValidationData"
self.__Comprehensive_Validation_Root = "../Dataset/Training/Comprehensive_ValidationData"
self.__ICG_ImageGenerator_Data_Root = "../Dataset/Training/ICG_ImageGenerator"
self.__Normal_ImageGenerator_Data_Root = "../Dataset/Training/Normal_ImageGenerator"
self.__Comprehensive_Generator_Root = "../Dataset/Training/Comprehensive_ImageGenerator"
def Set_OneHotEncording(self, content):
Counter = []
for i in range(len(content)):
@@ -75,47 +16,6 @@ class Tool:
Counter = torch.tensor(Counter)
self.__OneHot_Encording = functional.one_hot(Counter, len(content))
pass
def Get_Data_Label(self):
'''
取得所需資料的Labels
'''
return self.__Labels
def Get_Save_Roots(self, choose):
'''回傳結果為Train, test, validation
choose = 1 => 取ICG Label
else => 取Normal Label
若choose != 1 || choose != 2 => 會回傳四個結果
'''
if choose == 1:
return self.__ICG_Training_Root, self.__ICG_Test_Data_Root, self.__ICG_Validation_Data_Root
if choose == 2:
return self.__Normal_Training_Root, self.__Normal_Test_Data_Root, self.__Normal_Validation_Data_Root
else:
return self.__Comprehensive_Training_Root, self.__Comprehensive_Testing_Root, self.__Comprehensive_Validation_Root
def Get_Generator_Save_Roots(self, choose):
'''回傳結果為Train, test, validation'''
if choose == 1:
return self.__ICG_ImageGenerator_Data_Root
if choose == 2:
return self.__Normal_ImageGenerator_Data_Root
else:
return self.__Comprehensive_Generator_Root
def Get_OneHot_Encording_Label(self):
return self.__OneHot_Encording
def Convert_Data_To_DataSet_And_Put_To_Dataloader(self, Datas : list, Labels : list, Batch_Size : int, status : bool = True):
seed = 42 # 設定任意整數作為種子
# 產生隨機種子產生器
generator = torch.Generator()
generator.manual_seed(seed)
# 創建 Dataset
list_dataset = ListDataset(Datas, Labels, status)
# sampler = RandomSampler(list_dataset, generator = generator) # 創建Sampler
return DataLoader(dataset = list_dataset, batch_size = Batch_Size, num_workers = 0, pin_memory=True, shuffle = True)
return self.__OneHot_Encording

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

26
a00.xml Normal file
View File

@@ -0,0 +1,26 @@
<annotation>
<folder>Processing_Image</folder>
<filename>a00.jpg</filename>
<path>D:\Programing\stomach_cancer\Processing_Image\a00.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1074</width>
<height>1074</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>Have_Question</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>263</xmin>
<ymin>740</ymin>
<xmax>333</xmax>
<ymax>814</ymax>
</bndbox>
</object>
</annotation>

Binary file not shown.

View File

@@ -2,19 +2,6 @@ from Load_process.file_processing import Process_File
import datetime
import torch
# def attention_block(input):
# channel = input.shape[-1]
# GAP = GlobalAveragePooling2D()(input)
# block = Dense(units = channel // 16, activation = "relu")(GAP)
# block = Dense(units = channel, activation = "sigmoid")(block)
# block = Reshape((1, 1, channel))(block)
# block = Multiply()([input, block])
# return block
class EarlyStopping:
def __init__(self, patience=74, verbose=False, delta=0):
self.patience = patience
@@ -45,12 +32,11 @@ class EarlyStopping:
print(f"Validation loss decreased ({self.best_loss:.6f} --> {val_loss:.6f}). Saving model to {save_path}")
def call_back(model_name, index, optimizer):
def call_back(Save_Root, index, optimizer):
File = Process_File()
model_dir = '../Result/save_the_best_model/' + model_name
File.JudgeRoot_MakeDir(model_dir)
modelfiles = File.Make_Save_Root('best_model( ' + str(datetime.date.today()) + " )-" + str(index) + ".pt", model_dir)
File.JudgeRoot_MakeDir(Save_Root)
modelfiles = File.Make_Save_Root('best_model( ' + str(datetime.date.today()) + " )-" + index + ".pt", Save_Root)
# model_mckp = ModelCheckpoint(modelfiles, monitor='val_loss', save_best_only=True, save_weights_only = True, mode='auto')
@@ -60,8 +46,6 @@ def call_back(model_name, index, optimizer):
optimizer,
factor = 0.94, # 學習率降低的量。 new_lr = lr * factor
patience = 2, # 沒有改進的時期數,之後學習率將降低
verbose = 0,
mode = 'min',
min_lr = 0 # 學習率下限
)

View File

@@ -1,116 +0,0 @@
from all_models_tools.all_model_tools import attention_block
from keras.activations import softmax, sigmoid
from keras.applications import VGG16,VGG19, ResNet50, ResNet50V2, ResNet101, ResNet101V2, ResNet152, ResNet152V2, InceptionV3, InceptionResNetV2, MobileNet, MobileNetV2, DenseNet121, NASNetLarge, Xception
from keras.layers import GlobalAveragePooling2D, Dense, Flatten
from keras import regularizers
from keras.layers import Add
from application.Xception_indepentment import Xception_indepentment
def Original_VGG19_Model():
vgg19 = VGG19(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(vgg19.output)
dense = Dense(units = 4096, activation = "relu")(GAP)
dense = Dense(units = 4096, activation = "relu")(dense)
output = Dense(units = 2, activation = "softmax")(dense)
return vgg19.input, output
def Original_ResNet50_model():
xception = ResNet50(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(xception.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return xception.input, dense
def Original_NASNetLarge_model():
nasnetlarge = NASNetLarge(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(nasnetlarge.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return nasnetlarge.input, dense
def Original_DenseNet121_model():
Densenet201 = DenseNet121(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(Densenet201.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return Densenet201.input, dense
def Original_Xception_model():
xception = Xception(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(xception.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return xception.input, dense
def Original_VGG16_Model():
vgg16 = VGG16(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
flatten = Flatten()(vgg16.output)
dense = Dense(units = 4096, activation = "relu")(flatten)
dense = Dense(units = 4096, activation = "relu")(dense)
output = Dense(units = 2, activation = "softmax")(dense)
return vgg16.input, output
def Original_ResNet50v2_model():
resnet50v2 = ResNet50V2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet50v2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet50v2.input, dense
def Original_ResNet101_model():
resnet101 = ResNet101(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet101.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet101.input, dense
def Original_ResNet101V2_model():
resnet101v2 = ResNet101V2(include_top = False, weights = "imagenet", input_shape = (512, 512, 3))
GAP = GlobalAveragePooling2D()(resnet101v2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet101v2.input, dense
def Original_ResNet152_model():
resnet152 = ResNet152(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet152.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet152.input, dense
def Original_ResNet152V2_model():
resnet152v2 = ResNet152V2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(resnet152v2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return resnet152v2.input, dense
def Original_InceptionV3_model():
inceptionv3 = InceptionV3(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(inceptionv3.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return inceptionv3.input, dense
def Original_InceptionResNetV2_model():
inceptionResnetv2 = InceptionResNetV2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(inceptionResnetv2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return inceptionResnetv2.input, dense
def Original_MobileNet_model():
mobilenet = MobileNet(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(mobilenet.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return mobilenet.input, dense
def Original_MobileNetV2_model():
mobilenetv2 = MobileNetV2(include_top = False, weights = "imagenet", input_shape = (200, 200, 3))
GAP = GlobalAveragePooling2D()(mobilenetv2.output)
dense = Dense(units = 2, activation = "softmax")(GAP)
return mobilenetv2.input, dense

BIN
annotation_files.txt Normal file

Binary file not shown.

Binary file not shown.

View File

@@ -1,256 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# SENet
# block = layers.GlobalAveragePooling2D()(residual)
# block = layers.Dense(units = residual.shape[-1] // 16, activation = "relu")(block)
# block = layers.Dense(units = residual.shape[-1], activation = "sigmoid")(block)
# block = Reshape((1, 1, residual.shape[-1]))(block)
# residual = Multiply()([residual, block])
from keras import backend
from keras import layers
from keras.layers import Reshape, Multiply, Conv1D
import math
def Xception_indepentment(input_shape=None):
channel_axis = 1 if backend.image_data_format() == "channels_first" else -1
img_input = layers.Input(shape=input_shape)
x = layers.Conv2D(
32, (3, 3), strides=(2, 2), use_bias=False, name="block1_conv1"
)(img_input)
x = layers.BatchNormalization(axis=channel_axis, name="block1_conv1_bn")(x)
x = layers.Activation("relu", name="block1_conv1_act")(x
)
x = layers.Conv2D(64, (3, 3), use_bias=False, name="block1_conv2")(x)
x = layers.BatchNormalization(axis=channel_axis, name="block1_conv2_bn")(x)
x = layers.Activation("relu", name="block1_conv2_act")(x)
residual = layers.Conv2D(
128, (1, 1), strides=(2, 2), padding="same", use_bias=False
)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
# 注意力機制區域
kernel = int(abs((math.log(residual.shape[-1], 2) + 1) / 2))
if kernel % 2:
kernel_size = kernel
else:
kernel_size = kernel + 1
block = layers.GlobalAveragePooling2D()(residual)
block = Reshape(target_shape = (residual.shape[-1], 1))(block)
block = Conv1D(filters = 1, kernel_size = kernel_size, padding = "same", use_bias = False, activation = "sigmoid")(block)
block = Reshape((1, 1, residual.shape[-1]))(block)
residual = Multiply()([residual, block])
x = layers.SeparableConv2D(
128, (3, 3), padding="same", use_bias=False, name="block2_sepconv1"
)(x)
x = layers.BatchNormalization(axis=channel_axis, name="block2_sepconv1_bn")(
x
)
x = layers.Activation("relu", name="block2_sepconv2_act")(x)
x = layers.SeparableConv2D(
128, (3, 3), padding="same", use_bias=False, name="block2_sepconv2"
)(x)
x = layers.BatchNormalization(axis=channel_axis, name="block2_sepconv2_bn")(
x
)
x = layers.MaxPooling2D(
(3, 3), strides=(2, 2), padding="same", name="block2_pool"
)(x)
x = layers.add([x, residual])
residual = layers.Conv2D(
256, (1, 1), strides=(2, 2), padding="same", use_bias=False
)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
# 注意力機制區域
kernel = int(abs((math.log(residual.shape[-1], 2) + 1) / 2))
if kernel % 2:
kernel_size = kernel
else:
kernel_size = kernel + 1
block = layers.GlobalAveragePooling2D()(residual)
block = Reshape(target_shape = (residual.shape[-1], 1))(block)
block = Conv1D(filters = 1, kernel_size = kernel_size, padding = "same", use_bias = False, activation = "sigmoid")(block)
block = Reshape((1, 1, residual.shape[-1]))(block)
residual = Multiply()([residual, block])
x = layers.Activation("relu", name="block3_sepconv1_act")(x)
x = layers.SeparableConv2D(
256, (3, 3), padding="same", use_bias=False, name="block3_sepconv1"
)(x)
x = layers.BatchNormalization(axis=channel_axis, name="block3_sepconv1_bn")(
x
)
x = layers.Activation("relu", name="block3_sepconv2_act")(x)
x = layers.SeparableConv2D(
256, (3, 3), padding="same", use_bias=False, name="block3_sepconv2"
)(x)
x = layers.BatchNormalization(axis=channel_axis, name="block3_sepconv2_bn")(x)
x = layers.MaxPooling2D(
(3, 3), strides=(2, 2), padding="same", name="block3_pool"
)(x)
x = layers.add([x, residual])
residual = layers.Conv2D(
728, (1, 1), strides=(2, 2), padding="same", use_bias=False
)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
# 注意力機制區域
kernel = int(abs((math.log(residual.shape[-1], 2) + 1) / 2))
if kernel % 2:
kernel_size = kernel
else:
kernel_size = kernel + 1
block = layers.GlobalAveragePooling2D()(residual)
block = Reshape(target_shape = (residual.shape[-1], 1))(block)
block = Conv1D(filters = 1, kernel_size = kernel_size, padding = "same", use_bias = False, activation = "sigmoid")(block)
block = Reshape((1, 1, residual.shape[-1]))(block)
residual = Multiply()([residual, block])
x = layers.Activation("relu", name="block4_sepconv1_act")(x)
x = layers.SeparableConv2D(
728, (3, 3), padding="same", use_bias=False, name="block4_sepconv1"
)(x)
x = layers.BatchNormalization(axis=channel_axis, name="block4_sepconv1_bn")(
x
)
x = layers.Activation("relu", name="block4_sepconv2_act")(x)
x = layers.SeparableConv2D(
728, (3, 3), padding="same", use_bias=False, name="block4_sepconv2"
)(x)
x = layers.BatchNormalization(axis=channel_axis, name="block4_sepconv2_bn")(
x
)
x = layers.MaxPooling2D(
(3, 3), strides=(2, 2), padding="same", name="block4_pool"
)(x)
x = layers.add([x, residual])
for i in range(8):
residual = x
prefix = "block" + str(i + 5)
x = layers.Activation("relu", name=prefix + "_sepconv1_act")(x)
x = layers.SeparableConv2D(
728,
(3, 3),
padding="same",
use_bias=False,
name=prefix + "_sepconv1",
)(x)
x = layers.BatchNormalization(
axis=channel_axis, name=prefix + "_sepconv1_bn"
)(x)
x = layers.Activation("relu", name=prefix + "_sepconv2_act")(x)
x = layers.SeparableConv2D(
728,
(3, 3),
padding="same",
use_bias=False,
name=prefix + "_sepconv2",
)(x)
x = layers.BatchNormalization(
axis=channel_axis, name=prefix + "_sepconv2_bn"
)(x)
x = layers.Activation("relu", name=prefix + "_sepconv3_act")(x)
x = layers.SeparableConv2D(
728,
(3, 3),
padding="same",
use_bias=False,
name=prefix + "_sepconv3",
)(x)
x = layers.BatchNormalization(
axis=channel_axis, name=prefix + "_sepconv3_bn"
)(x)
x = layers.add([x, residual])
residual = layers.Conv2D(
1024, (1, 1), strides=(2, 2), padding="same", use_bias=False
)(x)
residual = layers.BatchNormalization(axis=channel_axis)(residual)
# 注意力機制區域
kernel = int(abs((math.log(residual.shape[-1], 2) + 1) / 2))
if kernel % 2:
kernel_size = kernel
else:
kernel_size = kernel + 1
block = layers.GlobalAveragePooling2D()(residual)
block = Reshape(target_shape = (residual.shape[-1], 1))(block)
block = Conv1D(filters = 1, kernel_size = kernel_size, padding = "same", use_bias = False, activation = "sigmoid")(block)
block = Reshape((1, 1, residual.shape[-1]))(block)
residual = Multiply()([residual, block])
x = layers.Activation("relu", name="block13_sepconv1_act")(x)
x = layers.SeparableConv2D(
728, (3, 3), padding="same", use_bias=False, name="block13_sepconv1"
)(x)
x = layers.BatchNormalization(
axis=channel_axis, name="block13_sepconv1_bn"
)(x)
x = layers.Activation("relu", name="block13_sepconv2_act")(x)
x = layers.SeparableConv2D(
1024, (3, 3), padding="same", use_bias=False, name="block13_sepconv2"
)(x)
x = layers.BatchNormalization(
axis=channel_axis, name="block13_sepconv2_bn"
)(x)
x = layers.MaxPooling2D(
(3, 3), strides=(2, 2), padding="same", name="block13_pool"
)(x)
x = layers.add([x, residual])
x = layers.SeparableConv2D(
1536, (3, 3), padding="same", use_bias=False, name="block14_sepconv1"
)(x)
x = layers.BatchNormalization(
axis=channel_axis, name="block14_sepconv1_bn"
)(x)
x = layers.Activation("relu", name="block14_sepconv1_act")(x)
x = layers.SeparableConv2D(
2048, (3, 3), padding="same", use_bias=False, name="block14_sepconv2"
)(x)
x = layers.BatchNormalization(
axis=channel_axis, name="block14_sepconv2_bn"
)(x)
x = layers.Activation("relu", name="block14_sepconv2_act")(x)
return img_input, block

View File

@@ -10,142 +10,109 @@ from Load_process.file_processing import Process_File
class GradCAM:
def __init__(self, model, target_layer):
"""
初始化 Grad-CAM
Args:
model: 訓練好的 ModifiedXception 模型
target_layer: 要計算 Grad-CAM 的目標層名稱 (例如 'base_model')
"""
self.model = model
self.target_layer = target_layer
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model.eval()
self.model.to(self.device)
# 用於儲存特徵圖和梯度
self.features = None
self.activations = None
self.gradients = None
# 註冊 hook
self._register_hooks()
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model.to(self.device) # Ensure model is on the correct device
def _register_hooks(self):
"""註冊前向和反向傳播的 hook"""
def forward_hook(module, input, output):
self.features = output
def backward_hook(module, grad_in, grad_out):
self.gradients = grad_out[0]
# 獲取目標層
target_module = dict(self.model.named_modules())[self.target_layer]
target_module.register_forward_hook(forward_hook)
target_module.register_backward_hook(backward_hook)
# Register hooks
self.target_layer.register_forward_hook(self.save_activations)
self.target_layer.register_backward_hook(self.save_gradients)
def generate_cam(self, input_image, target_class=None):
"""
生成 Grad-CAM 熱力圖
Args:
input_image: 輸入影像 (torch.Tensor, shape: [1, C, H, W])
target_class: 目標類別索引 (若為 None使用預測最高分數的類別)
Returns:
cam: Grad-CAM 熱力圖 (numpy array)
"""
input_image = input_image.to(self.device)
# 前向傳播
output = self.model(input_image)
if target_class is None:
target_class = torch.argmax(output, dim=1).item()
# 清除梯度
self.model.zero_grad()
# 反向傳播計算梯度
one_hot = torch.zeros_like(output)
one_hot[0][target_class] = 1
output.backward(gradient=one_hot, retain_graph=True)
# 計算 Grad-CAM
gradients = self.gradients.data.cpu().numpy()[0]
features = self.features.data.cpu().numpy()[0]
# 全局平均池化梯度
weights = np.mean(gradients, axis=(1, 2))
# 計算加權和
cam = np.zeros(features.shape[1:], dtype=np.float32)
for i, w in enumerate(weights):
cam += w * features[i]
# ReLU 激活
cam = np.maximum(cam, 0)
# 歸一化到 0-1
cam = cam - np.min(cam)
cam = cam / np.max(cam)
# 調整大小到輸入影像尺寸
h, w = input_image.shape[2:]
cam = cv2.resize(cam, (w, h))
return cam
def overlay_cam(self, original_image, cam, alpha=0.5):
"""
將 Grad-CAM 熱力圖疊加到原始影像上
Args:
original_image: 原始影像 (numpy array, shape: [H, W, C])
cam: Grad-CAM 熱力圖
alpha: 透明度
Returns:
overlay_img: 疊加後的影像
"""
# 將熱力圖轉為 RGB
heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
# 確保原始影像格式正確
if original_image.max() > 1:
original_image = original_image / 255.0
# 疊加熱力圖
overlay_img = heatmap * alpha + original_image * (1 - alpha)
overlay_img = np.clip(overlay_img, 0, 1)
return overlay_img
def visualize(self, input_image, original_image, target_class=None, File_Name=None, model_name = None):
"""
可視化 Grad-CAM 結果
Args:
input_image: 輸入影像 (torch.Tensor)
original_image: 原始影像 (numpy array)
target_class: 目標類別索引
save_path: 保存路徑 (可選)
"""
def Processing_Main(self, Test_Dataloader, File_Path):
File = Process_File()
# 生成 CAM
cam = self.generate_cam(input_image, target_class)
# 疊加到原始影像
overlay = self.overlay_cam(original_image, cam)
# 顯示結果
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.imshow(original_image)
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(overlay)
plt.title(f'Grad-CAM (Class {target_class})')
plt.axis('off')
model_dir = '../Result/Grad-CAM( ' + str(datetime.date.today()) + " )"
File.JudgeRoot_MakeDir(model_dir)
modelfiles = File.Make_Save_Root(str(model_name) + " " + File_Name + ".png", model_dir)
plt.savefig(modelfiles)
plt.close("all") # 關閉圖表
for batch_idx, (images, labels, File_Name, File_Classes) in enumerate(Test_Dataloader):
# Move data to device
images = images.to(self.device, dtype=torch.float32) # [64, C, H, W]
labels = labels.to(self.device, dtype=torch.float32) # [64, num_classes]
# Get ground-truth class indices
label_classes = torch.argmax(labels, dim=1).cpu().numpy() # [64]
# Generate Grad-CAM heatmaps for the entire batch
heatmaps = self.generate(images, label_classes)
# Process each image in the batch
for i in range(images.size(0)): # Loop over batch size (64)
heatmap = heatmaps[i] # Extract heatmap for this image
overlaid_image = self.overlay_heatmap(heatmap, images[i], alpha=0.5)
# Create file path based on class
path = f"{File_Path}/{File_Classes[i]}"
File.JudgeRoot_MakeDir(path)
# Save overlaid image
File.Save_CV2_File(f"batch_{batch_idx}_{File_Name[i]}", path, overlaid_image)
# # Save raw heatmap separately
# heatmap_resized = cv2.resize(heatmap, (images[i].shape[2], images[i].shape[1]), interpolation=cv2.INTER_CUBIC)
# heatmap_colored = (plt.cm.viridis(heatmap_resized)[:, :, :3] * 255).astype(np.uint8)
# File.Save_CV2_File(f"batch_{batch_idx}_img_{i}_heatmap.png", path, heatmap_colored)
def save_activations(self, module, input, output):
self.activations = output.detach() # [64, C, H', W']
def save_gradients(self, module, grad_input, grad_output):
self.gradients = grad_output[0].detach() # [64, C, H', W']
def generate(self, input_images, class_indices=None):
self.model.eval()
input_images.requires_grad = True # [64, C, H, W]
# Forward pass
outputs = self.model(input_images) # [64, num_classes]
if class_indices is None:
class_indices = torch.argmax(outputs, dim=1).cpu().numpy() # [64]
# Zero gradients
self.model.zero_grad()
# Backward pass for each image in the batch
heatmaps = []
for i in range(input_images.size(0)):
self.model.zero_grad()
outputs[i, class_indices[i]].backward(retain_graph=True) # Backward for specific image/class
heatmap = self._compute_heatmap()
heatmaps.append(heatmap)
return np.stack(heatmaps) # [64, H', W']
def _compute_heatmap(self):
# Get gradients and activations
gradients = self.gradients # [64, C, H', W']
activations = self.activations # [64, C, H', W']
# Compute weights (global average pooling of gradients)
weights = torch.mean(gradients, dim=[2, 3], keepdim=True) # [64, C, 1, 1]
# Compute Grad-CAM heatmap for one image (after single backward)
grad_cam = torch.sum(weights * activations, dim=1)[0] # [64, H', W'] -> [H', W']
grad_cam = F.relu(grad_cam) # Apply ReLU
grad_cam = grad_cam / (grad_cam.max() + 1e-8) # Normalize to [0, 1]
# Apply Gaussian smoothing to reduce artifacts
grad_cam_np = grad_cam.cpu().numpy()
grad_cam_np = cv2.GaussianBlur(grad_cam_np, (5, 5), 0)
# Re-normalize after blur
grad_cam_np = grad_cam_np / (grad_cam_np.max() + 1e-8)
return grad_cam_np
def overlay_heatmap(self, heatmap, image, alpha=0.5):
# Resize heatmap to match input image spatial dimensions using INTER_CUBIC for smoother results
heatmap = np.uint8(255 * heatmap) # Scale to [0, 255]
heatmap = cv2.resize(heatmap, (image.shape[2], image.shape[1]), interpolation=cv2.INTER_CUBIC)
# Use viridis colormap for better interpretability
heatmap = plt.cm.viridis(heatmap)[:, :, :3] # Apply viridis colormap
# Convert image tensor to numpy and denormalize (assuming ImageNet stats)
image_np = image.detach().cpu().permute(1, 2, 0).numpy() # [H, W, C]
# Ensure image is in [0, 1] range (if not already)
if image_np.max() > 1.0:
image_np = (image_np - image_np.min()) / (image_np.max() - image_np.min())
# Overlay heatmap on the image
overlay = alpha * heatmap + (1 - alpha) * image_np
overlay = np.clip(overlay, 0, 1) * 255
return overlay.astype(np.uint8) # Return uint8 for cv2

195
draw_tools/Saliency_Map.py Normal file
View File

@@ -0,0 +1,195 @@
import torch
import torch.nn as nn
import numpy as np
import cv2
import matplotlib.pyplot as plt
from Load_process.file_processing import Process_File
class SaliencyMap:
def __init__(self, model):
self.model = model
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model.to(self.device)
self.model.eval() # 設置為評估模式
def Processing_Main(self, Test_Dataloader, File_Path):
"""處理測試數據集並生成顯著性圖"""
File = Process_File()
for batch_idx, (images, labels, File_Name, File_Classes) in enumerate(Test_Dataloader):
# 將數據移至設備
images = images.to(self.device, dtype=torch.float32)
labels = labels.to(self.device, dtype=torch.float32)
# 獲取真實類別索引
label_classes = torch.argmax(labels, dim=1).cpu().numpy()
# 為批次中的每個圖像生成顯著性圖
for i in range(images.size(0)):
# 獲取單個圖像和類別
image = images[i:i+1] # 保持批次維度
target_class = label_classes[i]
# 生成顯著性圖
saliency_map = self.generate_saliency(image, target_class)
# 將顯著性圖疊加到原始圖像上
overlaid_image = self.overlay_saliency(saliency_map, image[0])
# 創建保存路徑
path = f"{File_Path}/{File_Classes[i]}"
File.JudgeRoot_MakeDir(path)
# 保存結果
File.Save_CV2_File(f"saliency_{batch_idx}_{File_Name[i]}", path, overlaid_image)
def generate_saliency(self, image, target_class):
"""生成單個圖像的顯著性圖"""
# 確保需要梯度
image.requires_grad_(True)
# 前向傳播
output = self.model(image)
# 清除之前的梯度
self.model.zero_grad()
# 創建one-hot編碼的目標
one_hot = torch.zeros_like(output)
one_hot[0, target_class] = 1
# 反向傳播
output.backward(gradient=one_hot)
# 獲取梯度
gradients = image.grad.data
# 計算顯著性圖 (取絕對值並在通道維度上取最大值)
saliency, _ = torch.max(gradients.abs(), dim=1)
# 轉換為numpy並歸一化
saliency_np = saliency.cpu().numpy()[0]
saliency_np = self._normalize(saliency_np)
# 應用平滑處理以減少噪聲
saliency_np = cv2.GaussianBlur(saliency_np, (5, 5), 0)
saliency_np = self._normalize(saliency_np) # 再次歸一化
return saliency_np
def _normalize(self, x):
"""將數組歸一化到[0,1]範圍"""
# 添加小的epsilon以避免除以零
return (x - x.min()) / (x.max() - x.min() + 1e-8)
def overlay_saliency(self, saliency, image, alpha=0.5):
"""將顯著性圖疊加到原始圖像上"""
# 將顯著性圖縮放到[0,255]範圍
saliency_uint8 = np.uint8(255 * saliency)
# 應用顏色映射
heatmap = cv2.applyColorMap(saliency_uint8, cv2.COLORMAP_JET)
# 將圖像張量轉換為numpy數組
image_np = image.detach().cpu().permute(1, 2, 0).numpy()
# 確保圖像在[0,1]範圍內
if image_np.max() > 1.0:
image_np = (image_np - image_np.min()) / (image_np.max() - image_np.min())
# 將圖像轉換為uint8
image_uint8 = np.uint8(255 * image_np)
# 如果圖像是單通道的轉換為3通道
if len(image_uint8.shape) == 2 or image_uint8.shape[2] == 1:
image_uint8 = cv2.cvtColor(image_uint8, cv2.COLOR_GRAY2BGR)
# 疊加顯著性圖和原始圖像
overlaid = cv2.addWeighted(heatmap, alpha, image_uint8, 1-alpha, 0)
return overlaid
def generate_smooth_saliency(self, image, target_class, n_samples=20, noise_level=0.1):
"""使用SmoothGrad技術生成更平滑的顯著性圖"""
# 獲取輸入圖像的標準差
stdev = noise_level * (torch.max(image) - torch.min(image)).item()
# 累積梯度
accumulated_gradients = None
# 生成多個帶噪聲的樣本並計算梯度
for _ in range(n_samples):
# 添加高斯噪聲
noisy_image = image + torch.randn_like(image) * stdev
noisy_image.requires_grad_(True)
# 前向傳播
output = self.model(noisy_image)
# 反向傳播
self.model.zero_grad()
one_hot = torch.zeros_like(output)
one_hot[0, target_class] = 1
output.backward(gradient=one_hot)
# 獲取梯度
gradients = noisy_image.grad.data
# 累積梯度
if accumulated_gradients is None:
accumulated_gradients = gradients
else:
accumulated_gradients += gradients
# 計算平均梯度
avg_gradients = accumulated_gradients / n_samples
# 計算顯著性圖
saliency, _ = torch.max(avg_gradients.abs(), dim=1)
# 轉換為numpy並歸一化
saliency_np = saliency.cpu().numpy()[0]
saliency_np = self._normalize(saliency_np)
return saliency_np
def generate_guided_saliency(self, image, target_class):
"""使用Guided Backpropagation生成顯著性圖"""
# 保存原始ReLU反向傳播函數
relu_backward_functions = {}
for module in self.model.modules():
if isinstance(module, nn.ReLU):
relu_backward_functions[module] = module.backward
module.backward = self._guided_relu_backward
# 生成顯著性圖
image.requires_grad_(True)
output = self.model(image)
self.model.zero_grad()
one_hot = torch.zeros_like(output)
one_hot[0, target_class] = 1
output.backward(gradient=one_hot)
# 獲取梯度
gradients = image.grad.data
# 恢復原始ReLU反向傳播函數
for module in relu_backward_functions:
module.backward = relu_backward_functions[module]
# 計算顯著性圖 (只保留正梯度)
saliency = torch.clamp(gradients, min=0)
saliency, _ = torch.max(saliency, dim=1)
# 轉換為numpy並歸一化
saliency_np = saliency.cpu().numpy()[0]
saliency_np = self._normalize(saliency_np)
return saliency_np
def _guided_relu_backward(self, grad_output):
"""Guided ReLU的反向傳播函數"""
# 只允許正梯度流過
positive_grad_output = torch.clamp(grad_output, min=0)
return positive_grad_output

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -5,33 +5,51 @@ import matplotlib.figure as figure
import matplotlib.backends.backend_agg as agg
from Load_process.file_processing import Process_File
def plot_history(Epochs, Losses, Accuracys, file_name, model_name):
def plot_history(Losses, Accuracys, Save_Root, File_Name):
File = Process_File()
plt.figure(figsize=(16,4))
plt.subplot(1,2,1)
plt.plot(range(1, Epochs + 1), Losses[0])
plt.plot(range(1, Epochs + 1), Losses[1])
# 修正維度不匹配問題
train_losses = Losses[0]
val_losses = Losses[1]
# 分別繪製訓練損失和驗證損失
train_epochs = range(1, len(train_losses) + 1)
plt.plot(train_epochs, train_losses, label='Train')
val_epochs = range(1, len(val_losses) + 1)
plt.plot(val_epochs, val_losses, label='Validation')
plt.ylabel('Losses')
plt.xlabel('epoch')
plt.legend(['Train','Validation'], loc='upper left')
plt.legend(loc='upper left')
plt.title('Model Loss')
plt.subplot(1,2,2)
plt.plot(range(1, Epochs + 1), Accuracys[0])
plt.plot(range(1, Epochs + 1), Accuracys[1])
plt.ylabel('Accuracies')
plt.xlabel('epoch')
plt.legend(['Train','Validation'], loc='upper left')
plt.title('Model Accuracy')
if Accuracys is not None:
plt.subplot(1,2,2)
train_acc = Accuracys[0]
val_acc = Accuracys[1]
# 分別繪製訓練準確率和驗證準確率
train_epochs_acc = range(1, len(train_acc) + 1)
plt.plot(train_epochs_acc, train_acc, label='Train')
val_epochs_acc = range(1, len(val_acc) + 1)
plt.plot(val_epochs_acc, val_acc, label='Validation')
plt.ylabel('Accuracies')
plt.xlabel('epoch')
plt.legend(loc='upper left')
plt.title('Model Accuracy')
model_dir = '../Result/save_the_train_image( ' + str(datetime.date.today()) + " )"
File.JudgeRoot_MakeDir(model_dir)
modelfiles = File.Make_Save_Root(str(model_name) + " " + str(file_name) + ".png", model_dir)
File.JudgeRoot_MakeDir(Save_Root)
modelfiles = File.Make_Save_Root(f"{str(File_Name)}.png", Save_Root)
plt.savefig(modelfiles)
plt.close("all") # 關閉圖表
def draw_heatmap(matrix, model_name, index): # 二分類以上混淆矩陣做法
def draw_heatmap(matrix, Save_Root, File_Name, index): # 二分類以上混淆矩陣做法
File = Process_File()
# 创建热图
@@ -40,20 +58,19 @@ def draw_heatmap(matrix, model_name, index): # 二分類以上混淆矩陣做法
Ax = fig.add_subplot(111)
sns.heatmap(matrix, square = True, annot = True, fmt = 'd', linecolor = 'white', cmap = "Purples", ax = Ax)#画热力图cmap表示设定的颜色集
model_dir = '../Result/model_matrix_image ( ' + str(datetime.date.today()) + " )"
File.JudgeRoot_MakeDir(model_dir)
modelfiles = File.Make_Save_Root(str(model_name) + "-" + str(index) + ".png", model_dir)
File.JudgeRoot_MakeDir(Save_Root)
modelfiles = File.Make_Save_Root(f"{File_Name}-{str(index)}.png", Save_Root)
# confusion.figure.savefig(modelfiles)
# 设置图像参数
Ax.set_title(str(model_name) + " confusion matrix")
Ax.set_title(f"{File_Name} confusion matrix")
Ax.set_xlabel("X-Predict label of the model")
Ax.set_ylabel("Y-True label of the model")
# 保存图像到文件中
canvas.print_figure(modelfiles)
def Confusion_Matrix_of_Two_Classification(Model_Name, Matrix, index):
def Confusion_Matrix_of_Two_Classification(Matrix, Save_Root, File_Name, index):
File = Process_File()
fx = sns.heatmap(Matrix, annot=True, cmap='turbo')
@@ -63,13 +80,20 @@ def Confusion_Matrix_of_Two_Classification(Model_Name, Matrix, index):
fx.set_xlabel('answer Values ')
fx.set_ylabel('Predicted Values')
# labels the boxes
fx.xaxis.set_ticklabels(['False','True'])
fx.yaxis.set_ticklabels(['False','True'])
# 根据矩阵维度动态设置标签
n_classes = Matrix.shape[0]
# 如果是2类问题使用False/True标签
if n_classes == 2:
labels = ['False', 'True']
else:
# 对于多类问题,使用数字标签
labels = [str(i) for i in range(n_classes)]
fx.xaxis.set_ticklabels(labels)
fx.yaxis.set_ticklabels(labels)
model_dir = '../Result/model_matrix_image ( ' + str(datetime.date.today()) + " )"
File.JudgeRoot_MakeDir(model_dir)
modelfiles = File.Make_Save_Root(str(Model_Name) + "-" + str(index) + ".png", model_dir)
File.JudgeRoot_MakeDir(Save_Root)
modelfiles = File.Make_Save_Root(f"{File_Name}-{str(index)}.png", Save_Root)
plt.savefig(modelfiles)
plt.close("all") # 關閉圖表

View File

@@ -1,182 +0,0 @@
from tqdm import tqdm
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torchmetrics.functional import auroc
import torch.optim as optim
import numpy as np
from all_models_tools.all_model_tools import call_back
from Model_Loss.Loss import Entropy_Loss
from merge_class.merge import merge
from draw_tools.Grad_cam import GradCAM
import time
class All_Step:
def __init__(self, Training_Data_And_Label, Test_Data_And_Label, Validation_Data_And_Label, Model, Epoch, Number_Of_Classes, Model_Name):
self.Training_Data_And_Label = Training_Data_And_Label
self.Test_Data_And_Label = Test_Data_And_Label
self.Validation_Data_And_Label = Validation_Data_And_Label
self.Model = Model
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.Epoch = Epoch
self.Number_Of_Classes = Number_Of_Classes
self.Model_Name = Model_Name
pass
def Training_Step(self, model_name, counter):
# 定義優化器,並設定 weight_decay 參數來加入 L2 正則化
Optimizer = optim.SGD(self.Model.parameters(), lr=0.045, momentum = 0.9, weight_decay=0.1)
model_path, early_stopping, scheduler = call_back(model_name, counter, Optimizer)
criterion = Entropy_Loss() # 使用自定義的損失函數
Merge_Function = merge()
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
Total_Epoch = 0
for epoch in range(self.Epoch): # 訓練迴圈
self.Model.train() # 開始訓練
running_loss = 0.0
all_train_preds = []
all_train_labels = []
processed_samples = 0
# 計算每個 epoch 的起始時間
start_time = time.time()
total_samples = len(self.Training_Data_And_Label)
epoch_iterator = tqdm(self.Training_Data_And_Label, desc=f"Epoch [{epoch}/{self.Epoch}]")
for inputs, labels in epoch_iterator:
inputs, labels = torch.as_tensor(inputs).to(self.device), torch.as_tensor(labels).to(self.device)
Optimizer.zero_grad()
outputs = self.Model(inputs)
loss = criterion(outputs, labels)
loss.backward()
Optimizer.step()
running_loss += loss.item()
# 收集訓練預測和標籤
Output_Values, Output_Indexs = torch.max(outputs, dim = 1)
True_Indexs = np.argmax(labels.cpu().numpy(), 1)
all_train_preds.append(Output_Indexs.cpu().numpy())
all_train_labels.append(True_Indexs)
processed_samples += len(inputs)
# 計算當前進度
progress = (processed_samples / total_samples) * 100
# 計算經過時間和剩餘時間
elapsed_time = time.time() - start_time
iterations_per_second = processed_samples / elapsed_time if elapsed_time > 0 else 0
eta = (total_samples - processed_samples) / iterations_per_second if iterations_per_second > 0 else 0
time_str = f"{int(elapsed_time//60):02d}:{int(elapsed_time%60):02d}<{int(eta//60):02d}:{int(eta%60):02d}"
# 計算當前批次的精確度(這裡需要根據你的具體需求調整)
batch_accuracy = (Output_Indexs.cpu().numpy() == True_Indexs).mean()
# 更新進度條顯示
epoch_iterator.set_description(f"Epoch [{epoch}/{self.Epoch}]")
epoch_iterator.set_postfix_str(
f"{processed_samples}/{total_samples} [{time_str}, {iterations_per_second:.2f}it/s, " +
f"acc={batch_accuracy:.3f}, loss={loss.item():.3f}, ]"
)
epoch_iterator.close()
all_train_preds = Merge_Function.merge_data_main(all_train_preds, 0, len(all_train_preds))
all_train_labels = Merge_Function.merge_data_main(all_train_labels, 0, len(all_train_labels))
Training_Loss = running_loss / len(self.Training_Data_And_Label)
train_accuracy = accuracy_score(all_train_labels, all_train_preds)
train_losses.append(Training_Loss)
train_accuracies.append(train_accuracy)
self.Model.eval()
val_loss = 0.0
all_val_preds = []
all_val_labels = []
with torch.no_grad():
for inputs, labels in self.Validation_Data_And_Label:
inputs, labels = torch.as_tensor(inputs).to(self.device), torch.as_tensor(labels).to(self.device)
outputs = self.Model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
# 收集訓練預測和標籤
Output_Values, Output_Indexs = torch.max(outputs, dim = 1)
True_Indexs = np.argmax(labels.cpu().numpy(), 1)
all_val_preds.append(Output_Indexs.cpu().numpy())
all_val_labels.append(True_Indexs)
val_loss /= len(self.Validation_Data_And_Label)
val_accuracy = accuracy_score(all_val_labels, all_val_preds)
val_losses.append(val_loss)
val_accuracies.append(val_accuracy)
# print(f"Val_loss: {val_loss:.4f}, Val_accuracy: {val_accuracy:0.2f}\n")
early_stopping(val_loss, self.Model, model_path)
if early_stopping.early_stop:
print("Early stopping triggered. Training stopped.")
Total_Epoch = epoch
break
# 學習率調整
scheduler.step(val_loss)
return train_losses, val_losses, train_accuracies, val_accuracies, Total_Epoch
def Evaluate_Model(self, cnn_model, counter):
# 測試模型
cnn_model.eval()
True_Label, Predict_Label = [], []
True_Label_OneHot, Predict_Label_OneHot = [], []
loss = 0.0
with torch.no_grad():
for images, labels in self.Test_Data_And_Label:
images, labels = torch.tensor(images).to(self.device), torch.tensor(labels).to(self.device)
outputs = cnn_model(images)
# 收集訓練預測和標籤
Output_Values, Output_Indexs = torch.max(outputs, 1)
True_Indexs = np.argmax(labels.cpu().numpy(), 1)
True_Label.append(Output_Indexs.cpu().numpy())
Predict_Label.append(True_Indexs)
Predict_Label_OneHot.append(torch.tensor(outputs, dtype = torch.float32).cpu().numpy()[0])
True_Label_OneHot.append(torch.tensor(labels, dtype = torch.int).cpu().numpy()[0])
# 創建 GradCAM 實例
Layers = cnn_model.base_model.body.conv4.pointwise
grad_cam = GradCAM(cnn_model, target_layer="base_model")
# 可視化 Grad-CAM
grad_cam.visualize(outputs, images, target_class = 3, File_Name = counter, model_name = self.Model_Name)
loss /= len(self.Test_Data_And_Label)
True_Label_OneHot = torch.tensor(True_Label_OneHot, dtype = torch.int)
Predict_Label_OneHot = torch.tensor(Predict_Label_OneHot, dtype = torch.float32)
accuracy = accuracy_score(True_Label, Predict_Label)
precision = precision_score(True_Label, Predict_Label, average = "macro")
recall = recall_score(True_Label, Predict_Label, average = "macro")
AUC = auroc(Predict_Label_OneHot, True_Label_OneHot, num_labels = self.Number_Of_Classes, task = "multilabel", average = "macro")
f1 = f1_score(True_Label, Predict_Label, average = "macro")
return loss, accuracy, precision, recall, AUC, f1, True_Label, Predict_Label

View File

@@ -0,0 +1,91 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class Encode_Block(nn.Module):
"""基本的卷積塊Conv2d + BatchNorm + ReLU"""
def __init__(self, in_channels, out_channels):
super(Encode_Block, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.conv(x)
class GastroSegNet(nn.Module):
"""簡單的U-Net實現"""
def __init__(self, in_channels=3, out_channels=3, features=[32, 64, 128, 256]):
super(GastroSegNet, self).__init__()
# 編碼器(下採樣路徑)
self.encoder = nn.ModuleList()
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
# 第一層
self.encoder.append(Encode_Block(in_channels, features[0]))
# 其他編碼層
for i in range(1, len(features)):
self.encoder.append(Encode_Block(features[i-1], features[i]))
# 瓶頸層(最底層)
self.bottleneck = Encode_Block(features[-1], features[-1] * 2)
# 解碼器(上採樣路徑)
self.decoder = nn.ModuleList()
self.upconv = nn.ModuleList()
# 創建上採樣和解碼層
for i in range(len(features)):
self.upconv.append(
nn.ConvTranspose2d(features[-1-i] * 2, features[-1-i], kernel_size=2, stride=2)
)
self.decoder.append(
Encode_Block(features[-1-i] * 2, features[-1-i])
)
# 最終輸出層
self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
def forward(self, x):
# 存儲跳躍連接
skip_connections = []
# 編碼器路徑
for encoder_layer in self.encoder:
x = encoder_layer(x)
skip_connections.append(x)
x = self.pool(x)
# 瓶頸層
x = self.bottleneck(x)
# 反轉跳躍連接列表
skip_connections = skip_connections[::-1]
# 解碼器路徑
for i, (upconv, decoder) in enumerate(zip(self.upconv, self.decoder)):
# 上採樣
x = upconv(x)
# 獲取對應的跳躍連接
skip = skip_connections[i]
# 如果尺寸不匹配,調整大小
if x.shape != skip.shape:
x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=False)
# 連接跳躍連接
x = torch.cat([skip, x], dim=1)
# 通過解碼塊
x = decoder(x)
# 最終輸出
return self.final_conv(x)

View File

@@ -0,0 +1,148 @@
import torch.nn as nn
import torch.nn.functional as F
import torch
class SeparableConv2d(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True):
super(SeparableConv2d, self).__init__()
self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride,
padding=padding, groups=in_channels, bias=bias)
self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1,
padding=0, bias=bias)
def forward(self, x):
x = self.depthwise(x)
x = self.pointwise(x)
return x
class EntryFlow(nn.Module):
def __init__(self, in_channels=3):
super(EntryFlow, self).__init__()
self.conv1 = nn.Conv2d(in_channels, 32, 3, stride=2, padding=1, bias=False, dilation = 2)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1, bias=False, dilation = 2)
self.bn2 = nn.BatchNorm2d(64)
self.conv3_residual = nn.Sequential(
SeparableConv2d(64, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(128, 128, 3, padding=1),
nn.BatchNorm2d(128),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv3_shortcut = nn.Conv2d(64, 128, 1, stride=2, bias=False)
self.bn3 = nn.BatchNorm2d(128)
self.conv4_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(128, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(256, 256, 3, padding=1),
nn.BatchNorm2d(256),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv4_shortcut = nn.Conv2d(128, 256, 1, stride=2, bias=False)
self.bn4 = nn.BatchNorm2d(256)
self.conv5_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(256, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv5_shortcut = nn.Conv2d(256, 728, 1, stride=2, bias=False)
self.bn5 = nn.BatchNorm2d(728)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
residual = self.conv3_residual(x)
shortcut = self.conv3_shortcut(x)
x = F.relu(self.bn3(residual + shortcut))
residual = self.conv4_residual(x)
shortcut = self.conv4_shortcut(x)
x = F.relu(self.bn4(residual + shortcut))
residual = self.conv5_residual(x)
shortcut = self.conv5_shortcut(x)
x = F.relu(self.bn5(residual + shortcut))
return x
class MiddleFlow(nn.Module):
def __init__(self):
super(MiddleFlow, self).__init__()
self.conv_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 728, 3, padding=1),
nn.BatchNorm2d(728)
)
def forward(self, x):
return self.conv_residual(x) + x
class ExitFlow(nn.Module):
def __init__(self, num_classes=2):
super(ExitFlow, self).__init__()
self.conv1_residual = nn.Sequential(
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(728, 1024, 3, padding=1),
nn.BatchNorm2d(1024),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(1024, 1024, 3, padding=1),
nn.BatchNorm2d(1024),
nn.MaxPool2d(3, stride=2, padding=1)
)
self.conv1_shortcut = nn.Conv2d(728, 1024, 1, stride=2, bias=False)
self.bn1 = nn.BatchNorm2d(1024)
self.conv2 = nn.Sequential(
SeparableConv2d(1024, 1536, 3, padding=1),
nn.BatchNorm2d(1536),
nn.ReLU(inplace=False), # 修改這裡
SeparableConv2d(1536, 2048, 3, padding=1),
nn.BatchNorm2d(2048),
nn.ReLU(inplace=False) # 修改這裡
)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.Hidden = nn.Linear(2048, 1025)
self.fc = nn.Linear(1025, num_classes)
def forward(self, x):
residual = self.conv1_residual(x)
shortcut = self.conv1_shortcut(x)
x = F.relu(self.bn1(residual + shortcut))
x = self.conv2(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.Hidden(x)
x = self.fc(x)
return x
class Xception(nn.Module):
def __init__(self, num_classes=2):
super(Xception, self).__init__()
self.entry_flow = EntryFlow(in_channels=3) # 默认输入通道为3
self.middle_flow = nn.Sequential(*[MiddleFlow() for _ in range(8)])
self.exit_flow = ExitFlow(num_classes)
def forward(self, x):
# 正常的前向傳播
x = self.entry_flow(x)
x = self.middle_flow(x)
x = self.exit_flow(x)
return x

Some files were not shown because too many files have changed in this diff Show More