20251104: The three Classes is finished, and result is running

This commit is contained in:
whitekirin
2025-11-04 18:59:52 +08:00
parent 54bb9b0072
commit e835cd4bce
193 changed files with 10152 additions and 1221 deletions

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.13

View File

@@ -5,28 +5,26 @@ from decimal import Decimal, ROUND_HALF_UP
class Calculate():
def __init__(self) -> None:
self.Loss, self.Accuracy, self.Precision, self.Recall, self.F1, self.AUC = 0, 0, 0, 0, 0, 0
self.Loss_Record, self.Accuracy_Record, self.Precision_Record, self.Recall_Record, self.F1_Record, self.AUC_Record = [], [], [], [], [], []
self.Loss_Record, self.Accuracy_Record, self.Precision_Record, self.Recall_Record, self.F1_Record = [], [], [], [], []
self.History = []
pass
def Append_numbers(self, Loss, Accuracy, Precision, Recall, AUC, F1):
def Append_numbers(self, Loss, Accuracy, Precision, Recall, F1):
self.Loss_Record.append(Loss)
self.Accuracy_Record.append(Accuracy)
self.Precision_Record.append(Precision)
self.Recall_Record.append(Recall)
self.F1_Record.append(F1)
self.AUC_Record.append(AUC)
pass
def Construction_To_DataFrame(self, Loss, Accuracy, Precision, Recall, F1, AUC):
def Construction_To_DataFrame(self, Loss, Accuracy, Precision, Recall, F1):
DataFrame = pd.DataFrame(
{
"loss" : "{:.2f}".format(Loss),
"precision" : "{:.2f}".format(Precision * 100),
"recall" : "{:.2f}".format(Recall * 100),
"accuracy" : "{:.2f}".format(Accuracy * 100),
"f1" : "{:.2f}".format(F1 * 100),
"AUC" : "{:.2f}".format(AUC * 100)
"precision" : "{:.2f}".format(Precision),
"recall" : "{:.2f}".format(Recall),
"accuracy" : "{:.2f}".format(Accuracy),
"f1" : "{:.2f}".format(F1),
}, index = [0]
)
self.History.append(DataFrame)
@@ -38,9 +36,8 @@ class Calculate():
Precision_Mean = np.mean(self.Precision_Record)
Recall_Mean = np.mean(self.Recall_Record)
F1_Mean = np.mean(self.F1_Record)
AUC_Mean = np.mean(self.AUC_Record)
Mean_DataFram = self.Construction_To_DataFrame(Loss_Mean, Accuracy_Mean, Precision_Mean, Recall_Mean, F1_Mean, AUC_Mean)
Mean_DataFram = self.Construction_To_DataFrame(Loss_Mean, Accuracy_Mean * 100, Precision_Mean * 100, Recall_Mean * 100, F1_Mean * 100)
return Mean_DataFram
@@ -50,20 +47,18 @@ class Calculate():
Precision_Std = Decimal(str(np.std(self.Precision_Record))).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
Recall_Std = Decimal(str(np.std(self.Recall_Record))).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
F1_Std = Decimal(str(np.std(self.F1_Record))).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
AUC_Std = Decimal(str(np.std(self.AUC_Record))).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
Std_DataFram = self.Construction_To_DataFrame(Loss_Std, Accuracy_Std, Precision_Std, Recall_Std, F1_Std, AUC_Std)
Std_DataFram = self.Construction_To_DataFrame(Loss_Std, Accuracy_Std, Precision_Std, Recall_Std, F1_Std)
return Std_DataFram
def Output_Style(self):
Result = pd.DataFrame(
{
"loss" : "{}%±{}".format(self.History[0]["loss"][0], self.History[1]["loss"][0]),
"loss" : "{}±{}".format(self.History[0]["loss"][0], self.History[1]["loss"][0]),
"precision" : "{}%±{}".format(self.History[0]["precision"][0], self.History[1]["precision"][0]),
"recall" : "{}%±{}".format(self.History[0]["recall"][0], self.History[1]["recall"][0]),
"accuracy" : "{}%±{}".format(self.History[0]["accuracy"][0], self.History[1]["accuracy"][0]),
"f1" : "{}%±{}".format(self.History[0]["f1"][0], self.History[1]["f1"][0]),
"AUC" : "{}%±{}".format(self.History[0]["AUC"][0], self.History[1]["AUC"][0])
}, index = [0]
)
return Result

Binary file not shown.

Binary file not shown.

760
Density_Peak_Algorithm.py Normal file
View File

@@ -0,0 +1,760 @@
import torch
import matplotlib
matplotlib.use('Agg') # 設置非 GUI 後端,避免 Tkinter 錯誤
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
from skimage.segmentation import slic
from skimage.util import img_as_float
from skimage.color import label2rgb
from scipy.spatial.distance import cdist
# 設置全局中文字體支持
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei', 'SimHei', 'Arial Unicode MS', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
from utils.Stomach_Config import Loading_Config, Save_Result_File_Config
from Load_process.LoadData import Loding_Data_Root
from Load_process.file_processing import Process_File
from merge_class.merge import merge
from model_data_processing.processing import make_label_list
from Load_process.LoadData import Load_Data_Prepare
def save_superpixel_regions(image_array, segments, save_dir, image_name, max_regions=50):
"""
Save individual superpixel regions as separate images.
Args:
- image_array: Original image array (H, W, 3)
- segments: Superpixel segmentation labels (H, W)
- save_dir: Directory to save superpixel region images
- image_name: Base name for the image files
- max_regions: Maximum number of regions to save (to avoid too many files)
Returns:
- saved_regions: List of saved region information
"""
os.makedirs(save_dir, exist_ok=True)
saved_regions = []
unique_segments = np.unique(segments)
# Limit the number of regions to save
if len(unique_segments) > max_regions:
print(f"⚠️ 超像素數量 ({len(unique_segments)}) 超過限制 ({max_regions}),只保存前 {max_regions}")
unique_segments = unique_segments[:max_regions]
print(f"💾 保存 {len(unique_segments)} 個超像素區域到: {save_dir}")
for i, segment_id in enumerate(unique_segments):
# Get mask for current superpixel
mask = segments == segment_id
# Get bounding box of the region
y_coords, x_coords = np.where(mask)
if len(y_coords) == 0:
continue
min_y, max_y = y_coords.min(), y_coords.max()
min_x, max_x = x_coords.min(), x_coords.max()
# Extract the region with some padding
padding = 5
min_y = max(0, min_y - padding)
max_y = min(image_array.shape[0], max_y + padding + 1)
min_x = max(0, min_x - padding)
max_x = min(image_array.shape[1], max_x + padding + 1)
# Extract region from original image
region_image = image_array[min_y:max_y, min_x:max_x].copy()
region_mask = mask[min_y:max_y, min_x:max_x]
# Apply mask to make background transparent/black
region_image[~region_mask] = [0, 0, 0] # Set non-region pixels to black
# Convert to PIL Image and save
region_pil = Image.fromarray(region_image.astype(np.uint8))
# Create filename
region_filename = f"{image_name}_superpixel_{segment_id:03d}_region_{i+1:03d}.png"
region_path = os.path.join(save_dir, region_filename)
# Save the region
region_pil.save(region_path)
# Calculate region statistics
region_pixels = image_array[mask]
mean_color = np.mean(region_pixels, axis=0)
region_area = np.sum(mask)
centroid_y = np.mean(y_coords)
centroid_x = np.mean(x_coords)
region_info = {
'segment_id': segment_id,
'filename': region_filename,
'path': region_path,
'area': region_area,
'centroid': (centroid_x, centroid_y),
'mean_color': mean_color,
'bbox': (min_x, min_y, max_x, max_y)
}
saved_regions.append(region_info)
print(f"✅ 成功保存 {len(saved_regions)} 個超像素區域影像")
return saved_regions
def calculate_optimal_superpixel_params(image_size):
"""
Calculate optimal superpixel parameters based on image size.
Args:
- image_size: Total number of pixels (width * height)
Returns:
- n_segments: Number of superpixel segments
- compactness: Compactness parameter for SLIC
"""
# 根據圖片大小動態調整參數
if image_size < 50000: # 小圖片 (< 224x224)
n_segments = min(100, max(50, image_size // 500))
compactness = 15
elif image_size < 200000: # 中等圖片 (< 447x447)
n_segments = min(300, max(100, image_size // 800))
compactness = 12
elif image_size < 500000: # 大圖片 (< 707x707)
n_segments = min(500, max(200, image_size // 1000))
compactness = 10
else: # 超大圖片
n_segments = min(800, max(300, image_size // 1500))
compactness = 8
return int(n_segments), compactness
def extract_superpixel_features(image_array, segments):
"""
Extract features for each superpixel region.
Args:
- image_array: Original image array (H, W, 3)
- segments: Superpixel segmentation labels (H, W)
Returns:
- features: Array of features for each superpixel (N_superpixels, 5)
[mean_R, mean_G, mean_B, norm_centroid_x, norm_centroid_y]
- centroids: Array of centroid positions for each superpixel (N_superpixels, 2)
"""
n_segments = len(np.unique(segments))
features = []
centroids = []
for segment_id in np.unique(segments):
# Get mask for current superpixel
mask = segments == segment_id
# Extract color features (mean RGB)
region_pixels = image_array[mask]
mean_color = np.mean(region_pixels, axis=0)
# Extract position features (centroid)
y_coords, x_coords = np.where(mask)
centroid_y = np.mean(y_coords)
centroid_x = np.mean(x_coords)
# Combine features: [mean_R, mean_G, mean_B, centroid_x, centroid_y]
# Normalize centroid coordinates to [0, 1] range
norm_centroid_x = centroid_x / image_array.shape[1]
norm_centroid_y = centroid_y / image_array.shape[0]
feature_vector = np.concatenate([mean_color, [norm_centroid_x, norm_centroid_y]])
features.append(feature_vector)
centroids.append([centroid_x, centroid_y])
return np.array(features), np.array(centroids)
def fuzzy_c_means(data, n_clusters, m=2.0, max_iter=100, tol=1e-4, random_state=None):
"""
Fuzzy C-means clustering algorithm implementation.
Args:
- data: Input data array (N_samples, N_features)
- n_clusters: Number of clusters
- m: Fuzziness parameter (m > 1, typically 2.0)
- max_iter: Maximum number of iterations
- tol: Tolerance for convergence
- random_state: Random seed for reproducibility
Returns:
- centers: Cluster centers (n_clusters, N_features)
- membership: Membership matrix (N_samples, n_clusters)
- labels: Hard cluster assignments (N_samples,)
- objective: Final objective function value
- n_iter: Number of iterations performed
"""
if random_state is not None:
np.random.seed(random_state)
torch.manual_seed(random_state)
# Convert to torch tensor
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
X = torch.from_numpy(data).float().to(device)
n_samples, n_features = X.shape
# Initialize membership matrix randomly
U = torch.rand(n_samples, n_clusters, device=device)
U = U / U.sum(dim=1, keepdim=True) # Normalize so each row sums to 1
print(f"🔄 開始 Fuzzy C-means 聚類: {n_clusters} 個聚類中心, 模糊度參數 m={m}")
for iteration in range(max_iter):
U_old = U.clone()
# Update cluster centers
Um = U ** m # Membership matrix raised to power m
centers = (Um.T @ X) / Um.sum(dim=0, keepdim=True).T
# Update membership matrix
# Calculate distances from each point to each center
distances = torch.cdist(X, centers) # (n_samples, n_clusters)
# Avoid division by zero
distances = torch.clamp(distances, min=1e-10)
# Calculate new membership values
power = 2.0 / (m - 1.0)
distance_matrix = distances ** power
# For each sample, calculate membership to each cluster
for i in range(n_samples):
for j in range(n_clusters):
denominator = torch.sum((distance_matrix[i, j] / distance_matrix[i, :]))
U[i, j] = 1.0 / denominator
# Check for convergence
diff = torch.norm(U - U_old)
if diff < tol:
print(f"✅ Fuzzy C-means 收斂於第 {iteration + 1} 次迭代 (差異: {diff:.6f})")
break
if (iteration + 1) % 20 == 0:
print(f" 迭代 {iteration + 1}/{max_iter}, 差異: {diff:.6f}")
# Calculate final objective function value
Um = U ** m
distances_squared = torch.cdist(X, centers) ** 2
objective = torch.sum(Um * distances_squared).item()
# Get hard cluster assignments (highest membership)
labels = torch.argmax(U, dim=1)
# Convert back to numpy
centers_np = centers.cpu().numpy()
membership_np = U.cpu().numpy()
labels_np = labels.cpu().numpy()
print(f"🎯 Fuzzy C-means 完成: 目標函數值 = {objective:.4f}")
return centers_np, membership_np, labels_np, objective, iteration + 1
def determine_optimal_clusters(data, gamma_values, max_clusters=10, min_clusters=2):
"""
Determine optimal number of clusters using gamma values from density peak analysis
and fuzzy clustering validation indices.
Args:
- data: Input data array (N_samples, N_features)
- gamma_values: Gamma values from density peak analysis
- max_clusters: Maximum number of clusters to test
- min_clusters: Minimum number of clusters to test
Returns:
- optimal_k: Optimal number of clusters
- scores: Dictionary containing validation scores for each k
"""
n_samples = len(data)
max_clusters = min(max_clusters, n_samples - 1)
# Method 1: Use gamma values to estimate cluster centers
# Sort gamma values and look for significant drops
sorted_gamma = np.sort(gamma_values)[::-1] # Descending order
gamma_diffs = np.diff(sorted_gamma)
# Find the largest drop in gamma values (elbow method)
if len(gamma_diffs) > 0:
elbow_idx = np.argmax(np.abs(gamma_diffs)) + 1
gamma_suggested_k = min(max_clusters, max(min_clusters, elbow_idx))
else:
gamma_suggested_k = min_clusters
print(f"📊 基於 Gamma 值分析建議的聚類數: {gamma_suggested_k}")
# Method 2: Test different k values with fuzzy clustering validation
scores = {}
best_k = gamma_suggested_k
best_score = -np.inf
print(f"🔍 測試聚類數從 {min_clusters}{max_clusters}...")
for k in range(min_clusters, max_clusters + 1):
try:
# Perform fuzzy c-means clustering
centers, membership, labels, objective, n_iter = fuzzy_c_means(
data, k, m=2.0, max_iter=50, random_state=42
)
# Calculate Partition Coefficient (PC) - higher is better
pc = np.mean(np.sum(membership ** 2, axis=1))
# Calculate Partition Entropy (PE) - lower is better
pe = -np.mean(np.sum(membership * np.log(membership + 1e-10), axis=1))
# Calculate Modified Partition Coefficient (MPC) - higher is better
mpc = 1 - k / (k - 1) * (1 - pc) if k > 1 else pc
# Combined score (higher is better)
combined_score = pc - 0.1 * pe + 0.5 * mpc
scores[k] = {
'pc': pc,
'pe': pe,
'mpc': mpc,
'combined_score': combined_score,
'objective': objective,
'n_iter': n_iter
}
print(f" K={k}: PC={pc:.3f}, PE={pe:.3f}, MPC={mpc:.3f}, 組合分數={combined_score:.3f}")
if combined_score > best_score:
best_score = combined_score
best_k = k
except Exception as e:
print(f" K={k}: 聚類失敗 - {e}")
scores[k] = {'error': str(e)}
print(f"🎯 最佳聚類數: {best_k} (組合分數: {best_score:.3f})")
return best_k, scores
def visualize_clustering_results(image_array, segments, labels, centers, membership, save_path):
"""
Visualize fuzzy clustering results on the original image.
Args:
- image_array: Original image array (H, W, 3)
- segments: Superpixel segmentation labels (H, W)
- labels: Hard cluster assignments for each superpixel
- centers: Cluster centers
- membership: Fuzzy membership matrix
- save_path: Path to save the visualization
"""
# 設置中文字體支持
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
# Original image
axes[0, 0].imshow(image_array)
axes[0, 0].set_title('原始影像', fontsize=14)
axes[0, 0].axis('off')
# Superpixel segmentation
superpixel_img = label2rgb(segments, image_array, kind='avg', bg_label=0)
axes[0, 1].imshow(superpixel_img)
axes[0, 1].set_title(f'超像素分割 ({len(np.unique(segments))} 個區域)', fontsize=14)
axes[0, 1].axis('off')
# Hard clustering result
n_clusters = len(centers)
colors = plt.cm.Set3(np.linspace(0, 1, n_clusters))
# Create clustering visualization
cluster_img = np.zeros_like(image_array)
for segment_id in np.unique(segments):
if segment_id < len(labels):
cluster_id = labels[segment_id]
mask = segments == segment_id
cluster_img[mask] = colors[cluster_id][:3]
axes[0, 2].imshow(cluster_img)
axes[0, 2].set_title(f'硬聚類結果 ({n_clusters} 個聚類)', fontsize=14)
axes[0, 2].axis('off')
# Fuzzy membership visualization for top 3 clusters
for i in range(min(3, n_clusters)):
fuzzy_img = np.zeros(image_array.shape[:2])
for segment_id in np.unique(segments):
if segment_id < len(membership):
mask = segments == segment_id
fuzzy_img[mask] = membership[segment_id, i]
im = axes[1, i].imshow(fuzzy_img, cmap='hot', vmin=0, vmax=1)
axes[1, i].set_title(f'聚類 {i+1} 的模糊隸屬度', fontsize=14)
axes[1, i].axis('off')
plt.colorbar(im, ax=axes[1, i], fraction=0.046, pad=0.04)
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"📊 聚類結果可視化已保存至: {save_path}")
def compute_decision_graph(image_path, Save_Root, dc=None, use_gaussian=False, threshold_factor=2.0, use_superpixels=True, n_segments=None, compactness=None, save_regions=True, max_regions=50):
"""
Process a single image to compute the decision graph using Density Peak Clustering principles.
Identifies potential cluster centers without performing full clustering.
Also computes gamma (rho * delta) and n (index in descending order of gamma) values.
Args:
- image_path: Path to the image file.
- dc: Cut-off distance. If None, approximates it as 2% of average neighbors.
- use_gaussian: If True, uses Gaussian kernel for density; else cut-off kernel.
- threshold_factor: Factor for std deviation to determine thresholds for centers.
- use_superpixels: If True, uses SLIC superpixel segmentation instead of raw pixels.
- n_segments: Number of superpixel segments (only used if use_superpixels=True).
- compactness: Compactness parameter for SLIC algorithm (only used if use_superpixels=True).
Returns:
- dict: Contains center_indices, center_points, rho, delta, gamma, and n arrays.
gamma = rho * delta (product of local density and minimum distance)
n = index in descending order of gamma (1-indexed as per Density Peak convention)
If use_superpixels=True, also contains 'segments' and 'superpixel_features'.
"""
# Load image
img = Image.open(image_path).convert('RGB')
img_array = np.array(img) / 255.0 # Normalize to [0, 1]
image_size = img_array.shape[0] * img_array.shape[1]
# 強制使用 Superpixel 分割,並動態計算參數
if n_segments is None or compactness is None:
n_segments, compactness = calculate_optimal_superpixel_params(image_size)
print(f"🖼️ 圖像大小: {img.size} ({image_size:,} 像素)")
print(f"🔧 使用動態 Superpixel 參數: n_segments={n_segments}, compactness={compactness}")
# Apply SLIC superpixel segmentation (強制使用)
print(f"🎯 應用 SLIC 超像素分割,目標 {n_segments} 個區域...")
segments = slic(img_array, n_segments=n_segments, compactness=compactness,
start_label=1, enforce_connectivity=True)
# Extract features for each superpixel
superpixel_features, superpixel_centroids = extract_superpixel_features(img_array, segments)
points = torch.from_numpy(superpixel_features).float()
print(f"✅ 成功從 {img_array.shape[0] * img_array.shape[1]:,} 像素壓縮到 {len(superpixel_features)} 個超像素")
# Save superpixel regions if requested
if save_regions:
# Get image name without extension
image_name = os.path.splitext(os.path.basename(image_path))[0]
superpixel_regions_dir = os.path.join(Save_Root, f"{image_name}_superpixel_regions")
# Convert back to 0-255 range for saving
img_array_255 = (img_array * 255).astype(np.uint8)
saved_regions = save_superpixel_regions(img_array_255, segments, superpixel_regions_dir, image_name, max_regions)
print(f"💾 已保存 {len(saved_regions)} 個超像素區域影像到: {superpixel_regions_dir}")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
points = points.to(device)
N = points.shape[0]
print(f"🔄 處理圖像: {os.path.basename(image_path)}{device} 上 (使用超像素分割)")
# Compute pairwise distances
dist = torch.cdist(points, points)
# Approximate dc if not provided (using a sample for efficiency)
if dc is None:
# Sample 1000 points to estimate dc
sample_size = min(1000, N)
sample_idx = torch.randperm(N)[:sample_size]
sample_dist = torch.cdist(points[sample_idx], points)
sample_dist_flat = sample_dist.flatten()
sample_dist_flat = sample_dist_flat[sample_dist_flat > 0] # Exclude zeros
sorted_dist = torch.sort(sample_dist_flat)[0]
pos = int(len(sorted_dist) * 0.02)
dc = sorted_dist[pos].item()
print(f"Approximated dc: {dc}")
# Compute local density rho
if use_gaussian:
rho = torch.exp(-(dist ** 2) / (2 * dc ** 2)).sum(dim=1) - 1
else:
rho = (dist < dc).float().sum(dim=1) - 1
# Compute delta
sorted_rho, sorted_idx = torch.sort(rho, descending=True)
delta = torch.full((N,), 0.0, device=device)
nn = torch.full((N,), -1, dtype=torch.long, device=device)
# For the highest density point
delta[sorted_idx[0]] = dist[sorted_idx[0]].max()
# For others
for i in range(1, N):
higher_idx = sorted_idx[:i]
cur_idx = sorted_idx[i]
dists_to_higher = dist[cur_idx, higher_idx]
min_dist_idx = torch.argmin(dists_to_higher)
delta[cur_idx] = dists_to_higher[min_dist_idx]
nn[cur_idx] = higher_idx[min_dist_idx]
# Calculate gamma (rho * delta) and n (index in descending order of gamma)
gamma = rho * delta
sorted_gamma_indices = torch.argsort(gamma, descending=True)
n = torch.empty_like(sorted_gamma_indices)
n[sorted_gamma_indices] = torch.arange(1, N + 1, device=device)
# Plot decision graph with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
# Decision graph (rho vs delta)
rho_np = rho.cpu().numpy()
delta_np = delta.cpu().numpy()
# 使用顏色映射來顯示密度
scatter1 = ax1.scatter(rho_np, delta_np, c=gamma.cpu().numpy(), s=8, alpha=0.7, cmap='viridis')
ax1.set_xlabel('Rho (Local Density)', fontsize=12)
ax1.set_ylabel('Delta (Min Distance to Higher Density)', fontsize=12)
ax1.set_title(f'Decision Graph - {os.path.basename(image_path)}\n(Superpixels: {len(rho_np)})', fontsize=14)
ax1.grid(True, alpha=0.3)
# 添加顏色條
cbar1 = plt.colorbar(scatter1, ax=ax1)
cbar1.set_label('Gamma (Rho × Delta)', fontsize=10)
# Gamma vs n plot - with data validation for log scale
gamma_np = gamma.cpu().numpy()
n_np = n.cpu().numpy()
# Filter out non-positive values for log scale
positive_mask = gamma_np > 0
if positive_mask.sum() > 0:
scatter2 = ax2.scatter(n_np[positive_mask], gamma_np[positive_mask],
c=rho_np[positive_mask], s=8, alpha=0.7, cmap='plasma')
ax2.set_yscale('log')
# 添加顏色條
cbar2 = plt.colorbar(scatter2, ax=ax2)
cbar2.set_label('Rho (Local Density)', fontsize=10)
else:
# Fallback to linear scale if no positive values
ax2.scatter(n_np, gamma_np, s=8, alpha=0.7, color='blue')
print("⚠️ Warning: No positive gamma values found, using linear scale instead of log scale")
ax2.set_xlabel('N (Index in descending order of Gamma)', fontsize=12)
ax2.set_ylabel('Gamma (Rho × Delta)', fontsize=12)
ax2.set_title('Gamma vs N Plot\n(Cluster Center Selection)', fontsize=14)
ax2.grid(True, alpha=0.3)
# Apply tight_layout with error handling
try:
plt.tight_layout()
except Exception as e:
print(f"Warning: tight_layout failed ({e}), using default layout")
# Use manual spacing as fallback
plt.subplots_adjust(left=0.1, right=0.95, top=0.9, bottom=0.1, wspace=0.3)
file = Process_File()
file.JudgeRoot_MakeDir(Save_Root)
# 使用原始檔名而不是索引編號
original_filename = os.path.basename(image_path)
path = file.Make_Save_Root(original_filename, Save_Root)
plt.savefig(path, dpi=300, bbox_inches='tight')
plt.close()
print(f"Decision graph and gamma vs n plot saved to: {path}")
# Identify potential cluster centers (high rho and high delta)
mean_rho = rho.mean()
std_rho = rho.std()
threshold_rho = mean_rho + threshold_factor * std_rho
mean_delta = delta.mean()
std_delta = delta.std()
threshold_delta = mean_delta + threshold_factor * std_delta
is_center = (rho > threshold_rho) & (delta > threshold_delta)
# Properly handle torch.nonzero result to avoid issues with empty tensors
center_nonzero = torch.nonzero(is_center)
if center_nonzero.numel() > 0:
center_indices = center_nonzero.squeeze().cpu().numpy()
# Ensure center_indices is always a 1D array, even for single element
if center_indices.ndim == 0:
center_indices = np.array([center_indices.item()])
else:
center_indices = np.array([])
# 識別潛在的聚類中心(超像素)
center_points = superpixel_features[center_indices] if len(center_indices) > 0 else np.array([])
print(f"🎯 發現潛在聚類中心: {len(center_indices)} 個超像素")
for idx in center_indices:
print(f" 中心超像素 {idx}: RGB({superpixel_features[idx][0]:.3f}, {superpixel_features[idx][1]:.3f}, {superpixel_features[idx][2]:.3f})")
# ========== Fuzzy C-means 聚類分析 ==========
print(f"\n🔄 開始 Fuzzy C-means 聚類分析...")
# 確定最佳聚類數
gamma_np = gamma.cpu().numpy()
optimal_k, cluster_scores = determine_optimal_clusters(
superpixel_features, gamma_np, max_clusters=8, min_clusters=2
)
# 執行 Fuzzy C-means 聚類
print(f"\n🎯 使用最佳聚類數 {optimal_k} 進行 Fuzzy C-means 聚類...")
cluster_centers, membership_matrix, cluster_labels, objective_value, n_iterations = fuzzy_c_means(
superpixel_features, optimal_k, m=2.0, max_iter=100, random_state=42
)
# 計算聚類統計信息
cluster_stats = {}
for cluster_id in range(optimal_k):
cluster_mask = cluster_labels == cluster_id
cluster_size = np.sum(cluster_mask)
avg_membership = np.mean(membership_matrix[cluster_mask, cluster_id])
cluster_stats[cluster_id] = {
'size': cluster_size,
'avg_membership': avg_membership,
'center': cluster_centers[cluster_id]
}
print(f" 聚類 {cluster_id}: {cluster_size} 個超像素, 平均隸屬度: {avg_membership:.3f}")
# 生成聚類結果可視化
print(f"\n📊 生成聚類結果可視化...")
# 將圖像數組轉換回0-255範圍用於可視化
img_array_255 = (img_array * 255).astype(np.uint8)
# 創建可視化保存路徑
original_filename = os.path.basename(image_path)
clustering_viz_path = os.path.join(Save_Root, f"clustering_results_{original_filename}")
# 生成可視化
visualize_clustering_results(
img_array_255, segments, cluster_labels, cluster_centers,
membership_matrix, clustering_viz_path
)
# Prepare return dictionary (包含所有信息:密度峰值分析 + 模糊聚類結果)
result = {
# 原有的密度峰值分析結果
'center_indices': center_indices,
'center_points': center_points,
'rho': rho.cpu().numpy(),
'delta': delta.cpu().numpy(),
'gamma': gamma.cpu().numpy(),
'n': n.cpu().numpy(),
'segments': segments,
'superpixel_features': superpixel_features,
'superpixel_centroids': superpixel_centroids,
'n_superpixels': len(superpixel_features),
'compression_ratio': len(superpixel_features) / image_size,
# 新增的 Fuzzy C-means 聚類結果
'optimal_clusters': optimal_k,
'cluster_centers': cluster_centers,
'membership_matrix': membership_matrix,
'cluster_labels': cluster_labels,
'cluster_stats': cluster_stats,
'clustering_objective': objective_value,
'clustering_iterations': n_iterations,
'cluster_scores': cluster_scores,
'clustering_viz_path': clustering_viz_path
}
return result
# Example usage:
if __name__ == "__main__":
Label_Length = len(Loading_Config["Training_Labels"])
Merge = merge()
Prepare = Load_Data_Prepare()
load = Loding_Data_Root(Loading_Config["Training_Labels"], Loading_Config["Train_Data_Root"], Loading_Config["ImageGenerator_Data_Root"])
Data_Dict_Data = load.process_main(False)
Total_Size_List = []
for label in Loading_Config["Training_Labels"]:
Total_Size_List.append(len(Data_Dict_Data[label]))
# 做出跟資料相同數量的Label
Classes = []
i = 0
for encording in Loading_Config["Training_Labels"]:
Classes.append(make_label_list(Total_Size_List[i], encording))
i += 1
# 將資料做成Dict的資料型態
Prepare.Set_Final_Dict_Data(Loading_Config["Training_Labels"], Data_Dict_Data, Classes, Label_Length)
Final_Dict_Data = Prepare.Get_Final_Data_Dict()
keys = list(Final_Dict_Data.keys())
Training_Data = Merge.merge_all_image_data(Final_Dict_Data[keys[0]], Final_Dict_Data[keys[1]]) # 將訓練資料合併成一個list
for i in range(2, Label_Length):
Training_Data = Merge.merge_all_image_data(Training_Data, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list
Training_Label = Merge.merge_all_image_data(Final_Dict_Data[keys[Label_Length]], Final_Dict_Data[keys[Label_Length + 1]]) #將訓練資料的label合併成一個label的list
for i in range(Label_Length + 2, 2 * Label_Length):
Training_Label = Merge.merge_all_image_data(Training_Label, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list
for i in range(len(Training_Data)):
print(f"\n{'='*60}")
print(f"🖼️ 處理圖像 {i+1}/{len(Training_Data)}: {Training_Data[i]}")
# 所有圖片都使用 Superpixel 分割進行 Density Peak 分析
print("🎯 使用 SLIC 超像素分割進行 Density Peak 分析")
try:
result_superpixels = compute_decision_graph(
Training_Data[i],
f'{Save_Result_File_Config["Density_Peak_Save_Root"]}/{Training_Label[i]}_superpixels',
use_superpixels=True,
save_regions=True,
max_regions=50
)
# 獲取圖片信息用於統計
test_image = Image.open(Training_Data[i])
image_size = test_image.size[0] * test_image.size[1]
print(f"\n✅ 超像素處理成功:")
print(f"📊 超像素數量: {len(result_superpixels['rho'])} 個數據點")
print(f"📈 壓縮比例: {len(result_superpixels['rho']) / image_size:.6f}")
print(f"🔄 壓縮倍數: {image_size / len(result_superpixels['rho']):.1f}x")
print(f"🎯 Decision-graph 已生成並保存")
# 顯示 Fuzzy C-means 聚類結果
print(f"\n🎯 Fuzzy C-means 聚類結果:")
print(f"📊 最佳聚類數: {result_superpixels['optimal_clusters']}")
print(f"🔄 聚類迭代次數: {result_superpixels['clustering_iterations']}")
print(f"📈 目標函數值: {result_superpixels['clustering_objective']:.4f}")
# 顯示各聚類的詳細信息
print(f"📋 各聚類詳細信息:")
for cluster_id, stats in result_superpixels['cluster_stats'].items():
print(f" 聚類 {cluster_id}: {stats['size']} 個超像素 (平均隸屬度: {stats['avg_membership']:.3f})")
center = stats['center']
print(f" 中心特徵: RGB({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f}), 位置({center[3]:.3f}, {center[4]:.3f})")
print(f"📊 聚類可視化已保存至: {result_superpixels['clustering_viz_path']}")
# 顯示聚類品質評估
if 'cluster_scores' in result_superpixels and result_superpixels['optimal_clusters'] in result_superpixels['cluster_scores']:
scores = result_superpixels['cluster_scores'][result_superpixels['optimal_clusters']]
print(f"📈 聚類品質評估:")
print(f" 分割係數 (PC): {scores['pc']:.3f}")
print(f" 分割熵 (PE): {scores['pe']:.3f}")
print(f" 修正分割係數 (MPC): {scores['mpc']:.3f}")
print(f" 綜合分數: {scores['combined_score']:.3f}")
except Exception as e:
print(f"❌ 超像素處理失敗: {e}")
continue
print(f"\n💾 結果保存至: {Save_Result_File_Config['Density_Peak_Save_Root']}")

View File

@@ -1,39 +1,51 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from model_data_processing.processing import make_label_list
from _validation.ValidationTheEnterData import validation_the_enter_data
from Load_process.file_processing import Process_File
from Load_process.LoadData import Load_Data_Prepare
from torchvision import transforms
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
from Training_Tools.PreProcess import Training_Precesses
import numpy as np
from PIL import Image
import torch
import cv2
from torchvision import transforms
class Image_generator():
'''製作資料強化'''
def __init__(self, Generator_Root, Labels, Image_Size) -> None:
def __init__(self, Training_Root, Generator_Root, Labels, Image_Size, Class_Count) -> None:
self._validation = validation_the_enter_data()
self.stop = 0
self.Labels = Labels
self.Training_Root = Training_Root
self.Generator_Root = Generator_Root
self.Image_Size = Image_Size
self.Class_Count = 904
self.Class_Count = Class_Count
pass
def Processing_Main(self, Training_Dict_Data_Root):
def Processing_Main(self):
data_size = 2712
File = Process_File()
Prepare = Load_Data_Prepare()
Load_Tool = Load_Data_Tools()
# 製作標準資料增強
'''
這裡我想要做的是依照paper上的資料強化IMAGE DATA COLLECTION AND IMPLEMENTATION OF DEEP LEARNING-BASED MODEL IN DETECTING MONKEYPOX DISEASE USING MODIFIED VGG16
產生出資料強化後的影像
'''
for i in range(1, 5, 1):
print("\nAugmentation one Generator image")
data_size = self.get_processing_Augmentation(Training_Dict_Data_Root, i, data_size)
self.stop += data_size
if not File.Judge_File_Exist(self.Generator_Root): # 檔案若不存在
# 確定我要多少個List
Prepare.Set_Data_Content([], len(self.Labels))
print()
# 製作讀檔字典並回傳檔案路徑
Prepare.Set_Label_List(self.Labels)
Prepare.Set_Data_Dictionary(Prepare.Get_Label_List(), Prepare.Get_Data_Content(), len(self.Labels))
Original_Dict_Data_Root = Prepare.Get_Data_Dict()
get_all_original_image_data = Load_Tool.get_data_root(self.Training_Root, Original_Dict_Data_Root, Prepare.Get_Label_List())
# 儲存資料強化後資料
# 製作標準資料增強
'''
這裡我想要做的是依照paper上的資料強化IMAGE DATA COLLECTION AND IMPLEMENTATION OF DEEP LEARNING-BASED MODEL IN DETECTING MONKEYPOX DISEASE USING MODIFIED VGG16
產生出資料強化後的影像
'''
for i in range(1, 5, 1):
print(f"\nAugmentation {i} Generator image")
data_size = self.get_processing_Augmentation(get_all_original_image_data, i, data_size)
self.stop += data_size
else: # 若檔案存在
print("standard data and myself data are exist\n")
def get_processing_Augmentation(self, original_image_root : dict, Augment_choose, data_size):
Prepaer = Load_Data_Prepare()
@@ -51,7 +63,6 @@ class Image_generator():
strardand = 要使用哪種Image Augmentation
'''
File = Process_File()
image_processing = Read_image_and_Process_image(self.Image_Size)
tool = Training_Precesses(self.Image_Size)
Classes = []
Transform = self.Generator_Content(stardand)
@@ -60,15 +71,15 @@ class Image_generator():
Image_Roots = self.get_data_roots[label]
save_root = File.Make_Save_Root(label, save_roots) # 合併路徑
Classes = image_processing.make_label_list(len(Image_Roots), "1")
Training_Dataset = tool.Setting_DataSet(Image_Roots, Classes)
Classes = make_label_list(len(Image_Roots), "1")
Training_Dataset = tool.Setting_DataSet(Image_Roots, Classes, "Generator")
Training_DataLoader = tool.Dataloader_Sampler(Training_Dataset, 1, False)
if File.JudgeRoot_MakeDir(save_root): # 判斷要存的資料夾存不存在,不存在則創立
print("The file is exist.This Script is not creating new fold.")
for i in range(1, int(self.Class_Count / len(Image_Roots)) + 1, 1):
for batch_idx, (images, labels) in enumerate(Training_DataLoader):
for batch_idx, (images, labels, File_Name, File_Classes) in enumerate(Training_DataLoader):
for j, img in enumerate(images):
# if i == self.stop:
# break
@@ -78,7 +89,6 @@ class Image_generator():
# 轉換為 NumPy 陣列並從 BGR 轉為 RGB
img_np = img.numpy().transpose(1, 2, 0) # 轉回 HWC 格式
img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) # BGR 轉 RGB
img_pil = transforms.ToPILImage()(img_np)
File.Save_PIL_File("image_" + label + str(data_size) + ".png", save_root, img_pil) # 存檔

View File

@@ -0,0 +1,262 @@
import xml.etree.ElementTree as ET
import cv2
import os
import numpy as np
from typing import List, Dict, Optional, Tuple
from utils.Stomach_Config import Loading_Config
class XMLAnnotationProcessor:
"""
XML標註檔案處理器
專門處理包含bounding box資訊的XML檔案並在對應圖片上繪製邊界框
"""
def __init__(self, dataset_root: str):
"""
初始化XML處理器
Args:
dataset_root: 圖片資料集根目錄
output_folder: 輸出資料夾
"""
self.dataset_root = dataset_root
self.box_color = (0, 255, 0) # 綠色邊界框
self.text_color = (0, 255, 0) # 綠色文字
self.box_thickness = 2
self.font_scale = 0.5
self.font = cv2.FONT_HERSHEY_SIMPLEX
def _ensure_output_folder(self, Save_Root: str) -> None:
"""確保輸出資料夾存在"""
if not os.path.exists(Save_Root):
os.makedirs(Save_Root)
def parse_xml(self, xml_file_path: str, Label: str) -> Optional[Dict]:
"""
解析XML檔案並提取所有相關資訊
Args:
xml_file_path: XML檔案路徑
Returns:
Dict: 包含檔案資訊和bounding box的字典解析失敗時返回None
"""
try:
tree = ET.parse(xml_file_path)
root = tree.getroot()
# 提取基本資訊
filename_element = root.find('filename')
if filename_element is None:
print(f"找不到path元素在 {xml_file_path}")
return None
filename = filename_element.text if filename_element is not None else "Unknown"
Original_Image_Data_Root = os.path.join(self.dataset_root, Label)
Original_Image_Data_Root = os.path.join(Original_Image_Data_Root, filename)
# 提取圖片尺寸
size_element = root.find('size')
width = int(size_element.find('width').text) if size_element is not None else 0
height = int(size_element.find('height').text) if size_element is not None else 0
depth = int(size_element.find('depth').text) if size_element is not None else 3
# 提取所有bounding box
bounding_boxes = []
objects = root.findall('object')
for obj in objects:
bndbox = obj.find('bndbox')
if bndbox is not None:
bbox_info = {
'name': obj.find('name').text if obj.find('name') is not None else "Unknown",
'pose': obj.find('pose').text if obj.find('pose') is not None else "Unspecified",
'truncated': int(obj.find('truncated').text) if obj.find('truncated') is not None else 0,
'difficult': int(obj.find('difficult').text) if obj.find('difficult') is not None else 0,
'xmin': int(bndbox.find('xmin').text),
'ymin': int(bndbox.find('ymin').text),
'xmax': int(bndbox.find('xmax').text),
'ymax': int(bndbox.find('ymax').text)
}
bounding_boxes.append(bbox_info)
return {
'filename': filename,
'image_path': Original_Image_Data_Root,
'width': width,
'height': height,
'depth': depth,
'bounding_boxes': bounding_boxes
}
except Exception as e:
print(f"解析XML檔案 {xml_file_path} 時發生錯誤: {str(e)}")
return None
def load_image(self, image_path: str) -> Optional[np.ndarray]:
"""
載入圖片檔案
Args:
image_path: 圖片檔案路徑
Returns:
np.ndarray: 圖片陣列載入失敗時返回None
"""
if not os.path.exists(image_path):
print(f"圖片檔案不存在: {image_path}")
return None
image = cv2.imread(image_path)
if image is None:
print(f"無法讀取圖片: {image_path}")
return None
return image
def draw_bounding_boxes(self, image: np.ndarray, bounding_boxes: List[Dict]) -> np.ndarray:
"""
創建遮罩圖片bounding box內保持原圖外部為黑色
Args:
image: 圖片陣列
bounding_boxes: bounding box資訊列表
Returns:
np.ndarray: 處理後的遮罩圖片陣列
"""
# 創建黑色背景圖片
height, width = image.shape[:2]
result_image = np.zeros((height, width, 3), dtype=np.uint8)
for i, bbox in enumerate(bounding_boxes):
xmin, ymin = bbox['xmin'], bbox['ymin']
xmax, ymax = bbox['xmax'], bbox['ymax']
object_name = bbox['name']
# 確保座標在圖片範圍內
xmin = max(0, min(xmin, width-1))
ymin = max(0, min(ymin, height-1))
xmax = max(0, min(xmax, width-1))
ymax = max(0, min(ymax, height-1))
# 將bounding box範圍內的原圖複製到結果圖像中
result_image[ymin:ymax, xmin:xmax] = image[ymin:ymax, xmin:xmax]
print(f"Object {i+1}: {object_name} - 座標: ({xmin}, {ymin}, {xmax}, {ymax})")
return result_image
def save_annotated_image(self, image: np.ndarray, original_filename: str, Annotation_Root : str, Label : str) -> str:
"""
儲存標註後的圖片
Args:
image: 標註後的圖片陣列
original_filename: 原始檔案名稱
Returns:
str: 儲存的檔案路徑
"""
output_filename = f"annotated_{original_filename}"
output_path = os.path.join(Annotation_Root, Label)
Save_Image_Roots = os.path.join(output_path, output_filename)
# 確保輸出資料夾存在
self._ensure_output_folder(output_path)
cv2.imwrite(Save_Image_Roots, image)
print(f"已儲存標註圖片至: {Save_Image_Roots}")
return Save_Image_Roots
def process_single_xml(self, xml_file_path: str, Annotation_Root : str, Label : str) -> Optional[Tuple[np.ndarray, str]]:
"""
處理單一XML檔案
Args:
xml_file_path: XML檔案路徑
Returns:
Tuple[np.ndarray, str]: (標註後的圖片, 輸出路徑)處理失敗時返回None
"""
# 解析XML
xml_data = self.parse_xml(xml_file_path, Label)
if xml_data is None:
return None
# 載入圖片
image = self.load_image(xml_data['image_path'])
if image is None:
return None
# 繪製bounding box
annotated_image = self.draw_bounding_boxes(image, xml_data['bounding_boxes'])
# 儲存結果
output_path = self.save_annotated_image(annotated_image, xml_data['filename'], Annotation_Root, Label)
return annotated_image, output_path
def process_multiple_xml(self, xml_folder_path: str, Annotation_Root : str, Label : str) -> List[Tuple[str, bool]]:
"""
批量處理多個XML檔案
Args:
xml_folder_path: 包含XML檔案的資料夾路徑
Returns:
List[Tuple[str, bool]]: [(檔案名稱, 處理成功與否), ...]
"""
if not os.path.exists(xml_folder_path):
print(f"XML資料夾不存在: {xml_folder_path}")
return []
xml_files = [f for f in os.listdir(xml_folder_path) if f.endswith('.xml')]
if not xml_files:
print(f"{xml_folder_path} 中找不到XML檔案")
return []
print(f"找到 {len(xml_files)} 個XML檔案")
for xml_file in xml_files:
try:
Read_XML_File = os.path.join(xml_folder_path, xml_file)
self.process_single_xml(Read_XML_File, Annotation_Root, Label)
print(f"\n處理檔案: {xml_file}")
except Exception as e:
print(f"處理 {xml_file} 時發生錯誤: {str(e)}")
return
def get_bounding_boxes_info(self, xml_file_path: str) -> Optional[Dict]:
"""
僅提取XML中的bounding box資訊不進行圖片處理
Args:
xml_file_path: XML檔案路徑
Returns:
Dict: 包含檔案資訊和bounding box座標的字典
"""
return self.parse_xml(xml_file_path)
def set_drawing_style(self, box_color: Tuple[int, int, int] = None,
text_color: Tuple[int, int, int] = None,
box_thickness: int = None,
font_scale: float = None) -> None:
"""
設定繪圖樣式
Args:
box_color: 邊界框顏色 (B, G, R)
text_color: 文字顏色 (B, G, R)
box_thickness: 邊界框粗細
font_scale: 字體大小
"""
if box_color is not None:
self.box_color = box_color
if text_color is not None:
self.text_color = text_color
if box_thickness is not None:
self.box_thickness = box_thickness
if font_scale is not None:
self.font_scale = font_scale

View File

@@ -1,87 +1,309 @@
import cv2
import numpy as np
import torch
from PIL import Image
import torchvision
import functools
import inspect
def shapen(image): # 銳化處理
sigma = 100
blur_img = cv2.GaussianBlur(image, (0, 0), sigma)
usm = cv2.addWeighted(image, 1.5, blur_img, -0.5, 0)
return usm
def increase_contrast(image): # 增加資料對比度
output = image # 建立 output 變數
alpha = 2
beta = 10
cv2.convertScaleAbs(image, output, alpha, beta) # 套用 convertScaleAbs
return output
def adaptive_histogram_equalization(image):
ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
channels = cv2.split(ycrcb)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe.apply(channels[0], channels[0])
ycrcb = cv2.merge(channels)
Change_image = cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR)
# 套用裝飾器到現有函數
def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=1.0, threshold=0):
"""使用OpenCV實現的Unsharp Mask銳化處理
參數:
image: PIL.Image對象(RGB格式)
kernel_size: 高斯模糊的核大小,必須是奇數
sigma: 高斯模糊的標準差
amount: 銳化程度,值越大效果越強
threshold: 邊緣檢測閾值,僅在邊緣處進行銳化
返回:
銳化後的PIL.Image對象
"""
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
return Change_image
# 對原圖進行高斯模糊
blurred = cv2.GaussianBlur(numpy_img, kernel_size, sigma)
# 計算銳化後的圖像
sharpened = cv2.addWeighted(numpy_img, 1 + amount, blurred, -amount, 0)
# 如果設置了threshold只在邊緣處應用銳化
if threshold > 0:
low_contrast_mask = np.absolute(numpy_img - blurred) < threshold
np.copyto(sharpened, numpy_img, where=low_contrast_mask)
# 確保像素值在有效範圍內
sharpened = np.clip(sharpened, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(sharpened)
def Remove_Background(image, Matrix_Size):
skinCrCbHist = np.zeros((256,256), dtype= np.uint8)
cv2.ellipse(skinCrCbHist, (113,155),(23,25), 43, 0, 360, (255, 255, 255), -1) #繪製橢圓弧線
def histogram_equalization(image):
"""GPU加速的一般直方圖等化
參數:
image: PIL.Image對象(RGB格式)
返回:
直方圖等化後的PIL.Image對象
"""
# 轉換為numpy數組並轉為PyTorch張量
numpy_img = np.array(image)
tensor_img = torch.from_numpy(numpy_img).float().to('cuda')
# 分離通道並進行直方圖等化
result = torch.zeros_like(tensor_img)
for i in range(3): # 對RGB三個通道分別處理
channel = tensor_img[..., i]
# 計算直方圖
hist = torch.histc(channel, bins=256, min=0, max=255)
# 計算累積分布函數(CDF)
cdf = torch.cumsum(hist, dim=0)
cdf_normalized = ((cdf - cdf.min()) * 255) / (cdf.max() - cdf.min())
# 應用直方圖等化
result[..., i] = cdf_normalized[channel.long()]
# 轉回CPU和numpy數組
result = torch.clamp(result, 0, 255).byte()
result_np = result.cpu().numpy()
return Image.fromarray(result_np)
img_ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
y,cr,cb = cv2.split(img_ycrcb) #拆分出Y,Cr,Cb值
def Contrast_Limited_Adaptive_Histogram_Equalization(image, clip_limit=3.0, tile_size=(8, 8)):
"""使用OpenCV實現的對比度限制自適應直方圖均衡化(CLAHE)
參數:
image: PIL.Image對象(RGB格式)
clip_limit: 剪切限制,用於限制對比度增強的程度,較大的值會產生更強的對比度
tile_size: 圖像分塊大小的元組(height, width),較小的值會產生更局部的增強效果
返回:
CLAHE處理後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到LAB色彩空間
lab_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2LAB)
# 創建CLAHE對象
clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_size)
# 分離LAB通道
l, a, b = cv2.split(lab_img)
# 對L通道應用CLAHE
l_clahe = clahe.apply(l)
# 合併處理後的L通道與原始的a和b通道
lab_output = cv2.merge([l_clahe, a, b])
# 將LAB轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(lab_output, cv2.COLOR_LAB2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
skin = np.zeros(cr.shape, dtype = np.uint8) #掩膜
(x,y) = cr.shape
def adaptive_histogram_equalization_without_limit(image, tile_size=(8, 8)):
"""使用OpenCV實現的自適應直方圖均衡化(AHE)
參數:
image: PIL.Image對象(RGB格式)
tile_size: 圖像分塊大小的元組(height, width),較小的值會產生更局部的增強效果
返回:
AHE處理後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到LAB色彩空間
lab_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2LAB)
# 分離LAB通道
l, a, b = cv2.split(lab_img)
# 創建AHE對象不設置clip limit
clahe = cv2.createCLAHE(clipLimit=None, tileGridSize=tile_size)
# 對L通道應用AHE
l_ahe = clahe.apply(l)
# 合併處理後的L通道與原始的a和b通道
lab_output = cv2.merge([l_ahe, a, b])
# 將LAB轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(lab_output, cv2.COLOR_LAB2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
# 依序取出圖片中每個像素
for i in range(x):
for j in range(y):
if skinCrCbHist [cr[i][j], cb[i][j]] > 0: #若不在橢圓區間中
skin[i][j] = 255
# 如果該像素的灰階度大於 200調整該像素的透明度
# 使用 255 - gray[y, x] 可以將一些邊緣的像素變成半透明,避免太過鋸齒的邊緣
# img_change = cv2.cvtColor(img_change, cv2.COLOR_BGRA2BGR)
img = cv2.bitwise_and(image, image, mask = skin)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
def laplacian_sharpen(image):
"""
GPU加速的拉普拉斯銳化處理函數
參數:
image: PIL.Image對象(RGB格式)
返回:
銳化後的PIL.Image對象
"""
# 轉換為numpy數組並轉為PyTorch張量
numpy_img = np.array(image)
tensor_img = torch.from_numpy(numpy_img).float().to('cuda')
# 創建拉普拉斯算子
laplacian_kernel = torch.tensor([
[0, 1, 0],
[1, -4, 1],
[0, 1, 0]
], dtype=torch.float32, device='cuda').unsqueeze(0).unsqueeze(0)
# 對每個通道進行處理
result = torch.zeros_like(tensor_img)
for i in range(3): # RGB三個通道
channel = tensor_img[..., i]
# 添加批次和通道維度
channel = channel.unsqueeze(0).unsqueeze(0)
# 應用拉普拉斯算子
laplacian = torch.nn.functional.conv2d(channel, laplacian_kernel, padding=1)
# 移除批次和通道維度
laplacian = laplacian.squeeze()
# 銳化處理:原圖 - 拉普拉斯
result[..., i] = channel.squeeze() - laplacian
# 確保像素值在合理範圍內
result = torch.clamp(result, 0, 255).byte()
# 轉回CPU和numpy數組
result_np = result.cpu().numpy()
return Image.fromarray(result_np)
h = image.shape[0] # 取得圖片高度
w = image.shape[1] # 取得圖片寬度
def adjust_hsv(image, v_adjustment=0):
"""調整圖像的HSV色彩空間中的H和V通道
參數:
image: PIL.Image對象(RGB格式)
v_adjustment: V通道的調整值範圍建議在[-255, 255]之間
返回:
HSV調整後的PIL.Image對象
"""
# 將PIL圖像轉換為OpenCV格式BGR
numpy_img = np.array(image)
bgr_img = cv2.cvtColor(numpy_img, cv2.COLOR_RGB2BGR)
# 轉換到HSV色彩空間
hsv_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2HSV)
# 調整V通道
hsv_img[..., 2] = np.clip(hsv_img[..., 2] + v_adjustment, 0, 255)
# 將HSV轉回BGR然後轉換為RGB
bgr_output = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR)
rgb_output = cv2.cvtColor(bgr_output, cv2.COLOR_BGR2RGB)
# 轉換為PIL圖像並返回
return Image.fromarray(rgb_output)
for x in range(w):
for y in range(h):
if img_gray[y, x] == 0:
# if x == 0 and y == 0: # 當X Y都在左上角時
# image[y, x] = Add(1, Matrix_Size, image[y, x]) / Matrix_Size
# if x == w - 1 and y == 0: # 當X Y都在右上角時
# image[y, x] = Add(w - Matrix_Size, w, image[y, x]) / Matrix_Size
# if x == 0 and y == h - 1: # 當X Y都在左下角時
# image[y, x] = (image[y - 1, x] + image[y - 1, x + 1] + image[y, x + 1]) / 3
# if x == w - 1 and y == h - 1: # 當X Y都在右下角時
# image[y, x] = (image[y, x - 1] + image[y - 1, x - 1] + image[y - 1, x]) / 3
def gamma_correction(image, gamma=1.0):
"""對圖像進行伽馬校正
參數:
image: PIL.Image對象(RGB格式)
gamma: 伽馬值gamma > 1 時圖像變暗gamma < 1 時圖像變亮gamma = 1 時保持不變
返回:
伽馬校正後的PIL.Image對象
"""
# 將PIL圖像轉換為numpy數組
numpy_img = np.array(image)
# 將像素值歸一化到[0, 1]範圍
normalized = numpy_img.astype(float) / 255.0
# 應用伽馬校正
corrected = np.power(normalized, gamma)
# 將值縮放回[0, 255]範圍
output = np.clip(corrected * 255.0, 0, 255).astype(np.uint8)
# 轉換回PIL圖像並返回
return Image.fromarray(output)
# if (x > 0 and x < w - 1) and y == 0: # 當上面的X Y從左到右
# image[y, x] = (image[y, x - 1] + image[y + 1, x - 1] + image[y + 1, x] + image[y, x + 1] + image[y + 1, x + 1]) / 5
# if (x > 0 and x < w - 1) and y == h - 1: # 當下面的X Y從左到右
# image[y, x] = (image[y, x - 1] + image[y - 1, x - 1] + image[y - 1, x] + image[y, x + 1] + image[y - 1, x + 1]) / 5
# if x == 0 and (y > 0 and y < h - 1): # 當左邊的X Y從上到下
# image[y, x] = (image[y - 1, x] + image[y - 1, x + 1] + image[y, x + 1] + image[y + 1, x + 1] + image[y + 1, x]) / 5
# if x == w - 1 and (y > 0 and y < h - 1): # 當右邊X Y從上到下
# image[y, x] = (image[y - 1, x] + image[y - 1, x - 1] + image[y, x - 1] + image[y + 1, x - 1] + image[y + 1, x]) / 5
def Hight_Light(image, Threshold):
image = np.array(image)
if (x >= 1 and x < w - 1) and (y >= 1 and y < h - 1): # 當y >= 2 且 X >= 2
image[y, x] = Add(x, y, image, Matrix_Size) / Matrix_Size
# BGRA_image[y, x, 3] = 255 - gray[y, x]
return image
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# 使用閾值檢測高光點(白色液滴)
_, thresh = cv2.threshold(gray, Threshold, 255, cv2.THRESH_BINARY)
# 使用形態學操作(膨脹)來擴大遮罩區域
kernel = np.ones((5, 5), np.uint8)
dilated = cv2.dilate(thresh, kernel, iterations=1)
# 使用 inpaint 修復高光點
image_inpaint = cv2.inpaint(image, dilated, 3, cv2.INPAINT_TELEA)
return Image.fromarray(image_inpaint)
def Add(width_Center, Height_Center, image, Mask_Size):
total = 0
for i in range(Mask_Size):
for j in range(Mask_Size):
total += image[width_Center - ((Mask_Size - 1) / 2) + j, Height_Center - ((Mask_Size - 1) / 2) + i]
def median_filter(image: Image.Image, kernel_size: int = 3):
"""
中值濾波Median Filter實現
參數:
image: PIL.Image對象(RGB格式)
kernel_size: 濾波核大小,必須是奇數
返回:
濾波後的PIL.Image對象
"""
# 確保kernel_size是奇數
if kernel_size % 2 == 0:
kernel_size += 1
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
# 對每個通道應用中值濾波
result = np.zeros_like(numpy_img)
for i in range(3): # 對RGB三個通道分別處理
result[:, :, i] = cv2.medianBlur(numpy_img[:, :, i], kernel_size)
# 確保像素值在有效範圍內
result = np.clip(result, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(result)
return total
def mean_filter(image: Image.Image, kernel_size: int = 3):
"""
均質濾波Mean Filter實現
參數:
image: PIL.Image對象(RGB格式)
kernel_size: 濾波核大小,必須是奇數
返回:
濾波後的PIL.Image對象
"""
# 確保kernel_size是奇數
if kernel_size % 2 == 0:
kernel_size += 1
# 轉換PIL圖像為numpy數組
numpy_img = np.array(image, dtype=np.uint8)
# 創建均質濾波核所有元素都是1/(kernel_size*kernel_size)
kernel = np.ones((kernel_size, kernel_size), np.float32) / (kernel_size * kernel_size)
# 對每個通道應用均質濾波
result = np.zeros_like(numpy_img)
for i in range(3): # 對RGB三個通道分別處理
result[:, :, i] = cv2.filter2D(numpy_img[:, :, i], -1, kernel)
# 確保像素值在有效範圍內
result = np.clip(result, 0, 255).astype(np.uint8)
# 轉回PIL圖像
return Image.fromarray(result)

View File

@@ -1,58 +0,0 @@
from Load_process.LoadData import Loding_Data_Root
from Image_Process.Image_Generator import Image_generator
from Load_process.file_processing import Process_File
from model_data_processing.processing_for_cut_image import Cut_Indepentend_Data
from Load_process.Loading_Tools import Load_Data_Prepare, Load_Data_Tools
class Load_ImageGenerator():
'''
這是一個拿來進行資料強化的物件,最主要結合了學姊給的資料強化與我自行設定的資料強化。
藉由此物件先將資料讀取出來,並將資料分別進行資料強化,利用資料強化來迷部資料的不平衡
這只是其中一個實驗
Parmeter
standard_root: 做跟學姊給的資料強化同一種的資料強化
myself_root: 資料強化的內容參數是我自己設定的
IndependentDataRoot: 要存回去的資料夾路徑
Herpeslabels: 皰疹的類別
MonKeyPoxlabels: 猴痘的類別(猴痘、水痘、正常)
herpes_data: 合併herpes Dataset的資料成一個List
MonkeyPox_data: 合併MonkeyPox DataSet 的資料成一個List
'''
def __init__(self, Training_Root,Test_Root, Generator_Root, Labels, Image_Size) -> None:
self.Training_Root = Training_Root
self.TestRoot = Test_Root
self.GeneratoRoot = Generator_Root
self.Labels = Labels
self.Image_Size = Image_Size
pass
def process_main(self, Data_Length : int):
File = Process_File()
Prepare = Load_Data_Prepare()
load = Loding_Data_Root(self.Labels, self.Training_Root, self.GeneratoRoot)
Indepentend = Cut_Indepentend_Data(self.Training_Root, self.Labels)
Load_Tool = Load_Data_Tools()
Generator = Image_generator(self.GeneratoRoot, self.Labels, self.Image_Size)
# 將測試資料獨立出來
test_size = 0.2
Indepentend.IndependentData_main(self.TestRoot, test_size)
if not File.Judge_File_Exist(self.GeneratoRoot): # 檔案若不存在
# 確定我要多少個List
Prepare.Set_Data_Content([], Data_Length)
# 製作讀檔字典並回傳檔案路徑
Prepare.Set_Label_List(self.Labels)
Prepare.Set_Data_Dictionary(Prepare.Get_Label_List(), Prepare.Get_Data_Content(), Data_Length)
Original_Dict_Data_Root = Prepare.Get_Data_Dict()
get_all_original_image_data = Load_Tool.get_data_root(self.Training_Root, Original_Dict_Data_Root, Prepare.Get_Label_List())
# 儲存資料強化後資料
Generator.Processing_Main(get_all_original_image_data) # 執行資料強化
else: # 若檔案存在
print("standard data and myself data are exist\n")
# 執行讀檔
return load.process_main()

View File

@@ -11,15 +11,16 @@ class Loding_Data_Root(Process_File):
super().__init__()
pass
def process_main(self):
def process_main(self, status):
'''處理讀Training、Image Generator檔資料'''
Merge = merge()
get_Image_Data = self.get_Image_data_roots(self.Train_Root)
Get_ImageGenerator_Image_Data = self.get_Image_data_roots(self.Generator_Root)
# Get_Total_Image_Data_Root = Merge.merge_dict_to_dict(get_Image_Data, Get_ImageGenerator_Image_Data)
# Get_Total_Image_Data_Root = Merge.merge_data_main(get_Image_Data, 0, len(self.Label_List))
if status:
Get_ImageGenerator_Image_Data = self.get_Image_data_roots(self.Generator_Root)
Get_Total_Image_Data_Root = Merge.merge_dict_to_dict(get_Image_Data, Get_ImageGenerator_Image_Data)
return Get_Total_Image_Data_Root
return get_Image_Data

View File

@@ -1,11 +1,10 @@
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from merge_class.merge import merge
from Read_and_process_image.ReadAndProcess import Read_image_and_Process_image
from Load_process.LoadData import Load_Data_Prepare, Load_Data_Tools
from model_data_processing.processing import Balance_Process
from model_data_processing.processing import make_label_list
from utils.Stomach_Config import Loading_Config
class Load_Indepentend_Data():
def __init__(self, Labels, OneHot_Encording):
def __init__(self, OneHot_Encording):
'''
影像切割物件
label有2類,會將其轉成one-hot-encoding的形式
@@ -13,33 +12,33 @@ class Load_Indepentend_Data():
[1, 0] = NPC_positive
'''
self.merge = merge()
self.Labels = Labels
self.OneHot_Encording = OneHot_Encording
pass
def process_main(self, Test_data_root):
self.test, self.test_label = self.get_Independent_image(Test_data_root)
def process_main(self, Test_data_root, Test_mask_root):
self.test, self.test_label, self.test_mask = self.get_Independent_image(Test_data_root, Test_mask_root)
print("\ntest_labels有" + str(len(self.test_label)) + "筆資料\n")
# self.validation, self.validation_label = self.get_Independent_image(Validation_data_root)
# print("validation_labels有 " + str(len(self.validation_label)) + " 筆資料\n")
def get_Independent_image(self, independent_DataRoot):
image_processing = Read_image_and_Process_image(123)
classify_image = []
def get_Independent_image(self, independent_DataRoot, independent_MaskRoot):
Total_Size_List = []
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot) # 讀取測試資料集的資料
Total_Dict_Data_Root = self.Get_Independent_data_Root(independent_DataRoot, Loading_Config["Training_Labels"], len(Loading_Config["Training_Labels"])) # 讀取測試資料集的資料
Total_Dict_Mask_Root = self.Get_Independent_data_Root(independent_MaskRoot, Loading_Config["XML_Loading_Label"], len(Loading_Config["XML_Loading_Label"])) # 讀取測試資料集的mask資料
# 將測試資料字典轉成列表,並且將其排序
Total_List_Data_Root = []
for Label in self.Labels:
for Label in Loading_Config["Training_Labels"]:
Total_List_Data_Root.append(Total_Dict_Data_Root[Label])
# 將測試資料字典轉成列表,並且將其排序
Total_List_Mask_Data_Root = []
for Label in Loading_Config["XML_Loading_Label"]:
Total_List_Mask_Data_Root.append(Total_Dict_Mask_Root[Label])
test_label, Classify_Label = [], []
classify_image, Classify_Label = [], []
i = 0 # 計算classify_image的counter且計算總共有幾筆資料
for test_title in Total_List_Data_Root: # 藉由讀取所有路徑來進行讀檔
test_label = image_processing.make_label_list(len(test_title), self.OneHot_Encording[i]) # 製作對應圖片數量的label出來+
print(self.Labels[i] + "" + str(len(test_label)) + " 筆資料 ")
test_label = make_label_list(len(test_title), self.OneHot_Encording[i]) # 製作對應圖片數量的label出來+
print(Loading_Config["Training_Labels"][i] + "" + str(len(test_label)) + " 筆資料 ")
Total_Size_List.append(len(test_label))
@@ -47,28 +46,28 @@ class Load_Indepentend_Data():
Classify_Label.append(test_label)
i += 1
test = self.merge.merge_data_main(classify_image, 0, len(self.Labels))
test_label = self.merge.merge_data_main(Classify_Label, 0, len(self.Labels))
classify_Mask_image = []
i = 0 # 計算classify_image的counter且計算總共有幾筆資料
for test_title in Total_List_Mask_Data_Root: # 藉由讀取所有路徑來進行讀檔
print(Loading_Config["XML_Loading_Label"][i] + "" + str(len(test_title)) + " 筆資料 ")
# test = []
# test = image_processing.Data_Augmentation_Image(original_test_root)
# test, test_label = image_processing.image_data_processing(test, original_test_label)
classify_Mask_image.append(test_title)
i += 1
# Balance_Data = list(zip(test, test_label))
# test, test_label = Balance_Process(Balance_Data, Total_Size_List) # 打亂並取出指定資料筆數的資料
# test = image_processing.normalization(test)
return test, test_label
test = self.merge.merge_data_main(classify_image, 0, len(Loading_Config["Training_Labels"]))
test_label = self.merge.merge_data_main(Classify_Label, 0, len(Loading_Config["Training_Labels"]))
test_Mask = self.merge.merge_data_main(classify_Mask_image, 0, len(Loading_Config["XML_Loading_Label"]))
return test, test_label, test_Mask
def Get_Independent_data_Root(self, load_data_root):
def Get_Independent_data_Root(self, load_data_root, Dictory_Keys, Length):
Prepare = Load_Data_Prepare()
Load_Tool = Load_Data_Tools()
Prepare.Set_Data_Content([], len(self.Labels))
Prepare.Set_Data_Dictionary(self.Labels, Prepare.Get_Data_Content(), 2)
Prepare.Set_Data_Content([], Length)
Prepare.Set_Data_Dictionary(Dictory_Keys, Prepare.Get_Data_Content(), Length)
Get_Data_Dict_Content = Prepare.Get_Data_Dict()
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, self.Labels)
Total_Data_Roots = Load_Tool.get_data_root(load_data_root, Get_Data_Dict_Content, Dictory_Keys)
return Total_Data_Roots
return Total_Data_Roots

View File

@@ -0,0 +1,87 @@
from Load_process.LoadData import Loding_Data_Root
from utils.Stomach_Config import Loading_Config
from merge_class.merge import merge
from Load_process.LoadData import Load_Data_Prepare
from Training_Tools.Tools import Tool
class Training_Preprocessing():
def __init__(self):
pass
def Process_Main(self, Loading_Data_Roots, Labels, Have_Label_Lists = True, Generator_Roots = None):
"""
讀取資料成 Dict 資料型態,並計算各資料筆數與總共資料筆數
回傳Loading_Data_Dict_Data
"""
Loading_Data_Dict_Data, Total_Size_Lists = self.Loading_Data_Dicts(Loading_Data_Roots, Labels, Generator_Roots)
# 做出跟資料相同數量的Label
# 取得One-hot encording 的資料
tool = Tool()
Label_Length = len(Labels)
if Have_Label_Lists:
tool.Set_OneHotEncording(Loading_Config["Training_Labels"])
Encording_Label = tool.Get_OneHot_Encording_Label()
Classes = self.make_label_list(Encording_Label, Total_Size_Lists)
return self.Merge_Total_Data(Loading_Data_Dict_Data, Classes, Label_Length)
return self.Merge_Total_Data(Loading_Data_Dict_Data, Classes, Label_Length)
def Loading_Data_Dicts(self, Loading_Data_Roots, Labels, Generator_Roots):
"""
讀取資料成 Dict 資料型態,並計算各資料筆數與總共資料筆數
回傳Loading_Data_Dict_Data
"""
Load_Data = Loding_Data_Root(Loading_Data_Roots, Labels, Generator_Roots)
Loading_Data_Dict_Data = Load_Data.process_main(False)
Total_Size_Lists = []
print("讀取的資料集總數")
for label in Labels:
Train_Size += len(Loading_Data_Dict_Data[label])
Total_Size_Lists.append(len(Loading_Data_Dict_Data[label]))
print(f"Labels: {label}, 總數為: {len(Loading_Data_Dict_Data[label])}")
print("總共有 " + str(Train_Size) + " 筆資料")
return Loading_Data_Dict_Data, Total_Size_Lists
def make_label_list(Contents, Lengths):
'''製作label的列表'''
Classes = []
for Content in Contents:
label_list = []
for i in range(Lengths):
label_list.append(Content)
Classes.append(label_list)
return label_list
def Merge_Total_Data(self, Data_Dict, Classes, Label_Length):
"""
將所有的資料合併成一個list
"""
Prepare = Load_Data_Prepare()
Merge = merge()
# 將資料做成Dict的資料型態
if Classes == None:
Classes = [[], [], []]
Prepare.Set_Final_Dict_Data(Loading_Config["Training_Labels"], Data_Dict, Classes, Label_Length)
Final_Dict_Data = Prepare.Get_Final_Data_Dict()
keys = Loading_Config["Training_Labels"]
Training_Data = Merge.merge_all_image_data(Final_Dict_Data[keys[0]], Final_Dict_Data[keys[1]]) # 將訓練資料合併成一個list
for i in range(2, Label_Length):
Training_Data = Merge.merge_all_image_data(Training_Data, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list
if Classes is not [[], [], []]:
Training_Label = Merge.merge_all_image_data(Final_Dict_Data[keys[Label_Length]], Final_Dict_Data[keys[Label_Length + 1]]) #將訓練資料的label合併成一個label的list
for i in range(Label_Length + 2, 2 * Label_Length):
Training_Label = Merge.merge_all_image_data(Training_Label, Final_Dict_Data[keys[i]]) # 將訓練資料合併成一個list
return Training_Data, Training_Label
return Training_Data

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -35,16 +35,14 @@ class Process_File():
save_root = self.Make_Save_Root(FileName, save_root)
np.save(save_root, image)
def Save_CSV_File(self, file_name, data): # 儲存訓練結果
Save_Root = '../Result/Training_Result/save_the_train_result(' + str(datetime.date.today()) + ")"
def Save_CSV_File(self, Save_Root, file_name, data): # 儲存訓練結果
self.JudgeRoot_MakeDir(Save_Root)
modelfiles = self.Make_Save_Root(file_name + ".csv", Save_Root) # 將檔案名稱及路徑字串合併成完整路徑
data.to_csv(modelfiles, mode = "a")
def Save_TXT_File(self, content, File_Name):
model_dir = '../Result/save_the_train_result(' + str(datetime.date.today()) + ")" # 儲存的檔案路徑由save_the_train_result + 當天日期
self.JudgeRoot_MakeDir(model_dir)
modelfiles = self.Make_Save_Root(File_Name + ".txt", model_dir) # 將檔案名稱及路徑字串合併成完整路徑
def Save_TXT_File(self, content, Save_Root, File_Name):
self.JudgeRoot_MakeDir(Save_Root)
modelfiles = self.Make_Save_Root(f"{File_Name}.txt", Save_Root) # 將檔案名稱及路徑字串合併成完整路徑
with open(modelfiles, mode = 'a') as file:
file.write(content)

315
Model_Loss/CIOU_Loss.py Normal file
View File

@@ -0,0 +1,315 @@
import torch
import torch.nn as nn
import math
class CIOULoss(nn.Module):
"""
Complete Intersection over Union (CIOU) Loss
適用於目標檢測中的邊界框回歸任務
CIOU Loss 考慮了三個幾何因子:
1. 重疊面積 (Overlap area)
2. 中心點距離 (Central point distance)
3. 長寬比一致性 (Aspect ratio consistency)
"""
def __init__(self, eps=1e-7):
super(CIOULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
"""
計算 CIOU Loss
Args:
pred_boxes: 預測邊界框 [N, 4] (x1, y1, x2, y2) 或 [N, 4] (cx, cy, w, h) 或分割掩碼 [B, 1, H, W]
target_boxes: 真實邊界框 [N, 4] (x1, y1, x2, y2) 或 [N, 4] (cx, cy, w, h) 或分割掩碼 [B, 1, H, W]
Returns:
CIOU loss value
"""
# 檢查輸入是否為分割掩碼格式
if len(pred_boxes.shape) == 4 and pred_boxes.shape[1] == 1:
# 將分割掩碼轉換為邊界框格式
pred_boxes = self._mask_to_boxes(pred_boxes)
target_boxes = self._mask_to_boxes(target_boxes)
# 如果無法從掩碼中提取有效的邊界框,則返回一個小的損失值
if pred_boxes is None or target_boxes is None:
return torch.tensor(0.01, device=pred_boxes.device if pred_boxes is not None else target_boxes.device)
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 檢查邊界框維度是否正確
if pred_boxes.dim() == 1:
# 如果是單個邊界框擴展為批次大小為1的張量
pred_boxes = pred_boxes.unsqueeze(0)
if target_boxes.dim() == 1:
target_boxes = target_boxes.unsqueeze(0)
# 確保邊界框有4個坐標
if pred_boxes.shape[1] != 4 or target_boxes.shape[1] != 4:
# 如果坐標數量不正確,返回一個小的損失值
return torch.tensor(0.01, device=pred_boxes.device)
# 如果輸入是 (cx, cy, w, h) 格式,轉換為 (x1, y1, x2, y2)
if self._is_center_format(pred_boxes, target_boxes):
pred_boxes = self._center_to_corner(pred_boxes)
target_boxes = self._center_to_corner(target_boxes)
# 計算交集區域
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自的面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集面積
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
# 計算最小外接矩形的對角線距離平方
enclose_diagonal_sq = (enclose_x2 - enclose_x1) ** 2 + (enclose_y2 - enclose_y1) ** 2 + self.eps
# 計算兩個邊界框中心點之間的距離平方
pred_center_x = (pred_boxes[:, 0] + pred_boxes[:, 2]) / 2
pred_center_y = (pred_boxes[:, 1] + pred_boxes[:, 3]) / 2
target_center_x = (target_boxes[:, 0] + target_boxes[:, 2]) / 2
target_center_y = (target_boxes[:, 1] + target_boxes[:, 3]) / 2
center_distance_sq = (pred_center_x - target_center_x) ** 2 + (pred_center_y - target_center_y) ** 2
# 計算長寬比一致性項
pred_w = pred_boxes[:, 2] - pred_boxes[:, 0]
pred_h = pred_boxes[:, 3] - pred_boxes[:, 1]
target_w = target_boxes[:, 2] - target_boxes[:, 0]
target_h = target_boxes[:, 3] - target_boxes[:, 1]
# 避免除零
pred_w = torch.clamp(pred_w, min=self.eps)
pred_h = torch.clamp(pred_h, min=self.eps)
target_w = torch.clamp(target_w, min=self.eps)
target_h = torch.clamp(target_h, min=self.eps)
v = (4 / (math.pi ** 2)) * torch.pow(torch.atan(target_w / target_h) - torch.atan(pred_w / pred_h), 2)
# 計算 alpha 參數
with torch.no_grad():
alpha = v / (1 - iou + v + self.eps)
# 計算 CIOU
ciou = iou - (center_distance_sq / enclose_diagonal_sq) - alpha * v
# 返回 CIOU Loss (1 - CIOU)
ciou_loss = 1 - ciou
return ciou_loss.mean()
def _is_center_format(self, pred_boxes, target_boxes):
"""
判斷輸入格式是否為中心點格式 (cx, cy, w, h)
簡單的啟發式判斷:如果第三、四列的值都是正數且相對較小,可能是寬高
"""
# 這裡使用簡單的判斷邏輯,實際使用時可能需要更精確的判斷
return False # 預設假設輸入為 (x1, y1, x2, y2) 格式
def _center_to_corner(self, boxes):
"""
將中心點格式 (cx, cy, w, h) 轉換為角點格式 (x1, y1, x2, y2)
"""
cx, cy, w, h = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
x1 = cx - w / 2
y1 = cy - h / 2
x2 = cx + w / 2
y2 = cy + h / 2
return torch.stack([x1, y1, x2, y2], dim=1)
def _mask_to_boxes(self, masks):
"""
將分割掩碼轉換為邊界框格式 [N, 4] (x1, y1, x2, y2)
Args:
masks: 分割掩碼 [B, 1, H, W]
Returns:
boxes: 邊界框 [B, 4] (x1, y1, x2, y2)
"""
batch_size = masks.size(0)
device = masks.device
# 將掩碼轉換為二值掩碼
binary_masks = (torch.sigmoid(masks) > 0.5).float()
# 初始化邊界框張量
boxes = torch.zeros(batch_size, 4, device=device)
# 對每個批次處理
for b in range(batch_size):
mask = binary_masks[b, 0] # [H, W]
# 找出非零元素的索引
non_zero_indices = torch.nonzero(mask, as_tuple=True)
# 如果掩碼中沒有非零元素,則使用默認的小邊界框
if len(non_zero_indices[0]) == 0:
# 返回一個默認的小邊界框
boxes[b] = torch.tensor([0, 0, 1, 1], device=device)
continue
# 計算邊界框坐標
y_min = torch.min(non_zero_indices[0])
y_max = torch.max(non_zero_indices[0])
x_min = torch.min(non_zero_indices[1])
x_max = torch.max(non_zero_indices[1])
# 存儲邊界框 [x1, y1, x2, y2]
boxes[b] = torch.tensor([x_min, y_min, x_max, y_max], device=device)
return boxes
def _calculate_intersection(self, pred_boxes, target_boxes):
"""
計算兩個邊界框的交集面積
"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
# 計算交集的寬度和高度
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h
class DIoULoss(nn.Module):
"""
Distance Intersection over Union (DIoU) Loss
CIOU 的簡化版本,只考慮重疊面積和中心點距離
"""
def __init__(self, eps=1e-7):
super(DIoULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 計算交集區域
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自的面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集面積
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形的對角線距離平方
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
enclose_diagonal_sq = (enclose_x2 - enclose_x1) ** 2 + (enclose_y2 - enclose_y1) ** 2 + self.eps
# 計算中心點距離平方
pred_center_x = (pred_boxes[:, 0] + pred_boxes[:, 2]) / 2
pred_center_y = (pred_boxes[:, 1] + pred_boxes[:, 3]) / 2
target_center_x = (target_boxes[:, 0] + target_boxes[:, 2]) / 2
target_center_y = (target_boxes[:, 1] + target_boxes[:, 3]) / 2
center_distance_sq = (pred_center_x - target_center_x) ** 2 + (pred_center_y - target_center_y) ** 2
# 計算 DIoU
diou = iou - (center_distance_sq / enclose_diagonal_sq)
# 返回 DIoU Loss
diou_loss = 1 - diou
return diou_loss.mean()
def _calculate_intersection(self, pred_boxes, target_boxes):
"""計算交集面積"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h
class GIoULoss(nn.Module):
"""
Generalized Intersection over Union (GIoU) Loss
IoU 的泛化版本,考慮了最小外接矩形
"""
def __init__(self, eps=1e-7):
super(GIoULoss, self).__init__()
self.eps = eps
def forward(self, pred_boxes, target_boxes):
# 確保輸入為浮點數
pred_boxes = pred_boxes.float()
target_boxes = target_boxes.float()
# 計算交集
intersection = self._calculate_intersection(pred_boxes, target_boxes)
# 計算各自面積
pred_area = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
target_area = (target_boxes[:, 2] - target_boxes[:, 0]) * (target_boxes[:, 3] - target_boxes[:, 1])
# 計算聯集
union = pred_area + target_area - intersection + self.eps
# 計算 IoU
iou = intersection / union
# 計算最小外接矩形面積
enclose_x1 = torch.min(pred_boxes[:, 0], target_boxes[:, 0])
enclose_y1 = torch.min(pred_boxes[:, 1], target_boxes[:, 1])
enclose_x2 = torch.max(pred_boxes[:, 2], target_boxes[:, 2])
enclose_y2 = torch.max(pred_boxes[:, 3], target_boxes[:, 3])
enclose_area = (enclose_x2 - enclose_x1) * (enclose_y2 - enclose_y1) + self.eps
# 計算 GIoU
giou = iou - (enclose_area - union) / enclose_area
# 返回 GIoU Loss
giou_loss = 1 - giou
return giou_loss.mean()
def _calculate_intersection(self, pred_boxes, target_boxes):
"""計算交集面積"""
x1 = torch.max(pred_boxes[:, 0], target_boxes[:, 0])
y1 = torch.max(pred_boxes[:, 1], target_boxes[:, 1])
x2 = torch.min(pred_boxes[:, 2], target_boxes[:, 2])
y2 = torch.min(pred_boxes[:, 3], target_boxes[:, 3])
intersection_w = torch.clamp(x2 - x1, min=0)
intersection_h = torch.clamp(y2 - y1, min=0)
return intersection_w * intersection_h

View File

@@ -1,6 +1,7 @@
from torch import nn
from torch.nn import functional
import torch
import numpy as np
class Entropy_Loss(nn.Module):
@@ -8,10 +9,10 @@ class Entropy_Loss(nn.Module):
super(Entropy_Loss, self).__init__()
def forward(self, outputs, labels):
# 範例: 使用均方誤差作為損失計算
# outputs = torch.argmax(outputs, 1)
# 转换为张量并确保在同一設備上
outputs_New = torch.as_tensor(outputs, dtype=torch.float32)
labels_New = torch.as_tensor(labels, dtype=torch.float32)
labels_New = torch.as_tensor(labels, dtype=torch.float32) # 標籤應該是 long 類型用於索引
loss = functional.cross_entropy(outputs_New, labels_New)
return torch.as_tensor(loss, dtype = torch.float32)
return torch.as_tensor(loss, dtype=torch.float32)

View File

@@ -0,0 +1,116 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, transforms
class VGGPerceptualLoss(nn.Module):
"""
基於VGG19的感知損失函數
使用預訓練的VGG19網絡提取特徵計算特徵空間中的損失
"""
def __init__(self, feature_layers=[2, 7, 12, 21, 30], use_normalization=True):
super(VGGPerceptualLoss, self).__init__()
# 載入預訓練的VGG19模型
vgg = models.vgg19(pretrained=True).features
# 凍結VGG參數
for param in vgg.parameters():
param.requires_grad = False
# 將模型移到與輸入相同的設備上在forward中處理
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 選擇要使用的特徵層
self.feature_layers = feature_layers
self.vgg_layers = nn.ModuleList()
# 分割VGG網絡到指定層
layer_idx = 0
current_layer = 0
for i, layer in enumerate(vgg):
if layer_idx < len(feature_layers) and i <= feature_layers[layer_idx]:
self.vgg_layers.append(layer)
if i == feature_layers[layer_idx]:
layer_idx += 1
else:
break
# 是否使用ImageNet標準化
self.use_normalization = use_normalization
if use_normalization:
self.normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
# 損失權重
self.weights = [1.0, 1.0, 1.0, 1.0, 1.0] # 可以調整不同層的權重
def extract_features(self, x):
"""
提取VGG特徵
"""
# 確保輸入在[0,1]範圍內
if x.min() < 0 or x.max() > 1:
x = torch.clamp(x, 0, 1)
# 標準化
if self.use_normalization:
# 確保normalize在與輸入相同的設備上
if hasattr(self, 'normalize') and not isinstance(self.normalize, torch.nn.Module):
self.normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
).to(x.device)
x = self.normalize(x)
features = []
layer_idx = 0
# 確保所有VGG層都在與輸入相同的設備上
device = x.device
for i, layer in enumerate(self.vgg_layers):
layer = layer.to(device) # 確保層在正確的設備上
x = layer(x)
# 檢查是否到達目標特徵層
if layer_idx < len(self.feature_layers) and i == self.feature_layers[layer_idx]:
features.append(x)
layer_idx += 1
return features
def forward(self, pred, target):
"""
計算感知損失
pred: 預測圖像 [B, C, H, W]
target: 目標圖像 [B, C, H, W]
"""
# 確保模型在與輸入相同的設備上
device = pred.device
self.vgg_layers = nn.ModuleList([layer.to(device) for layer in self.vgg_layers])
# 確保輸入尺寸匹配
if pred.shape != target.shape:
pred = F.interpolate(pred, size=target.shape[2:], mode='bilinear', align_corners=False)
# 如果是單通道,轉換為三通道
if pred.shape[1] == 1:
pred = pred.repeat(1, 3, 1, 1)
if target.shape[1] == 1:
target = target.repeat(1, 3, 1, 1)
# 提取特徵
pred_features = self.extract_features(pred)
target_features = self.extract_features(target)
# 計算特徵損失
perceptual_loss = 0
for i, (pred_feat, target_feat) in enumerate(zip(pred_features, target_features)):
# 使用MSE計算特徵差異
feat_loss = F.mse_loss(pred_feat, target_feat)
perceptual_loss += self.weights[i] * feat_loss
return perceptual_loss

View File

@@ -0,0 +1,22 @@
from multiprocessing import Value
import pstats
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms
from Model_Loss.CIOU_Loss import CIOULoss
from Model_Loss.Perceptual_Loss import VGGPerceptualLoss
class Segmentation_Loss(nn.Module):
def __init__(self) -> None:
super(Segmentation_Loss, self).__init__()
self.Perceptual_Loss = VGGPerceptualLoss()
self.CIOU = CIOULoss()
pass
def forward(self, Output_Result, GroundTruth_Result):
Perceptual_Loss = self.Perceptual_Loss(Output_Result, GroundTruth_Result)
CIOU_Loss = self.CIOU(Output_Result, GroundTruth_Result)
return Perceptual_Loss + CIOU_Loss

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,146 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class BinaryCrossEntropy(nn.Module):
"""
基本的二元交叉熵損失函數
"""
def __init__(self, reduction='mean'):
"""
初始化
Args:
reduction (str): 'mean', 'sum''none',指定如何減少損失
"""
super(BinaryCrossEntropy, self).__init__()
def forward(self, predictions, targets):
"""
計算二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出,形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤,形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
Loss = nn.BCELoss()
return Loss(predictions, targets)
# # 檢查輸出和標籤的維度是否匹配
# if predictions.shape[1] != targets.shape[1]:
# # 如果維度不匹配,使用交叉熵損失函數
# # 對於交叉熵損失標籤需要是類別索引而不是one-hot編碼
# # 將one-hot編碼轉換為類別索引
# _, targets_indices = torch.max(targets, dim=1)
# return F.cross_entropy(predictions, targets_indices, reduction=self.reduction)
# else:
# # 如果維度匹配,使用二元交叉熵損失函數
# # 使用 PyTorch 內建的 binary_cross_entropy_with_logits 函數
# # 它會自動應用 sigmoid 函數,避免輸入值超出 [0,1] 範圍
# return F.binary_cross_entropy_with_logits(predictions, targets, reduction=self.reduction)
class WeightedBinaryCrossEntropy(nn.Module):
"""
帶權重的二元交叉熵損失函數
"""
def __init__(self, pos_weight=1.0, neg_weight=1.0, reduction='mean'):
"""
初始化
Args:
pos_weight (float): 正樣本的權重
neg_weight (float): 負樣本的權重
reduction (str): 'mean', 'sum''none',指定如何減少損失
"""
super(WeightedBinaryCrossEntropy, self).__init__()
self.pos_weight = pos_weight
self.neg_weight = neg_weight
self.reduction = reduction
def forward(self, predictions, targets):
"""
計算帶權重的二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出,形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤,形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
# 使用 sigmoid 確保預測值在 [0,1] 範圍內
predictions = torch.sigmoid(predictions)
# 計算帶權重的二元交叉熵損失
loss = -self.pos_weight * targets * torch.log(predictions + 1e-7) - \
self.neg_weight * (1 - targets) * torch.log(1 - predictions + 1e-7)
# 根據 reduction 方式返回損失
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss
class LabelSmoothingBCE(nn.Module):
"""
帶標籤平滑的二元交叉熵損失函數
"""
def __init__(self, smoothing=0.1, reduction='mean'):
"""
初始化
Args:
smoothing (float): 標籤平滑係數,範圍 [0, 1]
reduction (str): 'mean', 'sum''none',指定如何減少損失
"""
super(LabelSmoothingBCE, self).__init__()
self.smoothing = smoothing
self.reduction = reduction
def forward(self, predictions, targets):
"""
計算帶標籤平滑的二元交叉熵損失
Args:
predictions (torch.Tensor): 模型的預測輸出,形狀為 [batch_size, ...]
targets (torch.Tensor): 目標標籤,形狀與 predictions 相同
Returns:
torch.Tensor: 計算得到的損失值
"""
# 確保輸入是張量
predictions = torch.as_tensor(predictions, dtype=torch.float32)
targets = torch.as_tensor(targets, dtype=torch.float32)
# 應用標籤平滑
targets = targets * (1 - self.smoothing) + 0.5 * self.smoothing
# 使用 sigmoid 確保預測值在 [0,1] 範圍內
predictions = torch.sigmoid(predictions)
# 計算二元交叉熵損失
loss = -targets * torch.log(predictions + 1e-7) - (1 - targets) * torch.log(1 - predictions + 1e-7)
# 根據 reduction 方式返回損失
if self.reduction == 'mean':
return loss.mean()
elif self.reduction == 'sum':
return loss.sum()
else: # 'none'
return loss

122
README.md
View File

@@ -1,16 +1,116 @@
main.py: 主程式檔
# 胃試鏡疾病判斷系統
## 項目概述
本項目是一個基於深度學習的胃試鏡疾病自動診斷系統主要用於檢測和分類胃部疾病特別是胃癌CA。系統採用了分割和分類的兩階段方法首先對胃試鏡圖像進行分割以識別可疑區域然後對這些區域進行分類以確定是否存在疾病。
* 資料集類別: 3分類(胃癌、非胃癌但有病、正常資料)
* 基礎模型: Xception
## 主要功能
- **圖像預處理**包括直方圖均衡化、自適應直方圖均衡化、銳化、HSV調整、伽馬校正等多種圖像增強方法
- **數據增強**:通過圖像生成器擴充訓練數據集
- **疾病分割**使用GastroSegNet模型對胃試鏡圖像中的可疑區域進行分割
- **疾病分類**使用修改版Xception模型對分割後的區域進行分類分為正常Normal、胃癌CA和待確認Have_Question三類
- **結果可視化**:提供混淆矩陣、訓練曲線等可視化工具
## 系統架構
系統採用了兩階段的處理流程:
1. **分割階段**使用GastroSegNet模型對輸入圖像進行分割識別可疑區域
2. **分類階段**
- 第一組分類器區分正常Normal和其他Others
- 第二組分類器區分胃癌CA和待確認Have_Question
## 環境要求
- Python 3.8+
- PyTorch 1.8+
- CUDA推薦用於加速訓練
- 其他依賴庫torchvision, numpy, opencv-python, scikit-image, pandas等
## 使用方法
### 數據準備
1. 將訓練數據放置在`../Dataset/Training`目錄下
2. 將測試數據放置在`../Dataset/Testing`目錄下
3. 標註數據XML格式放置在`../Label_Image`目錄下
### 訓練模型
```bash
uv run main.py
```
訓練過程將自動執行以下步驟:
1. 數據預處理和增強
2. 訓練分割模型GastroSegNet
3. 使用分割模型處理圖像
4. 訓練分類模型修改版Xception
### 結果查看
訓練結果將保存在`../Result`目錄下,包括:
- 訓練曲線圖:`../Result/Training_Image`
- 混淆矩陣:`../Result/Matrix_Image`
- 訓練結果數據:`../Result/Training_Result`
- 最佳模型:`../Result/save_the_best_model`
## 項目結構
- `main.py`:程序入口點
- `experiments/`:實驗相關代碼
- `experiment.py`:實驗主流程
- `Training/`:訓練相關代碼
- `Models/`:模型定義
- `Image_Process/`:圖像處理相關代碼
- `Model_Loss/`:損失函數定義
- `Training_Tools/`:訓練工具
- `utils/`:工具函數和配置
## 配置說明
系統配置在`utils/Stomach_Config.py`中定義,主要包括:
- `Image_Enhance`:圖像增強方法
- `Loading_Config`:數據加載配置
- `Training_Config`:訓練參數配置
- `Model_Config`:模型參數配置
- `Save_Result_File_Config`:結果保存路徑配置
## 模型說明
### 分割模型GastroSegNet
用於識別胃試鏡圖像中的可疑區域,輸出分割掩碼。
### 分類模型修改版Xception
基於Xception架構針對胃試鏡圖像分類任務進行了修改主要用於區分正常、胃癌和待確認三類。
* 主執行檔: main.py
## load_process
### 負責讀取影像檔案、分割獨立資料(測試、驗證)、讀取獨立資料、一般檔案的操作
File_Process : 檔案操作的主程式包含開檔、創立檔案、判斷檔案是否存在等都是他負責的範圍。是一般物件也是LoadData的父物件
LoadData : 讀檔主程式一切讀檔動作由他開始。繼承File_Process(子物件)
Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
* File_Process : 檔案操作的主程式包含開檔、創立檔案、判斷檔案是否存在等都是他負責的範圍。是一般物件也是LoadData的父物件
* LoadData : 讀檔主程式一切讀檔動作由他開始。繼承File_Process(子物件)
* Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
## Calculate_Process
### 計算模型的評估指標的內容
* Calculate: 計算模型的評估指標的平均跟標準差,並將結果儲存到檔案中
## Image_Process
### 負責進行資料擴增、影像處理等的操作
* Generator_Content : 負責建立基礎Generator項目為Image_Generator的父類別
* Image_Generator : 負責製造資料擴增的資料並將資料存到檔案中。繼承Generator_Content(子物件)
* image_enhancement : 負責進行影像處理並將資料回傳
* Image_Generator : 負責製造資料擴增的資料,並將資料存到檔案中。
* image_enhancement : 負責進行影像處理將資料強化。
*
## all_models_tools
### 模型的調控細節如early stop、降低學習率和儲存最佳模型
* all_model_tools: call back的方法
## Model_Tools
### 負責進行模型的基礎架構包含Convolution、Dense、以及其他模型的配件
@@ -42,11 +142,13 @@ Cutting_Indepentend_Image : 讀取獨立資料(testing、Validation)的物件
### 負責驗證程式碼內的資料型態或輸入錯誤等問題
* Validation : 驗證程式碼錯誤
## Draw
## draw_tools
### 負責畫圖的工具
* Draw_Tools : 畫出混淆矩陣、走勢圖的工具
* draw : 畫出混淆矩陣、走勢圖的工具
* Grad_CAM : 畫出模型可視化的熱力圖的工具
## Experiment
### 執行實驗的主程式
* Experiment : 負責執行讀檔、設定模型Compile的細節、執行訓練、驗證結果等功能
* Experiment : 負責執行讀檔、設定模型與實驗的細節、執行訓練、驗證結果等功能
* Model_All_Step : 執行模型的訓練流程設定與細節參數設定
* pytorch_Model: 設定模型的架構

View File

@@ -1,74 +0,0 @@
import cv2
import numpy as np
import torch
class Read_image_and_Process_image:
def __init__(self, Image_Size) -> None:
self.Image_Size = Image_Size
pass
def get_data(self, path):
'''讀檔'''
try:
img_arr = cv2.imread(path, cv2.IMREAD_COLOR) # 讀檔(彩色)
# img_arr = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # 讀檔(灰階)
resized_arr = cv2.resize(img_arr, (self.Image_Size, self.Image_Size)) # 濤整圖片大小
except Exception as e:
print(e)
return resized_arr
def Data_Augmentation_Image(self, path):
resized_arr = []
for p in path:
try:
img_arr = cv2.imread(p, cv2.IMREAD_COLOR) # 讀檔(彩色)
# img_arr = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # 讀檔(灰階)
resized_arr.append(cv2.resize(img_arr, (self.Image_Size, self.Image_Size))) # 調整圖片大小
except Exception as e:
print(e)
return np.array(resized_arr)
def image_data_processing(self, data, label):
'''讀檔後處理圖片'''
data = np.asarray(data).astype(np.float32) # 將圖list轉成np.array
data = data.reshape(-1, self.Image_Size, self.Image_Size, 3) # 更改陣列形狀
label = np.array(label) # 將label從list型態轉成 numpy array
return data, label
def normalization(self, images):
imgs = []
for img in images:
img = np.asarray(img).astype(np.float32) # 將圖list轉成np.array
img = img / 255 # 標準化影像資料
imgs.append(img)
return torch.as_tensor(imgs)
# def load_numpy_data(self, file_names):
# '''載入numpy圖檔並執行影像處理提高特徵擷取'''
# i = 0
# numpy_image = []
# original_image = []
# for file_name in file_names:
# compare = str(file_name).split(".")
# if compare[-1] == "npy":
# image = np.load(file_name) # 讀圖片檔
# numpy_image.append(image) # 合併成一個陣列
# else:
# original_image.append(file_name)
# original_image = self.get_data(original_image)
# for file in original_image:
# numpy_image.append(file)
# return numpy_image
def make_label_list(self, length, content):
'''製作label的列表'''
label_list = []
for i in range(length):
label_list.append(content)
return label_list

View File

@@ -7,32 +7,69 @@ import numpy as np
import cv2
class ListDataset(Dataset):
def __init__(self, data_list, labels_list, transform):
def __init__(self, data_list, labels_list, Mask_List, transform):
self.data = data_list
self.labels = labels_list
self.Mask_Truth_List = Mask_List
self.transform = transform
self.roots = []
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
Image_Root = self.data[idx]
# Mask_Ground_Truth = None
# if self.Mask_Truth_List is not None:
# mask_path = self.Mask_Truth_List[idx]
# if mask_path is not None: # 確保掩碼路徑不為None
# try:
# Mask_Ground_Truth = Image.open(mask_path).convert("RGB")
# # 先不轉換為 tensor等待 transform 處理完後再轉換
# except Exception as e:
# print(e)
Split_Roots = Image_Root.split("/")
Split_Roots = Split_Roots[-1].split("\\")
File_Name = Split_Roots[-1]
classes = Split_Roots[-2]
try:
with open(Image_Root, 'rb') as file:
Images = Image.open(file).convert("RGB")
# Image = cv2.imread(Image_Root, cv2.IMREAD_COLOR) # 讀檔(彩色)
# Image = cv2.cvtColor(Image, cv2.COLOR_BGR2RGB)
Images = Image.open(Image_Root).convert("RGB")
except Exception as e:
print(e)
assert e is not None, f"Error loading image {Image_Root}: {e}"
if self.transform is not "Generator":
if self.transform != "Generator":
Images = self.transform(Images)
# if self.Mask_Truth_List is not None and Mask_Ground_Truth is not None and not isinstance(Mask_Ground_Truth, torch.Tensor):
# Mask_Ground_Truth = self.transform(Mask_Ground_Truth)
# # 確保 Images 是 tensor
# if not isinstance(Images, torch.Tensor):
# Images = torch.tensor(np.array(Images))
# # 確保 Mask_Ground_Truth 是 tensor
# if self.Mask_Truth_List is not None and Mask_Ground_Truth is not None and not isinstance(Mask_Ground_Truth, torch.Tensor):
# Mask_Ground_Truth = torch.tensor(np.array(Mask_Ground_Truth))
Images = torch.tensor(np.array(Images))
label = self.labels[idx]
# if self.Mask_Truth_List is not None:
# # 如果掩碼為None創建一個與圖像相同大小的空掩碼
# if Mask_Ground_Truth is None:
# if isinstance(Images, torch.Tensor):
# # 創建與圖像相同大小的空掩碼張量
# Mask_Ground_Truth = torch.zeros_like(Images)
# else:
# # 如果圖像不是張量創建一個空的PIL圖像
# Mask_Ground_Truth = Image.new('RGB', Images.size, (0, 0, 0))
# if self.transform != "Generator":
# Mask_Ground_Truth = self.transform(Mask_Ground_Truth)
# return Images, Mask_Ground_Truth, label, File_Name, classes
# print(f"Dataset_Data: \n{sample}\n")
return Images, label
return Images, label, File_Name, classes
class Training_Precesses:
def __init__(self, ImageSize):
@@ -43,27 +80,19 @@ class Training_Precesses:
def Dataloader_Sampler(self, SubDataSet, Batch_Size, Sampler=True):
if Sampler:
# Data_Loader = DataLoader(
# dataset=SubDataSet,
# batch_size=Batch_Size,
# num_workers=0,
# pin_memory=True,
# sampler=self.Setting_RandomSampler_Content(SubDataSet)
# )
Data_Loader = DataLoader(
dataset=SubDataSet,
batch_size=Batch_Size,
num_workers=0,
pin_memory=True,
sampler=self.Setting_RandomSampler_Content(SubDataSet)
sampler=self.Setting_WeightedRandomSampler_Content(SubDataSet)
)
else:
Data_Loader = DataLoader(
dataset=SubDataSet,
batch_size=Batch_Size,
num_workers=0,
pin_memory=True,
shuffle=True
pin_memory=True
)
return Data_Loader
@@ -84,6 +113,16 @@ class Training_Precesses:
if labels.ndim > 1: # If one-hot encoded
labels = np.argmax(labels, axis=1)
# 確保標籤是整數類型
try:
# 嘗試將標籤轉換為整數
labels = labels.astype(np.int64)
except ValueError:
# 如果標籤是字符串,先將其映射到整數
unique_labels = np.unique(labels)
label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
labels = np.array([label_to_idx[label] for label in labels])
# Count occurrences of each class
class_counts = np.bincount(labels)
class_weights = 1.0 / class_counts # Inverse frequency as weight
@@ -98,20 +137,28 @@ class Training_Precesses:
def Setting_RandomSampler_Content(self, Dataset):
return RandomSampler(Dataset, generator = self.generator)
def Setting_DataSet(self, Datas, Labels, transform = None):
def Setting_DataSet(self, Datas, Labels, Mask_List, transform = None):
# 資料預處理
if transform == None:
transform = transforms.Compose([
transforms.Resize((256, 256))
transforms.Resize((self.ImageSize, self.ImageSize))
])
elif transform == "Transform":
transform = transforms.Compose([
transforms.Resize((256, 256)),
transform = transforms.Compose([
transforms.Resize((self.ImageSize, self.ImageSize)),
transforms.ToTensor()
])
elif transform == "Generator":
transform = "Generator"
# Create Dataset
list_dataset = ListDataset(Datas, Labels , transform)
return list_dataset
list_dataset = ListDataset(Datas, Labels, Mask_List, transform)
return list_dataset
def Setting_SubsetRandomSampler_Content(self, SubDataSet):
# Calculate subset indices (example: using a fraction of the dataset)
dataset_size = len(SubDataSet)
subset_size = int(0.8 * dataset_size) # Use 80% of the dataset as an example
subset_indices = torch.randperm(dataset_size, generator=self.generator)[:subset_size]
return SubsetRandomSampler(subset_indices, generator=self.generator)

View File

@@ -5,39 +5,9 @@ import torch
class Tool:
def __init__(self) -> None:
self.__ICG_Training_Root = ""
self.__Normal_Training_Root = ""
self.__Comprehensive_Training_Root = ""
self.__ICG_Test_Data_Root = ""
self.__Normal_Test_Data_Root = ""
self.__Comprehensive_Testing_Root = ""
self.__ICG_ImageGenerator_Data_Root = ""
self.__Normal_ImageGenerator_Data_Root = ""
self.__Comprehensive_Generator_Root = ""
self.__Labels = []
self.__OneHot_Encording = []
pass
def Set_Labels(self):
self.__Labels = ["stomach_cancer_Crop", "Normal_Crop", "Have_Question_Crop"]
# self.__Labels = ["NPC_negative", "NPC_positive"]
def Set_Save_Roots(self):
self.__ICG_Training_Root = "../Dataset/Training"
self.__Normal_Training_Root = "../Dataset/Training/CA"
self.__Comprehensive_Training_Root = "../Dataset/Training/Mixed"
self.__ICG_Test_Data_Root = "../Dataset/Testing"
self.__Normal_Test_Data_Root = "../Dataset/Training/Normal_TestData"
self.__Comprehensive_Testing_Root = "../Dataset/Training/Comprehensive_TestData"
self.__ICG_ImageGenerator_Data_Root = "../Dataset/ImageGenerator"
self.__Normal_ImageGenerator_Data_Root = "../Dataset/Training/Normal_ImageGenerator"
self.__Comprehensive_Generator_Root = "../Dataset/Training/Comprehensive_ImageGenerator"
def Set_OneHotEncording(self, content):
Counter = []
for i in range(len(content)):
@@ -46,35 +16,6 @@ class Tool:
Counter = torch.tensor(Counter)
self.__OneHot_Encording = functional.one_hot(Counter, len(content))
pass
def Get_Data_Label(self):
'''
取得所需資料的Labels
'''
return self.__Labels
def Get_Save_Roots(self, choose):
'''回傳結果為Train, test, validation
choose = 1 => 取白光 Label
else => 取濾光 Label
若choose != 1 || choose != 2 => 會回傳四個結果
'''
if choose == 1:
return self.__ICG_Training_Root, self.__ICG_Test_Data_Root
if choose == 2:
return self.__Normal_Training_Root, self.__Normal_Test_Data_Root
else:
return self.__Comprehensive_Training_Root, self.__Comprehensive_Testing_Root
def Get_Generator_Save_Roots(self, choose):
'''回傳結果為Train, test, validation'''
if choose == 1:
return self.__ICG_ImageGenerator_Data_Root
if choose == 2:
return self.__Normal_ImageGenerator_Data_Root
else:
return self.__Comprehensive_Generator_Root
def Get_OneHot_Encording_Label(self):
return self.__OneHot_Encording

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More