187 lines
6.5 KiB
Python
187 lines
6.5 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
測試梯度計算修復的腳本
|
||
用於驗證 RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn 是否已修復
|
||
"""
|
||
|
||
import torch
|
||
import torch.nn as nn
|
||
import numpy as np
|
||
from experiments.Models.Xception_Model_Modification import Xception
|
||
from Model_Loss.Loss import Entropy_Loss
|
||
|
||
def test_gradient_computation():
|
||
"""測試梯度計算是否正常工作"""
|
||
print("=== 測試梯度計算修復 ===")
|
||
|
||
# 設置設備
|
||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||
print(f"使用設備: {device}")
|
||
|
||
# 創建模型
|
||
model = Xception().to(device)
|
||
model.train()
|
||
|
||
# 檢查模型參數
|
||
print("\n=== 模型參數檢查 ===")
|
||
total_params = 0
|
||
trainable_params = 0
|
||
|
||
for name, param in model.named_parameters():
|
||
total_params += param.numel()
|
||
if param.requires_grad:
|
||
trainable_params += param.numel()
|
||
else:
|
||
print(f"❌ 參數 {name} 不需要梯度!")
|
||
|
||
print(f"總參數數量: {total_params:,}")
|
||
print(f"可訓練參數數量: {trainable_params:,}")
|
||
print(f"可訓練參數比例: {trainable_params/total_params*100:.2f}%")
|
||
|
||
if trainable_params == 0:
|
||
print("❌ 錯誤:沒有可訓練的參數!")
|
||
return False
|
||
|
||
# 創建測試數據
|
||
batch_size = 2
|
||
input_images = torch.randn(batch_size, 3, 224, 224, device=device, requires_grad=True)
|
||
|
||
# 創建 one-hot 編碼的標籤
|
||
labels_onehot = torch.zeros(batch_size, 3, device=device)
|
||
labels_onehot[0, 1] = 1.0 # 第一個樣本屬於類別1
|
||
labels_onehot[1, 2] = 1.0 # 第二個樣本屬於類別2
|
||
|
||
print(f"\n=== 測試數據 ===")
|
||
print(f"輸入形狀: {input_images.shape}")
|
||
print(f"標籤形狀: {labels_onehot.shape}")
|
||
print(f"輸入 requires_grad: {input_images.requires_grad}")
|
||
print(f"標籤內容:\n{labels_onehot}")
|
||
|
||
try:
|
||
# 前向傳播
|
||
print("\n=== 前向傳播 ===")
|
||
outputs = model(input_images)
|
||
print(f"輸出形狀: {outputs.shape}")
|
||
print(f"輸出 requires_grad: {outputs.requires_grad}")
|
||
print(f"輸出 grad_fn: {outputs.grad_fn}")
|
||
|
||
if outputs.grad_fn is None:
|
||
print("❌ 錯誤:輸出沒有 grad_fn!")
|
||
return False
|
||
|
||
# 計算損失
|
||
print("\n=== 損失計算 ===")
|
||
criterion = Entropy_Loss()
|
||
loss = criterion(outputs, labels_onehot)
|
||
print(f"損失值: {loss.item():.6f}")
|
||
print(f"損失 requires_grad: {loss.requires_grad}")
|
||
print(f"損失 grad_fn: {loss.grad_fn}")
|
||
|
||
if loss.grad_fn is None:
|
||
print("❌ 錯誤:損失沒有 grad_fn!")
|
||
return False
|
||
|
||
# 反向傳播
|
||
print("\n=== 反向傳播 ===")
|
||
loss.backward()
|
||
print("✅ 反向傳播成功完成!")
|
||
|
||
# 檢查梯度
|
||
print("\n=== 梯度檢查 ===")
|
||
grad_count = 0
|
||
for name, param in model.named_parameters():
|
||
if param.grad is not None:
|
||
grad_count += 1
|
||
grad_norm = param.grad.norm().item()
|
||
if grad_norm > 0:
|
||
print(f"✅ {name}: 梯度範數 = {grad_norm:.6f}")
|
||
else:
|
||
print(f"⚠️ {name}: 梯度為零")
|
||
else:
|
||
print(f"❌ {name}: 沒有梯度")
|
||
|
||
print(f"\n有梯度的參數數量: {grad_count}")
|
||
|
||
if grad_count == 0:
|
||
print("❌ 錯誤:沒有參數有梯度!")
|
||
return False
|
||
|
||
print("\n✅ 所有測試通過!梯度計算修復成功!")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"\n❌ 測試失敗:{str(e)}")
|
||
print(f"錯誤類型: {type(e).__name__}")
|
||
return False
|
||
|
||
def test_losses_method():
|
||
"""測試 Losses 方法的張量處理"""
|
||
print("\n=== 測試 Losses 方法 ===")
|
||
|
||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||
|
||
# 模擬 Losses 方法的邏輯
|
||
def test_losses(predicts, labels):
|
||
# 確保輸入是張量且在正確的設備上
|
||
if not isinstance(predicts, torch.Tensor):
|
||
predicts = torch.tensor(predicts, dtype=torch.float32, device=device, requires_grad=True)
|
||
if not isinstance(labels, torch.Tensor):
|
||
labels = torch.tensor(labels, dtype=torch.float32, device=device)
|
||
|
||
# 確保張量在同一設備上
|
||
predicts = predicts.to(device)
|
||
labels = labels.to(device)
|
||
|
||
print(f"Predicts: shape={predicts.shape}, requires_grad={predicts.requires_grad}, device={predicts.device}")
|
||
print(f"Labels: shape={labels.shape}, requires_grad={labels.requires_grad}, device={labels.device}")
|
||
|
||
criterion = Entropy_Loss()
|
||
loss = criterion(predicts, labels)
|
||
return loss
|
||
|
||
# 測試不同類型的輸入
|
||
batch_size = 2
|
||
num_classes = 3
|
||
|
||
# 測試1: 張量輸入
|
||
print("\n--- 測試1: 張量輸入 ---")
|
||
predicts_tensor = torch.randn(batch_size, num_classes, device=device, requires_grad=True)
|
||
labels_tensor = torch.zeros(batch_size, num_classes, device=device)
|
||
labels_tensor[0, 1] = 1.0
|
||
labels_tensor[1, 2] = 1.0
|
||
|
||
try:
|
||
loss1 = test_losses(predicts_tensor, labels_tensor)
|
||
print(f"✅ 張量輸入測試成功,損失: {loss1.item():.6f}")
|
||
except Exception as e:
|
||
print(f"❌ 張量輸入測試失敗: {e}")
|
||
|
||
# 測試2: NumPy 輸入
|
||
print("\n--- 測試2: NumPy 輸入 ---")
|
||
predicts_numpy = np.random.randn(batch_size, num_classes).astype(np.float32)
|
||
labels_numpy = np.zeros((batch_size, num_classes), dtype=np.float32)
|
||
labels_numpy[0, 1] = 1.0
|
||
labels_numpy[1, 2] = 1.0
|
||
|
||
try:
|
||
loss2 = test_losses(predicts_numpy, labels_numpy)
|
||
print(f"✅ NumPy 輸入測試成功,損失: {loss2.item():.6f}")
|
||
except Exception as e:
|
||
print(f"❌ NumPy 輸入測試失敗: {e}")
|
||
|
||
if __name__ == "__main__":
|
||
print("開始測試梯度計算修復...")
|
||
|
||
# 測試梯度計算
|
||
gradient_test_passed = test_gradient_computation()
|
||
|
||
# 測試 Losses 方法
|
||
test_losses_method()
|
||
|
||
print("\n" + "="*50)
|
||
if gradient_test_passed:
|
||
print("🎉 梯度計算修復驗證成功!可以開始訓練了。")
|
||
else:
|
||
print("❌ 梯度計算仍有問題,需要進一步調試。")
|
||
print("="*50) |