基于Optuna的图深度学习模型超参数优化
本文介绍了使用Optuna工具优化PyTorch Geometric图神经网络(GAT)超参数的方法。主要内容包括环境配置、Cora数据集准备、基础GAT模型实现、训练评估函数设计以及Optuna超参数优化流程。通过定义objective函数,对学习率、隐藏层维度、注意力头数等关键参数进行自动搜索,以提升模型在节点分类任务上的性能。该方法可广泛应用于图深度学习模型的参数调优。
基于Optuna的图深度学习模型超参数优化
概述
图深度学习是处理图结构数据的重要方法,广泛应用于社交网络分析、推荐系统、生物信息学等领域。然而,图深度学习模型的性能很大程度上依赖于超参数的选择。本文将详细介绍如何使用Optuna工具对基于PyTorch Geometric的图神经网络进行超参数优化,包括学习率、Batch size、注意力头数、注意力头维度和编码层层数等关键参数。
环境配置
首先,我们需要配置基础环境。本实验基于Python 3.8+和PyTorch框架,使用PyTorch Geometric处理图数据,Optuna进行超参数优化。
# 创建conda环境
conda create -n gdl-optuna python=3.8
conda activate gdl-optuna
# 安装PyTorch (请根据您的CUDA版本选择合适的命令)
conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
# 安装PyTorch Geometric
pip install torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
pip install torch-geometric
# 安装其他依赖
pip install optuna matplotlib scikit-learn networkx
数据集准备
我们将使用Cora数据集,这是一个经典的引文网络数据集,常用于图神经网络基准测试。
import torch
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
# 下载并加载Cora数据集
dataset = Planetoid(root='data/Cora', name='Cora', transform=NormalizeFeatures())
# 数据集信息
print(f'Dataset: {dataset}')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')
# 获取第一个图数据
data = dataset[0]
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')
基础图神经网络模型
首先,我们实现一个基础的图注意力网络(GAT)作为我们的基准模型。
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.nn import global_mean_pool
class GATModel(torch.nn.Module):
def __init__(self, num_features, hidden_channels, num_heads, head_dim, num_layers, num_classes, dropout=0.5):
super(GATModel, self).__init__()
self.dropout = dropout
self.convs = torch.nn.ModuleList()
# 输入层
self.convs.append(GATConv(num_features, hidden_channels, heads=num_heads, dropout=dropout))
# 隐藏层
for _ in range(num_layers - 2):
self.convs.append(GATConv(
hidden_channels * num_heads,
hidden_channels,
heads=num_heads,
dropout=dropout
))
# 输出层
self.convs.append(GATConv(
hidden_channels * num_heads,
num_classes,
heads=1,
concat=False,
dropout=dropout
))
def forward(self, x, edge_index, batch=None):
# 应用所有卷积层
for i, conv in enumerate(self.convs):
x = F.dropout(x, p=self.dropout, training=self.training)
x = conv(x, edge_index)
if i < len(self.convs) - 1: # 除了最后一层外都使用激活函数
x = F.elu(x)
return x
def reset_parameters(self):
for conv in self.convs:
conv.reset_parameters()
训练和评估函数
接下来,我们实现模型的训练和评估函数。
def train_model(model, data, optimizer, criterion):
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index)
loss = criterion(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
return loss.item()
def evaluate_model(model, data, mask):
model.eval()
with torch.no_grad():
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1)
correct = pred[mask] == data.y[mask]
acc = int(correct.sum()) / int(mask.sum())
return acc
def test_model(model, data):
model.eval()
with torch.no_grad():
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1)
# 计算各种指标
test_correct = pred[data.test_mask] == data.y[data.test_mask]
test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
val_correct = pred[data.val_mask] == data.y[data.val_mask]
val_acc = int(val_correct.sum()) / int(data.val_mask.sum())
train_correct = pred[data.train_mask] == data.y[data.train_mask]
train_acc = int(train_correct.sum()) / int(data.train_mask.sum())
return train_acc, val_acc, test_acc
Optuna超参数优化实现
现在,我们使用Optuna来优化GAT模型的超参数。
import optuna
from optuna.trial import TrialState
import numpy as np
def objective(trial):
# 超参数搜索空间
learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
hidden_channels = trial.suggest_int("hidden_channels", 8, 64, step=8)
num_heads = trial.suggest_int("num_heads", 1, 8)
head_dim = trial.suggest_int("head_dim", 8, 32, step=8)
num_layers = trial.suggest_int("num_layers", 2, 5)
dropout = trial.suggest_float("dropout", 0.1, 0.7)
# 创建模型
model = GATModel(
num_features=dataset.num_features,
hidden_channels=hidden_channels,
num_heads=num_heads,
head_dim=head_dim,
num_layers=num_layers,
num_classes=dataset.num_classes,
dropout=dropout
)
# 优化器和损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
# 训练模型
best_val_acc = 0
patience_counter = 0
patience = 50
for epoch in range(200):
loss = train_model(model, data, optimizer, criterion)
val_acc = evaluate_model(model, data, data.val_mask)
# 早停机制
if val_acc > best_val_acc:
best_val_acc = val_acc
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= patience:
break
# 向Optuna报告中间结果
trial.report(val_acc, epoch)
# 如果试验应该提前终止
if trial.should_prune():
raise optuna.exceptions.TrialPruned()
return best_val_acc
# 创建Optuna研究
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100, timeout=3600)
# 输出优化结果
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print(" Value: ", trial.value)
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
完整训练流程
基于Optuna找到的最佳超参数,我们进行完整的模型训练和评估。
def train_with_best_params(params, data, dataset, epochs=200):
# 创建模型
model = GATModel(
num_features=dataset.num_features,
hidden_channels=params['hidden_channels'],
num_heads=params['num_heads'],
head_dim=params['head_dim'],
num_layers=params['num_layers'],
num_classes=dataset.num_classes,
dropout=params['dropout']
)
# 优化器和损失函数
optimizer = torch.optim.Adam(
model.parameters(),
lr=params['learning_rate'],
weight_decay=5e-4
)
criterion = torch.nn.CrossEntropyLoss()
# 训练记录
train_losses = []
val_accuracies = []
train_accuracies = []
best_val_acc = 0
best_model_state = None
patience_counter = 0
patience = 50
for epoch in range(epochs):
# 训练
loss = train_model(model, data, optimizer, criterion)
train_losses.append(loss)
# 评估
train_acc = evaluate_model(model, data, data.train_mask)
val_acc = evaluate_model(model, data, data.val_mask)
train_accuracies.append(train_acc)
val_accuracies.append(val_acc)
# 保存最佳模型
if val_acc > best_val_acc:
best_val_acc = val_acc
best_model_state = model.state_dict().copy()
patience_counter = 0
else:
patience_counter += 1
# 早停
if patience_counter >= patience:
print(f"Early stopping at epoch {epoch}")
break
if epoch % 10 == 0:
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
# 加载最佳模型
model.load_state_dict(best_model_state)
# 最终测试
train_acc, val_acc, test_acc = test_model(model, data)
print(f'Final Results: Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}, Test Acc: {test_acc:.4f}')
return {
'model': model,
'train_losses': train_losses,
'train_accuracies': train_accuracies,
'val_accuracies': val_accuracies,
'final_train_acc': train_acc,
'final_val_acc': val_acc,
'final_test_acc': test_acc
}
# 使用最佳参数训练
best_params = study.best_trial.params
results = train_with_best_params(best_params, data, dataset)
可视化分析
为了更直观地理解优化过程和模型性能,我们实现一些可视化功能。
import matplotlib.pyplot as plt
import seaborn as sns
def plot_optimization_history(study):
fig = optuna.visualization.plot_optimization_history(study)
fig.show()
def plot_parallel_coordinate(study):
fig = optuna.visualization.plot_parallel_coordinate(study)
fig.show()
def plot_param_importances(study):
fig = optuna.visualization.plot_param_importances(study)
fig.show()
def plot_slice(study):
fig = optuna.visualization.plot_slice(study)
fig.show()
def plot_training_history(results):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
# 损失曲线
ax1.plot(results['train_losses'])
ax1.set_title('Training Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
# 准确率曲线
ax2.plot(results['train_accuracies'], label='Train')
ax2.plot(results['val_accuracies'], label='Validation')
ax2.set_title('Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
plt.tight_layout()
plt.show()
# 绘制优化历史
plot_optimization_history(study)
# 绘制参数重要性
plot_param_importances(study)
# 绘制训练历史
plot_training_history(results)
高级优化技巧
为了进一步提高优化效率,我们可以使用更高级的Optuna功能。
# 使用TPE采样器进行更高效的优化
sampler = optuna.samplers.TPESampler(
n_startup_trials=10,
multivariate=True,
group=True
)
study = optuna.create_study(direction="maximize", sampler=sampler)
# 添加剪枝策略
pruner = optuna.pruners.MedianPruner(
n_startup_trials=5,
n_warmup_steps=10,
interval_steps=1
)
study = optuna.create_study(direction="maximize", sampler=sampler, pruner=pruner)
# 使用MySQL或PostgreSQL存储研究结果(适用于分布式优化)
# storage = optuna.storages.RDBStorage(
# url="mysql://username:password@localhost/optuna",
# engine_kwargs={"pool_size": 20, "max_overflow": 100},
# )
# study = optuna.create_study(storage=storage, direction="maximize")
# 定义自定义搜索空间
def suggest_hyperparams(trial):
return {
"learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
"hidden_channels": trial.suggest_int("hidden_channels", 8, 64, step=8),
"num_heads": trial.suggest_categorical("num_heads", [1, 2, 4, 8]),
"head_dim": trial.suggest_categorical("head_dim", [8, 16, 32]),
"num_layers": trial.suggest_int("num_layers", 2, 5),
"dropout": trial.suggest_float("dropout", 0.1, 0.7),
}
# 使用回调函数记录最佳试验
def callback(study, trial):
if study.best_trial.number == trial.number:
print(f"New best trial {trial.number} with value: {trial.value}")
print(f"Params: {trial.params}")
# 运行优化
study.optimize(
objective,
n_trials=100,
timeout=3600,
callbacks=[callback],
gc_after_trial=True # 每次试验后进行垃圾回收
)
多模型比较
除了GAT,我们还可以比较其他图神经网络架构的性能。
from torch_geometric.nn import GCNConv, SAGEConv
# GCN模型
class GCNModel(torch.nn.Module):
def __init__(self, num_features, hidden_channels, num_layers, num_classes, dropout=0.5):
super(GCNModel, self).__init__()
self.dropout = dropout
self.convs = torch.nn.ModuleList()
# 输入层
self.convs.append(GCNConv(num_features, hidden_channels))
# 隐藏层
for _ in range(num_layers - 2):
self.convs.append(GCNConv(hidden_channels, hidden_channels))
# 输出层
self.convs.append(GCNConv(hidden_channels, num_classes))
def forward(self, x, edge_index, batch=None):
for i, conv in enumerate(self.convs):
x = conv(x, edge_index)
if i < len(self.convs) - 1: # 除了最后一层外都使用激活函数和dropout
x = F.relu(x)
x = F.dropout(x, p=self.dropout, training=self.training)
return x
# GraphSAGE模型
class SAGEModel(torch.nn.Module):
def __init__(self, num_features, hidden_channels, num_layers, num_classes, dropout=0.5):
super(SAGEModel, self).__init__()
self.dropout = dropout
self.convs = torch.nn.ModuleList()
# 输入层
self.convs.append(SAGEConv(num_features, hidden_channels))
# 隐藏层
for _ in range(num_layers - 2):
self.convs.append(SAGEConv(hidden_channels, hidden_channels))
# 输出层
self.convs.append(SAGEConv(hidden_channels, num_classes))
def forward(self, x, edge_index, batch=None):
for i, conv in enumerate(self.convs):
x = conv(x, edge_index)
if i < len(self.convs) - 1: # 除了最后一层外都使用激活函数和dropout
x = F.relu(x)
x = F.dropout(x, p=self.dropout, training=self.training)
return x
# 多模型比较函数
def compare_models(dataset, trials_per_model=50):
models = {
"GAT": GATModel,
"GCN": GCNModel,
"GraphSAGE": SAGEModel
}
results = {}
for model_name, model_class in models.items():
print(f"Optimizing {model_name}...")
# 为不同模型定义不同的目标函数
if model_name == "GAT":
def objective(trial):
params = {
"learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
"hidden_channels": trial.suggest_int("hidden_channels", 8, 64, step=8),
"num_heads": trial.suggest_int("num_heads", 1, 8),
"head_dim": trial.suggest_int("head_dim", 8, 32, step=8),
"num_layers": trial.suggest_int("num_layers", 2, 5),
"dropout": trial.suggest_float("dropout", 0.1, 0.7),
}
model = model_class(
num_features=dataset.num_features,
hidden_channels=params['hidden_channels'],
num_heads=params.get('num_heads', 1), # GCN和GraphSAGE不需要这个参数
head_dim=params.get('head_dim', 16), # GCN和GraphSAGE不需要这个参数
num_layers=params['num_layers'],
num_classes=dataset.num_classes,
dropout=params['dropout']
)
return train_and_evaluate(model, data, params['learning_rate'])
else: # GCN和GraphSAGE
def objective(trial):
params = {
"learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
"hidden_channels": trial.suggest_int("hidden_channels", 8, 64, step=8),
"num_layers": trial.suggest_int("num_layers", 2, 5),
"dropout": trial.suggest_float("dropout", 0.1, 0.7),
}
model = model_class(
num_features=dataset.num_features,
hidden_channels=params['hidden_channels'],
num_layers=params['num_layers'],
num_classes=dataset.num_classes,
dropout=params['dropout']
)
return train_and_evaluate(model, data, params['learning_rate'])
# 优化当前模型
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=trials_per_model)
# 存储结果
results[model_name] = {
"best_value": study.best_trial.value,
"best_params": study.best_trial.params,
"study": study
}
return results
# 辅助函数:训练和评估单个配置
def train_and_evaluate(model, data, learning_rate, epochs=200):
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
best_val_acc = 0
patience_counter = 0
patience = 50
for epoch in range(epochs):
# 训练
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index)
loss = criterion(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
# 验证
model.eval()
with torch.no_grad():
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1)
val_correct = pred[data.val_mask] == data.y[data.val_mask]
val_acc = int(val_correct.sum()) / int(data.val_mask.sum())
# 早停
if val_acc > best_val_acc:
best_val_acc = val_acc
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= patience:
break
return best_val_acc
# 运行多模型比较
model_comparison = compare_models(dataset, trials_per_model=30)
# 输出比较结果
print("Model Comparison Results:")
for model_name, result in model_comparison.items():
print(f"{model_name}: Best Validation Accuracy = {result['best_value']:.4f}")
超参数优化结果分析
对优化结果进行深入分析,可以帮助我们理解不同超参数对模型性能的影响。
def analyze_optimization_results(study):
# 获取所有完成的试验
completed_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
print(f"Total completed trials: {len(completed_trials)}")
# 分析最佳参数
best_trial = study.best_trial
print(f"Best trial value: {best_trial.value:.4f}")
print("Best parameters:")
for key, value in best_trial.params.items():
print(f" {key}: {value}")
# 分析参数分布
param_distributions = {}
for param_name in best_trial.params.keys():
param_values = [trial.params[param_name] for trial in completed_trials if param_name in trial.params]
param_distributions[param_name] = param_values
# 绘制参数与目标值的关系
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()
for i, (param_name, values) in enumerate(param_distributions.items()):
if i >= len(axes):
break
objectives = [trial.value for trial in completed_trials if param_name in trial.params]
axes[i].scatter(values, objectives, alpha=0.6)
axes[i].set_xlabel(param_name)
axes[i].set_ylabel('Validation Accuracy')
axes[i].set_title(f'{param_name} vs Accuracy')
plt.tight_layout()
plt.show()
# 分析参数重要性
param_importance = optuna.importance.get_param_importances(study)
print("Parameter importances:")
for param, importance in param_importance.items():
print(f" {param}: {importance:.4f}")
return param_distributions, param_importance
# 分析GAT优化结果
param_distributions, param_importance = analyze_optimization_results(study)
部署最佳模型
找到最佳超参数后,我们可以训练最终模型并保存以备后续使用。
def train_final_model(best_params, data, dataset, save_path="best_model.pth"):
# 创建最终模型
model = GATModel(
num_features=dataset.num_features,
hidden_channels=best_params['hidden_channels'],
num_heads=best_params['num_heads'],
head_dim=best_params['head_dim'],
num_layers=best_params['num_layers'],
num_classes=dataset.num_classes,
dropout=best_params['dropout']
)
# 优化器
optimizer = torch.optim.Adam(
model.parameters(),
lr=best_params['learning_rate'],
weight_decay=5e-4
)
criterion = torch.nn.CrossEntropyLoss()
# 训练模型
best_val_acc = 0
best_epoch = 0
train_losses = []
val_accuracies = []
for epoch in range(200):
# 训练
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index)
loss = criterion(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
train_losses.append(loss.item())
# 验证
model.eval()
with torch.no_grad():
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1)
val_correct = pred[data.val_mask] == data.y[data.val_mask]
val_acc = int(val_correct.sum()) / int(data.val_mask.sum())
val_accuracies.append(val_acc)
# 保存最佳模型
if val_acc > best_val_acc:
best_val_acc = val_acc
best_epoch = epoch
torch.save(model.state_dict(), save_path)
if epoch % 10 == 0:
print(f'Epoch: {epoch:03d}, Loss: {loss.item():.4f}, Val Acc: {val_acc:.4f}')
# 加载最佳模型
model.load_state_dict(torch.load(save_path))
# 最终测试
train_acc, val_acc, test_acc = test_model(model, data)
print(f'Final Results - Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')
return {
'model': model,
'train_losses': train_losses,
'val_accuracies': val_accuracies,
'best_epoch': best_epoch,
'final_train_acc': train_acc,
'final_val_acc': val_acc,
'final_test_acc': test_acc
}
# 训练最终模型
final_results = train_final_model(best_params, data, dataset)
# 绘制最终训练过程
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(final_results['train_losses'])
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.subplot(1, 2, 2)
plt.plot(final_results['val_accuracies'])
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.axvline(x=final_results['best_epoch'], color='r', linestyle='--', label=f'Best epoch: {final_results["best_epoch"]}')
plt.legend()
plt.tight_layout()
plt.show()
结论与讨论
通过本实验,我们实现了使用Optuna对图深度学习模型进行超参数优化的完整流程。以下是主要发现和结论:
-
超参数重要性:实验表明,学习率和dropout率对模型性能影响最大,其次是隐藏层维度和注意力头数。
-
模型架构选择:GAT模型在Cora数据集上表现最佳,这可能是由于其注意力机制能够更好地捕捉节点间的重要性关系。
-
优化效率:使用Optuna的TPE采样器相比随机搜索能更高效地找到优质超参数配置,平均需要更少的试验次数。
-
早停策略:引入早停机制显著减少了训练时间,同时防止了过拟合。
-
超参数相互作用:某些超参数之间存在明显的相互作用,例如较高的学习率需要配合适当的dropout率以防止过拟合。
本实验提供的代码框架可以轻松扩展到其他图神经网络架构和数据集,为图深度学习研究提供了实用的超参数优化工具。
参考文献
-
F. Scarselli, M. Gori, A. C. Tsoi, M. Hagenbuchner, and G. Monfardini, “The Graph Neural Network Model,” IEEE Transactions on Neural Networks, 2009.
-
P. Veličković, G. Cucurull, A. Casanova, A. Romero, P. Liò, and Y. Bengio, “Graph Attention Networks,” International Conference on Learning Representations, 2018.
-
T. Akiba, S. Sano, T. Yanase, T. Ohta, and M. Koyama, “Optuna: A Next-generation Hyperparameter Optimization Framework,” Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, 2019.
-
W. L. Hamilton, R. Ying, and J. Leskovec, “Inductive Representation Learning on Large Graphs,” Advances in Neural Information Processing Systems, 2017.
-
M. Fey and J. E. Lenssen, “Fast Graph Representation Learning with PyTorch Geometric,” arXiv preprint arXiv:1903.02428, 2019.
附录:完整代码结构
project/
│
├── data/
│ └── Cora/... # 自动下载的数据集
│
├── models/
│ ├── gat_model.py # GAT模型定义
│ ├── gcn_model.py # GCN模型定义
│ └── sage_model.py # GraphSAGE模型定义
│
├── utils/
│ ├── data_loader.py # 数据加载和处理
│ ├── trainer.py # 训练和评估函数
│ └── visualizer.py # 可视化函数
│
├── optimization/
│ ├── objective.py # Optuna目标函数
│ └── study_manager.py # 研究管理
│
├── configs/
│ └── default.yaml # 默认配置
│
├── main.py # 主程序入口
├── train.py # 训练脚本
├── optimize.py # 优化脚本
└── requirements.txt # 依赖项列表
这个完整的项目结构可以帮助您组织代码,使其更加模块化和可维护。
更多推荐
所有评论(0)