Skip to content

Commit e6edcad

Browse files
committed
linear:kaiming Init
1 parent 7c05fc1 commit e6edcad

File tree

6 files changed

+179
-17
lines changed

6 files changed

+179
-17
lines changed

datasetloader/mnist/train/mnist_train.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,19 @@ func main() {
1919
}
2020
// 设置超参数
2121

22-
numClasses := 10 // 分类数量
23-
batchSize := 32 // 批处理大小
24-
learningRate := 0.001 // 学习率
25-
epochs := 30 // 训练轮数
22+
numClasses := 10 // 分类数量
23+
batchSize := 32 // 批处理大小
24+
learningRate := 0.01 // 学习率
25+
epochs := 30 // 训练轮数
2626
// 创建模型
2727
m := &model.Model{
2828
Optimizer: optimizer.NewSGD(learningRate), // 学习率设置为0.01
2929
}
30-
m.Layer(layer.Linear(mnist.TRAIN_MNIST.ImageSize, 128)).
30+
m.Layer(layer.Linear(mnist.TRAIN_MNIST.ImageSize, 128, true)).
3131
Layer(layer.Activation(layer.Relu, layer.ReluDerivative)).
32-
Layer(layer.Linear(128, 64)).
32+
Layer(layer.Linear(128, 64, true)).
3333
Layer(layer.Activation(layer.Relu, layer.ReluDerivative)).
34-
Layer(layer.Linear(64, numClasses)) // 将各个层添加到模型中
34+
Layer(layer.Linear(64, numClasses, true)) // 将各个层添加到模型中
3535

3636
// 定义前向传播函数
3737
m.ForwardFunc = func(input *dl.Tensor) (output *dl.Tensor) {
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import torch
2+
import torch.nn as nn
3+
import torch.optim as optim
4+
from torchvision import datasets, transforms
5+
from torch.utils.data import DataLoader
6+
7+
# 设置设备
8+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9+
10+
# 设置超参数
11+
num_classes = 10
12+
batch_size = 32
13+
learning_rate = 0.001
14+
epochs = 30
15+
16+
# 加载MNIST数据集
17+
transform = transforms.Compose([
18+
transforms.ToTensor(),
19+
])
20+
21+
22+
train_dataset = datasets.MNIST(root='data', train=True, download=False, transform=transform)
23+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
24+
25+
# 定义验证集
26+
val_dataset = datasets.MNIST(root='data', train=False, download=False, transform=transform)
27+
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
28+
29+
# 提前停止的参数
30+
patience = 5 # 容忍的epoch数
31+
best_loss = float('inf')
32+
trigger_times = 0
33+
34+
# 定义模型
35+
class Net(nn.Module):
36+
def __init__(self):
37+
super(Net, self).__init__()
38+
self.flatten = nn.Flatten()
39+
self.fc1 = nn.Linear(28 * 28, 128)
40+
self.relu1 = nn.ReLU()
41+
self.fc2 = nn.Linear(128, 64)
42+
self.relu2 = nn.ReLU()
43+
self.fc3 = nn.Linear(64, num_classes)
44+
45+
def forward(self, x):
46+
x = self.flatten(x)
47+
x = self.relu1(self.fc1(x))
48+
x = self.relu2(self.fc2(x))
49+
x = self.fc3(x)
50+
return x
51+
52+
model = Net().to(device)
53+
54+
# 定义损失函数和优化器
55+
criterion = nn.CrossEntropyLoss()
56+
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
57+
58+
# 训练循环
59+
while True: # 不限制epochs,使用无限循环
60+
model.train() # 设置模型为训练模式
61+
running_loss = 0.0
62+
for batch_idx, (inputs, labels) in enumerate(train_loader):
63+
inputs, labels = inputs.to(device), labels.to(device)
64+
65+
# 前向传播
66+
outputs = model(inputs)
67+
loss = criterion(outputs, labels)
68+
69+
# 反向传播和优化
70+
optimizer.zero_grad()
71+
loss.backward()
72+
optimizer.step()
73+
74+
running_loss += loss.item()
75+
76+
# 打印每个epoch的平均训练损失
77+
average_loss = running_loss / len(train_loader)
78+
print(f"Average Training Loss: {average_loss:.4f}", end="")
79+
80+
# 验证集评测
81+
model.eval() # 设置模型为评估模式
82+
val_loss = 0.0
83+
with torch.no_grad():
84+
for inputs, labels in val_loader:
85+
inputs, labels = inputs.to(device), labels.to(device)
86+
outputs = model(inputs)
87+
loss = criterion(outputs, labels)
88+
val_loss += loss.item()
89+
average_val_loss = val_loss / len(val_loader)
90+
print(f" Validation Loss: {average_val_loss:.4f}")
91+
92+
# 提前停止逻辑
93+
if average_val_loss < best_loss:
94+
best_loss = average_val_loss
95+
trigger_times = 0 # 重置触发次数
96+
else:
97+
trigger_times += 1
98+
if trigger_times >= patience:
99+
print("Early stopping triggered")
100+
break
101+
print("Training complete")

dl/layer/linear.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ package layer
22

33
import (
44
"deepgo/dl"
5+
"math"
56
)
67

78
// NewLinear 创建一个新的线性层,支持批处理
8-
func Linear(in_features, out_features int) (l *ComputeGraphNode) {
9+
func Linear(in_features, out_features int, biasInit bool) (l *ComputeGraphNode) {
910
l = NewNode(nil, nil)
1011

1112
l.SetAttr("in_features", in_features)
@@ -14,10 +15,21 @@ func Linear(in_features, out_features int) (l *ComputeGraphNode) {
1415
weight := dl.NewTensor([]int{out_features, in_features})
1516
bias := dl.NewTensor([]int{out_features})
1617

17-
// 使用He初始化
18-
weight.He(in_features)
19-
bias.He(in_features)
18+
// 初始化权重
2019

20+
weight.KaimingUniform(math.Sqrt(5))
21+
l.RegisterParameter("weight", weight)
22+
23+
if biasInit {
24+
// 初始化偏置
25+
biasT := dl.NewTensor([]int{out_features})
26+
fanIn, _ := dl.CalculateFanInAndFanOut(weight)
27+
bound := 1 / math.Sqrt(float64(fanIn))
28+
biasT.Uniform(-bound, bound)
29+
l.RegisterParameter("bias", biasT)
30+
} else {
31+
l.RegisterParameter("bias", nil)
32+
}
2133
l.RegisterParameter("weight", weight)
2234
l.RegisterParameter("bias", bias)
2335

dl/layer/linear.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import torch
2+
import torch.nn as nn
3+
import json
4+
import sys
5+
6+
def create_linear_layer(in_features, out_features):
7+
# 创建线性层
8+
linear = nn.Linear(in_features, out_features)
9+
10+
# 获取权重和偏置
11+
weight = linear.weight.data.tolist()
12+
bias = linear.bias.data.tolist()
13+
14+
return {
15+
"weight": weight,
16+
"bias": bias
17+
}
18+
19+
if __name__ == "__main__":
20+
# 从命令行参数读取输入和输出特征数
21+
in_features = int(sys.argv[1])
22+
out_features = int(sys.argv[2])
23+
24+
# 创建线性层并获取参数
25+
params = create_linear_layer(in_features, out_features)
26+
27+
# 打印权重和偏置
28+
print(json.dumps(params))

dl/layer/linear_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func TestLinear(t *testing.T) {
1717
// 创建一个线性层,输入特征为2,输出特征为3
1818
inFeatures := 2
1919
outFeatures := 3
20-
linearLayer := Linear(inFeatures, outFeatures)
20+
linearLayer := Linear(inFeatures, outFeatures, true)
2121

2222
// 创建输入张量,形状为 [batchSize, inFeatures]
2323
inputTensor := dl.NewTensor([]int{1, inFeatures}, 1.0, 2.0) // 输入为 [1, 2]

dl/tensor_initializer.go

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,32 @@ func (t *Tensor) Xavier(inFeatures int) {
1313
t.Uniform(-stdv, stdv)
1414
}
1515

16-
// He 使用He初始化方法初始化张量
17-
func (t *Tensor) He(inFeatures int) {
18-
stdv := math.Sqrt(2.0 / float64(inFeatures))
19-
// 使用均匀分布生成随机数,范围为[-stdv, stdv]
20-
t.Uniform(-stdv, stdv)
16+
// KaimingUniform 使用 Kaiming uniform 初始化方法初始化张量
17+
func (t *Tensor) KaimingUniform(a float64) {
18+
fanIn, _ := CalculateFanInAndFanOut(t)
19+
std := a / math.Sqrt(float64(fanIn))
20+
bound := math.Sqrt(3.0) * std
21+
t.Uniform(-bound, bound)
22+
}
23+
24+
// calculateFanInAndFanOut 计算 fan_in 和 fan_out
25+
func CalculateFanInAndFanOut(t *Tensor) (fanIn, fanOut int) {
26+
dimensions := len(t.Shape)
27+
if dimensions < 2 {
28+
return 1, 1
29+
}
30+
31+
numInputFmaps := t.Shape[1]
32+
numOutputFmaps := t.Shape[0]
33+
receptiveFieldSize := 1
34+
if dimensions > 2 {
35+
for _, s := range t.Shape[2:] {
36+
receptiveFieldSize *= s
37+
}
38+
}
39+
fanIn = numInputFmaps * receptiveFieldSize
40+
fanOut = numOutputFmaps * receptiveFieldSize
41+
return fanIn, fanOut
2142
}
2243

2344
// Normal 使用正态分布初始化张量

0 commit comments

Comments
 (0)