Run this notebook online: or Colab:

# 3.2. 线性回归的从零开始实现¶

%load ../utils/djl-imports


## 3.2.1. 生成数据集¶

(3.2.1)$\mathbf{y}= \mathbf{X} \mathbf{w} + b + \mathbf\epsilon.$

class DataPoints {
private NDArray X, y;
public DataPoints(NDArray X, NDArray y) {
this.X = X;
this.y = y;
}

public NDArray getX() {
return X;
}

public NDArray getY() {
return y;
}
}

// Generate y = X w + b + noise
public DataPoints syntheticData(NDManager manager, NDArray w, float b, int numExamples) {
NDArray X = manager.randomNormal(new Shape(numExamples, w.size()));
y = y.add(manager.randomNormal(0, 0.01f, y.getShape(), DataType.FLOAT32));
return new DataPoints(X, y);
}

NDManager manager = NDManager.newBaseManager();

NDArray trueW = manager.create(new float[]{2, -3.4f});
float trueB = 4.2f;

DataPoints dp = syntheticData(manager, trueW, trueB, 1000);
NDArray features = dp.getX();
NDArray labels = dp.getY();


System.out.printf("features: [%f, %f]\n", features.get(0).getFloat(0), features.get(0).getFloat(1));
System.out.println("label: " + labels.getFloat(0));

features: [0.292537, -0.718359]
label: 7.234216


float[] X = features.get(new NDIndex(":, 1")).toFloatArray();
float[] y = labels.toFloatArray();

Table data = Table.create("Data")
FloatColumn.create("X", X),
FloatColumn.create("y", y)
);

ScatterPlot.create("Synthetic Data", data, "X", "y");


## 3.2.2. 读取数据集¶

import ai.djl.training.dataset.ArrayDataset;
import ai.djl.training.dataset.Batch;

int batchSize = 10;

ArrayDataset dataset = new ArrayDataset.Builder()
.setData(features) // Set the Features
.optLabels(labels) // Set the Labels
.setSampling(batchSize, false) // set the batch size and random sampling to false
.build();


for (Batch batch : dataset.getData(manager)) {
// Call head() to get the first NDArray
System.out.println(X);
System.out.println(y);
// Don't forget to close the batch!
batch.close();
break;
}

ND: (10, 2) gpu(0) float32
[[ 0.2925, -0.7184],
[ 0.1   , -0.3932],
[ 2.547 , -0.0034],
[ 0.0083, -0.251 ],
[ 0.129 ,  0.3728],
[ 1.0822, -0.665 ],
[ 0.5434, -0.7168],
[-1.4913,  1.4805],
[ 0.1374, -1.2208],
[ 0.3072,  1.1135],
]

ND: (10) gpu(0) float32
[ 7.2342,  5.7411,  9.3138,  5.0536,  3.1772,  8.6284,  7.7434, -3.808 ,  8.6185,  1.0259]


## 3.2.3. 初始化模型参数¶

NDArray w = manager.randomNormal(0, 0.01f, new Shape(2, 1), DataType.FLOAT32);
NDArray b = manager.zeros(new Shape(1));
NDList params = new NDList(w, b);


## 3.2.4. 定义模型¶

// Saved in Training.java for later use
public NDArray linreg(NDArray X, NDArray w, NDArray b) {
}


## 3.2.5. 定义损失函数¶

// Saved in Training.java for later use
public NDArray squaredLoss(NDArray yHat, NDArray y) {
return (yHat.sub(y.reshape(yHat.getShape()))).mul
((yHat.sub(y.reshape(yHat.getShape())))).div(2);
}


## 3.2.6. 定义优化算法¶

// Saved in Training.java for later use
public static void sgd(NDList params, float lr, int batchSize) {
for (int i = 0; i < params.size(); i++) {
NDArray param = params.get(i);
// Update param
// param = param - param.gradient * lr / batchSize
}
}


## 3.2.7. 训练¶

• 初始化参数

• 重复，直到完成

• 计算梯度 $$\mathbf{g} \leftarrow \partial_{(\mathbf{w},b)} \frac{1}{|\mathcal{B}|} \sum_{i \in \mathcal{B}} l(\mathbf{x}^{(i)}, y^{(i)}, \mathbf{w}, b)$$

• 更新参数 $$(\mathbf{w}, b) \leftarrow (\mathbf{w}, b) - \eta \mathbf{g}$$

float lr = 0.03f;  // Learning Rate
int numEpochs = 3;  // Number of Iterations

for (NDArray param : params) {
}

for (int epoch = 0; epoch < numEpochs; epoch++) {
// Assuming the number of examples can be divided by the batch size, all
// the examples in the training dataset are used once in one epoch
// iteration. The features and tags of minibatch examples are given by X
// and y respectively.
for (Batch batch : dataset.getData(manager)) {

// Minibatch loss in X and y
NDArray l = squaredLoss(linreg(X, params.get(0), params.get(1)), y);
gc.backward(l);  // Compute gradient on l with respect to w and b
}
sgd(params, lr, batchSize);  // Update parameters using their gradient

batch.close();
}
NDArray trainL = squaredLoss(linreg(features, params.get(0), params.get(1)), labels);
System.out.printf("epoch %d, loss %f\n", epoch + 1, trainL.mean().getFloat());
}

epoch 1, loss 0.042579
epoch 2, loss 0.000161
epoch 3, loss 0.000052


float[] w = trueW.sub(params.get(0).reshape(trueW.getShape())).toFloatArray();
System.out.println(String.format("Error in estimating w: [%f, %f]", w[0], w[1]));
System.out.println(String.format("Error in estimating b: %f", trueB - params.get(1).getFloat()));

Error in estimating w: [-0.000233, -0.000601]
Error in estimating b: 0.000912


## 3.2.8. 小结¶

• 我们学习了深度网络是如何实现和优化的。在这一过程中只使用NDArray和自动微分，不需要定义层或复杂的优化器。

• 这一节只触及到了表面知识。在下面的部分中，我们将基于刚刚介绍的概念描述其他模型，并学习如何更简洁地实现其他模型。

## 3.2.9. 练习¶

1. 如果我们将权重初始化为零，会发生什么。算法仍然有效吗？

2. 假设你是 乔治·西蒙·欧姆 ，试图为电压和电流的关系建立一个模型。你能使用自动微分来学习模型的参数吗?

3. 您能基于 普朗克定律 使用光谱能量密度来确定物体的温度吗？

4. 如果你想计算二阶导数可能会遇到什么问题？你会如何解决这些问题？

5. 为什么在 squaredLoss() 函数中需要使用 reshape() 函数？

6. 尝试使用不同的学习率，观察损失函数值下降的快慢。

7. 如果样本个数不能被批量大小整除，dataset.getData()函数的行为会有什么变化？