DAY 17
0
AI & Data

## 【17】訓練到一半遇到 nan 嗎? 梯度爆炸與梯度消失的測試實驗

Colab連結

``````class PrintWeightsCallback(tf.keras.callbacks.Callback):
def on_epoch_begin(self, epoch, logs=None):
l1_w = self.model.layers[1].weights[0].numpy()[:6,0]
print(f'layer1: {l1_w}')
``````

``````x = np.linspace(0,1,100, dtype=np.float32)  # 介於[0,+1]之間的樣本
noise = np.random.normal(0, 0.05, x.shape).astype(np.float32)  # noise取樣本區間的5%, (1-0)*0.05=0.05
y = x
z = x + noise

fig = plt.figure(figsize=(12,8))
plt.scatter(x,z)
plt.plot(x,y,color='red')
plt.show()

train_data = tf.data.Dataset.from_tensor_slices((z,y))

ds_train = train_data.cache()
ds_train = ds_train.shuffle(SHUFFLE_SIZE)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
``````

``````model = tf.keras.Sequential()
model.compile(loss=tf.keras.losses.mean_squared_error,
optimizer=tf.keras.optimizers.SGD(learning_rate=0.1))

history = model.fit(
ds_train,
epochs=EPOCHS,
verbose=True,
callbacks=[PrintWeightsCallback()])
``````

``````Epoch 1/20
layer1: [ 0.07604259  0.27522993 -0.10235941  0.08207107 -0.04505447 -0.07462746]
5/5 [==============================] - 0s 2ms/step - loss: 0.1663
(略)
Epoch 20/20
layer1: [ 0.00553192  0.213019   -0.1658938   0.21581025 -0.08234034 -0.2084046 ]
5/5 [==============================] - 0s 2ms/step - loss: 0.0026
``````

``````print(f'1={model.predict([1])[0]}, 2={model.predict([2])[0]}, 3={model.predict([3])[0]}')
``````
``````1=[0.98057926], 2=[1.9300488], 3=[2.8795183]
``````

``````x = np.linspace(0,4,100, dtype=np.float32)  # 介於[0,+4]之間的樣本
noise = np.random.normal(0, 0.1, x.shape).astype(np.float32)  # noise取樣本區間的5%, (4-0)*0.05=0.1
y = x
z = x + noise

fig = plt.figure(figsize=(12,8))
plt.scatter(x,z)
plt.plot(x,y,color='red')
plt.show()

train_data = tf.data.Dataset.from_tensor_slices((z,y))

ds_train = train_data.cache()
ds_train = ds_train.shuffle(SHUFFLE_SIZE)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
``````

``````Epoch 1/20
layer1: [-0.07210064  0.24890727  0.30479372 -0.30981055 -0.08024356 -0.16160044]
5/5 [==============================] - 0s 2ms/step - loss: 57269209748267820971597475348480.0000
Epoch 2/20
layer1: [ 6.2245629e+22  1.1131139e+23  1.2879328e+22 -1.5327043e+23
-7.5734576e+22 -1.5577560e+23]
5/5 [==============================] - 0s 2ms/step - loss: nan
Epoch 3/20
layer1: [nan nan nan nan nan nan]
5/5 [==============================] - 0s 2ms/step - loss: nan
(略)
``````

``````x = np.linspace(0,1,100, dtype=np.float32)  # 介於[0,1]之間的樣本
noise = np.random.normal(0, 0.05, x.shape).astype(np.float32)  # noise取樣本區間的5%, (1-0)*0.05=0.05
y = x
z = x + noise

fig = plt.figure(figsize=(12,8))
plt.scatter(x,z)
plt.plot(x,y,color='red')
plt.show()

train_data = tf.data.Dataset.from_tensor_slices((z,y))

ds_train = train_data.cache()
ds_train = ds_train.shuffle(SHUFFLE_SIZE)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
``````

``````ACT='sigmoid' # you can also try ACT=None or ACT='relu'

model = tf.keras.Sequential()
model.compile(loss=tf.keras.losses.mean_squared_error,
optimizer=tf.keras.optimizers.SGD(learning_rate=0.1))

history = model.fit(
ds_train,
epochs=EPOCHS,
verbose=True,
callbacks=[PrintWeightsCallback()])
``````

``````Epoch 1/20
layer1: [ 0.18758693 -0.0771921  -0.01569629  0.17212537  0.1411779   0.10927829]
5/5 [==============================] - 0s 3ms/step - loss: 0.5679
Epoch 2/20
layer1: [ 0.18786529 -0.07689717 -0.01544899  0.17244542  0.14150831  0.10956431]
5/5 [==============================] - 0s 2ms/step - loss: 0.1452
(略)
Epoch 20/20
layer1: [ 0.18772912 -0.07688259 -0.01586881  0.17269064  0.14184701  0.10949538]
5/5 [==============================] - 0s 3ms/step - loss: 0.0864
``````

``````print(f'1={model.predict([1])[0]}, 2={model.predict([2])[0]}, 3={model.predict([3])[0]}')
``````
``````1=[0.5729448], 2=[0.5735199], 3=[0.5740514]
``````