iT邦幫忙

0

tensorflow的CNN 錯誤問題

import tensorflow as tf
from keras.datasets import mnist
import numpy as np

class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data,self.train_label),(self.test_data,self.test_label) = mnist.load_data()
        # MNIST中的圖像默認為uint8(0-255的數字)。以下代碼將其歸一化到0-1之間的浮點數,並在最後增加一維作為顏色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32)/255.0,axis=-1) # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32)/255.0,axis=-1) # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32) # [60000]
        self.test_label = self.test_label.astype(np.int32) # [10000]
        self.num_train_data,self.num_test_data = self.train_data.shape[0],self.test_data.shape[0]

    def get_batch(self, batch_size):
    # 從資料集中隨機取出batch_size個元素並返回
        index = np.random.randint(0, np.shape(self.train_data)[0], batch_size)
        return self.train_data[index,:],self.train_label[index]

class CNN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Convolution2D(
            filters = 32,         # 卷積層神經元(卷積核)數目
            kernel_size = [5,5],   # 感受野大小
            padding='same',       # padding策略(vaild 或 same)
            activation=tf.nn.relu # 激活函數
        )
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size = [2, 2],strides = 2)
        self.conv2 = tf.keras.layers.Convolution2D(
            filters = 64,         # 卷積層神經元(卷積核)數目
            kernel_size = [5,5],   # 感受野大小
            padding='same',       # padding策略(vaild 或 same)
            activation=tf.nn.relu # 激活函數
        )
        self.pool2 = tf.keras.layers.MaxPool2D(pool_size = [2, 2],strides = 2)
        self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 *64,))
        self.dense1=tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
        self.dense2=tf.keras.layers.Dense(units=10)
    
    def call(self, inputs):
        x = self.conv1(inputs)     # [batch_size, 28, 28, 32]
        x = self.pool1(x)          # [batch_size, 14, 14, 32]
        x = self.conv2(x)          # [batch_size, 14, 14, 64]
        x = self.pool2(x)          # [batch_size, 7, 7, 64]
        x = self.flatten(x)        # [batch_size, 7 * 7 * 64]
        x = self.dense1(x)         # [batch_size, 1024]
        x = self.dense2(x)         # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output
    
num_epochs = 5
batch_size = 50
learning_rate = 0.001
model = CNN()  #It will be trained!!
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate = learning_rate) #10000/(50*100)=20
num_batches = int(data_loader.num_train_data // batch_size * num_epochs) #//整數除

for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size) #X data Y label 0 ~ 9
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true = y , y_pred = y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" %(batch_index , loss.numpy()))
    # TensorFlow自動計算損失函數關於自變數(模型參數)的梯度
    grads = tape.gradient(loss, model.variables)
    # TensorFlow自動根據梯度更新參數
    optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
    
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index:end_index])
    sparse_categorical_accuracy.update_state(y_true = data_loader.test_label[strat_index: end_index], y_pred = y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

**UnknownError:**Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [Op:Conv2D]

請問會是cuda+cuDNN+TensorFlow的版本匹配不一致嗎
目前我tensorflow非GPU版:2.0.0
keras:2.2.4
cudnn:7.6.4
CUDA:10.0

1 個回答

0
huahualiu
iT邦新手 5 級 ‧ 2019-12-15 22:57:40
最佳解答

cuda
cuDNN
本身就是要使用GPU訓練情況下的需求元件
所以你既然有用到cuda與cuDNN
那tensorflow當然就要裝GPU版本的


不過也可能是其他問題,這很難講哈哈,當初安裝也花我一堆時間
stackoverflow會是你的好朋友XD

感謝 我也想說網路上都在寫tf-gpu
想說是差在哪哈哈 之前很急著開頭有些版本的問題沒有太去注意@_@
偶爾遇到問題會看到stackoverflow的板
感覺有點像國外的IT?

huahualiu iT邦新手 5 級 ‧ 2019-12-16 00:05:25 檢舉

'感覺有點像國外的IT?'
你可以這樣理解,而且他的資料量更廣更大
或者說IT其實根本是模仿stackoverflow的哈哈

huahualiu iT邦新手 5 級 ‧ 2019-12-16 00:17:34 檢舉

你如果還有遇到問題
或許可以參考這篇
https://ithelp.ithome.com.tw/questions/10195675
這是我之前遇到的問題,我有回覆解決方式,情況不一定相同,不過思路maybe可參考

要測試GPU可不可以run,你google tensorflow2 doc gpu test 就有語法可以看了

我要發表回答

立即登入回答