DAY 7
0
AI & Data

## 【7】Dataset 的三個API : Shuffle Batch Repeat 如果使用順序不同會產生的影響

Colab連結

Shuffle:

Batch:

Repeat:

``````dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
``````

``````BATCH_SIZE=4
SHUFFLE_SIZE=13

ds = dataset.shuffle(SHUFFLE_SIZE)
ds = ds.batch(BATCH_SIZE)
ds = ds.repeat()

for example in ds.take(12):
batch = example.numpy()
print(batch)
``````

``````[13 11  9  4]
[12 10  3  5]
[2 1 6 8]
[7]
[ 6  7  4 10]
[11  2  5 13]
[ 9  1 12  3]
[8]
[ 5  4  8 11]
[ 6 13  3 10]
[7 1 2 9]
[12]
``````

``````BATCH_SIZE=4
SHUFFLE_SIZE=13

ds = dataset.repeat()
ds = ds.shuffle(SHUFFLE_SIZE)
ds = ds.batch(BATCH_SIZE)

for example in ds.take(12):
batch = example.numpy()
print(batch)

``````

``````[11  9  6  3]
[1 1 4 5]
[ 2 12  2  3]
[10 13  7  8]
[2 1 4 3]
[ 9  4 11 10]
[ 7  5 10 13]
[ 8  2 13  6]
[11  4 12  1]
[10  6 12  5]
[6 7 8 8]
[3 5 9 7]
``````

``````BATCH_SIZE=4
SHUFFLE_SIZE=13

ds = dataset.shuffle(SHUFFLE_SIZE)
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)

for example in ds.take(12):
batch = example.numpy()
print(batch)
``````

``````[ 7 11  2  4]
[ 6  5  9 13]
[ 8  1 12 10]
[ 3  6  5 12]
[4 7 8 9]
[11 13  3  1]
[ 2 10  3  4]
[ 5 11  9  2]
[12  6  1  8]
[ 7 10 13 12]
[11 10  2  7]
[ 5  9 13  6]
``````

``````x = np.array(range(100))
x = x.repeat(10)
print(f'length: {len(x)}')
dataset = tf.data.Dataset.from_tensor_slices(x)
``````

``````SHUFFLE_SIZE = 10

ds = dataset.shuffle(SHUFFLE_SIZE)
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)

for idx, example in enumerate(ds.take(100)):
batch = example.numpy()
print(batch)
``````

``````[0 0 0 0]
[0 1 0 1]
[1 1 0 0]
[2 1 0 0]
[2 1 2 1]
[1 2 3 2]
[3 1 3 1]
[2 3 3 2]
(略)
``````

``````SHUFFLE_SIZE=100

ds = dataset.shuffle(SHUFFLE_SIZE)
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)

for idx, example in enumerate(ds.take(100)):
batch = example.numpy()
print(batch)
``````

``````[3 4 5 0]
[4 1 4 9]
[2 6 3 7]
[10  8  4  3]
[2 0 3 7]
[5 0 4 0]
[ 8  0 11  7]
[11  6  0 12]
[11 10  6  2]
[13 11  9  6]
(略)
``````

``````x = np.array(range(100))
x = x.repeat(10)
np.random.shuffle(x) # shuffle
print(f'length: {len(x)}')
dataset = tf.data.Dataset.from_tensor_slices(x)
``````
``````SHUFFLE_SIZE=100

ds = dataset.shuffle(SHUFFLE_SIZE)
ds = ds.repeat()
ds = ds.batch(BATCH_SIZE)

for idx, example in enumerate(ds.take(100)):
batch = example.numpy()
print(batch)
``````

``````[19 12 88 32]
[13 43 76 91]
[85 24 63 58]
[48 52 44 82]
[82 58 46 26]
[24 20 85 63]
``````