藝術風格轉換

2018 iT 邦幫忙鐵人賽

DAY 22

AI & Machine Learning

探索 Microsoft CNTK 機器學習工具系列第 22 篇

2018鐵人賽

HO-HSUN

2018-01-10 19:38:36

2213 瀏覽

分享至

Introduction

這裡不是創造一張圖像，而是將一張圖像風格轉換，渲染成另一種畫風。

Tasks

引用物件

from __future__ import print_function
import numpy as np
from scipy import optimize as opt
import cntk as C
from PIL import Image
import requests
import h5py
import os
%matplotlib inline
import matplotlib.pyplot as plt

import cntk.tests.test_utils
cntk.tests.test_utils.set_device_from_pytest_env()
C.cntk_py.set_fixed_random_seed(1)

宣告函式：download，下載檔案。

def download(url, filename):
    response = requests.get(url, stream=True)
    with open(filename, 'wb') as handle:
        for data in response.iter_content(chunk_size=2**20):
            if data: handle.write(data)

宣告函式：load_vgg，讀取 VGG 模型。

def load_vgg(path):
    f = h5py.File(path)
    layers = []
    for k in range(f.attrs['nb_layers']):
        g = f['layer_{}'.format(k)]
        n = g.attrs['nb_params']
        layers.append([g['param_{}'.format(p)][:] for p in range(n)])
    f.close()
    return layers

設定變數，CNTK 環境變數。

envvar = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'
def is_test(): return envvar in os.environ

檢查本地資料夾是否已有 VGG 模型檔案，不然就從網路下載。

path = 'vgg16_weights.bin'
url = 'https://cntk.ai/jup/models/vgg16_weights.bin'

# 檢查是否存在本地資料夾
if not os.path.exists(path):
    # If not there we might be running in CNTK's test infrastructure
    if is_test():
        path = os.path.join(os.environ[envvar],'PreTrainedModels','Vgg16','v0',path)
    else:
        #If neither is true we download the file from the web
        print('downloading VGG model (~0.5GB)')
        download(url, path)
layers = load_vgg(path)
print('loaded VGG model')

宣告函式：vggblock，設定 VGG 神經網路及卷積層。

def vggblock(x, arrays, layer_map, name):
    f = arrays[0]
    b = arrays[1]
    k = C.constant(value=f)
    t = C.constant(value=np.reshape(b, (-1, 1, 1)))
    y = C.relu(C.convolution(k, x, auto_padding=[False, True, True]) + t)
    layer_map[name] = y
    return y

宣告函式：vggpool，設定池化層(pooling layer)

def vggpool(x):
    return C.pooling(x, C.AVG_POOLING, (2, 2), (2, 2))

宣告函式：建立 VGG 神經網路。

def model(x, layers):
    model_layers = {}
    def convolutional(z): return len(z) == 2 and len(z[0].shape) == 4
    conv = [layer for layer in layers if convolutional(layer)]
    cnt = 0
    num_convs = {1: 2, 2: 2, 3: 3, 4: 3, 5: 3}
    for outer in range(1,6):
        for inner in range(num_convs[outer]):
            x = vggblock(x, conv[cnt], model_layers, 'conv%d_%d' % (outer, 1+inner))
            cnt += 1
        x = vggpool(x)
    
    return x, C.combine([model_layers[k] for k in sorted(model_layers.keys())])

總變異損失(total variation loss)計算相鄰像素值變異數之和的平均值。

風格損失(style loss)是代表風格圖像的共變異矩陣和待表原始圖像的共變異矩陣的均方差，有許多種實現方式，和計算方法。

宣告函式：定義損失函式，即成本函式。

def flatten(x):
    assert len(x.shape) >= 3
    return C.reshape(x, (x.shape[-3], x.shape[-2] * x.shape[-1]))


def gram(x):
    features = C.minus(flatten(x), C.reduce_mean(x))
    return C.times_transpose(features, features)


def npgram(x):
    features = np.reshape(x, (-1, x.shape[-2]*x.shape[-1])) - np.mean(x)
    return features.dot(features.T)


def style_loss(a, b):
    channels, x, y = a.shape
    assert x == y
    A = gram(a)
    B = npgram(b)
    return C.squared_error(A, B)/(channels**2 * x**4)


def content_loss(a,b):
    channels, x, y = a.shape
    return C.squared_error(a, b)/(channels*x*y)


def total_variation_loss(x):
    xx = C.reshape(x, (1,)+x.shape)
    delta = np.array([-1, 1], dtype=np.float32)
    kh = C.constant(value=delta.reshape(1, 1, 1, 1, 2))
    kv = C.constant(value=delta.reshape(1, 1, 1, 2, 1))
    dh = C.convolution(kh, xx, auto_padding=[False])
    dv = C.convolution(kv, xx, auto_padding=[False])
    avg = 0.5 * (C.reduce_mean(C.square(dv)) + C.reduce_mean(C.square(dh)))
    return avg

成本計算，資料集的每個像素樣本的紅綠藍三色通道(channel)都同時減去一個常數，也就是一個位移(shift)。

style_path = 'style.jpg'
content_path = 'content.jpg'

start_from_random = False
content_weight = 5.0
style_weight = 1.0
decay = 0.5

if is_test():
    outer = 2
    inner = 2
    SIZE = 64
else:
    outer = 10
    inner = 20
    SIZE = 300

SHIFT = np.reshape([103.939, 116.779, 123.68], (3, 1, 1)).astype('f')

def load_image(path):
    with Image.open(path) as pic:
        hw = pic.size[0] / 2
        hh = pic.size[1] / 2
        mh = min(hw,hh)
        cropped = pic.crop((hw - mh, hh - mh, hw + mh, hh + mh))
        array = np.array(cropped.resize((SIZE,SIZE), Image.BICUBIC), dtype=np.float32)
        return np.ascontiguousarray(np.transpose(array, (2,0,1)))-SHIFT

def save_image(img, path):
    sanitized_img = np.maximum(0, np.minimum(255, img+SHIFT))
    pic = Image.fromarray(np.uint8(np.transpose(sanitized_img, (1, 2, 0))))
    pic.save(path)

def ordered_outputs(f, binding):
    _, output_dict = f.forward(binding, f.outputs)
    return [np.squeeze(output_dict[out]) for out in f.outputs]

# 如果圖像不存在就從網路下載
for local_path in content_path, style_path:
    if not os.path.exists(local_path):
        download('https://cntk.ai/jup/%s' % local_path, local_path)

# 讀取圖像
style   = load_image(style_path)
content = load_image(content_path)

# 顯示圖像
for img in content, style:
    plt.figure()
    plt.imshow(np.asarray(np.transpose(img+SHIFT, (1, 2, 0)), dtype=np.uint8))

# 透過 VGG 神經網路定義輸入和輸出圖像
y = C.input_variable((3, SIZE, SIZE), needs_gradient=True)
z, intermediate_layers = model(y, layers)

# 定義激活函式
content_activations = ordered_outputs(intermediate_layers, {y: [[content]]})
style_activations = ordered_outputs(intermediate_layers, {y: [[style]]})
style_output = np.squeeze(z.eval({y: [[style]]}))
n = len(content_activations)

# 確保衰變不會影響內容/風格的大小
total = (1-decay**(n+1))/(1-decay)
loss = (1.0/total * content_weight * content_loss(y, content) 
         + 1.0/total * style_weight * style_loss(z, style_output) 
         + total_variation_loss(y))

for i in range(n):
    loss = (loss 
        + decay**(i+1)/total * content_weight * content_loss(intermediate_layers.outputs[i], content_activations[i])
        + decay**(n-i)/total * style_weight   *   style_loss(intermediate_layers.outputs[i], style_activations[i]))

擬牛頓演算法(Broyden–Fletcher–Goldfarb–Shanno, BFGS)是一種優化演算法，這裡使用 scipy 當中的函式庫，優化最後的圖像。

宣告函式：vec2img，將向量轉換成圖像。

def vec2img(x):
    d = np.round(np.sqrt(x.size / 3)).astype('i')
    return np.reshape(x.astype(np.float32), (3, d, d))

宣告函式：img2vec，將圖像轉換成向量。

def img2vec(img):
    return img.flatten().astype(np.float64)

宣告函式：value_and_grads，計算梯度。

def value_and_grads(f, binding):
    if len(f.outputs) != 1:
        raise ValueError('function must return a single tensor')
    df, valdict = f.forward(binding, [f.output], set([f.output]))
    value = list(valdict.values())[0]
    grads = f.backward(df, {f.output: np.ones_like(value)}, set(binding.keys()))
    return value, grads

宣告函式：objfun，優化圖像。

def objfun(x, loss):
    y = vec2img(x)
    v, g = value_and_grads(loss, {loss.arguments[0]: [[y]]})
    v = np.reshape(v, (1,))
    g = img2vec(list(g.values())[0])
    return v, g

def optimize(loss, x0, inner, outer):
    bounds = [(-np.min(SHIFT), 255-np.max(SHIFT))]*x0.size
    for i in range(outer):
        s = opt.minimize(objfun, img2vec(x0), args=(loss,), method='L-BFGS-B', 
                         bounds=bounds, options={'maxiter': inner}, jac=True)
        print('objective : %s' % s.fun[0])
        x0 = vec2img(s.x)
        path = 'output_%d.jpg' % i
        save_image(x0, path)
    return x0

np.random.seed(98052)
if start_from_random:
    x0 = np.random.randn(3, SIZE, SIZE).astype(np.float32)
else:
    x0 = content
xstar = optimize(loss, x0, inner, outer)
plt.imshow(np.asarray(np.transpose(xstar+SHIFT, (1, 2, 0)), dtype=np.uint8))