iT邦幫忙

第 11 屆 iThome 鐵人賽

DAY 27
0
AI & Data

人工智慧(RL系列) 完爆遊戲30天系列 第 27

Day27 認識Class Box

  • 這章節我們介紹Box的實現
    首先載入套件
import numpy as np
from .space import Space

自定義類別參數

def __init__(self, low, high, shape=None, dtype=np.float32):
    assert dtype is not None, 'dtype must be explicitly provided. '
    self.dtype = np.dtype(dtype)

    if shape is None:
        assert low.shape == high.shape, 'box dimension mismatch. '
        self.shape = low.shape
        self.low = low
        self.high = high
    else:
        assert np.isscalar(low) and np.isscalar(high), 'box requires scalar bounds. '
        self.shape = tuple(shape)
        self.low = np.full(self.shape, low)
        self.high = np.full(self.shape, high)

    self.low = self.low.astype(self.dtype)
    self.high = self.high.astype(self.dtype)

    # Boolean arrays which indicate the interval type for each coordinate
    self.bounded_below = -np.inf < self.low
    self.bounded_above = np.inf > self.high

    super(Box, self).__init__(self.shape, self.dtype)

定義邊界

def is_bounded(self, manner="both"):
    below = np.all(self.bounded_below)
    above = np.all(self.bounded_above)
    if manner == "both":
        return below and above
    elif manner == "below":
        return below
    elif manner == "above":
        return above
    else:
        raise ValueError("manner is not in {'below', 'above', 'both'}")

定義採樣過程

def sample(self):
    """
    Generates a single random sample inside of the Box. 

    In creating a sample of the box, each coordinate is sampled according to
    the form of the interval:

    * [a, b] : uniform distribution 
    * [a, oo) : shifted exponential distribution
    * (-oo, b] : shifted negative exponential distribution
    * (-oo, oo) : normal distribution
    """
    high = self.high if self.dtype.kind == 'f' \
            else self.high.astype('int64') + 1
    sample = np.empty(self.shape)

    # Masking arrays which classify the coordinates according to interval
    # type
    unbounded   = ~self.bounded_below & ~self.bounded_above
    upp_bounded = ~self.bounded_below &  self.bounded_above
    low_bounded =  self.bounded_below & ~self.bounded_above
    bounded     =  self.bounded_below &  self.bounded_above


    # Vectorized sampling by interval type
    sample[unbounded] = self.np_random.normal(
            size=unbounded[unbounded].shape)

    sample[low_bounded] = self.np_random.exponential(
        size=low_bounded[low_bounded].shape) + self.low[low_bounded]

    sample[upp_bounded] = -self.np_random.exponential(
        size=upp_bounded[upp_bounded].shape) - self.high[upp_bounded]

    sample[bounded] = self.np_random.uniform(low=self.low[bounded], 
                                        high=high[bounded],
                                        size=bounded[bounded].shape)

    return sample.astype(self.dtype)

補充參考

OpenAI gym 源碼:https://bre.is/2tD4gj5x


上一篇
Day26 自定義環境
下一篇
Day28 認識Class Discrete
系列文
人工智慧(RL系列) 完爆遊戲30天30
圖片
  直播研討會
圖片
{{ item.channelVendor }} {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言