iT邦幫忙

2021 iThome 鐵人賽

DAY 10
0
自我挑戰組

終極大數據地獄系列 第 10

#10 Pandas教學2

新增刪除操作

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    # 以字典來建立DataFrame
    d = {
        "brand":["Toyota 86", "Subaru BRZ", "NISSAN GT-R", "MAZDA MX-5"],
        "auto":[False, True, True, False],
        "price":[1300000, 1320000, 6750000, 1340000],
        }
    data = pd.DataFrame(d)
    print(data)## 新增刪除操作
    """
             brand   auto    price
    0    Toyota 86  False  1300000
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False  1340000
    """
    
    # 新增一列
    data.loc[4] = ["Toyota Supra", True, 2480000]
    print(data)
    """
              brand   auto    price
    0     Toyota 86  False  1300000
    1    Subaru BRZ   True  1320000
    2   NISSAN GT-R   True  6750000
    3    MAZDA MX-5  False  1340000
    4  Toyota Supra   True  2480000
    """

    # 新增一欄
    data["color"] = ["黑", "銀", "黑", "紅", "藍"]
    print(data)
    """
              brand   auto    price color
    0     Toyota 86  False  1300000     黑
    1    Subaru BRZ   True  1320000     銀
    2   NISSAN GT-R   True  6750000     黑
    3    MAZDA MX-5  False  1340000     紅
    4  Toyota Supra   True  2480000     藍

    """

    # 刪除一列,axis = 0表示刪除列
    data = data.drop([2], axis = 0)
    print(data)
    """
              brand   auto    price color
    0     Toyota 86  False  1300000     黑
    1    Subaru BRZ   True  1320000     銀
    3    MAZDA MX-5  False  1340000     紅
    4  Toyota Supra   True  2480000     藍
    """
    # 刪除一欄,axis = 1表示刪除欄
    data = data.drop(["color"], axis = 1)
    print(data)
    """
              brand   auto    price
    0     Toyota 86  False  1300000
    1    Subaru BRZ   True  1320000
    3    MAZDA MX-5  False  1340000
    4  Toyota Supra   True  2480000
    """

自訂索引

這裡要注意,當我們要操作一列時,我們會使用loc屬性,loc的中括號必須放索引,但有的時候索引可能不是數字,這時候如果還是想用數字來存取資料,必須使用iloc屬性

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    # 以字典來建立DataFrame ,索引為["A", "B", "C", "D"]
    d = {
        "brand":["Toyota 86", "Subaru BRZ", "NISSAN GT-R", "MAZDA MX-5"],
        "auto":[False, True, True, False],
        "price":[1300000, 1320000, 6750000, 1340000],
        }
    data = pd.DataFrame(d, index = ["A", "B", "C", "D"])
    print(data)
    """
             brand   auto    price
    A    Toyota 86  False  1300000
    B   Subaru BRZ   True  1320000
    C  NISSAN GT-R   True  6750000
    D   MAZDA MX-5  False  1340000
    """

    #使用索引存取一列
    print(data.loc["C"])
    """
    brand    NISSAN GT-R
    auto            True
    price        6750000
    Name: C, dtype: object
    """

    #存取由上往下數的第3列(從0開始數)
    print(data.iloc[3])
    """
    brand    MAZDA MX-5
    auto          False
    price       1340000
    Name: D, dtype: object
    """

把某欄設為索引

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    # 以字典來建立DataFrame,把brand欄位設為索引
    d = {
        "brand":["Toyota 86", "Subaru BRZ", "NISSAN GT-R", "MAZDA MX-5"],
        "auto":[False, True, True, False],
        "price":[1300000, 1320000, 6750000, 1340000],
        }
    data = pd.DataFrame(d)
    data = data.set_index("brand")
    print(data)
    """
                  auto    price
    brand                      
    Toyota 86    False  1300000
    Subaru BRZ    True  1320000
    NISSAN GT-R   True  6750000
    MAZDA MX-5   False  1340000
    """

    #使用索引存取一列
    print(data.loc["Toyota 86"])
    """
    auto       False
    price    1300000
    Name: Toyota 86, dtype: object
    """

    #存取由上往下數的第2列(從0開始數)
    print(data.iloc[2])
    """
    auto        True
    price    6750000
    Name: NISSAN GT-R, dtype: object
    """

過濾資料

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    # 以字典來建立DataFrame
    d = {
        "brand":["Toyota 86", "Subaru BRZ", "NISSAN GT-R", "MAZDA MX-5"],
        "auto":[False, True, True, False],
        "price":[1300000, 1320000, 6750000, 1340000],
        }
    data = pd.DataFrame(d)
    print(data)## 新增刪除操作
    """
             brand   auto    price
    0    Toyota 86  False  1300000
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False  1340000
    """

    #利用布林遮罩來過濾資料
    mask = data["auto"]  == False
    print(mask)
    """
    0     True
    1    False
    2    False
    3     True
    Name: auto, dtype: bool
    """
    print(data[mask])
    """
            brand   auto    price
    0   Toyota 86  False  1300000
    3  MAZDA MX-5  False  1340000
    """
    print(data[mask]["brand"])
    """
    0     Toyota 86
    3    MAZDA MX-5
    Name: brand, dtype: object
    """
    print(data[mask].iloc[0])
    """
    brand    Toyota 86
    auto         False
    price      1300000
    Name: 0, dtype: object
    """
    #計算手排車的平均價格
    print(data[mask]["price"].mean()) #1320000.0

修改符合條件的資料

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    # 以字典來建立DataFrame
    d = {
        "brand":["Toyota 86", "Subaru BRZ", "NISSAN GT-R", "MAZDA MX-5"],
        "auto":[False, True, True, False],
        "price":[1300000, 1320000, 6750000, 1340000],
        }
    data = pd.DataFrame(d)
    print(data)## 新增刪除操作
    """
             brand   auto    price
    0    Toyota 86  False  1300000
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False  1340000
    """

    #利用布林遮罩來過濾資料
    mask = data["auto"] == False
    #將auto欄位為False的那些資料(手排車)的price欄位設為0
    data["price"][mask] = 0
    print(data)
    """
             brand   auto    price
    0    Toyota 86  False        0
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False        0
    """

上一篇
#9 Pandas教學
下一篇
#11 Pandas教學3
系列文
終極大數據地獄24

尚未有邦友留言

立即登入留言