iT邦幫忙

2021 iThome 鐵人賽

DAY 11
0
自我挑戰組

終極大數據地獄系列 第 11

#11 Pandas教學3

Pandas匯入CSV檔

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    #匯入CSV來建立DataFrame
    data = pd.read_csv("./car.csv")
    print(data)
    """
             brand   auto    price
    0    Toyota 86  False  1300000
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False  1340000
    """

而且Pandas很聰明,會自己辨識資料欄位應該要是什麼資料型態

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    #匯入CSV來建立DataFrame
    data = pd.read_csv("./car.csv")
    print(data)
    """
             brand   auto    price
    0    Toyota 86  False  1300000
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False  1340000
    """

    print(data["brand"].dtype) #object
    print(data["auto"].dtype) #bool
    print(data["price"].dtype) #int64

指定某個欄位為索引

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    #匯入CSV來建立DataFrame
    data = pd.read_csv("./car.csv", index_col = ["brand"])
    print(data)
    """
               auto    price
    brand
    Toyota 86    False  1300000
    Subaru BRZ    True  1320000
    NISSAN GT-R   True  6750000
    MAZDA MX-5   False  1340000
    """

指定某個欄位的資料型態

# 載入pandas
import pandas as pd
# 載入numpy
import numpy as np

if __name__ == "__main__":
    #匯入CSV來建立DataFrame
    data = pd.read_csv("./car.csv", dtype={"price":np.float64})
    print(data)
    """
         brand   auto      price
    0    Toyota 86  False  1300000.0
    1   Subaru BRZ   True  1320000.0
    2  NISSAN GT-R   True  6750000.0
    3   MAZDA MX-5  False  1340000.0
    """

    print(data["brand"].dtype) #object
    print(data["auto"].dtype) #bool
    print(data["price"].dtype) #float64  

自訂欄位名稱

# 載入pandas
import pandas as pd
# 載入numpy
import numpy as np

if __name__ == "__main__":
    #匯入CSV來建立DataFrame
    data = pd.read_csv("./car.csv", names = ['A', 'AA', 'AAA'])
    print(data)
    """
             A     AA      AAA
    0        brand   auto    price
    1    Toyota 86  FALSE  1300000
    2   Subaru BRZ   TRUE  1320000
    3  NISSAN GT-R   TRUE  6750000
    4   MAZDA MX-5  FALSE  1340000
    """

    print(data["AAA"])
    """
    0      price
    1    1300000
    2    1320000
    3    6750000
    4    1340000
    """

這時候會出現一個問題,原本的欄位名稱變成第一筆資料了,必須解決他

# 載入pandas
import pandas as pd
# 載入numpy
import numpy as np

if __name__ == "__main__":
    #匯入CSV來建立DataFrame
    data = pd.read_csv("./car.csv", names = ['A', 'AA', 'AAA'], header=0)
    print(data)
    """
             A     AA      AAA
    0    Toyota 86  False  1300000
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False  1340000
    """

    print(data["AAA"])
    """
    0      price
    1    1300000
    2    1320000
    3    6750000
    4    1340000
    """

輸出為CSV檔

# 載入pandas
import pandas as pd

if __name__ == "__main__":
    # 以字典來建立DataFrame,把brand欄位設為索引
    d = {
        "brand":["Toyota 86", "Subaru BRZ", "NISSAN GT-R", "MAZDA MX-5"],
        "auto":[False, True, True, False],
        "price":[1300000, 1320000, 6750000, 1340000],
        }
    data = pd.DataFrame(d)
    print(data)
    """
             brand   auto    price
    0    Toyota 86  False  1300000
    1   Subaru BRZ   True  1320000
    2  NISSAN GT-R   True  6750000
    3   MAZDA MX-5  False  1340000
    """

    #輸出為CSV檔
    data.to_csv("./new_car.c

上一篇
#10 Pandas教學2
下一篇
#12 matplotlab教學
系列文
終極大數據地獄24

尚未有邦友留言

立即登入留言