DAY 10
0
AI & Data

## [Day 9]向前特徵選擇舉例

SFS(模型 : 如 LinearRegression(), SVM(),...
k_features : 最後選擇的特徵數,
forward : True --> 向前; False --> 向後
floating : 這我還不太清楚做啥的~XD
verbose : 回報訓練狀態，0 : 不報，1 : 簡易報告，2 : 詳細報告
scoring : 模型表現評斷方法，可用 : 'r2', 'accuracy', 'f1_score',...)

``````# feature number
n = 100

# create coef
create_coef = np.array([np.round((2*random.random()-1), 4) if random.random() > 0.1 else 0.0 for i in range(n)])

# create [x0,...,x9]
x_list = []
for i in range(n):
mu = 5*random.random() # mean範圍 : 0 ~ 5
sigma = 0.3*random.random() # sigma範圍 : 0 ~ 0.3
x_list.append(list(np.random.normal(mu, sigma, 1000))) # 創造 1000 筆資料

X_df = pd.DataFrame(x_list).T
X_df.columns = ['x_'+str(i) for i in range(n)]

# create y
y_list = [np.round(np.array(x).dot(create_coef)*(0.1*random.random()+0.95), 4) for x in zip(*x_list)]
``````

``````lr = LinearRegression()

# Forward Feature Selection
forward = SFS(lr,
k_features = 10,
forward = True,
floating = False,
verbose = 2,
scoring = 'r2',
cv=0)

forward = forward.fit(X_df, y_list)
``````

``````forward.k_feature_idx_ = (16, 25, 32, 42, 53, 54, 69, 71, 81, 94)
forward.k_feature_names_ = ('x_16', 'x_25', 'x_32', 'x_42', 'x_53', 'x_54', 'x_69', 'x_71', 'x_81', 'x_94')
k_feature_rank = [28, 21, 30, 2, 12, 6, 0, 19, 4, 10]
``````

``````import numpy as np
from sklearn.linear_model import LinearRegression
import random
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
import pandas as pd

# feature number
n = 100

# create coef
create_coef = np.array([np.round((2*random.random()-1), 4) if random.random() > 0.1 else 0.0 for i in range(n)])

# create [x0,...,x9]
x_list = []
for i in range(n):
mu = 5*random.random() # mean範圍 : 0 ~ 5
sigma = 0.3*random.random() # sigma範圍 : 0 ~ 0.3
x_list.append(list(np.random.normal(mu, sigma, 1000))) # 創造 100 筆資料

X_df = pd.DataFrame(x_list).T
X_df.columns = ['x_'+str(i) for i in range(n)]

# create y
y_list = [np.round(np.array(x).dot(create_coef)*(0.1*random.random()+0.95), 4) for x in zip(*x_list)]

lr = LinearRegression()

# Forward Feature Selection
forward = SFS(lr,
k_features = 10,
forward = True,
floating = False,
verbose = 1,
scoring = 'r2',
cv=0)

forward = forward.fit(X_df, y_list)

print('forward.k_feature_idx_ = ', forward.k_feature_idx_)
print('forward.k_feature_names_ = ', forward.k_feature_names_)

rank_df = pd.DataFrame([np.arange(n), create_coef]).T
rank_df.columns = ['x_i', 'coef_i']
rank_df['abs_coef_i'] = abs(rank_df['coef_i'])
rank_df = rank_df.sort_values(by = ['abs_coef_i'], ascending = False).reset_index(drop = True)

k_feature_rank = [list(rank_df['x_i']).index(x) for x in forward.k_feature_idx_]
print('k_feature_rank = ', k_feature_rank)
``````