DAY 29
0
AI & Data

## [Day 28]粗糙集特徵選擇簡介-6

``````# equivalence relation
def eq_relation(f_list, data, item_name, subset = None):
'''
f_list : 特徵子集
data : 觀察集
item_name : 觀察集中代表觀察樣本編號的欄位名稱
subset : 論文之後會用到，當只想看樣本子集時可用
'''

if subset is None:
subset = data[item_name]

cut = (data[item_name].isin(subset))
temp = data[cut]

res = temp.groupby(f_list)

return list(res[item_name].apply(list))
``````

``````def pos_dep(f_list, q_list, data, item_name, subset = None):
if subset is None:
subset1 = data[item_name]

if len(f_list)*len(q_list)==0:
return 0

modP = eq_relation(f_list, data, item_name, subset = subset1)
modQ = eq_relation(q_list, data, item_name, subset = subset1)

pos_list = [[p for p in modP if len([p1 for p1 in p if p1 not in q])==0] for q in modQ]
union_pos = list(set().union(*[list(set().union(*p)) for p in pos_list]))

return len(union_pos)/len(data[item_name])
``````

``````def rough_feature_selection(q_list, data, item_name, feature_list, subset = None):
fs_list = []
temp_fs = []
best_performance = 0
temp_performance = -1

while temp_performance != best_performance:

temp_performance = best_performance

for f in [feat for feat in feature_list if feat not in fs_list]:
now_per = pos_dep(f_list = fs_list + [f],
q_list = q_list,
data = data,
item_name = item_name)
past_per = pos_dep(f_list = fs_list,
q_list = q_list,
data = data,
item_name = item_name)

if now_per > past_per and now_per > best_performance:
temp_fs = fs_list + [f]
best_performance = now_per

fs_list = temp_fs

return temp_fs, best_performance
``````