1. 程式人生 > >【機器學習】決策樹演算法(二)— 程式碼實現

【機器學習】決策樹演算法(二)— 程式碼實現

#coding=utf8 ‘’’ Created on 2018年11月4日

@author: xiaofengyang 決策樹演算法:ID3演算法 ‘’’ from sklearn.feature_extraction import DictVectorizer import csv from sklearn import tree from sklearn.externals.six import StringIO from sklearn import preprocessing

## 1.讀取csv 檔案資料 allDate=open(‘AllElectronics.csv’) reader=csv.reader(allDate) ##2.讀取csv檔案的頭部 header=reader.next() ##3.對資料預處理 featureList=[] laberList=[]

for item in reader: ## 將結果存入laberList中 laberList.append(item[len(item)-1]) ## 建立字典存放屬性名稱與對應值 itemDit={} for i in range(1,len(item)-1): itemDit[header[i]]=item[i] #將資料存入featureList featureList.append(itemDit) ##4.資料的預處理問題 dic=DictVectorizer() dummX=dic.fit_transform(featureList).toarray()

print str(dummX)
獲取屬性對應的值

print dic.get_feature_names()

##5.對於結果資料進行處理 lb=preprocessing.LabelBinarizer() dummY=lb.fit_transform(laberList)

print dummY

##6.建立決策樹 tcf=tree.DecisionTreeClassifier(criterion=‘entropy’) clf=tcf.fit(dummX,dummY)

##7.預測 oneRox=dummX[0,:] oneRox[4]=0 oneRox[9]=1 print oneRox

predict_data=clf.predict([oneRox]) print str(predict_data)

附件:資料集 RID,age,income,student,credit_rating,class_buys_computer 1,youth,high,no,fair,no 2,youth,high,no,excellent,no 3,middle_aged,high,no,fair,yes 4,senior,medium,no,fair,yes 5,senior,low,yes,fair,yes 6,senior,low,yes,excellent,no 7,middle_aged,low,yes,excellent,yes 8,youth,medium,no,fair,no 9,youth,low,yes,fair,yes 10,senior,medium,yes,fair,yes 11,youth,medium,yes,excellent,yes 12,middle_aged,medium,no,excellent,yes 13,middle_aged,high,yes,fair,yes 14,senior,medium,no,excellent,no