1. 程式人生 > 其它 >K-Means聚類演算法k值選取——輪廓係數

K-Means聚類演算法k值選取——輪廓係數

 1 # 1 匯入模組和包
 2 import matplotlib.pyplot as plt #匯入繪製資料圖的資料庫
 3 from sklearn.datasets import make_blobs
 4 from sklearn.cluster import KMeans
 5 from sklearn.metrics import calinski_harabasz_score, silhouette_score
 6 from matplotlib.font_manager import FontProperties
 7 
 8 font_set = FontProperties(fname=r"
c:\windows\fonts\simsun.ttc", size=15) #匯入漢字模板 9 with open('result.csv', 'r', encoding='GBK') as f: #開啟result.csv檔案(檔案可自己指定) 10 results = f.readlines()[1:100] #讀取表格資料的從第二行到101行(多少行自己指定) 11 # res = [] 12 # for it in results: 13 # tmp = it.split(',')[1:] 14 # tmp = list(map(lambda x: float(x), tmp))
15 # res.append(it) 16 X = [list(map(lambda x: float(x), it.split(',')[1:])) for it in results] 17 18 # # 1、模型訓練 19 # y_pre = KMeans(n_clusters=clu_num).fit_predict(X) 20 21 22 # 1、建立空列表 23 sc_list = [] 24 25 # 2、設定中心點個數,檢視SC的變化範圍 26 for clu_num in range(2, 10): 27 # 初始化迭代器一次的Kmeans
28 my_kmeans = KMeans(n_clusters=clu_num, max_iter=100, random_state=0) 29 # 模型訓練 30 my_kmeans.fit(X) 31 y_pre = my_kmeans.fit_predict(X) 32 33 # 將SC的每一次迭代結果新增到空列表內 34 sc_list.append(silhouette_score(X, y_pre)) 35 36 # 3、影象視覺化 37 plt.figure(figsize=(20, 8), dpi=100) 38 plt.scatter(range(2, 10), sc_list) 39 plt.plot(range(2, 10), sc_list) 40 x_ticks = range(1, 10, 1) 41 plt.xticks(x_ticks) 42 plt.xlabel('中心點個數', fontproperties=font_set) 43 plt.ylabel('輪廓係數', fontproperties=font_set) 44 plt.grid() 45 plt.show()