bobo老師機器學習筆記-第四課:KNN演算法
阿新 • • 發佈:2018-11-05
自己參考Bobo老師寫得程式碼:
主要分為四個檔案: knn.py中實現KNN演算法、model_selection.py封裝了樣本資料的一些工具方法,比如切分為訓練集和測試集;
metrics用來對模型進行評估、client用來呼叫演算法進行執行
# -*- encoding: utf-8 -*- """ 實現KNN的分類演算法 """ import numpy as np from math import sqrt from collections import Counter from metrics import accuracy_score class KnnClassifier(object): """ K-近鄰演算法,(K Nearest Neighbour),簡稱KNN """ def __init__(self, k): """ K表示 :param k: 表示參考的個數 """ self.k = k def fit(self, X_train, y_train): """ 利用輸入的樣本集進行訓練KNN演算法 :param X_train: X 訓練樣本集 :param y_train: y :return: """ self.X_train = X_train self.y_train = y_train return self def predict(self, x_test): """ 對於輸入的測試樣本x進行預測 :param x_test: 這個一個行向量 :return: """ assert x_test.shape[1] == self.X_train.shape[1], u'預測樣本和訓練樣本的特徵值不相等' # step1 用歐幾里得演算法計算x與周邊的距離 pridect_list = [] for one_x in x_test: distances = [sqrt(np.sum((x - one_x) ** 2)) for x in self.X_train] sorted_index = np.argsort(distances) fit_y = self.y_train[sorted_index[:self.k]] target_label = Counter(fit_y).most_common()[0][0] pridect_list.append(target_label) return np.asarray(pridect_list, dtype='int32') def scores(self, y_pridect, y_test): return accuracy_score(y_pridect, y_test) def __repr__(self): return 'knn(k=%s)' % self.k
Metrics檔案:
# -*- encoding: utf-8 -*- """ 這個檔案主要是計算一些指標比如準確度,用來評估演算法的好還 """ import numpy as np def accuracy_score(y_test, y_pridect): """ 用來計算準確度 :param y_test: 樣本的標記測試集和 :param y_pridect: 預測集 :return: """ assert y_pridect.shape[0] == y_test.shape[0], u'測試集和預測集的資料個數不相等' cnt = np.sum(y_test==y_pridect) return cnt / len(y_pridect)
model_selection.py檔案:
# -*- encoding: utf-8 -*- """ 這個檔案主要是計算一些指標比如準確度,用來評估演算法的好還 """ import numpy as np def accuracy_score(y_test, y_pridect): """ 用來計算準確度 :param y_test: 樣本的標記測試集和 :param y_pridect: 預測集 :return: """ assert y_pridect.shape[0] == y_test.shape[0], u'測試集和預測集的資料個數不相等' cnt = np.sum(y_test==y_pridect) return cnt / len(y_pridect)
client檔案進行測試:
from knn import KnnClassifier
from sklearn import datasets
from model_selection import train_test_split
from metrics import accuracy_score
import numpy as np
if __name__ == '__main__':
knn = KnnClassifier(3)
iris = datasets.load_iris()
x_train, y_train, x_test, y_test = train_test_split(iris.data, iris.target, 0.7)
classifier = knn.fit(x_train, y_train)
y_pridect = classifier.predict(x_test)
print(classifier.scores(y_pridect, y_test))