forked from justmarkham/DAT3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path08_sklearn_knn_class.py
50 lines (42 loc) · 1.46 KB
/
08_sklearn_knn_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
'''
CLASS: Introduction to scikit-learn with iris data
'''
import numpy as np
# read in the iris data
from sklearn.datasets import load_iris
iris = load_iris()
# create X (predictors) and y (response)
X, y = iris.data, iris.target
X.shape
y.shape
# predict y with KNN
from sklearn.neighbors import KNeighborsClassifier # import class
knn = KNeighborsClassifier(n_neighbors=1) # instantiate the estimator
knn.fit(X, y) # fit with data
knn.predict([3, 5, 4, 2]) # predict for a new observation
iris.target_names[knn.predict([3, 5, 4, 2])]
knn.predict([3, 5, 2, 2])
# predict for multiple observations at once
X_new = [[3, 5, 4, 2], [3, 5, 2, 2]]
knn.predict(X_new)
# try a different value of K
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
knn.predict(X_new) # predictions
knn.predict_proba(X_new) # predicted probabilities
knn.kneighbors([3, 5, 4, 2]) # distances to nearest neighbors (and identities)
np.sqrt(((X[106] - [3, 5, 4, 2])**2).sum()) # confirm Euclidian distance calculation
# compute the accuracy for K=5 and K=1
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
knn.score(X, y)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X, y)
knn.score(X, y)
# easily swap in another classification method
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X, y)
logreg.predict(X_new)
logreg.predict_proba(X_new)
logreg.score(X, y)