K Nearest Neighbors
Prompt: Implement K Nearest Neighbours algorithm. Assume X_train contains a set of N d-dimensional data points. Assume y_train are integer classification labels
class KNN:
def __init__(self, k):
self.k = k
def fit(self, X_train, y_train):
self.X_train = X_train # (N, d)
self.y_train = y_train # (N, )
def predict(self, x):
"""
Args:
x : numpy array of shape (M, d)
Returns:
numpy array of shape (M,)
"""
M, _ = x.shape
a2 = np.square(self.X_train).sum(axis=1) # (N,)
b2 = np.square(x).sum(axis=1) # (M,)
ab = self.X_train @ x.T # (N, M)
dists = a2[:, None] + b2[None, :] - 2*ab # (N, M) where dists[i, j] is distance between training point i and query point j
topk = np.argsort(dists, axis=0)[:self.k] # (k, M)
preds = self.y_train[topk.flatten()].reshape(topk.shape) # (k, M)
# now just need to return the mode along axis 0 of preds, something like `np.mode(preds, axis=0)`
out = np.zeros(M)
for i, row in enumerate(preds.T): # row is shape (k,)
u, c = np.unique(row, return_counts=True)
out[i] = u[c.argmax()]
return out