K Nearest Neighbors

Prompt: Implement K Nearest Neighbours algorithm. Assume X_train contains a set of N d-dimensional data points. Assume y_train are integer classification labels


class KNN:
    def __init__(self, k):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train # (N, d)
        self.y_train = y_train # (N, )

    def predict(self, x):
        """
        Args:
            x : numpy array of shape (M, d)

        Returns:
            numpy array of shape (M,)
        """

        M, _ = x.shape

        a2 = np.square(self.X_train).sum(axis=1) # (N,)
        b2 = np.square(x).sum(axis=1) # (M,)
        ab = self.X_train @ x.T # (N, M)

        dists = a2[:, None] + b2[None, :] - 2*ab # (N, M) where dists[i, j] is distance between training point i and query point j

        topk = np.argsort(dists, axis=0)[:self.k] # (k, M)

        preds = self.y_train[topk.flatten()].reshape(topk.shape) # (k, M)


        # now just need to return the mode along axis 0 of preds, something like `np.mode(preds, axis=0)`

        out = np.zeros(M)
        for i, row in enumerate(preds.T): # row is shape (k,)
            u, c = np.unique(row, return_counts=True)

            out[i] = u[c.argmax()]


        return out