import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm

from scipy.linalg import sqrtm

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import confusion_matrix


url = "https://liangfgithub.github.io/Data/digits.csv"
df = pd.read_csv(url)

# Split the data into training and testing sets
X = df.iloc[:400, 0:256].values
Y = df.iloc[:400, 256].values
X_test = df.iloc[400:, 0:256].values
Y_test = df.iloc[400:, 256].values


fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(10, 3))
for i in range(10):
    image = X[i * 40].reshape((16, 16))
    axes[i // 5, i % 5].set_axis_off()
    axes[i // 5, i % 5].imshow(image, cmap=plt.cm.gray_r)


dig_lda = LinearDiscriminantAnalysis()
dig_lda.fit(X, Y)

LinearDiscriminantAnalysis()

LinearDiscriminantAnalysis()


Y_pred = dig_lda.predict(X_test)
confusion_matrix(Y_pred, Y_test)

array([[29,  0,  0,  0,  1,  1,  2,  1,  1,  0],
       [ 0, 40,  2,  0,  3,  0,  1,  0,  1,  0],
       [ 3,  0, 22,  1,  2,  4,  0,  1,  4,  0],
       [ 2,  0,  5, 32,  1,  3,  1,  1,  4,  1],
       [ 0,  0,  1,  0, 21,  2,  1,  1,  1,  2],
       [ 3,  0,  0,  4,  0, 23,  0,  0,  1,  0],
       [ 0,  0,  4,  1,  1,  2, 33,  1,  0,  0],
       [ 1,  0,  1,  2,  2,  0,  1, 33,  2,  0],
       [ 2,  0,  4,  0,  3,  3,  1,  0, 25,  0],
       [ 0,  0,  1,  0,  6,  2,  0,  2,  1, 37]])


np.mean(Y_pred != Y_test)

0.2625


dig_qda = QuadraticDiscriminantAnalysis(reg_param=0.1)
dig_qda.fit(X, Y)

/Users/feng_macpro/anaconda3/lib/python3.11/site-packages/sklearn/discriminant_analysis.py:935: UserWarning: Variables are collinear
  warnings.warn("Variables are collinear")

QuadraticDiscriminantAnalysis(reg_param=0.1)

QuadraticDiscriminantAnalysis(reg_param=0.1)


Y_pred = dig_qda.predict(X_test)
confusion_matrix(Y_pred, Y_test)

array([[36,  0,  0,  0,  0,  0,  1,  0,  0,  0],
       [ 0, 39,  0,  0,  2,  0,  0,  0,  0,  0],
       [ 0,  0, 15,  2,  5,  1,  3,  1,  0,  0],
       [ 0,  0,  5, 33,  0,  3,  0,  0,  3,  2],
       [ 0,  0,  2,  0, 22,  2,  1,  0,  1,  3],
       [ 1,  0,  4,  1,  2, 26,  1,  0,  1,  0],
       [ 2,  0,  3,  0,  0,  2, 34,  0,  7,  0],
       [ 1,  1,  8,  3,  3,  4,  0, 36,  1,  3],
       [ 0,  0,  2,  1,  0,  2,  0,  0, 26,  0],
       [ 0,  0,  1,  0,  6,  0,  0,  3,  1, 32]])


np.mean(Y_pred != Y_test)

0.2525


FDA_dir = dig_lda.scalings_
FDA_dir.shape

(256, 9)


F = X @ FDA_dir
F_test = X_test @ FDA_dir

fig, axes = plt.subplots(1, 2)

axes[0].scatter(F[:, 0], F[:, 1], marker='None')
for i in range(F.shape[0]):
    axes[0].annotate(int(Y[i]), (F[i, 0], F[i, 1]), c=cm.tab10(
        Y[i] / 10), ha="center", va="center")

axes[1].scatter(F_test[:, 0], F_test[:, 1], marker='None')
for i in range(F_test.shape[0]):
    axes[1].annotate(int(Y_test[i]), (F_test[i, 0], F_test[i, 1]),
                     c=cm.tab10(Y_test[i] / 10), ha="center", va="center")

fig.set_figwidth(10)
fig.tight_layout()


Xid = [0, 0, 0, 1]
Yid = [1, 2, 3, 2]

fig, axes = plt.subplots(2, 2, figsize=(10, 10))

for i in range(4):
    axes[i // 2, i % 2].scatter(F_test[:, Xid[i]],
                                F_test[:, Yid[i]], marker='None')

    for j in range(F.shape[0]):
        axes[i // 2, i % 2].annotate(int(Y[j]), (F_test[j, Xid[i]], F_test[j, Yid[i]]),
                                     c=cm.tab10(Y[j] / 10), ha="center", va="center")


Y_dummy = pd.get_dummies(Y).to_numpy()
# each X_i - its group mean
X_fitted = Y_dummy @ np.linalg.solve(Y_dummy.T @ Y_dummy, Y_dummy.T) @ X
residuals = X - X_fitted

W = np.cov(residuals, rowvar=False)


B = np.cov(X_fitted, rowvar=False)


# square root of W^-1, may use eigendecomposition to compute this
A = sqrtm(np.linalg.inv(W))

tmp_eigen = np.linalg.eig(A @ B @ A.T)
tmp = np.real(A @ tmp_eigen[1][:, 0:9])
plt.scatter(x=range(X.shape[1]), y=tmp_eigen[0], marker='.')

/Users/feng_macpro/anaconda3/lib/python3.11/site-packages/matplotlib/collections.py:192: ComplexWarning: Casting complex values to real discards the imaginary part
  offsets = np.asanyarray(offsets, float)

<matplotlib.collections.PathCollection at 0x13cc487d0>


plt.plot(FDA_dir[:, 3], tmp[:, 3])

[<matplotlib.lines.Line2D at 0x13cf62f10>]


Sigma = np.cov(X, rowvar=False)
dig_eigen = np.linalg.eig(Sigma)
PCA_dir = dig_eigen[1][:, 0:100]  # pick top 100 PCA directions
PCA_dir

array([[-4.75066347e-04, -4.99943037e-04,  4.29006257e-04, ...,
        -6.16121275e-03,  1.26913714e-02,  7.39568834e-03],
       [-7.34706763e-04, -3.48812158e-03,  1.46637077e-04, ...,
        -1.95420167e-02,  5.52468388e-02,  3.49975391e-02],
       [ 1.26607549e-03, -1.34420853e-02, -8.99609485e-03, ...,
        -2.27865570e-02,  1.24522001e-01,  5.23179532e-02],
       ...,
       [ 6.30263391e-03,  2.73453019e-03, -4.59943519e-03, ...,
         3.37220295e-02, -1.02098521e-01, -2.82966411e-02],
       [ 2.05354388e-03,  8.54348239e-04, -1.09852628e-03, ...,
         1.61529479e-02, -1.05474697e-02, -1.57337414e-03],
       [ 1.29227991e-04,  4.07300211e-05,  1.26725020e-03, ...,
        -2.40525176e-03,  1.23515611e-02, -6.97640165e-04]])


print(np.round(PCA_dir.T @ PCA_dir, 5))
print(np.round(FDA_dir.T @ W @ FDA_dir, 5))

[[ 1.  0. -0. ... -0.  0.  0.]
 [ 0.  1. -0. ... -0. -0. -0.]
 [-0. -0.  1. ... -0.  0.  0.]
 ...
 [-0. -0. -0. ...  1.  0.  0.]
 [ 0. -0.  0. ...  0.  1.  0.]
 [ 0. -0.  0. ...  0.  0.  1.]]
[[ 0.97744 -0.       0.       0.      -0.      -0.      -0.       0.
   0.     ]
 [-0.       0.97744  0.       0.      -0.       0.      -0.       0.
   0.     ]
 [ 0.       0.       0.97744 -0.       0.       0.      -0.      -0.
   0.     ]
 [ 0.       0.      -0.       0.97744 -0.       0.       0.       0.
  -0.     ]
 [-0.      -0.       0.      -0.       0.97744 -0.       0.      -0.
  -0.     ]
 [-0.      -0.       0.       0.      -0.       0.97744  0.       0.
   0.     ]
 [-0.      -0.      -0.       0.       0.       0.       0.97744  0.
  -0.     ]
 [ 0.       0.      -0.       0.      -0.       0.       0.       0.97744
   0.     ]
 [ 0.       0.       0.      -0.      -0.       0.      -0.       0.
   0.97744]]


newX = X @ PCA_dir
new_dig_lda = LinearDiscriminantAnalysis()
new_dig_lda.fit(newX, Y)
newX_test = X_test @ PCA_dir
Y_pred_new = new_dig_lda.predict(newX_test)
confusion_matrix(Y_pred_new, Y_test)

array([[39,  0,  1,  0,  0,  2,  0,  0,  0,  0],
       [ 0, 40,  0,  0,  3,  0,  0,  0,  0,  0],
       [ 0,  0, 33,  0,  2,  2,  2,  0,  0,  0],
       [ 1,  0,  2, 35,  0,  4,  0,  0,  2,  1],
       [ 0,  0,  1,  0, 31,  0,  1,  3,  1,  0],
       [ 0,  0,  2,  1,  0, 29,  0,  0,  2,  0],
       [ 0,  0,  0,  0,  1,  0, 35,  0,  0,  0],
       [ 0,  0,  0,  3,  0,  0,  0, 35,  0,  1],
       [ 0,  0,  1,  1,  0,  2,  2,  0, 34,  0],
       [ 0,  0,  0,  0,  3,  1,  0,  2,  1, 38]])


np.mean(Y_pred_new != Y_test)

0.1275


F = newX @ new_dig_lda.scalings_
F_test = newX_test @ new_dig_lda.scalings_

fig, axes = plt.subplots(1, 2)

axes[0].scatter(F[:, 0], F[:, 1], marker='None')
for i in range(F.shape[0]):
    axes[0].annotate(int(Y[i]), (F[i, 0], F[i, 1]), c=cm.tab10(
        Y[i] / 10), ha="center", va="center")

axes[1].scatter(F_test[:, 0], F_test[:, 1], marker='None')
for i in range(F_test.shape[0]):
    axes[1].annotate(int(Y_test[i]), (F_test[i, 0], F_test[i, 1]),
                     c=cm.tab10(Y_test[i] / 10), ha="center", va="center")

fig.set_figwidth(10)
fig.tight_layout()

(PSL) Discriminant Analysis¶

MNIST Data¶

LDA¶

QDA¶

FDA¶

Computation Details¶

FDA vs PCA¶

FDA after PCA¶