import warnings
warnings.filterwarnings('ignore')

import numpy as np
import seaborn as sns

from scipy.interpolate import splev
from scipy.optimize import minimize_scalar

import matplotlib as mpl
import matplotlib.pyplot as plt

from csaps import csaps


def f(x):
    return np.sin(12 * (x + 0.2)) / (x + 0.2)


np.random.seed(1234)
n = 30
err = 1
x = np.sort(np.random.uniform(size=n))
y = f(x) + np.random.normal(0, err, n)
fx = np.arange(1, 51, 1)/50
fy = f(fx)


sns.set()
mpl.rcParams['figure.dpi'] = 300
plt.scatter(x, y, color="red", s=2)
plt.plot(fx, fy, color="black", linewidth=1)
plt.xlabel("x")
plt.ylabel("y")

Text(0, 0.5, 'y')


def smooth(x, y, fx, lam, axis=-1):
    return csaps(x, y, fx, smooth=1/(lam + 1), axis=-1)


def S_lam(x, lam):
    n = len(x)
    Y = np.identity(n)
    A = smooth(x, Y, x, lam, axis=0)
    return (A + A.T)/2


def df_to_lam(x, df):
    def g(lam):
        return (S_lam(x, lam).trace() - df)**2
    return minimize_scalar(g, bounds=(0, 10), options={'xatol': 1e-10}).x


def smooth_df(x, y, fx, df, axis=-1):
    return smooth(x, y, fx, df_to_lam(x, df), axis)


dfs = [5, 9, 15, 20]
fig = plt.figure()
for i in range(len(dfs)):
    plt.subplot(2, 2, i+1)
    plt.scatter(x, y, color="red", s=2)
    plt.plot(fx, fy, color="gray", linewidth=1, label="True Function")
    plt.plot(fx, smooth_df(x, y, fx, dfs[i]), color="blue",
             linestyle="dashed", linewidth=1, label="Fitted Line")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.title("df = " + str(dfs[i]))
    plt.legend(borderpad=0.2, prop={'size': 8})
fig.tight_layout()


def S_df(x, df):
    return S_lam(x, df_to_lam(x,  df))


S4 = S_df(fx, 4)
S9 = S_df(fx, 9)

# use eigh instead of eig for more accurate results
val4, vec4 = np.linalg.eigh(S4)
val9, vec9 = np.linalg.eigh(S9)


rev_ord = np.arange(49, -1, -1)  # sort descending instead of ascending
val4, vec4 = val4[rev_ord], vec4[:, rev_ord]
val9, vec9 = val9[rev_ord], vec9[:, rev_ord]


fig = plt.figure(figsize=(18, 10))
for i in range(15):
    plt.subplot(3, 5, i+1)
    plt.plot(fx, vec4[:, i], color="red", label="df = 4")
    plt.plot(fx, vec9[:, i], color="blue", linestyle="dotted", label="df = 9")
    plt.xlabel("x")
    plt.ylabel("y")
    if i == 0:
        plt.legend()
fig.tight_layout()


plt.scatter(np.arange(50), val4, s=10, marker='^', color="green",
            facecolors='none', label="SS with df=9")
plt.scatter(np.arange(50), val9, s=10, marker='d', color="blue",
            facecolors='none', label="NCS with df=9")
plt.legend()

<matplotlib.legend.Legend at 0x1381eef10>


print(S4.trace())
print(S9.trace())

3.999999999514415
8.9999999699797


def gcv(x, y, df):
    fit = smooth_df(x, y, x, df)
    return (((y - fit) ** 2) / ((1 - df/n) ** 2)).mean()


def lcv(x, y, df):
    fit = smooth_df(x, y, x, df)
    lev = np.diag(S_df(x, df=df))
    return (((y - fit) ** 2) / ((1 - lev) ** 2)).mean()


dfs = 2+np.arange(1, 40)/2
m = len(dfs)
my_lcv = np.zeros(m)
my_gcv = np.zeros(m)
for i in range(m):
    df = dfs[i]
    my_gcv[i] = gcv(x, y, df)
    my_lcv[i] = lcv(x, y, df)
plt.plot(dfs, my_lcv, color='red')
plt.scatter(dfs, my_lcv, color='red', marker='d', s=10, label='LOO CV')
plt.plot(dfs, my_gcv, color='blue')
plt.scatter(dfs, my_gcv, color='blue', marker='s', s=10, label='GCV')
plt.legend()
plt.xlabel("df")
plt.ylabel("CV Score")

Text(0, 0.5, 'CV Score')


opt_df = dfs[np.argmin(my_gcv)]
print(opt_df)

10.5


plt.scatter(x, y, color="red", s=2)
plt.plot(fx, fy, color="gray", linewidth=1, label="True Function")
plt.plot(np.arange(1, 101)/100, smooth_df(x, y, np.arange(1, 101)/100, opt_df), color="blue",
         linestyle="dashed", linewidth=1, label="Fitted Line")
plt.xlabel("x")
plt.ylabel("y")
plt.legend()

<matplotlib.legend.Legend at 0x13831e890>

PSL: Smoothing Splines¶

Load Libraries¶

A Simulated Example¶

Fit a Smoothing Spline Model¶

DR Basis Functions¶

Choice of Lambda¶