import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel


def k(z):
    return (1.0 / np.sqrt(2 * np.pi)) * np.exp(-z**2 / 2)

def f(t, x, y, b): # return f(t) using data (x, y)
    assert x.size == y.size
    w = k((t-x)/b) # vector of numerators; rescaled to sum to zero on next line
    w /= np.sum(w)
    assert np.isclose(np.sum(w), 1.0)
    return np.sum(w * y)


N = 20
low = 0
high = 3
rng = np.random.default_rng(seed=0)
x = np.sort(rng.uniform(low=low, high=high, size=N)) # sort for left-to-right line plot
noise = rng.normal(loc=0, scale=1, size=N)
y = np.exp(x) * np.sin(x) + noise # I made up a nonlinear function.
X = x.copy().reshape(-1, 1)


show_k_bell_curves = True
show_f_from_formulas = True
show_scikit_learn_model = True
    
kernels = [ConstantKernel(constant_value=1.0) + WhiteKernel(noise_level=1.0), # underfit
           ConstantKernel() * RBF(length_scale=1.0, length_scale_bounds=(1e-5, 1e5)), # overfit
           ConstantKernel() * RBF() + WhiteKernel() # nice fit
          ]
titles = ['Underfit', 'Overfit', 'Good fit']

for kernel_i in range(len(kernels)):
    plt.plot(x, y, '.', color='black', markersize=10, label='data') # plot data
    plt.plot(x, y - noise, linestyle='dotted', color='gray', label='(unknown truth)') # plot unknown truth
    
    if show_k_bell_curves:
        # for understanding, add plots of k(z) bell curves to understand weights {w_i}
        b = [5, .05, .35] # I chose these by trial-and-error.
        # - The first widens the bell curves so much we get the underfit f(x) = np.mean(y).
        # - The second makes them so narrow enough we get overfit f(x) =~ 1NN(x) (kNN with k=1) for most x
        # - The third gives a nice fit.
        for i in np.arange(N):
            x_plot_bells = np.linspace(start=low, stop=high, num=1000)
            y_scale_hack = 8 # make bell curves a nice height; it is irrelevant to the math
            y_plot_bells = y_scale_hack * k((x[i] - x_plot_bells) / b[kernel_i])
            label = np.where(i == 0, 'k(x) curves', '') # only label first of N curves for legend
            plt.plot(x_plot_bells, y_plot_bells, linestyle='dashdot', label=label)

    if show_f_from_formulas:
        # for understanding, add f(x) calculated from formulas
        y_hat_formulas = np.zeros(shape=N*3) # Use N*3 to see more detail.
        x_plot_formulas = np.linspace(low, high, N*3)
        for i in np.arange(N*3):
            y_hat_formulas[i] = f(x_plot_formulas[i], x, y, b[kernel_i])
        plt.plot(x_plot_formulas, y_hat_formulas, linestyle='dashed',
                 color='lime', label=f'f(x) from formulas')

    if show_scikit_learn_model:
        model = GaussianProcessRegressor(kernel=kernels[kernel_i], n_restarts_optimizer=10, random_state=0)
        model.fit(X, y)
        print(f'model={model},\nmodel.kernel_={model.kernel_},\nmodel.score(X, y)={model.score(X, y):.3}')
        y_hat = model.predict(X)

        # plot model
        # next line: for kernels[1], plot at x and some other points to see wild overfit
        x_plot = np.sort(np.concatenate([x, np.linspace(start=low, stop=high, num=N)]))
        y_hat_plot = model.predict(x_plot.reshape(-1, 1))
        plt.plot(x_plot, y_hat_plot, '-', color='black', label=r'model')
        plt.plot(x, y_hat, '.', color='red', label=r'$\hat{y}$')
    
    plt.ylim(-1, 8)
    plt.title(titles[kernel_i])
    plt.legend()
    plt.show()

model=GaussianProcessRegressor(kernel=1**2 + WhiteKernel(noise_level=1),
                         n_restarts_optimizer=10, random_state=0),
model.kernel_=4.29**2 + WhiteKernel(noise_level=8.68),
model.score(X, y)=-0.00121

model=GaussianProcessRegressor(kernel=1**2 * RBF(length_scale=1),
                         n_restarts_optimizer=10, random_state=0),
model.kernel_=5.06**2 * RBF(length_scale=0.00618),
model.score(X, y)=1.0

model=GaussianProcessRegressor(kernel=1**2 * RBF(length_scale=1) + WhiteKernel(noise_level=1),
                         n_restarts_optimizer=10, random_state=0),
model.kernel_=3.85**2 * RBF(length_scale=1.05) + WhiteKernel(noise_level=0.388),
model.score(X, y)=0.963

Kernel Regression¶

Implement $k(z)$ and $f(x)$ to understand the method.¶

Make nonlinear toy data¶

Look at what math formulas do, and what three scikit-learn kernels do:¶