## Preamble / required packages
import numpy as np
np.random.seed(0)

# Import local plotting functions and in-notebook display functions
import matplotlib.pyplot as plt
from IPython.display import Image, display
%matplotlib inline

import warnings
# Comment this out to activate warnings
warnings.filterwarnings('ignore')


## Set nicer default colors
plt.rcParams['image.cmap'] = 'PuBu'
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=[[1.0, .3882, .2784], 
                                                    [0.0, 0.6, 0.5], 
                                                    [1.0, .6471, 0.0], 
                                                    [0.0, 0.4471, 0.7412], 
                                                    [0.5, 0.5, 0.5], 
                                                    [0.7490, 0, 0.7490], 
                                                    [0.0, 0.0, 0.0]])
plt.rcParams['lines.markersize'] = 10

microstates = np.load('../resources/spin_chain_microstates.npy', allow_pickle=True)[:500]
plt.figure(figsize=(8, 8))
plt.imshow(microstates.T)
plt.ylabel("Lattice Position")
plt.xlabel("Time")

Text(0.5, 0, 'Time')

plt.plot(microstates[0])
plt.plot(microstates[1])

[<matplotlib.lines.Line2D at 0x16d668850>]

plt.plot(microstates[10], label="Time 10")
plt.plot(microstates[11], label="Time 11")
plt.xlabel("Lattice Position")
plt.ylabel("Spin Value")
plt.legend()

<matplotlib.legend.Legend at 0x16d5dc990>

X_all, y_all = microstates.copy()[:-1], microstates.copy()[1:]
print("X_all shape: ", X_all.shape)
print("y_all shape: ", y_all.shape, "\n")

X_all shape:  (499, 160)
y_all shape:  (499, 160)

# Data matrix / design matrix always has shape (n_samples, n_features)
X_train, X_test = X_all[:400], X_all[400:]
y_train, y_test = y_all[:400], y_all[400:]
print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape, "\n")
print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (400, 160)
y_train shape:  (400, 160) 

X_test shape:  (99, 160)
y_test shape:  (99, 160)

from sklearn.linear_model import LinearRegression#, RidgeCV, Lasso

# model = RidgeCV()
# model = Lasso(alpha=6e-2)
model = LinearRegression()

model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)

# plt.imshow(model.coef_)

plt.plot(X_train[0])
# plt.plot(y_train[0])
plt.plot(y_pred_train[0])

[<matplotlib.lines.Line2D at 0x16d6060d0>]

plt.figure()
plt.imshow(y_train[:100])
plt.xlabel("Time")
plt.ylabel("Lattice Position")
plt.title("True values")

plt.figure()
plt.imshow(y_pred_train[:100])
plt.title("Predictions")

plt.figure()
plt.imshow(np.round(y_pred_train[:100]))
plt.title("Rounded Predictions")

plt.figure()
plt.imshow(y_train[-60:] - np.round(y_pred_train[-60:]))
plt.title("Difference between true and predicted values")

# plt.colorbar()

Text(0.5, 1.0, 'Difference between true and predicted values')

def coefficient_of_determination(y_true, y_pred):
    """The R^2 score, or coefficient of determination, is a measure of how well 
    future samples are likely to be predicted by the model.
    
    Args:
        y_true (np.ndarray): True values
        y_pred (np.ndarray): Predicted values

    Returns:
        float: R^2 score
    """
    ss_res = np.sum((y_true - y_pred) ** 2)  # Sum of squared residuals
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)  # Total sum of squares
    r2 = 1 - (ss_res / ss_tot)
    return r2

print("R^2 score on training data: ", coefficient_of_determination(y_train, y_pred_train))

R^2 score on training data:  0.8526570213563216

y_pred_test = model.predict(X_test)

plt.figure()
plt.imshow(y_test[:100])
plt.title("True values")

plt.figure()
plt.imshow(y_pred_test[:100])
plt.title("Predictions")

plt.figure()
plt.imshow(np.round(y_pred_test[:100]))
plt.title("Rounded Predictions")

plt.figure()
plt.imshow(y_test[-60:] - y_pred_test[-60:])
plt.title("Difference between true and predicted values")

Text(0.5, 1.0, 'Difference between true and predicted values')

print("R^2 score on test data: ", coefficient_of_determination(y_test, y_pred_test))

R^2 score on test data:  0.5461927695908562

## Because we flattened the data, we need to reshape the coefficients to get the couplings
L = X_train.shape[1]
rules_estimated = np.array(model.coef_)

plt.figure(figsize=(6, 6))
plt.imshow(rules_estimated[:50, :50], cmap='RdBu_r')
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x3147f99d0>

plt.figure(figsize=(9, 9))
## Plot 3 x 3 subplots

n_samples = 20
for i in range(9):

    ## Pick random training data set
    selection_inds = np.random.choice(range(X_train.shape[0]), size=n_samples, replace=False)
    X_train_subset, y_train_subset = X_all[selection_inds], y_all[selection_inds]
    model = LinearRegression()
    model.fit(X_train_subset, y_train_subset)
    rules_estimated = np.array(model.coef_)
    
    ## Plot learned coupling matrix
    plt.subplot(3, 3, i + 1)
    plt.imshow(rules_estimated, cmap='RdBu_r')
    plt.axis('off')

# spacing between subplots
plt.subplots_adjust(wspace=0.1, hspace=0.1)

# Instantiate models
from sklearn import linear_model
model_ols = linear_model.LinearRegression()
model_l2 = linear_model.Ridge()
model_l1= linear_model.Lasso()

# Set regularization range
lambdas = np.logspace(-6, 4, 8)

# Load data
# define subset of samples
# n_samples = 100
X_train, X_test = X_all[:400], X_all[400:]
y_train, y_test = y_all[:400], y_all[400:]

# define error lists
train_error_ols, test_error_ols = list(), list()
train_error_l2, test_error_l2 = list(), list()
train_error_l1, test_error_l1 = list(), list()

#Initialize coefficients for ridge regression and Lasso
coeffs_ols, coeffs_ridge, coeffs_lasso = list(), list(), list()

for lam in lambdas:
    ### ordinary least squares
    model_ols.fit(X_train, y_train) # fit model 
    coeffs_ols.append(model_ols.coef_) # store weights
    # use the coefficient of determination R^2 as the performance of prediction.
    train_error_ols.append(model_ols.score(X_train, y_train))
    test_error_ols.append(model_ols.score(X_test, y_test))

    ### ridge regression
    model_l2.set_params(alpha=lam) # set regularisation strength
    model_l2.fit(X_train, y_train) # fit model
    coeffs_ridge.append(model_l2.coef_) # store weights
    train_error_l2.append(model_l2.score(X_train, y_train))
    test_error_l2.append(model_l2.score(X_test, y_test))

    ### lasso
    model_l1.set_params(alpha=lam) # set regularisation strength
    model_l1.fit(X_train, y_train) # fit model
    coeffs_lasso.append(model_l1.coef_) # store weights
    train_error_l1.append(model_l1.score(X_train, y_train))
    test_error_l1.append(model_l1.score(X_test, y_test))




    ### plot Ising interaction J
    J_leastsq = np.array(model_ols.coef_).reshape((L, L))
    J_ridge = np.array(model_l2.coef_).reshape((L, L))
    J_lasso = np.array(model_l1.coef_).reshape((L, L))


    fig, axarr = plt.subplots(nrows=1, ncols=3)
    
    axarr[0].imshow(J_leastsq[:50, :50],  cmap="RdBu_r", vmin=-1, vmax=1)
    axarr[0].set_title(f"OLS \n Train={train_error_ols[-1]}, Test={test_error_ols[-1]}")
    ## 3 sig figs
    # axarr[0].set_title('OLS \n Train$=%.3f$, Test$=%.3f$' %(train_error_ols[-1],test_error_ols[-1]))
    axarr[1].set_title('OLS $\lambda=%.4f$\n Train$=%.3f$, Test$=%.3f$' %(lam, train_error_ols[-1],test_error_ols[-1]))
    axarr[0].tick_params(labelsize=16)
    
    axarr[1].imshow(J_ridge[:50, :50],  cmap="RdBu_r", vmin=-1, vmax=1)
    axarr[1].set_title('Ridge $\lambda=%.4f$\n Train$=%.3f$, Test$=%.3f$' %(lam, train_error_l2[-1],test_error_l2[-1]))
    axarr[1].tick_params(labelsize=16)
    
    im=axarr[2].imshow(J_lasso[:50, :50], cmap="RdBu_r", vmin=-1, vmax=1)
    axarr[2].set_title('LASSO $\lambda=%.4f$\n Train$=%.3f$, Test$=%.3f$' %(lam, train_error_l1[-1],test_error_l1[-1]))
    axarr[2].tick_params(labelsize=16)
    
    # divider = make_axes_locatable(axarr[2])
    # cax = divider.append_axes("right", size="5%", pad=0.05, add_to_figure=True)
    # cbar=fig.colorbar(im, cax=cax)
    
    # cbar.ax.set_yticklabels(np.arange(-1.0, 1.0+0.25, 0.25),fontsize=14)
    # cbar.set_label('$J_{i,j}$',labelpad=15, y=0.5,fontsize=20,rotation=0)
    
    fig.subplots_adjust(right=2.0)
    
    plt.show()

# Plot our performance on both the training and test data
plt.figure(figsize=(6, 3))
plt.semilogx(lambdas, train_error_ols, "", label="Train (OLS)")
plt.semilogx(lambdas, test_error_ols, "--", label="Test (OLS)")
plt.legend(frameon=False)
plt.title("Ordinary Least Squares", fontsize=16)
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Accuracy $R^2$")


plt.figure(figsize=(6, 3))
plt.semilogx(lambdas, train_error_l2, "", label="Train (Ridge)", linewidth=1)
plt.semilogx(lambdas, test_error_l2, "--", label="Test (Ridge)", linewidth=1)
plt.legend(frameon=False)
plt.title("Ridge Regression", fontsize=16)
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Accuracy $R^2$")

plt.figure(figsize=(6, 3))
plt.semilogx(lambdas, train_error_l1, label="Train (LASSO)")
plt.semilogx(lambdas, test_error_l1, "--", label="Test (LASSO)")
plt.legend(frameon=False)
plt.title("LASSO", fontsize=16)
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Accuracy $R^2$")

Text(0, 0.5, 'Accuracy $R^2$')

lambda_optimal = lambdas[np.argmax(test_error_l1)]
print("Optimal lambda for Lasso: ", lambda_optimal)

Optimal lambda for Lasso:  0.019306977288832496

model_l1.set_params(alpha=lambda_optimal)
model_l1.fit(X_train, y_train)

y_pred_test = model_l1.predict(X_test)

plt.figure()
plt.imshow(y_test[:100])
plt.title("True values")

plt.figure()
plt.imshow(y_pred_test[:100])
plt.title("Predictions")

# plt.figure()
# plt.imshow(np.round(y_pred_test[:100]))
# plt.title("Rounded Predictions")

plt.figure()
plt.imshow(y_test[-60:] - y_pred_test[-60:])
plt.title("Difference between true and predicted values")

Text(0.5, 1.0, 'Difference between true and predicted values')

plt.imshow(model_l1.coef_[:50, :50], cmap='RdBu_r')

<matplotlib.image.AxesImage at 0x311fef8d0>

ca.powers

array([1, 2, 4])

ca.ruleset

array([0, 1, 1, 1, 0, 0, 0, 0])

microstates = np.load('../resources/rule30_microstates.npy', allow_pickle=True)[:500]
plt.figure(figsize=(10, 30))
plt.imshow(microstates.T[:, :100])
plt.ylabel("Lattice Position")
plt.xlabel("Time")

X_all, y_all = microstates.copy()[:-1], microstates.copy()[1:]
print("X_all shape: ", X_all.shape)
print("y_all shape: ", y_all.shape, "\n")

# Data matrix / design matrix always has shape (n_samples, n_features)
X_train, X_test = X_all[:400], X_all[400:]
y_train, y_test = y_all[:400], y_all[400:]
print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape, "\n")
print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)

X_all shape:  (499, 50)
y_all shape:  (499, 50) 

X_train shape:  (400, 50)
y_train shape:  (400, 50) 

X_test shape:  (99, 50)
y_test shape:  (99, 50)

from sklearn.linear_model import Lasso

model = Lasso()
model.fit(X_train, y_train)
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

plt.figure()
plt.imshow(y_train[:100].T)
plt.title("True values")

plt.figure()
plt.imshow(y_pred_train[:100].T)
plt.title("Predictions")

print("R^2 score on training data: ", coefficient_of_determination(y_train, y_pred_train))
print("R^2 score on test data: ", coefficient_of_determination(y_test, y_pred_test))

R^2 score on training data:  0.001774999999999971
R^2 score on test data:  -0.0011755948469776012

from sklearn.neural_network import MLPRegressor

model = MLPRegressor(hidden_layer_sizes=(200), activation='logistic', 
                     max_iter=2000, learning_rate_init=0.01, random_state=0)
model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

plt.figure()
plt.imshow(y_train[:100].T)
plt.title("True values")

plt.figure()
plt.imshow(y_pred_train[:100].T)
plt.title("Predictions")

print("R^2 score on training data: ", coefficient_of_determination(y_train, y_pred_train))

R^2 score on training data:  0.8953384649747517

from sklearn import linear_model

# define train, validation and test data sets
X_train, X_val, X_test = X_all[:400], X_all[400 : 450], X_all[450 :]
y_train, y_val, y_test = y_all[:400], y_all[400 : 450], y_all[450 :]

# the hyperparameter values to check
lambdas = np.logspace(-6, 4, 100)

all_validation_losses = list()

for lam in lambdas:
    model_l1 = linear_model.Lasso(alpha=lam)
    model_l1.fit(X_train, y_train)
    validation_loss = model_l1.score(X_val, y_val)
    all_validation_losses.append(validation_loss)

best_lambda = lambdas[np.argmax(all_validation_losses)]

plt.semilogx(lambdas, all_validation_losses, label="Validation")
plt.axvline(best_lambda, color='k', linestyle='--', label="Best lambda")
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Validation Performance $R^2$")

print("Best lambda on validation set", best_lambda)

Best lambda on validation set 0.022051307399030457

# fit best model 
model_l1 = linear_model.Lasso(alpha=best_lambda)
model_l1.fit(X_train, y_train)

y_test_predict = model_l1.predict(X_test)

plt.figure()
plt.imshow(y_test[:100].T, vmin=0, vmax=1)

plt.figure()
plt.imshow(y_test_predict[:100].T)


# # plot Ising interaction J
# J_lasso = np.array(model_l1.coef_).reshape((L, L))
# plt.figure()
# plt.imshow(J_lasso,  cmap="RdBu_r", vmin=-1, vmax=1)
# plt.gca().set_aspect(1)

<matplotlib.image.AxesImage at 0x16d9934d0>

## Cross validation

from sklearn import linear_model

# define train, validation and test data sets
X_train, X_test = X_all[:400], X_all[400 :]
y_train, y_test = y_all[:400], y_all[400 :]

# the hyperparameter values to check
lambdas = np.logspace(-6, 4, 9)

all_validation_losses = list()


## Outer loop over hyperparameter values
for lam in lambdas:

    all_val_loss_lam = list()

    ## Inner loop over k-folds
    for k in range(4):

        # Create the training and validation subsets from the training data
        X_train_k = np.concatenate([X_train[:k*100], X_train[(k+1)*100:]])
        y_train_k = np.concatenate([y_train[:k*100], y_train[(k+1)*100:]])
        X_val_k = X_train[k*100:(k+1)*100]
        y_val_k = y_train[k*100:(k+1)*100]


        model_l1 = linear_model.Lasso(alpha=lam)
        model_l1.fit(X_train_k, y_train_k)
        validation_loss = model_l1.score(X_val_k, y_val_k)
        all_val_loss_lam.append(validation_loss)

    all_validation_losses.append(np.mean(all_val_loss_lam))

best_lambda = lambdas[np.argmax(all_validation_losses)]
plt.figure()
plt.semilogx(lambdas, all_validation_losses, label="Validation")
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Validation Accuracy $R^2$")
print("Best lambda on validation set", best_lambda)

Best lambda on validation set 0.005623413251903491

# fit best model 
model_l1 = linear_model.Lasso(alpha=best_lambda)
model_l1.fit(X_train, y_train)

y_test_predict = model_l1.predict(X_test)

plt.figure()
plt.imshow(y_test[:100].T, vmin=0, vmax=1)

plt.figure()
plt.imshow(y_test_predict[:100].T)

# Plot interaction matrix
plt.figure()
plt.imshow(model_l1.coef_[:50, :50], cmap='RdBu_r')

<matplotlib.image.AxesImage at 0x313d8cd90>

class CellularAutomaton:
    """
    A base class for cellular automata. Subclasses must implement the step method.

    Parameters
        n (int): The number of cells in the system
        n_states (int): The number of states in the system
        mutation_rate (int): The number of spins that randomly flip at each time step
        random_state (None or int): The seed for the random number generator. If None,
            the random number generator is not seeded.
        initial_state (None or array): The initial state of the system. If None, a 
            random initial state is used.
        
    """
    def __init__(self, n, n_states, mutation_rate=0, random_state=None, initial_state=None):
        self.n_states = n_states
        self.mutation_rate = mutation_rate
        self.n = n
        self.random_state = random_state
        np.random.seed(random_state)

        ## The universe is a 2D array of integers
        if initial_state is None:
            self.initial_state = np.random.choice(self.n_states, size=self.n)
        else:
            self.initial_state = initial_state
        self.state = self.initial_state

        self.history = [self.state]

    def next_state(self):
        """
        Output the next state of the entire board
        """
        return NotImplementedError

    def simulate(self, n_steps):
        """
        Iterate the dynamics for n_steps, and return the results as an array
        """
        for i in range(n_steps):
            self.state = self.next_state()
            ## Add thermal noise
            flip_inds = np.random.choice(len(self.state), self.mutation_rate)
            self.state[flip_inds] = 1 - self.state[flip_inds]
            self.history.append(self.state.copy())
        return self.state
    

from scipy.ndimage import convolve1d

class ProgrammaticCA(CellularAutomaton):

    def __init__(self, n, ruleset,  **kwargs):
        k = np.unique(ruleset).size
        super().__init__(n, k, **kwargs)
        self.ruleset = ruleset

        ## A special convolutional kernel for converting a binary neighborhood 
        ## to an integer
        self.powers = 2 ** np.arange(3)


    def next_state(self):

        # Compute the next state
        next_state = np.zeros_like(self.state)
        
        # convolve with periodic boundary conditions
        rule_indices = convolve1d(self.state, self.powers, mode='wrap')

        ## look up the rule for each cell
        next_state = self.ruleset[rule_indices.astype(int)]

        return next_state


ruleset = np.array([0, 1, 1, 1, 0, 0, 0, 0]) # Rule good
ca = ProgrammaticCA(160, ruleset, mutation_rate=4, random_state=0)
ca.simulate(3001)

X_all = np.array(ca.history)

plt.figure(figsize=(8, 8))
plt.imshow(X_all.T[:, :501])
plt.xlabel("Position")
plt.ylabel("Time")

# X_all.dump('../resources/spin_chain_microstates.npy')

ruleset = np.array([0, 1, 1, 1, 1, 0, 0, 0]) # Rule 30
ca = ProgrammaticCA(50, ruleset, mutation_rate=0, random_state=0)
ca.simulate(1001)

X_all = np.array(ca.history)

plt.figure(figsize=(8, 8))
plt.imshow(X_all.T[:, :301])
plt.xlabel("Position")
plt.ylabel("Time")

# X_all.dump('../resources/rule30_microstates.npy')

Predicting one-dimensional cellular automata¶

Supervised learning¶

Supervised learning as inferring a generator¶

Load experimental measurements of an unknown spin chain¶

Forecasting as a supervised learning problem¶

Training and testing splits¶

A note on flattening¶

Fitting a linear model with least squares¶

Python syntax: the `scikit-learn` library¶

Scoring a regression model¶

What about experiments that the model hasn't seen before?¶

Overfitting¶

But raw score doesn't tell the whole story¶

Let's look at the learned coupling matrix, or the weights of our fitted model¶

Let's try repeating the model fitting several times on different subsets of our experimental data¶

We can see that there is variance in the fitted models. While they have some similarities, the fitted parameters (weights) vary from trained replicate to replicate¶

Narrowing the model class with regularization¶

Ordinary linear regression solves an optimization problem¶

Regularization¶

What physics do we learn from the dynamics matrix?¶

One-dimensional cellular automata¶

What can't our model fit?¶

Can we add additional free parameters into our model?¶

Idea:¶

Solution:¶

Hyperparameter tuning¶

Why not always use regularizers¶

Validation set¶

Cross-validation¶

Appendix¶

One-dimensional cellular automata¶

One-dimensional cellular automata¶

Implementing one-dimensional binary cellular automata¶

Learning a cellular automaton from data¶

Predicting one-dimensional cellular automata¶

Supervised learning¶

Supervised learning as inferring a generator¶

Load experimental measurements of an unknown spin chain¶

Forecasting as a supervised learning problem¶

Training and testing splits¶

A note on flattening¶

Fitting a linear model with least squares¶

Python syntax: the scikit-learn library¶

Scoring a regression model¶

What about experiments that the model hasn't seen before?¶

Overfitting¶

But raw score doesn't tell the whole story¶

Let's look at the learned coupling matrix, or the weights of our fitted model¶

Let's try repeating the model fitting several times on different subsets of our experimental data¶

We can see that there is variance in the fitted models. While they have some similarities, the fitted parameters (weights) vary from trained replicate to replicate¶

Narrowing the model class with regularization¶

Ordinary linear regression solves an optimization problem¶

Regularization¶

What physics do we learn from the dynamics matrix?¶

One-dimensional cellular automata¶

What can't our model fit?¶

Can we add additional free parameters into our model?¶

Idea:¶

Solution:¶

Hyperparameter tuning¶

Why not always use regularizers¶

Validation set¶

Cross-validation¶

Appendix¶

One-dimensional cellular automata¶

One-dimensional cellular automata¶

Implementing one-dimensional binary cellular automata¶

Learning a cellular automaton from data¶

Python syntax: the `scikit-learn` library¶