## Preamble / required packages
import numpy as np
np.random.seed(0)

# Import local plotting functions and in-notebook display functions
import matplotlib.pyplot as plt
from IPython.display import Image, display
%matplotlib inline

import warnings
# Comment this out to activate warnings
warnings.filterwarnings('ignore')

microstates = np.load('../resources/spin_microstates.npy', allow_pickle=True)
energies = np.load('../resources/spin_energies.npy', allow_pickle=True)

print("Microstates shape: ", microstates.shape)
print("Energies shape: ", energies.shape)

Microstates shape:  (10000, 40)
Energies shape:  (10000,)

plt.figure(figsize=(10, 5))
plt.imshow(
    microstates[np.argsort(energies)].T, 
    interpolation='nearest', aspect='auto', cmap='binary'
)
plt.ylabel("Spin position")
plt.xlabel("Microstate")
plt.title("Spin configurations sorted by energy")
## aspect ratio
# plt.gca().set_aspect('auto')

plt.figure(figsize=(10, 5))
plt.hist(energies, bins=100)
plt.xlabel("Energy")
plt.ylabel("Number of microstates")

Text(0, 0.5, 'Number of microstates')

# #ytest not defined at this point
# plt.figure(figsize=(10, 5))
# plt.hist(y_test, bins=100)
# plt.xlabel("Energy")
# plt.ylabel("Number of microstates")

Text(0, 0.5, 'Number of microstates')

X_all = microstates[:, :, None] * microstates[:, None, :] # outer product creates neighbor matrix
print("X_all shape: ", X_all.shape)

# Data matrix / design matrix always has shape (n_samples, n_features)
X_all = np.reshape(X_all, (X_all.shape[0], -1)) 
print("X_all shape after flattening into data matrix: ", X_all.shape)

# Match our label shape
y_all = energies
print("y_all shape: ", y_all.shape)

X_all shape:  (10000, 40, 40)
X_all shape after flattening into data matrix:  (10000, 1600)
y_all shape:  (10000,)

# define subset of samples
n_samples = 400

# define train and test data sets
X_train, X_test = X_all[:n_samples], X_all[n_samples : 3 * n_samples // 2]
y_train, y_test = y_all[:n_samples], y_all[n_samples : 3 * n_samples // 2]

print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape, "\n")

print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (400, 1600)
y_train shape:  (400,) 

X_test shape:  (200, 1600)
y_test shape:  (200,)

#now we can plot the histogram of the test data
plt.figure(figsize=(10, 5))
plt.hist(y_test, bins=100)
plt.xlabel("Energy")
plt.ylabel("Number of microstates")

Text(0, 0.5, 'Number of microstates')

from sklearn.linear_model import LinearRegression
# from sklearn.random_forest import RandomForestRegressor

model = LinearRegression()

model.fit(X_train, y_train) # Set the parameter of the model

y_pred_train = model.predict(X_train)

plt.figure(figsize=(6, 6))
plt.plot(y_train, y_pred_train, ".k")
plt.xlabel("True energy (Training Data)")
plt.ylabel("Predicted energy (Training Data)")
plt.title("Linear regression on training data")
plt.gca().set_aspect('auto')

y_pred_test = model.predict(X_test)
# y_pred_test = model.predict(X_test)
plt.figure(figsize=(6, 6))
plt.plot(y_test, y_pred_test, ".k")
plt.xlabel("True energy (Test Data)")
plt.ylabel("Predicted energy (Test Data)")
plt.title("Linear regression on test data")
plt.gca().set_aspect('auto')

#so far we have been training on a sample of only 400 microstates, while all of our data has 10000 microstates. Let us do a train test split where we hold out 10% of the data for testing
# and train on varying sizes of the remaining data: 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90% of the data.
# We will then plot the training and test error as a function of the number of training size.

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train_all, X_test, y_train_all, y_test= train_test_split(X_all, y_all, train_size=0.9, random_state=0)

train_sizes = np.linspace(1000, 9000,9)
#prepend to train_sizes the size of the training data we have already used (400)
train_sizes = np.concatenate(([400], train_sizes))
train_errors = []
test_errors = []

for train_size in train_sizes:
    X_train, y_train= X_train_all[:int(train_size)], y_train_all[:int(train_size)]
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    train_errors.append(mean_squared_error(y_train, y_pred_train))
    test_errors.append(mean_squared_error(y_test, y_pred_test))

plt.figure(figsize=(6, 6))
plt.plot(train_sizes, train_errors,'o-', label="Training error")
plt.plot(train_sizes, test_errors,'o-',label="Test error")
#add a vertical line at 400 to show where we started training
plt.axvline(x=400, color='r', linestyle='--', label="Training size = 400")
plt.xlabel("Train size")
plt.ylabel("Mean squared error")
plt.legend()
plt.title("Learning curve for linear regression for different training sizes")
plt.show()

print("Train $R^2$ was:", model.score(X_train, y_train)) # A measure of model expressivity
print("Test $R^2$ was:", model.score(X_test, y_test)) # A measure of model generalization

Train $R^2$ was: 1.0
Test $R^2$ was: 0.48928332309816613

## Because we flattened the data, we need to reshape the coefficients to get the couplings
L = microstates.shape[1]
couplings_estimated = np.array(model.coef_).reshape((L, L))

plt.figure(figsize=(6, 6))
plt.imshow(couplings_estimated, cmap='RdBu_r')

<matplotlib.image.AxesImage at 0x16d05d250>

plt.figure(figsize=(9, 9))
## Plot 3 x 3 subplots

n_samples = 200
for i in range(9):

    ## Pick random training data set
    selection_inds = np.random.choice(range(X_all.shape[0]), size=n_samples, replace=False)
    X_train, y_train = X_all[selection_inds], y_all[selection_inds]
    model = LinearRegression()
    model.fit(X_all[selection_inds], y_all[selection_inds])
    couplings_estimated = np.array(model.coef_).reshape((L, L))
    
    ## Plot learned coupling matrix
    plt.subplot(3, 3, i + 1)
    plt.imshow(couplings_estimated, cmap='RdBu_r')
    plt.axis('off')

# spacing between subplots
plt.subplots_adjust(wspace=0.1, hspace=0.1)

# Instantiate OLS, Ridge, and Lasso models
from sklearn import linear_model
model_ols = linear_model.LinearRegression()
model_l2 = linear_model.Ridge()
model_l1= linear_model.Lasso()

# Set range of values for the regularization
lambdas = np.logspace(-4, 5, 10)

# Load data on subset of samples
n_samples = 400
X_train, X_test = X_all[:n_samples], X_all[n_samples : 3 * n_samples // 2]
y_train, y_test = y_all[:n_samples], y_all[n_samples : 3 * n_samples // 2]

# define lists that will store the error terms
train_error_ols, test_error_ols = list(), list()
train_error_l2, test_error_l2 = list(), list()
train_error_l1, test_error_l1 = list(), list()

#Initialize coefficients for ridge regression and Lasso
coeffs_ols, coeffs_ridge, coeffs_lasso = list(), list(), list()

for lam in lambdas:
    ### ordinary least squares
    model_ols.fit(X_train, y_train) # fit model 
    coeffs_ols.append(model_ols.coef_) # store weights
    # use the coefficient of determination R^2 as the performance of prediction.
    train_error_ols.append(model_ols.score(X_train, y_train))
    test_error_ols.append(model_ols.score(X_test, y_test))

    ### ridge regression
    model_l2.set_params(alpha=lam) # set regularisation strength
    model_l2.fit(X_train, y_train) # fit model
    coeffs_ridge.append(model_l2.coef_) # store weights
    train_error_l2.append(model_l2.score(X_train, y_train))
    test_error_l2.append(model_l2.score(X_test, y_test))

    ### lasso
    model_l1.set_params(alpha=lam) # set regularisation strength
    model_l1.fit(X_train, y_train) # fit model
    coeffs_lasso.append(model_l1.coef_) # store weights
    train_error_l1.append(model_l1.score(X_train, y_train))
    test_error_l1.append(model_l1.score(X_test, y_test))




    ### plot Ising interaction J
    J_leastsq = np.array(model_ols.coef_).reshape((L, L))
    J_ridge = np.array(model_l2.coef_).reshape((L, L))
    J_lasso = np.array(model_l1.coef_).reshape((L, L))


    fig, axarr = plt.subplots(nrows=1, ncols=3)
    
    axarr[0].imshow(J_leastsq,  cmap="RdBu_r", vmin=-1, vmax=1)
    axarr[0].set_title(f"OLS \n Train={train_error_ols[-1]}, Test={test_error_ols[-1]}")
    ## 3 sig figs
    # axarr[0].set_title('OLS \n Train$=%.3f$, Test$=%.3f$' %(train_error_ols[-1],test_error_ols[-1]))
    axarr[1].set_title('OLS $\lambda=%.4f$\n Train$=%.3f$, Test$=%.3f$' %(lam, train_error_ols[-1],test_error_ols[-1]))
    axarr[0].tick_params(labelsize=16)
    
    axarr[1].imshow(J_ridge,  cmap="RdBu_r", vmin=-1, vmax=1)
    axarr[1].set_title('Ridge $\lambda=%.4f$\n Train$=%.3f$, Test$=%.3f$' %(lam, train_error_l2[-1],test_error_l2[-1]))
    axarr[1].tick_params(labelsize=16)
    
    im=axarr[2].imshow(J_lasso, cmap="RdBu_r", vmin=-1, vmax=1)
    axarr[2].set_title('LASSO $\lambda=%.4f$\n Train$=%.3f$, Test$=%.3f$' %(lam, train_error_l1[-1],test_error_l1[-1]))
    axarr[2].tick_params(labelsize=16)
    
    # divider = make_axes_locatable(axarr[2])
    # cax = divider.append_axes("right", size="5%", pad=0.05, add_to_figure=True)
    # cbar=fig.colorbar(im, cax=cax)
    
    # cbar.ax.set_yticklabels(np.arange(-1.0, 1.0+0.25, 0.25),fontsize=14)
    # cbar.set_label('$J_{i,j}$',labelpad=15, y=0.5,fontsize=20,rotation=0)
    
    fig.subplots_adjust(right=2.0)
    
    plt.show()

# Plot our performance on both the training and test data
plt.figure(figsize=(6, 3))
plt.semilogx(lambdas, train_error_ols, "b", label="Train (OLS)")
plt.semilogx(lambdas, test_error_ols, "--b", label="Test (OLS)")
plt.title("Ordinary Least Squares", fontsize=16)
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Performance $R^2$")


plt.figure(figsize=(6, 3))
plt.semilogx(lambdas, train_error_l2, "r", label="Train (Ridge)", linewidth=1)
plt.semilogx(lambdas, test_error_l2, "--r", label="Test (Ridge)", linewidth=1)
plt.title("Ridge Regression", fontsize=16)
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Performance $R^2$")

plt.figure(figsize=(6, 3))
plt.semilogx(lambdas, train_error_l1, "g", label="Train (LASSO)")
plt.semilogx(lambdas, test_error_l1, "--g", label="Test (LASSO)")
plt.title("LASSO", fontsize=16)
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Performance $R^2$")

Text(0, 0.5, 'Performance $R^2$')

from sklearn import linear_model

# define train, validation and test data sets
X_train, X_val, X_test = X_all[:400], X_all[400 : 600], X_all[600 : 800]
y_train, y_val, y_test = y_all[:400], y_all[400 : 600], y_all[600 : 800]

# the hyperparameter values to check
lambdas = np.logspace(-6, 4, 100)

all_validation_losses = list()

for lam in lambdas:
    model_l1 = linear_model.Lasso(alpha=lam)
    model_l1.fit(X_train, y_train)
    validation_loss = model_l1.score(X_val, y_val)
    all_validation_losses.append(validation_loss)

best_lambda = lambdas[np.argmax(all_validation_losses)]

plt.semilogx(lambdas, all_validation_losses, label="Validation")
plt.axvline(best_lambda, color='k', linestyle='--', label="Best lambda")
plt.xlabel("Regularization strength $\lambda$")
plt.ylabel("Validation Performance $R^2$")

print("Best lambda on validation set", best_lambda)

Best lambda on validation set 0.0005336699231206307

# fit best model 
model_l1 = linear_model.Lasso(alpha=best_lambda)
model_l1.fit(X_train, y_train)

y_test_predict = model_l1.predict(X_test)

plt.figure()
plt.plot(y_test, y_test_predict, ".k")
plt.xlabel("True value")
plt.ylabel("Predicted value")
plt.gca().set_aspect(1)


# plot Ising interaction J
J_lasso = np.array(model_l1.coef_).reshape((L, L))
plt.figure()
plt.imshow(J_lasso,  cmap="RdBu_r", vmin=-1, vmax=1)
plt.gca().set_aspect(1)

## Cross validation

from sklearn import linear_model

# define train, validation and test data sets
X_train, X_test = X_all[:600], X_all[600 : 800]
y_train, y_test = y_all[:600], y_all[600 : 800]

# the hyperparameter values to check
lambdas = np.logspace(-6, 4, 9)

all_validation_losses = list()
all_validation_stderrs = list()

for lam in lambdas:

    all_val_loss_lam = list()
    all_val_stderrs_lam = list()

    for k in range(5):

        # Create the training and validation subsets from the training data
        X_train_k = np.concatenate([X_train[:k*100], X_train[(k+1)*100:]])
        y_train_k = np.concatenate([y_train[:k*100], y_train[(k+1)*100:]])
        X_val_k = X_train[k*100:(k+1)*100]
        y_val_k = y_train[k*100:(k+1)*100]


        model_l1 = linear_model.Lasso(alpha=lam)
        model_l1.fit(X_train_k, y_train_k)
        validation_loss = model_l1.score(X_val_k, y_val_k)
        all_val_loss_lam.append(validation_loss)
        all_val_stderrs_lam.append(np.std(model_l1.predict(X_val_k) - y_val_k))

    all_validation_losses.append(np.mean(all_val_loss_lam))
    all_validation_stderrs.append(np.mean(all_val_stderrs_lam))

best_lambda = lambdas[np.argmax(all_validation_losses)]
plt.figure()
plt.semilogx(lambdas, all_validation_losses, label="Validation")
print("Best lambda on validation set", best_lambda)

Best lambda on validation set 0.005623413251903491

plt.plot(all_validation_stderrs)
plt.xlabel("Lasso regularization strength")
plt.ylabel("Standard error of the val acc")

Text(0, 0.5, 'Standard error of the val acc')

# fit best model 
model_l1 = linear_model.Lasso(alpha=best_lambda)
model_l1.fit(X_train, y_train)

y_test_predict = model_l1.predict(X_test)

plt.figure()
plt.plot(y_test, y_test_predict, ".k")
plt.xlabel("True value")
plt.ylabel("Predicted value")
plt.gca().set_aspect(1)


# plot Ising interaction J
J_lasso = np.array(model_l1.coef_).reshape((L, L))
plt.figure()
plt.imshow(J_lasso,  cmap="RdBu_r", vmin=-1, vmax=1)
plt.gca().set_aspect(1)

class IsingModel:
    """
    The Ising model with ferromagnetic interactions that encourage nearest neighbors
    to align
    """
    def __init__(self, L, random_state=None):
        self.L = L
        self.random_state = random_state
        
        self.J = np.diag(-np.ones(L - 1), 1)
        self.J [-1, 0] = -1.0 # periodic boundary conditions

    def sample(self, n_samples=1):
        if self.random_state is not None:
            np.random.seed(self.random_state)
        return np.random.choice([-1, 1], size=(n_samples, self.L))

    def energy(self, state):
        return np.einsum("...i,ij,...j->...", states, self.J, states)

model_experiment = IsingModel(40, random_state=0)

# create 10000 random Ising states
states = model_experiment.sample(n_samples=10000)


# calculate Ising energies
energies = model_experiment.energy(states)

print("Input data has shape: ", states.shape)
print("Labels have shape: ", energies.shape)


## Save data
# states.dump("../resources/spin_microstates.npy")
# energies.dump("../resources/spin_energies.npy")

Inferring Hamiltonians from experimental data with supervised learning¶

Supervised learning¶

Supervised learning as inferring a generator¶

Spin glasses¶

Load experimental measurements of an unknown spin glass¶

Can we infer the unknown $J_{jk}$, given only the samples $\mathbf{s}^{(i)}$ and their energies $E^{(i)}$?¶

A note on flattening and indices¶

Training and testing data¶

Fitting a linear model with least squares¶

Using the `scikit-learn` Python library¶

Overfitting¶

Scoring a trained regression model¶

But raw score doesn't tell the whole story¶

Let's look at the learned coupling matrix, which corresponds to the weights of our fitted model¶

Let's try repeating the model fitting on different subsets of our experimental data¶

We can see that there is variance in the fitted models. While they have some similarities, the fitted parameters (weights) vary from replicate to replicate¶

Narrowing the model class with regularization¶

The loss function of least-squares fitting¶

Understanding our result¶

Hyperparameter tuning¶

Why not always use regularizers¶

Validation set¶

Cross-validation¶

How many free parameters are in our model?¶

What if we wanted to increase our model complexity?¶

Idea: what if we add another weight matrix?¶

Solution:¶

Appendix¶

Inferring Hamiltonians from experimental data with supervised learning¶

Supervised learning¶

Supervised learning as inferring a generator¶

Spin glasses¶

Load experimental measurements of an unknown spin glass¶

Can we infer the unknown $J_{jk}$, given only the samples $\mathbf{s}^{(i)}$ and their energies $E^{(i)}$?¶

A note on flattening and indices¶

Training and testing data¶

Fitting a linear model with least squares¶

Using the scikit-learn Python library¶

Overfitting¶

Scoring a trained regression model¶

But raw score doesn't tell the whole story¶

Let's look at the learned coupling matrix, which corresponds to the weights of our fitted model¶

Let's try repeating the model fitting on different subsets of our experimental data¶

We can see that there is variance in the fitted models. While they have some similarities, the fitted parameters (weights) vary from replicate to replicate¶

Narrowing the model class with regularization¶

The loss function of least-squares fitting¶

Understanding our result¶

Hyperparameter tuning¶

Why not always use regularizers¶

Validation set¶

Cross-validation¶

How many free parameters are in our model?¶

What if we wanted to increase our model complexity?¶

Idea: what if we add another weight matrix?¶

Solution:¶

Appendix¶

Using the `scikit-learn` Python library¶