## Preamble / required packages
import numpy as np
np.random.seed(0)

## Import local plotting functions and in-notebook display functions
import matplotlib.pyplot as plt
from IPython.display import Image, display
%matplotlib inline

import warnings
## Comment this out to activate warnings
warnings.filterwarnings('ignore')


# plt.style.use("dark_background")

## Load the Reynold number regression dataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

class ReynoldsDataset:
    """
    Class to load the Reynolds number classification dataset
    
    Parameters:
        downsample (int): Factor by which to downsample the dataset
        split (float): Fraction of data to use for testing
        random_state (int): Random seed for reproducibility
    """

    def __init__(self, downsample=1, split=0.2, random_state=None):
        
        self.random_state = random_state

        all_vorticity_fields = list()
        all_reynolds_numbers = list()

        # Load simulations for different Reynolds numbers
        re_vals = [300, 600, 900, 1200]
        for re_val in re_vals:

            # Load the two-dimensional velocity field data. Data is stored in a 4D numpy array,
            # where the first dimension is the time index, the second and third dimensions are the
            # x and y coordinates, and the fourth dimension is the velocity components (ux or uv).
            vfield = np.load(
                f"../resources/von_karman_street/vortex_street_velocities_Re_{re_val}_largefile.npz", 
                allow_pickle=True
            )

            # Calculate the vorticity, which is the curl of the velocity field
            vort_field = np.diff(vfield, axis=1)[..., :-1, 1] + np.diff(vfield, axis=2)[:, :-1, :, 0]

            # Crop to wake field only
            vort_field = vort_field[::5, -127:, :][:, 32:96, 32:96]

            # downsample the dataset
            vort_field = vort_field[:, ::downsample, ::downsample]

            all_vorticity_fields.append(vort_field)
            all_reynolds_numbers.extend(re_val * np.ones(vort_field.shape[0]))

        self.data_shape = vort_field[0].shape

        all_vorticity_fields = np.vstack(all_vorticity_fields)
        all_reynolds_numbers = np.array(all_reynolds_numbers)

        X = np.reshape(all_vorticity_fields, (all_vorticity_fields.shape[0], -1))
        y = all_reynolds_numbers

        X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                            test_size=split, 
                                                            random_state=self.random_state
                                                            )

        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.X = np.vstack([X_train, X_test])
        self.y = np.hstack([y_train, y_test])

    def reshape(self, X):
        if len(X.shape) == 1:
            return X.reshape(self.data_shape)
        elif len(X.shape) == 2:
            return np.reshape(X, (X.shape[0], *self.data_shape))
        else:
            raise ValueError("X must be a 1D or 2D array")
    
    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


dataset = ReynoldsDataset()

print(f"Dataset size: X_train: {dataset.X_train.shape}, y_train: {dataset.y_train.shape}")
print(f"Dataset size: X_test: {dataset.X_test.shape}, y_test: {dataset.y_test.shape}")

## plot examples of the dataset
fig, ax = plt.subplots(1, 4, figsize=(12, 3))
for i, re in enumerate([300, 600, 900, 1200]):
    ax[i].imshow(dataset.reshape(dataset.X[dataset.y == re][0]), cmap='coolwarm', vmin=-.005, vmax=.005)
    ax[i].set_title(f"Re = {re}")
    ax[i].axis("off")
plt.show()

Dataset size: X_train: (1920, 4096), y_train: (1920,)
Dataset size: X_test: (480, 4096), y_test: (480,)

fig, ax = plt.subplots(1, 4, figsize=(12, 3))
for i, re in enumerate([300, 600, 900, 1200]):
    ax[i].imshow(dataset.reshape(dataset.X[dataset.y == re][0]), cmap='coolwarm', vmin=-.005, vmax=.005)
    ax[i].set_title(f"Re = {re}")
    ax[i].axis("off")
plt.show()

from sklearn.decomposition import PCA

## Perform PCA on the dataset
pca = PCA(n_components=2)
pca.fit(dataset.X_train)

## Plot the PCA components
fig, ax = plt.subplots(figsize=(6, 6))
ax.scatter(dataset.X_train[:, 0], dataset.X_train[:, 1], c=dataset.y_train, cmap='rainbow')
ax.set_xlabel("PCA Component 1")
ax.set_ylabel("PCA Component 2")

Text(0, 0.5, 'PCA Component 2')

### Are the classes balanced?
class_ids = np.unique(dataset.y_train)
counts = list()
for id in class_ids:
    print(f"Fraction of class {re}: {np.mean(dataset.y_train == id):.3f}")
    counts.append(np.sum(dataset.y_train == id))

plt.figure(figsize=(6, 4))
plt.bar(class_ids, counts, width=100)
plt.xticks(class_ids)
plt.xlabel("Reynolds Number")
plt.ylabel("Number of Samples in Training Set")

Fraction of class 1200: 0.261
Fraction of class 1200: 0.244
Fraction of class 1200: 0.251
Fraction of class 1200: 0.244

Text(0, 0.5, 'Number of Samples in Training Set')

from sklearn.linear_model import RidgeCV

## Train a linear model to predict the Reynolds number
model = RidgeCV()
model.fit(dataset.X_train, dataset.y_train)
y_pred = model.predict(dataset.X_test)

## Evaluate the model on the test set
print(f"Test set R^2: {model.score(dataset.X_test, dataset.y_test):.3f}")

Test set R^2: 0.205

from sklearn.neural_network import MLPRegressor
mlp = MLPRegressor(
    hidden_layer_sizes=(10, 10), 
    activation='relu', 
    solver='adam', 
    learning_rate='constant',
    learning_rate_init=0.001, 
    max_iter=1000, 
    random_state=0, 
    batch_size=32
)

# Fit the model to the data.
mlp.fit(dataset.X_train, dataset.y_train)

# Generate predictions on the test set.
y_test_pred = mlp.predict(dataset.X_test)

## Score using R^2
print("R^2 score: %f" % mlp.score(dataset.X_test, dataset.y_test))

plt.figure(figsize=(6, 6))
plt.plot(dataset.y_test, y_test_pred, '.', alpha=0.1)
plt.xlabel("True Reynolds number")
plt.ylabel("Predicted Reynolds number")

R^2 score: 0.982845

Text(0, 0.5, 'Predicted Reynolds number')

def double_well(x):
    """Double well potential function"""
    return x**4 - 2*x**2

def double_well_grad(x):
    """Derivative of double well potential function"""
    return 4*x**3 - 4*x

print(double_well(0.12132987))
print(double_well_grad(0.12132987))

def double_well_primitive(x):
    """Decompose the double well calculation into primitive operations"""
    h1a = x**4
    h1b = 2*x**2
    h2 = h1a - h1b
    return h2

def double_well_primitive_grad(x):
    """Decompose the double well gradient calculation into primitive operations"""
    dh2dh1a = 1
    dh2dh1b = -1
    dh1adx = 4*x**3
    dh1bdx = 4*x
    dh2dx = dh2dh1a * dh1adx + dh2dh1b * dh1bdx
    return dh2dx

print(double_well_primitive(0.12132987))
print(double_well_primitive_grad(0.12132987))

import numpy as np

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1.0, 0.0)

class MultiLayerPerceptron:
    """
    An MLP for regression with mean squared loss

    Attributes:
        hidden_layer_sizes (tuple): Number of neurons in each hidden layer
        activation (str): Activation function to use. Options are "relu", "tanh", and "identity"
        learning_rate (float): Learning rate for gradient descent
        max_iter (int): Maximum number of iterations to train for
        batch_size (int): Size of the mini-batches for gradient descent
        random_state (int): Random seed for reproducibility
        store_history (bool): Whether to store the training history


    """
    def __init__(self, hidden_layer_sizes=(100,), activation='relu', learning_rate=0.001, 
                 max_iter=1000, batch_size=32, store_history=False, random_state=0):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.activation = activation
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.batch_size = batch_size
        self.random_state = random_state
        np.random.seed(self.random_state)
        
        self.weights = []
        self.biases = []
        self.store_history = store_history
        if self.store_history:
            self.history = []

    def loss(self, y_true, y_pred):
        return np.mean((y_true - y_pred)**2)
    
    def initialize_parameters(self, input_size, output_size):
        """Initialize all model parameters using random numbers"""
        layer_sizes = [input_size] + list(self.hidden_layer_sizes) + [output_size]
        self.weights = [np.random.randn(layer_sizes[i], layer_sizes[i+1]) * 0.01 
                        for i in range(len(layer_sizes) - 1)]
        self.biases = [np.zeros((1, size)) for size in layer_sizes[1:]]
    
    def forward_pass(self, X):
        """Given an input, compute the activations of all layers. The activations of the last layer
        are the predictions of the model"""
        activations = [X]
        for w, b in zip(self.weights, self.biases):
            z = np.dot(activations[-1], w) + b
            a = relu(z)
            activations.append(a)
        return activations
    
    def backward_pass(self, activations, y):
        """
        Given the activations of all layers and the true labels, compute the gradients of the
        loss with respect to the weights and biases of all layers
        """
        deltas = [(activations[-1] - y)]
        for i in reversed(range(len(self.weights)-1)):
            delta = np.dot(deltas[0], self.weights[i+1].T) * relu_derivative(activations[i+1])
            deltas.insert(0, delta)
        
        grad_w = [np.dot(activations[i].T, deltas[i]) for i in range(len(self.weights))]
        grad_b = [np.sum(deltas[i], axis=0, keepdims=True) for i in range(len(self.biases))]
        
        return grad_w, grad_b
    
    def update_parameters(self, grad_w, grad_b):
        """Given the gradients of the loss with respect to the weights and biases, update the
        weights and biases using gradient descent"""
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * grad_w[i]
            self.biases[i] -= self.learning_rate * grad_b[i]
    
    def fit(self, X, y):

        # Promote y to a 2D array if it is 1D
        if y.ndim == 1:
            y = y.reshape(-1, 1)
        
        self.initialize_parameters(X.shape[1], y.shape[1])
        for i in range(self.max_iter):
            for batch in range(0, X.shape[0], self.batch_size):

                # Random subsample a batch from the training set
                # This provides the stochastic part of the stochastic gradient descent 
                # algorithm
                X_batch = X[batch:batch+self.batch_size]
                y_batch = y[batch:batch+self.batch_size]
                
                # We cache all activations, not just the predictions
                activations = self.forward_pass(X_batch)
                grad_w, grad_b = self.backward_pass(activations, y_batch)
                self.update_parameters(grad_w, grad_b)
            
            if self.store_history:
                mse = self.loss(y, self.predict(X))
                self.history.append(mse)
        
    def predict(self, X):
        activations = self.forward_pass(X)
        return activations[-1]

mlp = MultiLayerPerceptron(hidden_layer_sizes=(10, 10), learning_rate=0.001, store_history=True)
mlp.fit(X_train, y_train)
predictions = mlp.predict(X_test)

plt.plot(mlp.history)

R^2 score: nan

import jax

def double_well(x):
    """Double well potential function"""
    return x**4 - 2*x**2

print("Forward pass value:", double_well(0.12132987))
print("Analytic calculation backwards pass", double_well_grad(0.12132987))
print("Jax autodiff backwards pass", jax.grad(double_well)(0.12132987))

Forward pass value: -0.029225168711847025
Analytic calculation backwards pass -0.4781751223381389
Jax autodiff backwards pass -0.4781751

import jax.numpy as jnp

a = jnp.array(np.random.random((5, )))
x = jnp.array(np.random.random((5, )))

def forward_pass(x, a):
    """Forward pass of a simple neural network"""
    return jnp.tanh(a.T @ x)

def backward_pass(x, a):
    """Backward pass of a simple neural network"""
    return (1 - np.tanh(a.T @ x)**2) * x.T

print("Forward pass value:\n", forward_pass(x, a))

print("\nAnalytic calculation backwards pass:\n", backward_pass(x, a))

print("\nJax autodiff backwards pass:\n", jax.grad(forward_pass, argnums=1)(x, a))

Forward pass value:
 0.5584225

Analytic calculation backwards pass:
 [0.19981267 0.21903262 0.0847854  0.421329   0.42325357]

Jax autodiff backwards pass:
 [0.19981267 0.21903262 0.0847854  0.421329   0.42325357]

from sklearn.neural_network import MLPRegressor



mlp = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', learning_rate='constant',
                     learning_rate_init=0.001, max_iter=1000, random_state=0, batch_size=32)


X_train, X_test, y_train, y_test = dataset.X_train, dataset.X_test, dataset.y_train, dataset.y_test

mlp.fit(X_train, y_train)

## Score
print("R^2 score: %f" % mlp.score(X_test, y_test))

plt.semilogy(mlp.loss_curve_)
plt.xlabel('Epoch')
plt.ylabel('Loss')

R^2 score: 0.999712

Text(0, 0.5, 'Loss')

for batch_size_value in [2, 32, 512]:
    mlp = MLPRegressor(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', learning_rate='constant',
                     learning_rate_init=0.001, max_iter=1000, random_state=0, batch_size=batch_size_value)

    mlp.fit(X_train, y_train)
    plt.semilogy(mlp.loss_curve_, label=f"batch_size={batch_size_value}")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

<matplotlib.legend.Legend at 0x2cdfc2490>

for lr in [1e0, 1e-1, 1e-3, 1e-5]:
    mlp = MLPRegressor(hidden_layer_sizes=(100, 100), 
                       activation='relu', 
                       solver='adam', 
                       learning_rate='constant',
                       learning_rate_init=lr, 
                       max_iter=1000, 
                       random_state=0,
                       batch_size=32)

    mlp.fit(X_train, y_train)
    plt.semilogy(mlp.loss_curve_, label=f"learning_rate={lr}")
plt.xlabel('Epoch')
plt.ylabel('Loss')

Text(0, 0.5, 'Loss')

Training neural networks to predict turbulence¶

Training a model¶

An aside: What if our problem is non-differentiable?¶

Revisiting predicting the Reynolds number of a turbulent flow¶

Before fitting a model, check the data¶

Are the classes balanced?¶

Generate baseline predictions¶

Let's try a neural network¶

How do we train a multilayer perceptron? Iterative optimization¶

Linear model¶

Logistic model:¶

Multivariate Linear model¶

Gradient descent requires us to take gradients of each trainable parameter with respect to the loss¶

But we implicitly performed several subroutines to do this.¶

Let's write out these functions in terms of basic computational steps¶

Backpropagation is the chain rule on a computation graph¶

Vector-valued functions¶

Backpropagation through a multilayer perceptron¶

Forward pass¶

Backward pass¶

Some things to note¶

A reminder regarding indices...¶

What does the code actually look like?¶

Harder cases: branching paths, loops, and shared weights¶

Math is hard: Automatic Differentiation¶

Let the computer do the work¶

Some optimization tricks¶

How do different hyperparameters affect training?¶