import numpy as np

# Wipe all outputs from this notebook
from IPython.display import Image, clear_output, display
clear_output(True)

# Import local plotting functions and in-notebook display functions
import matplotlib.pyplot as plt
%matplotlib inline

import networkx as nx

## Load the full coauthorship network
fpath = "../resources/ca-AstroPh.txt.gz"
# fpath = "../resources/ca-CondMat.txt.gz"
g = nx.read_edgelist(fpath)

## Create a subgraph of the 1000 most well-connected authors
subgraph = sorted(g.degree, key=lambda x: x[1], reverse=True)[:4000]
subgraph = [x[0] for x in subgraph]
g2 = g.subgraph(subgraph)
# rename nodes to sequential integers as they would appear in an adjacency matrix
g2 = nx.convert_node_labels_to_integers(g2, first_label=0)

pos = nx.spring_layout(g2)
# pos = nx.kamada_kawai_layout(g2)
# nx.draw_spring(g2, pos=pos, node_size=10, node_color='black', edge_color='gray', width=0.5)
nx.draw(g2, pos=pos, node_size=5, node_color='black', edge_color='gray', width=0.5, alpha=0.5)
plt.show()

A = nx.adjacency_matrix(g2).todense() # get the binary adjacency matrix of the graph
rank_degree = A @ np.ones(A.shape[0])

plt.figure(figsize=(7, 7))
nx.draw(g2, pos=pos, node_size=200, node_color=np.log10(rank_degree), edge_color='gray', width=0.01, cmap='viridis', alpha=0.2)
plt.title(f"Authors ranked by degree")
plt.show()

plt.figure(figsize=(7, 2))
plt.hist(rank_degree)
plt.ylabel("Number of Authors")
plt.xlabel("Degree")

Text(0.5, 0, 'Degree')

alpha = 0.85 # a typical damping factor used in PageRank
M = (1 - alpha) * (np.eye(A.shape[0]) - alpha * A)
print(M.shape)

plt.imshow(M[:100, :100])

(4000, 4000)

<matplotlib.image.AxesImage at 0x17eb68890>

def solve_tril(a, b):
    """
    Given a triangular matrix, solve using forward subtitution

    Args:
        a (np.ndarray): A lower triangular matrix
        b (np.ndarray): A vector

    Returns:
        x (np.ndarray): The solution to the system
    """
    #a = a.T # make it lower triangular for cleaner notation
    n = a.shape[0]
    x = np.zeros(n)
    for i in range(n):
        x[i] = (b[i] - np.dot(a[i, :i], x[:i])) / a[i, i]
    return x
    
# A random lower triangular matrix
a = np.tril(np.random.random((10, 10)))
b = np.random.random(10)

print(np.linalg.solve(a, b))
print(solve_tril(a, b))

# Check that the numpy and our implementation give the same result
print(np.allclose(np.linalg.solve(a, b), solve_tril(a, b)))
# print(np.all(np.linalg.solve(a, b) == solve_tril(a, b)))
# np.sum(np.abs(np.linalg.solve(a, b) - solve_tril(a, b))) < 1e-14

[   2.79707624   -2.18975392   53.23153183  -45.2687191    30.5757885
 -655.20868552  537.77417227  188.26440703 -566.85239614  -94.10449712]
[   2.79707624   -2.18975392   53.23153183  -45.2687191    30.5757885
 -655.20868552  537.77417227  188.26440703 -566.85239614  -94.10449712]
True

import timeit

all_times1, all_times2 = list(), list()
nvals = np.arange(10, 500)
for n in nvals:
    ## Upper triangular solve
    a = np.tril(np.random.random((n, n)))
    b = np.random.random(n)
    all_reps = [timeit.timeit("solve_tril(a, b)", globals=globals(), number=10) for _ in range(10)]
    all_times1.append(np.mean(all_reps))

    ## Full solve
    a = np.random.random((n, n))
    b = np.random.random(n)
    all_reps = [timeit.timeit("np.linalg.solve(a, b)", globals=globals(), number=10) for _ in range(10)]
    all_times2.append(np.mean(all_reps))

plt.loglog(nvals, all_times1, label="Triangular solve")
plt.loglog(nvals, all_times2, label="Full solve")
plt.xlabel("Matrix size")
plt.ylabel("Time (s)")
plt.legend()
plt.show()

class LinearSolver:
    """
    Solve a linear matrix equation via LU decomposition (naive algorithm)
    """
    def __init__(self):
        # Run a small test upon construction
        self.test_lu()

    def lu(self, a):
        """Perform LU factorization of a matrix"""
        n = a.shape[0]
        L, U = np.identity(n), np.copy(a)
        for i in range(n):
            factor = U[i+1:, i] / U[i, i]
            L[i + 1:, i] = factor
            U[i + 1:] -= factor[:, None] * U[i]
        return L, U
    
    # A unit test of a single class method
    def test_lu(self):
        """A small test method that the factorization is correct"""
        X = np.random.random((10, 10))
        L, U = self.lu(X)
        assert np.allclose(X, L @ U), "LU decomposition failed"

    def forward_substitution(self, L, b):
        """Solve a lower triangular matrix equality of the form Lx = b for x"""
        n = L.shape[0]
        y = np.zeros(n)
        y[0] = b[0] / L[0, 0]
        for i in range(1, n):
            y[i] = (b[i] - np.dot(L[i,:i], y[:i])) / L[i,i]
        return y
        
    def backward_substitution(self, U, b):
        """Solve an upper triangular matrix equality of teh form Ux = b for x"""
        n = U.shape[0]
        y = np.zeros(n)
        y[-1] = b[-1] / U[-1, -1]
        for i in range(n-2, -1, -1):
            y[i] = (b[i] - np.dot(U[i,i+1:], y[i+1:])) / U[i,i]
        return y
        
    def solve(self, X, b):
        L, U = self.lu(X)
        self.L, self.U = L, U

        # Intermediate variable
        h = self.forward_substitution(L, b)
        
        return self.backward_substitution(U, h)

        
        
        

A = np.random.rand(4, 4)
b = np.random.rand(4)

model = LinearSolver()
print(model.solve(A, b))

# Using the numpy built-in solver
print(np.linalg.solve(A, b))

print(np.allclose(model.solve(A, b), np.linalg.solve(A, b) ))

[ 1.13327806 -1.41619997  0.5899517   0.65301225]
[ 1.13327806 -1.41619997  0.5899517   0.65301225]
True

def hadamard(n):
    """
    Create a Hadamard matrix of size n
    """
    if n == 1:
        return np.array([[1]])
    else:
        H = hadamard(n // 2)
        return np.block([[H, H], [H, -H]])

plt.figure(figsize=(10, 10))
a = hadamard(2**8).astype(float)

plt.imshow(a, cmap='gray')
plt.axis('off')

(-0.5, 255.5, 255.5, -0.5)

ll, uu = model.lu(a)


plt.figure(figsize=(9, 4.5))
plt.subplot(121)
plt.imshow(ll, cmap='gray')
plt.axis('off')
plt.subplot(122)
plt.imshow(uu.astype(bool), cmap='gray')
plt.axis('off')
## show plots closer together
plt.subplots_adjust(wspace=0.1)

A = nx.adjacency_matrix(g2).todense() # get the binary adjacency matrix of the graph

def find_pagerank(A, alpha=0.85, verbose=False):
    """
    Find the PageRank of a graph using matrix inversion

    Args:
        A (np.ndarray): The adjacency matrix of the graph
        alpha (float): The damping factor. The default value is 0.85

    Returns:
        page_rank (np.ndarray): The PageRank of each node
    """
    M = (1 - alpha) * (np.eye(A.shape[0]) - alpha * A)

    if verbose:
        print(f"Condition number of M: {np.linalg.cond(M)}", flush=True)

    ## use our LU solver to solve for the PageRank
    # page_rank = np.linalg.inv(M) @ np.ones(M.shape[0])
    l, u = LinearSolver().lu(M)
    Minv = np.linalg.inv(u) @ np.linalg.inv(l)
    page_rank = Minv @ np.ones(M.shape[0])

    return page_rank


plt.figure(figsize=(8, 8))
nx.draw(g2, pos=pos, node_size=200, node_color=np.log10(rank_degree), 
        edge_color='gray', width=0.02, cmap='viridis', alpha=0.2)
plt.title(f"Degree Ranking")
plt.show()

page_rank1 = find_pagerank(A, alpha=0.05, verbose=True)
plt.figure(figsize=(8, 8))
nx.draw(g2, pos=pos, node_size=200, node_color=np.log10(page_rank1), 
        edge_color='gray', width=0.02, cmap='viridis', alpha=0.2)
plt.title(f"PageRank centrality with alpha=0.05")
plt.show()


page_rank2 = find_pagerank(A, alpha=0.99, verbose=True)
plt.figure(figsize=(8, 8))
nx.draw(g2, pos=pos, node_size=200, node_color=np.log10(page_rank2), 
        edge_color='gray', width=0.02, cmap='viridis', alpha=0.2)
plt.title(f"PageRank centrality with alpha=0.95")
plt.show()

Condition number of M: 3808.383285542044

/var/folders/xt/9wdl4pmx26gf_qytq8_d528c0000gq/T/ipykernel_30515/3448364456.py:36: RuntimeWarning: invalid value encountered in log10
  nx.draw(g2, pos=pos, node_size=200, node_color=np.log10(page_rank1),

Condition number of M: 112286.22458188192

/var/folders/xt/9wdl4pmx26gf_qytq8_d528c0000gq/T/ipykernel_30515/3448364456.py:44: RuntimeWarning: invalid value encountered in log10
  nx.draw(g2, pos=pos, node_size=200, node_color=np.log10(page_rank2),

LU Decomposition and the PageRank Algorithm¶

Revisiting the coauthorship graph¶

Estimating degree distributions, centrality, and the PageRank algorithm¶

How else might we group authors?¶

Implementing the PageRank algorithm¶

Inverting a matrix¶

How would we do this by hand?¶

Why do we want triangular matrices?¶

To invert a matrix, we just need to reach triangular form¶

Our key insight is that the matrix $M$ turns out to be a triangular matrix as well.¶

About LU factorization¶

The LU decomposition algorithm¶

Applying PageRank to the coauthorship graph¶