Source code for cmdtools.estimation.galerkin

from .. import utils
import numpy as np
from scipy.spatial import distance


[docs]class TransferOperatorGalerkin:
    """ Class resposible for computing the Monte-Carlo estimates of
    the Perron-Frobenius and Koopman operators wrt to a given Basis """

    def __init__(self, basis):
        self.basis = basis

    # def fit(self, trajectory):
    #     self.coords = self.basis.evaluate(trajectory)

[docs]    def koopman(self):
        """ K = M^-1 S """
        S, Minv = self.stiffness(), np.inv(self.mass())
        return np.dot(Minv, S)

[docs]    def perronfrobenius(self):
        """ P = S^T M^-1 = K^T """
        return self.koopman().T

[docs]    def mass(self):
        """ M_ij = 1/n sum_k=(0,...,n) X_ki X_kj """
        X = self.coords  # TODO: use only first n-1 samples for rowstoch?
        nsamples = np.size(X, 0)
        return np.dot(X.T, X) / nsamples

[docs]    def stiffness(self):
        """ S_ij = 1/n sum_k=(0,...,n-1) X_(k+1,i) X_kj """
        X = self.coords[:-1, :]
        Y = self.coords[1:, :]
        nsamples = np.size(X, 0)
        return np.dot(X.T, Y) / nsamples


[docs]class Basis:
    pass


[docs]class Gaussian(Basis):
    def __init__(self, timeseries,
                 centers=None, sqd=None, sigma=None, percentile=50):
        self.timeseries = timeseries
        self.centers = timeseries \
            if centers is None else centers
        self.sqd = sqdist(self.timeseries, self.centers) \
            if sqd is None else sqd
        self.sigma = find_bandwidth(self.sqd, percentile) \
            if sigma is None else sigma

        self.membership = membership(self.sqd, self.sigma)
        self.mass = massmatrix(self.membership)

    @property
    def propagator(self):
        return propagator(self.membership)


[docs]def massmatrix(membership):
    """ Compute the row-stochastic empirical mass function
    S_ij = St_ij / sum_j St_ij
    St_ij = 1/n sum_x phi_i(x) phi_j(x), x in traj
    cf. ([TGDW2019] eqn 12, [W2006] eqn 35)

    Args:
        membership (matrix): row-stochastic membership matrix
            of shape (#samples x #basis)

    Returns:
        matrix: row-stochastic mass matrix S
    """

    St = membership.T.dot(membership)
    return utils.rowstochastic(St)


# TODO: isn't this actually the koopman operator?
[docs]def propagator(membership, lag=1):
    """Compute the row-stochastic empirical propagator
    K_ij = Kt_ij / sum_j Kt_ij
    Kt_ij = 1/n sum_x phi_i(x) phi_j(y), (x,y) in traj
    cf. ([TGDW2019] eqn 12, [W2006] eqn 35)

    Args:
        membership (matrix): row-stochastic membership matrix
            of shape (#samples x #basis)

    Returns:
        matrix: row-stochastic propagator P
    """

    counts = membership[0:-lag, :].T.dot(membership[lag:, :])
    p = utils.rowstochastic(counts)
    return p


[docs]def membership(sqd, sigma):
    """
    Given square distances and standard deviation,
    return the row-stochastic Gaussian membership matrix
    """
    m = np.exp(-sqd / (2 * sigma**2))
    return utils.rowstochastic(m)


[docs]def sqdist(timeseries, centers, metric='sqeuclidean'):
    d = distance.cdist(timeseries, centers, metric)
    sqd = d if metric == 'sqeuclidean' else d**2
    return sqd


[docs]def find_bandwidth(sqd, percentile=50):
    """

    Given the square-distance matrix d_ij = (x_i - x_j)^2
    compute the standard deviation s for the Gaussian kernel
    k(x_i,x_j) = exp(-1/(2s^2) d_ij).
    The heuristic is based on the assumption that we want
    sum_j k(x_i, x_j) ≈ n exp(-1/(2s^2) med^2) = 1 and hence
    (2s^2) = med^2 / log n  where n is the number of points and
    med the pairwise median distance.

    percentile allows to shift from the median to an arbitrary percentage and
    thus influences how many points have an influence on the bandwith.

    Heuristic taken from "Stein Variational Gradient Descent [...],
    Qiang Liu and Dilin Wang (2016)".

    Input:
        sqd: matrix, pairwise squared-distances of the samples
        percentile: float [0,100], percentile to use instead of median

     Output:
         sigma: float, the standard deviation of the Gaussian
    """

    n = np.size(sqd, 1)
    h = np.percentile(sqd, percentile) / np.log(n)
    sigma = np.sqrt(h/2)
    return sigma