Source code for ensemble_md.analysis.synthesize_data

####################################################################
#                                                                  #
#    ensemble_md,                                                  #
#    a python package for running GROMACS simulation ensembles     #
#                                                                  #
#    Written by Wei-Tse Hsu <wehs7661@colorado.edu>                #
#    Copyright (c) 2022 University of Colorado Boulder             #
#                                                                  #
####################################################################
"""
The :obj:`.synthesize_data` module provides methods for synthesizing REXEE data,
specifically trajectories and transition matrices. This is mainly useful
for carrying out the bootstrap analysis for some analysis, such as the spectral gap calculation in
:obj:`.analyze_matrix`.
"""
import numpy as np
from ensemble_md.analysis import analyze_traj
from ensemble_md.analysis import analyze_matrix


[docs]def synthesize_traj(trans_mtx, n_frames=100000, method='transmtx', start=0, seed=None):
    """
    Synthesizes a trajectory based on the input transition matrix.

    Parameters
    ----------
    trans_mtx: numpy.ndarray
        The input transition matrix.
    n_frames: int, Optional
        The number of frames to be generated. The default value is 100000.
    method: str, Optional
        The method to be used for trajectory synthesis. It can be either :code:`'transmtx'` or :code:`'equil_prob'`.
        The former refers to generating the trajectory by simulating the moves between states based on the
        input transition matrix, with the trajectory starting from the state specified by the :code:`start` parameter.
        If the method is :code:`equil_prob`, the trajectory will be generated by simply sampling from the equilibrium
        probability distribution calculated from the input transition matrix. The method :code:`'transmtx'` should
        generate a trajectory characterized by a transition matrix similar to the input one, while the method
        :code:`'equil_prob'` may generate a trajectory that has a significantly different transition matrix. Still,
        a trajectory generated by either method should have a similar underlying equilibrium probability distribution
        (hence the spectral gap as well) as the input transition matrix. The default value is :code:`'transmtx'`.
    start: int, Optional
        The starting state of the synthesized trajectory if the method is :code:`transmtx`. The default value is 0,
        i.e., the first state. This parameter is ignored if the method is :code:`equil_prob`.
    seed: int, Optional
        The seed for the random number generator. The default value is :code:`None`, i.e., the seed will not be set.

    Returns
    -------
    syn_traj: numpy.ndarray
        The synthesized trajectory.
    """
    if seed is not None:
        np.random.seed(seed)
    N = len(trans_mtx)  # Can be the number of states or replicas depending on the type of the input mtraix

    if start >= N:
        raise ValueError(f'The starting state {start} is out of the range of the input transition matrix.')

    if method == 'equil_prob':
        equil_prob = analyze_matrix.calc_equil_prob(trans_mtx)
        syn_traj = np.random.choice(N, size=n_frames, p=equil_prob.reshape(N))
    elif method == 'transmtx':
        check_row = sum([np.isclose(np.sum(trans_mtx[i]), 1, atol=1e-8) for i in range(len(trans_mtx))])
        check_col = sum([np.isclose(np.sum(trans_mtx[:, i]), 1, atol=1e-8) for i in range(len(trans_mtx))])
        if check_row == N:
            mtx = trans_mtx
        elif check_col == N:
            mtx = trans_mtx.T
        else:
            raise ValueError('The input matrix is not normalized.')

        syn_traj = np.zeros(n_frames, dtype=int)
        syn_traj[0] = start
        for i in range(1, n_frames):
            syn_traj[i] = np.random.choice(N, p=mtx[syn_traj[i-1]])
    else:
        raise ValueError(f'Invalid method: {method}. The method must be either "transmtx" or "equil_prob".')

    return syn_traj


[docs]def synthesize_transmtx(trans_mtx, n_frames=100000, seed=None):
    """
    Synthesizes a normalized transition matrix similar to the input transition matrix by first
    generating a trajectory using :obj:`synthesize_traj` with :code:`method='transmtx'` and then
    calculating the transition matrix from the synthesized trajectory. This function can be
    useful in performing boostrapping analysis when estimating the uncertainty of the spectral gap
    associated with the input transition matrix.

    Parameters
    ----------
    trans_mtx: numpy.ndarray
        The input transition matrix.
    n_frames: int, Optional
        The number of frames of the synthesized trajectory from which the mock transition matrix is calculated.
        The default value is 100000.
    seed: int, Optional
        The seed for the random number generator. The default value is :code:`None`, i.e., the seed will not be set.

    Returns
    -------
    syn_mtx: numpy.ndarray
        The synthesized transition matrix.
    syn_traj: numpy.ndarray
        The synthesized trajectory from which the transition matrix is calculated.
    diff_mtx: numpy.ndarray
        The input transition matrix subtracted by the synthesized transition matrix.
    """
    N = len(trans_mtx)  # can be the number of states or number of replicas depending on mtx_type

    # Note that here we just use the default values (method='transmtx' and start=0) for synthesize_traj, so that
    # the synthesized matrix will be similar to the input one. (If equil_prob is used, the resulting matrix may
    # be very different from the input one, though the equilibrium probabilities and spectral gap should be similar.)
    # Note that for transition matrix synthesis, the starting state of the synthesized trajectory
    # should not matter given that the number of frames is large.
    syn_traj = synthesize_traj(trans_mtx, n_frames, seed=seed)
    syn_mtx = analyze_traj.traj2transmtx(syn_traj, N)
    diff_mtx = trans_mtx - syn_mtx

    return syn_mtx, syn_traj, diff_mtx