Source code for ensemble_md.analysis.synthesize_data

####################################################################
#                                                                  #
#    ensemble_md,                                                  #
#    a python package for running GROMACS simulation ensembles     #
#                                                                  #
#    Written by Wei-Tse Hsu <wehs7661@colorado.edu>                #
#    Copyright (c) 2022 University of Colorado Boulder             #
#                                                                  #
####################################################################
"""
The :obj:`.synthesize_data` module provides methods for synthesizing REXEE data,
specifically trajectories and transition matrices. This is mainly useful
for carrying out the bootstrap analysis for some analysis, such as the spectral gap calculation in
:obj:`.analyze_matrix`.
"""
import numpy as np
from ensemble_md.analysis import analyze_traj
from ensemble_md.analysis import analyze_matrix


[docs]def synthesize_traj(trans_mtx, n_frames=100000, method='transmtx', start=0, seed=None): """ Synthesizes a trajectory based on the input transition matrix. Parameters ---------- trans_mtx: numpy.ndarray The input transition matrix. n_frames: int, Optional The number of frames to be generated. The default value is 100000. method: str, Optional The method to be used for trajectory synthesis. It can be either :code:`'transmtx'` or :code:`'equil_prob'`. The former refers to generating the trajectory by simulating the moves between states based on the input transition matrix, with the trajectory starting from the state specified by the :code:`start` parameter. If the method is :code:`equil_prob`, the trajectory will be generated by simply sampling from the equilibrium probability distribution calculated from the input transition matrix. The method :code:`'transmtx'` should generate a trajectory characterized by a transition matrix similar to the input one, while the method :code:`'equil_prob'` may generate a trajectory that has a significantly different transition matrix. Still, a trajectory generated by either method should have a similar underlying equilibrium probability distribution (hence the spectral gap as well) as the input transition matrix. The default value is :code:`'transmtx'`. start: int, Optional The starting state of the synthesized trajectory if the method is :code:`transmtx`. The default value is 0, i.e., the first state. This parameter is ignored if the method is :code:`equil_prob`. seed: int, Optional The seed for the random number generator. The default value is :code:`None`, i.e., the seed will not be set. Returns ------- syn_traj: numpy.ndarray The synthesized trajectory. """ if seed is not None: np.random.seed(seed) N = len(trans_mtx) # Can be the number of states or replicas depending on the type of the input mtraix if start >= N: raise ValueError(f'The starting state {start} is out of the range of the input transition matrix.') if method == 'equil_prob': equil_prob = analyze_matrix.calc_equil_prob(trans_mtx) syn_traj = np.random.choice(N, size=n_frames, p=equil_prob.reshape(N)) elif method == 'transmtx': check_row = sum([np.isclose(np.sum(trans_mtx[i]), 1, atol=1e-8) for i in range(len(trans_mtx))]) check_col = sum([np.isclose(np.sum(trans_mtx[:, i]), 1, atol=1e-8) for i in range(len(trans_mtx))]) if check_row == N: mtx = trans_mtx elif check_col == N: mtx = trans_mtx.T else: raise ValueError('The input matrix is not normalized.') syn_traj = np.zeros(n_frames, dtype=int) syn_traj[0] = start for i in range(1, n_frames): syn_traj[i] = np.random.choice(N, p=mtx[syn_traj[i-1]]) else: raise ValueError(f'Invalid method: {method}. The method must be either "transmtx" or "equil_prob".') return syn_traj
[docs]def synthesize_transmtx(trans_mtx, n_frames=100000, seed=None): """ Synthesizes a normalized transition matrix similar to the input transition matrix by first generating a trajectory using :obj:`synthesize_traj` with :code:`method='transmtx'` and then calculating the transition matrix from the synthesized trajectory. This function can be useful in performing boostrapping analysis when estimating the uncertainty of the spectral gap associated with the input transition matrix. Parameters ---------- trans_mtx: numpy.ndarray The input transition matrix. n_frames: int, Optional The number of frames of the synthesized trajectory from which the mock transition matrix is calculated. The default value is 100000. seed: int, Optional The seed for the random number generator. The default value is :code:`None`, i.e., the seed will not be set. Returns ------- syn_mtx: numpy.ndarray The synthesized transition matrix. syn_traj: numpy.ndarray The synthesized trajectory from which the transition matrix is calculated. diff_mtx: numpy.ndarray The input transition matrix subtracted by the synthesized transition matrix. """ N = len(trans_mtx) # can be the number of states or number of replicas depending on mtx_type # Note that here we just use the default values (method='transmtx' and start=0) for synthesize_traj, so that # the synthesized matrix will be similar to the input one. (If equil_prob is used, the resulting matrix may # be very different from the input one, though the equilibrium probabilities and spectral gap should be similar.) # Note that for transition matrix synthesis, the starting state of the synthesized trajectory # should not matter given that the number of frames is large. syn_traj = synthesize_traj(trans_mtx, n_frames, seed=seed) syn_mtx = analyze_traj.traj2transmtx(syn_traj, N) diff_mtx = trans_mtx - syn_mtx return syn_mtx, syn_traj, diff_mtx