See also
This page was generated from examples/vbhmm.ipynb.
Download the Jupyter Notebook for this section: vbhmm.ipynb
. View in nbviewer.
Variational Bayes hidden Markov modelΒΆ
Here is an example for variational Bayes hidden Markov model learning in scopyon.analysis. This module requires hmmlearn
(https://hmmlearn.readthedocs.io/).
[1]:
import numpy
rng = numpy.random.RandomState(123)
[2]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
Preparing test model parameters, first. feature_matrix
represents diffusion constant, intensity mean and variance for each state.
[3]:
feature_matrix = numpy.array([
[0.222, 1.0, 0.5],
[0.032, 1.0, 0.5],
[0.008, 1.0, 0.5],
[0.222, 2.0, 0.5 * numpy.sqrt(2)],
[0.032, 2.0, 0.5 * numpy.sqrt(2)],
[0.008, 2.0, 0.5 * numpy.sqrt(2)],
[0.222, 3.0, 0.5 * numpy.sqrt(3)],
[0.032, 3.0, 0.5 * numpy.sqrt(3)],
[0.008, 3.0, 0.5 * numpy.sqrt(3)],
[0.222, 4.0, 0.5 * 2],
[0.032, 4.0, 0.5 * 2],
[0.008, 4.0, 0.5 * 2],
])
state_transition_matrix = numpy.array([
[0.0, 0.5, 1.0, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.0, 0.0],
[0.5, 0.0, 1.0, 0.0, 0.2, 0.0, 0.0, 0.5, 0.0, 0.0, 0.1, 0.0],
[0.2, 0.5, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.0, 0.0, 0.2],
[0.1, 0.0, 0.0, 0.0, 0.5, 1.0, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0],
[0.0, 0.5, 0.0, 0.5, 0.0, 1.0, 0.0, 0.2, 0.0, 0.0, 0.6, 0.0],
[0.0, 0.0, 0.1, 0.2, 0.5, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.2],
[0.5, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.5, 1.0, 0.3, 0.0, 0.0],
[0.0, 0.3, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 1.0, 0.0, 0.1, 0.0],
[0.0, 0.0, 0.4, 0.0, 0.0, 0.1, 0.2, 0.5, 0.0, 0.0, 0.0, 0.5],
[0.5, 0.0, 0.0, 0.5, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.5, 1.0],
[0.0, 0.1, 0.0, 0.0, 0.3, 0.0, 0.0, 0.5, 0.0, 0.5, 0.0, 1.0],
[0.0, 0.0, 0.3, 0.0, 0.0, 0.4, 0.0, 0.0, 0.1, 0.2, 0.5, 0.0]
])
Converting state transion rates to probabilities after a step, dt
.
[4]:
dt = 33e-3
n, m = state_transition_matrix.shape
assert n == m
assert (state_transition_matrix.diagonal() == 0).all()
P = 1 - numpy.exp(-state_transition_matrix * dt)
assert (P.sum(axis=1) <= 1.0).all()
P.ravel()[::n+1] = 1.0 - P.sum(axis=1)
Pacc = P.cumsum(axis=1)
The initial state is assumed to be at equilibrium.
[5]:
init_state_vector = numpy.linalg.matrix_power(P.T, 1000)[:, 0]
init_state_vector
[5]:
array([0.05303013, 0.07400135, 0.18132384, 0.04356367, 0.05342888,
0.12401652, 0.0326116 , 0.06682534, 0.13502689, 0.03386681,
0.05677142, 0.14553354])
Generating 1000 trajectories containing 100 steps.
[6]:
def generate_state_vector(Pacc, n=100, init=None, random_state=None):
random_state = random_state or numpy.random
init_state = random_state.choice(len(init), p=init) if init is not None else 0
data = [init_state]
for i in range(n):
rnd = random_state.uniform(0, 1)
state_next = numpy.searchsorted(Pacc[data[i]], rnd, side='leff')
data.append(state_next)
return numpy.array(data)
[7]:
def generate_sample_from_state(state, random_state=None):
random_state = random_state or numpy.random
D, loc, scale = feature_matrix[state]
intensity = random_state.normal(loc, scale)
displacement = numpy.sqrt(numpy.power(random_state.normal(scale=numpy.sqrt(2 * D * dt), size=2), 2).sum())
return numpy.array([displacement, intensity])
[8]:
state_vec = []
observation_vec = []
lengths = []
for i in range(1000):
state_vector = generate_state_vector(Pacc, n=100, init=init_state_vector, random_state=rng)
state_vec.extend(state_vector)
sample = numpy.array([generate_sample_from_state(state, random_state=rng) for state in state_vector])
observation_vec.extend(sample)
lengths.append(len(sample))
observation_vec = numpy.array(observation_vec)
Estimating model parameters.
[9]:
from scopyon.analysis import PTHMM
[10]:
model = PTHMM(n_diffusivities=3, n_oligomers=4, n_iter=300, random_state=rng)
model.fit(observation_vec, lengths)
[10]:
PTHMM(algorithm='viterbi', init_params='stdmv', min_var=1e-05,
n_diffusivities=3, n_iter=300, n_oligomers=4, params='stdmv',
random_state=RandomState(MT19937) at 0x14954292C050, startprob_prior=1.0,
tol=0.01, transmat_prior=1.0, verbose=False)
Diffusion constants, intensity mean and variances are estimated as below:
[11]:
print("diffusivities=\n", model.diffusivities_ / dt)
diffusivities=
[[0.00803652]
[0.03173145]
[0.22392446]]
[12]:
print("intensity_means=", model.intensity_means_)
print("intensity_vars=", model.intensity_vars_)
intensity_means= [[0.99918217]]
intensity_vars= [[0.24898571]]
Start probabilities are estimated as below:
[13]:
print("startprob=\n", model.startprob_)
startprob=
[0.17533235 0.10963236 0.12994636 0.1521473 0.07763026 0.04588303
0.0644636 0.06009023 0.07814598 0.04067981 0.02467016 0.04137855]
State transition rates are estimated from transition matrix as below:
[14]:
print("transmat=\n", model.transmat_)
transmat=
[[9.59181585e-01 3.59226333e-03 1.00420205e-02 5.82366889e-03
1.51376895e-02 9.13151079e-06 4.88452875e-08 1.08885321e-07
6.21331596e-03 1.67811761e-07 1.04830441e-29 1.52623388e-28]
[3.25102470e-03 9.48538368e-01 1.47813508e-02 6.30496637e-03
1.77677786e-04 1.74090786e-02 4.38503192e-04 5.79116955e-11
2.06077509e-07 9.09882489e-03 9.10726874e-12 5.60400368e-21]
[1.04750148e-02 3.34802762e-03 9.44390051e-01 1.83009483e-02
5.64057956e-04 3.89369389e-04 1.50600378e-02 2.82096594e-04
7.04902204e-14 9.66545836e-16 6.93235364e-03 2.58043190e-04]
[9.98469331e-03 1.36491180e-02 4.47802593e-03 9.50151119e-01
6.99977578e-09 3.23219661e-05 3.59745085e-05 1.59732039e-02
2.34745888e-33 1.39747790e-25 2.27126972e-14 5.69553602e-03]
[3.41887032e-02 7.58607895e-10 3.11155127e-04 3.03516739e-05
9.28084546e-01 6.58735412e-03 1.61707712e-02 2.63240446e-03
1.19341426e-02 4.68776689e-05 2.60869834e-08 1.36666969e-05]
[7.27551144e-05 2.94740646e-02 1.92961558e-04 5.65370690e-04
1.16442999e-02 9.14343712e-01 7.51606376e-03 2.31914749e-02
3.33809093e-04 1.26600808e-02 5.40784563e-06 6.98602118e-12]
[1.07857301e-08 6.63081912e-04 3.24073318e-02 5.17119690e-04
9.19585332e-03 1.49743784e-02 9.23651079e-01 1.69150812e-03
5.34926184e-10 6.01174605e-09 1.68989549e-02 6.75125995e-07]
[4.59463442e-06 2.50948000e-04 2.80107793e-05 2.75667484e-02
3.09214353e-03 9.73054045e-03 1.78043172e-02 9.21114413e-01
9.66765774e-22 4.87900550e-04 8.27944768e-04 1.90924390e-02]
[3.20978289e-02 6.50268809e-23 8.89595335e-32 5.45410742e-34
1.59053851e-02 8.28730364e-12 1.41919548e-22 1.08737408e-09
9.28883376e-01 8.36698529e-03 2.24758391e-03 1.24988396e-02]
[1.37037068e-04 2.91551746e-02 4.78420218e-11 3.27069862e-25
3.48870069e-12 1.62495129e-02 3.77997986e-10 2.24203410e-09
3.90265469e-03 9.41462157e-01 5.76171579e-03 3.33174504e-03]
[1.67742729e-04 2.67483964e-04 3.87312242e-02 3.93516159e-04
3.20297600e-08 6.11045852e-04 1.57737698e-02 8.72651469e-12
1.47313900e-02 1.01622788e-03 9.19499692e-01 8.80787498e-03]
[1.27606120e-13 8.67135661e-16 5.70624665e-10 3.42768137e-02
5.67318578e-13 1.67222480e-10 3.58743010e-06 1.53322668e-02
1.58198359e-02 1.52859050e-02 4.32652201e-03 9.14955068e-01]]
[15]:
P = model.transmat_
k = -numpy.log(1 - P) / dt
k.ravel()[:: k.shape[0] + 1] = 0.0
print("state_transition_matrix=\n", k)
state_transition_matrix=
[[ 0.00000000e+00 1.09052455e-01 3.05841871e-01 1.76990684e-01
4.62225270e-01 2.76713712e-04 1.48016026e-06 3.29955537e-06
1.88869665e-01 5.08520530e-06 -0.00000000e+00 -0.00000000e+00]
[ 9.86763867e-02 0.00000000e+00 4.51263138e-01 1.91664443e-01
5.38465370e-03 5.32193897e-01 1.32908898e-02 1.75489926e-09
6.24477364e-06 2.76984000e-01 2.75977894e-10 -0.00000000e+00]
[ 3.19098906e-01 1.01625600e-01 0.00000000e+00 5.59711584e-01
1.70974878e-02 1.18013701e-02 4.59836113e-01 8.54958758e-03
2.13633824e-12 3.02788098e-14 2.10802850e-01 7.82049966e-03]
[ 3.04087111e-01 4.16458289e-01 1.36002495e-01 0.00000000e+00
2.12114418e-07 9.79469349e-04 1.09015623e-03 4.87943956e-01
-0.00000000e+00 -0.00000000e+00 6.89684000e-13 1.73085377e-01]
[ 1.05414572e+00 2.29881165e-08 9.43041047e-03 9.19761652e-04
0.00000000e+00 2.00277166e-01 4.94028634e-01 7.98750099e-02
3.63815945e-01 1.42056872e-03 7.90514657e-07 4.14145160e-04]
[ 2.20478064e-03 9.06580394e-01 5.84788418e-03 1.71372901e-02
3.54928050e-01 0.00000000e+00 2.28619749e-01 7.11049354e-01
1.01171157e-02 3.86087954e-01 1.63874553e-04 2.11695981e-10]
[ 3.26840307e-07 2.01000560e-02 9.98305381e-01 1.56743468e-02
2.79951400e-01 4.57200798e-01 0.00000000e+00 5.13012222e-02
1.62098853e-08 1.82174125e-07 5.16465804e-01 2.04583704e-05]
[ 1.39231666e-04 7.60543916e-03 8.48823382e-04 8.47086114e-01
9.38464875e-02 2.96308834e-01 5.44385479e-01 0.00000000e+00
-0.00000000e+00 1.47884731e-02 2.50996274e-02 5.84153129e-01]
[ 9.88613932e-01 -0.00000000e+00 -0.00000000e+00 -0.00000000e+00
4.85855552e-01 2.51129084e-10 -0.00000000e+00 3.29507297e-08
0.00000000e+00 2.54611666e-01 6.81852582e-02 3.81139610e-01]
[ 4.15292299e-03 8.96625229e-01 1.44975950e-09 -0.00000000e+00
1.05716782e-10 4.96454063e-01 1.14544838e-08 6.79404278e-08
1.18493634e-01 0.00000000e+00 1.75102379e-01 1.01130535e-01]
[ 5.08353937e-03 8.10665892e-03 1.19700684e+00 1.19270790e-02
9.70598802e-07 1.85222005e-02 4.81803020e-01 2.64438303e-10
4.49726500e-01 3.08104423e-02 0.00000000e+00 2.68087685e-01]
[ 3.86559471e-12 2.69144976e-14 1.72916563e-08 1.05691038e+00
1.71916353e-11 5.06734722e-09 1.08710198e-04 4.68212770e-01
4.83221366e-01 4.66786026e-01 1.31391166e-01 0.00000000e+00]]
Generating random samples from the model.
[16]:
expected_vec = numpy.zeros((sum(lengths), 2), dtype=observation_vec.dtype)
for i in range(len(lengths)):
X_, Z_ = model.sample(lengths[i])
expected_vec[sum(lengths[: i]): sum(lengths[: i + 1])] = X_
[17]:
fig = make_subplots(rows=1, cols=2, subplot_titles=['Square Displacement', 'Intensity'])
fig.add_trace(go.Histogram(x=observation_vec[:, 0], nbinsx=100), row=1, col=1)
fig.add_trace(go.Histogram(x=expected_vec[:, 0], nbinsx=100), row=1, col=1)
fig.add_trace(go.Histogram(x=observation_vec[:, 1], nbinsx=100), row=1, col=2)
fig.add_trace(go.Histogram(x=expected_vec[:, 1], nbinsx=100), row=1, col=2)
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75, showlegend=False)
fig.show()
Finding most likely state sequence corresponding to the trajectories.
[18]:
fig = make_subplots(rows=2, cols=2)
for i in range(4):
X_ = observation_vec[sum(lengths[: i]): sum(lengths[: i + 1])]
Y_ = state_vec[sum(lengths[: i]): sum(lengths[: i + 1])]
Z_ = model.predict(X_)
t = dt * numpy.arange(len(X_))
fig.add_trace(go.Scatter(x=t, y=X_[:, 1], line=dict(color='black')), row=i % 2 + 1, col=i // 2 + 1)
fig.add_trace(go.Scatter(x=t, y=[feature_matrix[state, 1] for state in Y_], line_shape='hv', line=dict(color='green')), row=i % 2 + 1, col=i // 2 + 1)
fig.add_trace(go.Scatter(x=t, y=[model.intensity_means_[0, 0] * (1 + state % model.n_oligomers) for state in Z_], line_shape='hv', line=dict(color='red', dash='dash')), row=i % 2 + 1, col=i // 2 + 1)
fig.update_xaxes(title_text="Time", row=i % 2 + 1, col=i // 2 + 1)
fig.update_yaxes(title_text="Intensity", row=i % 2 + 1, col=i // 2 + 1)
fig.update_traces(showlegend=False)
fig.show()