o
    RŀgZ                     @   sV  d Z ddlZddlmZ ede zddlZW n ey*   ddlmZ eddw ej	Z	dd Z
ej  d	ZeeZG d
d dZdd Zdd Zdd Z				d5ddZdZ							d6ddZdd Zdd Zdd Z			d7ddZdd  Zd8d!d"Zd#d$ Zd%d& Zd'd( Zd)d* Z d+d, Z!d-d. Z"d/d0 Z#d1d2 Z$d3d4 Z%dS )9a  A state-emitting MarkovModel.

Note terminology similar to Manning and Schutze is used.


Functions:
train_bw        Train a markov model using the Baum-Welch algorithm.
train_visible   Train a visible markov model using MLE.
find_states     Find the a state sequence that explains some observations.

load            Load a MarkovModel.
save            Save a MarkovModel.

Classes:
MarkovModel     Holds the description of a markov model
    N)BiopythonDeprecationWarningzThe 'Bio.MarkovModel' module is deprecated and will be removed in a future release of Biopython. Consider using the hmmlearn package instead.)MissingPythonDependencyErrorzRPlease install NumPy if you want to use Bio.MarkovModel. See http://www.numpy.org/c                 C   s@   i }t | ddd }t| d }|D ]
\}}|| ||< q|S )zAReturn a dictionary of values with their sequence offset as keys.N   )	enumeratelen)valuesdentriesnindexkey r   C/var/www/html/myenv/lib/python3.10/site-packages/Bio/MarkovModel.py	itemindex1   s   r   gYnc                   @   s$   e Zd ZdZ	dddZdd ZdS )MarkovModelz+Create a state-emitting MarkovModel object.Nc                 C   s"   || _ || _|| _|| _|| _dS )zInitialize the class.N)statesalphabet	p_initialp_transition
p_emission)selfr   r   r   r   r   r   r   r   __init__D   s
   
zMarkovModel.__init__c                 C   s.   ddl m} | }t| | |d | S )z9Create a string representation of the MarkovModel object.r   )StringIO)ior   saveseekread)r   r   handler   r   r   __str__N   s
   

zMarkovModel.__str__NNN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   A   s
    

r   c                 C   s*   |   }||std|d||S )zNRead the first line and evaluate that begisn with the correct start (PRIVATE).zI expected z	 but got )readline
startswith
ValueError)r   startliner   r   r   _readline_and_check_startX   s   
r*   c                 C   s|  t | d}| dd }t | d}| dd }t||}t|t|}}t||_t | d}tt|D ]}t | d||  d}t| d |j|< q9t||f|_	t | d	}tt|D ]!}t | d||  d}d
d | dd D |j	|ddf< qet||f|_
t | d}tt|D ]!}t | d||  d}dd | dd D |j
|ddf< q|S )z.Parse a file handle into a MarkovModel object.zSTATES:r   Nz	ALPHABET:zINITIAL:  :r   zTRANSITION:c                 S      g | ]}t |qS r   float.0vr   r   r   
<listcomp>y       zload.<locals>.<listcomp>z	EMISSION:c                 S   r-   r   r.   r0   r   r   r   r3      r4   )r*   splitr   r   npzerosr   ranger/   r   r   )r   r)   r   r   mmNMir   r   r   load`   s,   




*
*r=   c              
   C   s  |j }|dd| j d |dd| j d |d tt| jD ]}|d| j|  d| j| dd q&|d	 tt| jD ]}|d| j|  ddd
d | j| D  d qF|d tt| jD ]}|d| j|  dddd | j| D  d qmdS )z$Save MarkovModel object into handle.zSTATES:  
z
ALPHABET: z	INITIAL:
r+   z: gzTRANSITION:
c                 s       | ]}t |V  qd S Nstrr1   xr   r   r   	<genexpr>       zsave.<locals>.<genexpr>z
EMISSION:
c                 s   rA   rB   rC   rE   r   r   r   rG      rH   N)	writejoinr   r   r8   r   r   r   r   )r9   r   wr<   r   r   r   r      s   &44r   c              	      s  t | t |}}|std|dur"t|}|j|fkr"td|dur6t|}|j||fkr6td|durJt|}|j||fkrJtdg }	t| |D ]}
|	 fdd|
D  qRdd |	D }t|d	krrtd
t|||	||||d}|\}}}t	| ||||S )a  Train a MarkovModel using the Baum-Welch algorithm.

    Train a MarkovModel using the Baum-Welch algorithm.  states is a list
    of strings that describe the names of each state.  alphabet is a
    list of objects that indicate the allowed outputs.  training_data
    is a list of observations.  Each observation is a list of objects
    from the alphabet.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix, before
    normalization.

    update_fn is an optional callback that takes parameters
    (iteration, log_likelihood).  It is called once per iteration.
    zNo training data given.N$pseudo_initial not shape len(states)5pseudo_transition not shape len(states) X len(states)5pseudo_emission not shape len(states) X len(alphabet)c                       g | ]} | qS r   r   rE   indexesr   r   r3      r4   ztrain_bw.<locals>.<listcomp>c                 S   r-   r   )r   rE   r   r   r   r3      r4   r   z,I got training data with outputs of length 0)pseudo_initialpseudo_transitionpseudo_emission	update_fn)
r   r'   r6   asarrayshaper   appendmin_baum_welchr   )r   r   training_datarR   rS   rT   rU   r:   r;   training_outputsoutputslengthsrF   r   r   r   r   rP   r   train_bw   sB   



	r_   i  c
                 C   s\  |du r	t | }nt|| f}|du rt | | f}nt|| | f}|du r,t | |f}nt|| |f}t|}
t|}t|}|durLt|}nd}|durXt|}nd}|durdt|}nd}d}ttD ]1}t}|D ]}|t| |||
|||||	7 }qr|	dur|	|| |durt|| dk r n	|}qlt	dt dd |
||fD S )zfImplement the Baum-Welch algorithm to evaluate unknown parameters in the MarkovModel object (PRIVATE).Ng?z%HMM did not converge in %d iterationsc                 S   s   g | ]}t |qS r   )r6   exp)r1   _r   r   r   r3   &      z_baum_welch.<locals>.<listcomp>)
_random_norm_copy_and_checkr6   logr8   MAX_ITERATIONSLOG0_baum_welch_onefabsRuntimeError)r:   r;   r\   r   r   r   rR   rS   rT   rU   
lp_initiallp_transitionlp_emissionlpseudo_initiallpseudo_transitionlpseudo_emission	prev_llikr<   llikr]   r   r   r   rZ      sV   




rZ   c	              	   C   s  t |}	t| |	||||}
t| |	|||}t| | |	f}t|	D ]J}|| }t| | f}t| D ])}t| D ]"}|
| | || |  || |  || |d   }||| |< q8q2|t| |dddd|f< q!t| |	f}t|	D ]}t| D ]}t||dd|f || |< q}qwt| }t| D ]}t||ddf ||< q|dddf }|durt||}|t| }t| D ]5}t| D ]}t|||ddf ||  || |< q|durt|| |||< || t||  ||< qt| D ]K}t|t }t|	D ]}|| }t| D ]}t	|| ||||f ||< qq
|t| }|dur@t||| }|t| }|||ddf< qt|
dd|	f S )zExecute one step for Baum-Welch algorithm (PRIVATE).

    Do one iteration of Baum-Welch based on a sequence of output.
    Changes the value for lp_initial, lp_transition and lp_emission in place.
    r   Nr   )
r   _forward	_backwardr6   r7   r8   _logsum
_logvecaddrg   	logaddexp)r:   r;   r]   rk   rl   rm   rn   ro   rp   Tfmatbmatlp_arctklp_traverser<   jlplp_arcout_t	lp_arcoutksumr   r   r   rh   )  sh   


  

( 
	rh   c                 C   s   t | |d f}||dddf< td|d D ]7}||d  }t| D ]*}	t}
t| D ]}|| |d  || |	  || |  }t|
|}
q,|
||	 |< q$q|S )zImplement forward algorithm (PRIVATE).

    Calculate a Nx(T+1) matrix, where the last column is the total
    probability of the output.
    r   Nr   r6   r7   r8   rg   rw   )r:   rx   rk   rl   rm   r]   matrixr|   r}   r   lprobr<   r   r   r   r   rs     s   (rs   c                 C   s   t | |d f}t|d ddD ]5}|| }t| D ]*}t}	t| D ]}
||
 |d  || |
  || |  }t|	|}	q#|	|| |< qq|S )z'Implement backward algorithm (PRIVATE).r   r   r   )r:   rx   rl   rm   r]   r   r|   r}   r<   r   r   r   r   r   r   rt     s   (rt   c                    s&  t | t |}}|durt|}|j|fkrtd|dur0t|}|j||fkr0td|durDt|}|j||fkrDtdg g }}	t| t| |D ](\}
}t |t |
krctd|fdd|D  |	 fdd|
D  qSt|||	||||}|\}}}t| ||||S )	a  Train a visible MarkovModel using maximum likelihoood estimates for each of the parameters.

    Train a visible MarkovModel using maximum likelihoood estimates
    for each of the parameters.  states is a list of strings that
    describe the names of each state.  alphabet is a list of objects
    that indicate the allowed outputs.  training_data is a list of
    (outputs, observed states) where outputs is a list of the emission
    from the alphabet, and observed states is a list of states from
    states.

    pseudo_initial, pseudo_transition, and pseudo_emission are
    optional parameters that you can use to assign pseudo-counts to
    different matrices.  They should be matrices of the appropriate
    size that contain numbers to add to each parameter matrix.
    NrL   rM   rN   zstates and outputs not alignedc                    rO   r   r   rE   )states_indexesr   r   r3     r4   z!train_visible.<locals>.<listcomp>c                    rO   r   r   rE   )outputs_indexesr   r   r3     r4   )	r   r6   rV   rW   r'   r   rX   _mler   )r   r   r[   rR   rS   rT   r:   r;   training_statesr\   toutputststatesrF   r   r   r   r   )r   r   r   train_visible  s@   




	r   c                 C   s  t | }|r|| }|D ]}||d   d7  < qt|}t | | f}	|r+|	| }	|D ]"}tt|d D ]}
||
 ||
d  }}|	||f  d7  < q7q-tt|	D ]}|	|ddf t|	|ddf  |	|ddf< qVt | |f}|r~|| }t | |f}t||D ]\}}t||D ]\}}|||f  d7  < qqtt|D ]}||ddf t||ddf  ||ddf< q||	|fS )z<Implement Maximum likelihood estimation algorithm (PRIVATE).r   r   N)r6   r7   
_normalizer8   r   sumoneszip)r:   r;   r\   r   rR   rS   rT   r   r   r   r   r<   r   r   r]   osr   r   r   r     s6   
22
r   c                 C   s   t | gS )z?Return indices of the maximum values aong the vector (PRIVATE).)r6   argmax)vector	allowancer   r   r   	_argmaxes  s   r   c           
         s   | t j}tjt }tjt }tjt }tj	  fdd|D }t
|||||}tt |D ]}|| \}}	fdd|D t|	f||< q;|S )zaFind states in the given Markov model output.

    Returns a list of (states, score) tuples.
    c                    rO   r   r   rE   rP   r   r   r3   /  r4   zfind_states.<locals>.<listcomp>c                    s   g | ]} j | qS r   )r   rE   )r9   r   r   r3   6  rb   )r   r   r6   re   r   VERY_SMALL_NUMBERr   r   r   r   _viterbir8   r`   )
markov_modeloutputr:   rk   rl   rm   resultsr<   r   scorer   )rQ   r9   r   find_states   s   

"r   c                 C   s  t |}g }t| D ]
}|dg|  q
t| |f}||dd|d f  |dddf< td|D ]9}	||	 }
t| D ].}|dd|	d f |dd|f  |||
f  }t|}||d  |||	f< ||| |	< q=q3g }g }t|dd|d f }|D ]}||d |g|| |d  f q|r| \}	}}|	dkr|||f n||d  |	 }|D ]}||	d |g| |f q|s|S )zSImplement Viterbi algorithm to find most likely states for a given input (PRIVATE).Nr   r   )r   r8   rX   r6   r7   r   pop)r:   rk   rl   rm   r   rx   	backtracer<   scoresr|   r}   r   i_scoresrQ   
in_processr   r   r   r   r   r   r   :  s8   $0$r   c                 C   sz   t | jdkr| t|  } | S t | jdkr9tt | D ]}| |ddf t| |ddf  | |ddf< q| S td)z"Normalize matrix object (PRIVATE).r      Nz&I cannot handle matrixes of that shape)r   rW   r   r8   r'   )r   r<   r   r   r   r   c  s   2r   c                 C   s   t | }t|S )z%Normalize a uniform matrix (PRIVATE).)r6   r   r   rW   r   r   r   r   _uniform_normp  s   
r   c                 C   s   t j| }t|S )z$Normalize a random matrix (PRIVATE).)r6   randomr   r   r   r   r   rc   v  s   rc   c                 C   s   t j| dd} | j|krtdt| jdkr(t t| d dkr&td| S t| jdkrMtt| D ]}t t| | d dkrJtd| q5| S td	)
zFCopy a matrix and check its dimension. Normalize at the end (PRIVATE).r   )copyzIncorrect dimensiong      ?g{Gz?zmatrix not normalized to 1.0r   zmatrix %d not normalized to 1.0z&I don't handle matrices > 2 dimensions)r6   arrayrW   r'   r   ri   r   r8   )r   desired_shaper<   r   r   r   rd   |  s   
rd   c                 C   sF   t | jdkrt| t| jf}n| }t}|D ]}t||}q|S )z/Implement logsum for a matrix object (PRIVATE).r   )r   rW   r6   reshapeprodrg   rw   )r   vecr   numr   r   r   ru     s   ru   c                 C   sR   t | t |ksJ dtt | }tt | D ]}t| | || ||< q|S )z5Implement a log sum for two vector objects (PRIVATE).zvectors aren't the same length)r   r6   r7   r8   rw   )logvec1logvec2sumvecr<   r   r   r   rv     s
   rv   c                 C   s   t | }t|S )z-Return the exponential of a logsum (PRIVATE).)ru   r6   r`   )numbersr   r   r   r   _exp_logsum  s   
r   )NNNN)NNNNNNNr    rB   )&r$   warningsBior   warnnumpyr6   ImportErrorr   rw   r   r   seedr   re   rg   r   r*   r=   r   r_   rf   rZ   rh   rs   rt   r   r   r   r   r   r   r   rc   rd   ru   rv   r   r   r   r   r   <module>   st   


%
E
J^
?
.)	