o
    Rŀg>                     @   s  d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 G dd deZd	d
 Zd3ddZdd Zd4ddZdd Zdd Zdd Zdd Zdd Zdd Zd5d!d"Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zed1krdd2lm Z  e   dS dS )6zCode for dealing with coding sequence.

CodonSeq class is inherited from Seq class. This is the core class to
deal with sequences in CodonAlignment in biopython.

    )permutationslog)
CodonTable)Seq)	SeqRecordc                   @   sh   e Zd ZdZdddZdd Zd	d
 Z	dddZdd Zdd Z	dddZ
dddZedddZdS )CodonSeqaK  CodonSeq is designed to be within the SeqRecords of a CodonAlignment class.

    CodonSeq is useful as it allows the user to specify
    reading frame when translate CodonSeq

    CodonSeq also accepts codon style slice by calling
    get_codon() method.

    **Important:** Ungapped CodonSeq can be any length if you
    specify the rf_table. Gapped CodonSeq should be a
    multiple of three.

    >>> codonseq = CodonSeq("AAATTTGGGCCAAATTT", rf_table=(0,3,6,8,11,14))
    >>> print(codonseq.translate())
    KFGAKF

    test get_full_rf_table method

    >>> p = CodonSeq('AAATTTCCCGG-TGGGTTTAA', rf_table=(0, 3, 6, 9, 11, 14, 17))
    >>> full_rf_table = p.get_full_rf_table()
    >>> print(full_rf_table)
    [0, 3, 6, 9, 12, 15, 18]
    >>> print(p.translate(rf_table=full_rf_table, ungap_seq=False))
    KFPPWV*
    >>> p = CodonSeq('AAATTTCCCGGGAA-TTTTAA', rf_table=(0, 3, 6, 9, 14, 17))
    >>> print(p.get_full_rf_table())
    [0, 3, 6, 9, 12.0, 15, 18]
    >>> p = CodonSeq('AAA------------TAA', rf_table=(0, 3))
    >>> print(p.get_full_rf_table())
    [0, 3.0, 6.0, 9.0, 12.0, 15]

     -Nc                 C   s   t | |  || _|du r-t| }|d dkrtdttd|| | d| _	dS t
|ttfs8tdtdd |D sEtd|| _	dS )	zInitialize the class.N   r   zJSequence length is not a multiple of three (i.e. a whole number of codons)z)rf_table should be a tuple or list objectc                 s   s    | ]}t |tV  qd S N)
isinstanceint.0i r   K/var/www/html/myenv/lib/python3.10/site-packages/Bio/codonalign/codonseq.py	<genexpr>X   s    z$CodonSeq.__init__.<locals>.<genexpr>zSElements in rf_table should be int that specify the codon positions of the sequence)r   __init__uppergap_charlen
ValueErrorlistrangecountrf_tabler   tuple	TypeErrorall)selfdatar   r   lengthr   r   r   r   8   s     
zCodonSeq.__init__c                    s   t dd jD dkrtdt|tr1|dkr't|d |d d  S t|d d S tt d   fdd	}||}t|S )
z&Get the index codon from the sequence.c                 S   s   h | ]}|d  qS r   r   r   r   r   r   	<setcomp>b   s    z%CodonSeq.get_codon.<locals>.<setcomp>   z}frameshift detected. CodonSeq object is not able to deal with codon sequence with frameshift. Please use normal slice option.r   Nc                    s:    |  }d}|D ]}||d |d d  7 }qt |S )Nr	   r   str)paa_slicecodon_slicer   aa_indexr!   r   r   csliceu   s
   z"CodonSeq.get_codon.<locals>.cslice)r   r   RuntimeErrorr   r   r)   r   r   )r!   indexr/   r,   r   r-   r   	get_codon`   s   
zCodonSeq.get_codonc                 C   s
   t | jS )z,Return the number of codons in the CodonSeq.)r   r   r!   r   r   r   get_codon_num   s   
zCodonSeq.get_codon_num*Tc           
   	   C   s>  |du r	t jd }g }|rt| | jd}nt| }|du r"| j}d}|D ]s}t|tr3|d q&d|||d  v rk|dksG|| dkrZ|}|||d  dddd }	n|| dkrj|||d  }	|}n
|||d  }	|}|	|j	v r|| q&z
||j
|	  W q& ty   td|	 d	w d|S )
a1  Translate the CodonSeq based on the reading frame in rf_table.

        It is possible for the user to specify
        a rf_table at this point. If you want to include
        gaps in the translated sequence, this is the only
        way. ungap_seq should be set to true for this
        purpose.
        Nr&   r	   r'   r
   r      zUnknown codon detected (z4). Did you forget to specify the ungap_seq argument?)r   generic_by_idr)   replacer   r   r   floatappendstop_codonsforward_tableKeyErrorr0   join)
r!   codon_tablestop_symbolr   	ungap_seqamino_acidstr_seqr*   r   codonr   r   r   	translate   sD   


"



zCodonSeq.translatec                 C   s   t t| S )zConvert DNA to seq object.)r   r)   r3   r   r   r   toSeq   s   zCodonSeq.toSeqc              	   C   s  t | dd}| jd g}tdt| jdd d D ]}|| j| | j|d    qg }d}tdt| dD ]}| ||d  | jd krQ||d  n`|| dkra|| |d7 }nP|| dv rd| d|d | }|dkr||||   n|d	kr||d ||   n|dkr||d	 ||   |d7 }n|| dkr||d  zd| d||d  }||  |8  < W q: ty   Y q:w |S )
zReturn full rf_table of the CodonSeq records.

        A full rf_table is different from a normal rf_table in that
        it translate gaps in CodonSeq. It is helpful to construct
        alignment containing frameshift.
        r
   r	   r   r&   Nr   g        )r'      )	r)   r8   r   r   r   r:   r   r   	Exception)r!   rA   relative_posr   full_rf_table	codon_numgap_statthis_lenr   r   r   get_full_rf_table   s<     


zCodonSeq.get_full_rf_tablec                 C   s,   |du r	t jd }|  }| j|||ddS )z,Apply full translation with gaps considered.Nr&   F)r?   r@   r   rA   )r   r7   rO   rE   )r!   r?   r@   rK   r   r   r   full_translate   s   
zCodonSeq.full_translatec                 C   s>   t |dkst|tstd|tt| |d| jdS )z;Return a copy of the sequence without the gap character(s).r&   zUnexpected gap character, r	   r   )r   r   r)   r   r   r8   r   )r!   gapr   r   r   ungap   s   zCodonSeq.ungapc                 C   s$   |du r
| t |S | t ||dS )z&Get codon sequence from sequence data.NrQ   r(   )clsseqr   r   r   r   from_seq   s   zCodonSeq.from_seq)r	   r
   N)Nr5   NT)Nr5   )r
   r   )__name__
__module____qualname____doc__r   r2   r4   rE   rF   rO   rP   rS   classmethodrV   r   r   r   r   r      s    
!(
4
'
r   c              	   C   s   |   }g }t|D ]f\}}t|trK|}z
t||d  }W n ty,   |d }Y nw t| || }t|dkrA|| q
|t|  q
t| t|t|d  dkra|d q
|| t|t|d   q
|S )zAList of codons according to full_rf_table for counting (PRIVATE).r&   r   ---)	rO   	enumerater   r   
IndexErrorr)   r   r:   rS   )codonseqrK   	codon_lstr   kstartend
this_codonr   r   r   _get_codon_list   s$   
  re   NG86Nr&   c                 C   sf  t | trt |trnt | trt |tr| j} |j}ntdt|  t| kr?tdt|   dt|  d|du rFd}n|durR|dkrRtd|d	vred
dl}|	d| d d}|du rnt
jd }t| }t|}g }	g }
t||D ]\}}d|vrd|vr|	| |
| qttttd}|dkr|| |	|
||S || |	|
||S )a  Calculate dN and dS of the given two sequences.

    Available methods:
        - NG86  - `Nei and Gojobori (1986)`_ (PMID 3444411).
        - LWL85 - `Li et al. (1985)`_ (PMID 3916709).
        - ML    - `Goldman and Yang (1994)`_ (PMID 7968486).
        - YN00  - `Yang and Nielsen (2000)`_ (PMID 10666704).

    .. _`Nei and Gojobori (1986)`: http://www.ncbi.nlm.nih.gov/pubmed/3444411
    .. _`Li et al. (1985)`: http://www.ncbi.nlm.nih.gov/pubmed/3916709
    .. _`Goldman and Yang (1994)`: http://mbe.oxfordjournals.org/content/11/5/725
    .. _`Yang and Nielsen (2000)`: https://doi.org/10.1093/oxfordjournals.molbev.a026236

    Arguments:
     - codon_seq1 - CodonSeq or or SeqRecord that contains a CodonSeq
     - codon_seq2 - CodonSeq or or SeqRecord that contains a CodonSeq
     - w  - transition/transversion ratio
     - cfreq - Current codon frequency vector can only be specified
       when you are using ML method. Possible ways of
       getting cfreq are: F1x4, F3x4 and F61.

    zVcal_dn_ds accepts two CodonSeq objects or SeqRecord that contains CodonSeq as its seq!zfull_rf_table length of seq1 (z) and seq2 (z) are not the sameNF3x4MLz8cfreq can only be specified when you are using ML method)F1x4rg   F61r   zUnknown cfreq (zF). Only F1x4, F3x4 and F61 are acceptable. Used F3x4 in the following.r&   r
   )rh   rf   LWL85YN00)r   r   r   rU   r   r   rO   r0   warningswarnr   r7   re   zipr:   _ml_ng86_lwl85_yn00)
codon_seq1
codon_seq2methodr?   ra   cfreqrm   seq1_codon_lstseq2_codon_lstseq1seq2r   j	dnds_funcr   r   r   	cal_dn_ds  sN   




r~   c              	   C   s   t | ||d\}}t |||d\}}|| d }|| d }	ddg}
t| |D ]\}}dd t|
t|||dD }
q'|
d | }|
d |	 }|dk rXtd	tdd
|   }nd}|dk rntd	tdd
|   }||fS d}||fS )z$NG86 method main function (PRIVATE).)r?   ra          @r   c                 S      g | ]\}}|| qS r   r   r   mnr   r   r   
<listcomp>j      z_ng86.<locals>.<listcomp>r?   r&   g      ?      UUUUUU?r'   )_count_site_NG86ro   _count_diff_NG86absr   )rz   r{   ra   r?   S_sites1N_sites1S_sites2N_sites2S_sitesN_sitesSNr   r|   pspndSdNr   r   r   rq   b  s&   rq   c                 C   s  d}d}d}d}d}| D ]}g g d}	| dd}|dkrqt|D ]\\}
}|D ]U}||kr/q(||v rL||v rLt|}|||
< d	|}|	d
 | q(||v ri||v rit|}|||
< d	|}|	d
 | q(t|}|||
< d	|}|	d | q(q"|j| }d }}|	d
 D ]}||jv r||7 }q|j| |kr||7 }q||7 }q|	d D ]}||jv r|d7 }q|j| |kr|d7 }q|d7 }q|| d }||| 7 }||| 7 }q||fS )a  Count synonymous and non-synonymous sites of a list of codons (PRIVATE).

    Arguments:
     - codon_lst - A three letter codon list from a CodonSeq object.
       This can be returned from _get_codon_list method.
     - k - transition/transversion rate ratio.

    r   AGTCr   r   r   r   )
transitiontransversionUr   r\   r	   r   r   r&   r   )r8   r]   r   r>   r:   r<   r;   )r`   r?   ra   S_siteN_sitepurine
pyrimidine
base_tuplerD   neighbor_codonr   r   r|   codon_charsrd   aathis_codon_N_sitethis_codon_S_siteneighbor
norm_constr   r   r   r   {  s^   	












r   c              
      s  t | tr
t |tstdt|  dt| dt| dks%t|dkr4tdt|  dt| dddg}| dks@|dkrB|S d t fd	d
| D sWtd|  dt fdd
|D sjtd| d| |krp|S g }tt| |D ]\}}|d |d kr|	| qyd!dd}t|dkrdd t||| ||dD }|S t|dkr|D ]4}| d| ||  | |d d  }dd t||| ||ddD }dd t|||||ddD }q|S t|dkrdt
tg dd}	g }
|	D ]i}| d|d  ||d   | |d d d  }|d|d  ||d   ||d d d  }|
	||f dd t||| ||ddD }dd t|||||ddD }d d t|||||ddD }q|S )"zCount differences between two codons, three-letter string (PRIVATE).

    The function will take multiple pathways from codon1 to codon2
    into account.
    z;_count_diff_NG86 accepts string object to represent codon (, 
 detected)r   %codon should be three letter string (r   r\   r   r   r   r   c                 3       | ]}| v V  qd S r   r   r   r   r   r   r         z#_count_diff_NG86.<locals>.<genexpr>*Unrecognized character detected in codon1 ! (Codons consist of A, T, C or G)c                 3   r   r   r   r   r   r   r   r     r   *Unrecognized character detected in codon2 r&   c                 S   sF   d }}t tt|jj| |gdkr||7 }||fS ||7 }||fS )z4Compare two codon accounting for different pathways.r   r&   )r   setmapr<   get)codon1codon2r?   weightsdndr   r   r   compare_codon  s   z'_count_diff_NG86.<locals>.compare_codonc                 S   r   r   r   r   r   r|   r   r   r   r         z$_count_diff_NG86.<locals>.<listcomp>r   rH   Nc                 S   r   r   r   r   r   r   r   r     r         ?)r?   r   c                 S   r   r   r   r   r   r   r   r     r   r   r&   rH   c                 S   r   r   r   r   r   r   r   r     r   gUUUUUU?r   c                 S   r   r   r   r   r   r   r   r     r   c                 S   r   r   r   r   r   r   r   r     r   r&   )r   r)   r   typer   r0   r    r]   ro   r:   r   r   )r   r   r?   r   diff_posr   ra   r   
temp_codonpaths	tmp_codonr*   tmp1tmp2r   r   r   r     s   



	4$	"00r   c              	   C   s  t |}ddg}ddg}ddg}| | D ]1}|| }	|	D ](}
|
dkr+|d  d7  < q|
dkr8|d  d7  < q|
dkrD|d  d7  < qqt|d t|d t|d g}dgd }t| |D ] \}}|dksq|dksq||krrqad	d
 t|t|||dD }qadd
 t||d D }|dd }|dd }dd
 t||D }dd
 |D }d|d |d  |d |d |d     |d d|d    }d|d |d  |d |d |d     d|d  d|d    }||fS )zlLWL85 method main function (PRIVATE).

    Nomenclature is according to Li et al. (1985), PMID 3916709.
    r   0r&   24r   r6   r\   c                 S   r   r   r   r   r   r   r   r   ?  r   z_lwl85.<locals>.<listcomp>)	fold_dictc                 S   s   g | ]\}}|| qS r   r   r   r   r   r   r   E  s    rH   Nr   c              	   S   sD   g | ]\}}d t ddd|  |   dt ddd|     qS )r         ?r&   rH   g      ?r   r   r   r   r   r   H  s    2c                 S   s$   g | ]}d t ddd|    qS )r   r   r&   rH   r   r   r   r   r   r   L  s   $ )_get_codon_foldsumro   _diff_codon)rz   r{   ra   r?   codon_fold_dictfold0fold2fold4rD   fold_numfLPQr   r   PQr   Br   r   r   r   r   rr   %  sF   "
@Drr   c                 C   s<   dd }i }| j D ]}d|vr||| j ||< q	d|d< |S )zFClassify different position in a codon into different folds (PRIVATE).c           
   
   S   s   h d}d}t | }t|D ]a\}}|t| }g }|D ] }	|	||< z||d|  W q ty<   |d Y qw |||  dkrK|d7 }n |||  dv rY|d7 }n|||  dkrg|d	7 }ntd
|||< q|S )N>   r   r   r   r   r	   stopr   r   )r&   rH   r   r   r   z3Unknown Error, cannot assign the position to a fold)r   r]   r   r:   r>   r=   r   r0   )
rD   r<   basefoldcodon_base_lstr   b
other_baser   r|   r   r   r   find_fold_classU  s0   



z(_get_codon_fold.<locals>.find_fold_classr   r\   )r<   )r?   r   
fold_tablerD   r   r   r   r   R  s   
r   c                 C   s  d } } } } }}||  }	d}
d}t t| |D ]\}\}}||krV||
v rV||
v rV|	| dkr8|d7 }n|	| dkrC|d7 }n|	| dkrN|d7 }ntd|	|  ||kr||v r||v r|	| dkrm|d7 }n|	| dkrx|d7 }n|	| dkr|d7 }ntd|	|  ||kr||
v r||v s||v r||
v r|	| dkr|d7 }q|	| dkr|d7 }q|	| dkr|d7 }qtd|	|  q||||||fS )	zCount number of different substitution types between two codons (PRIVATE).

    returns tuple (P0, P2, P4, Q0, Q2, Q4)

    Nomenclature is according to Li et al. (1958), PMID 3916709.
    r   r   r   r   r&   r   r   zUnexpected fold_num %d)r]   ro   r0   )r   r   r   P0P2P4Q0Q2Q4r   r   r   r   r   r|   r   r   r   r   w  s@   





 


r   c           +   	      s*  ddl m} ddlm} dddddddddddddddg}t|}|t}|t}	| | D ]T}
|
dkrZ|d |
d   d7  < |d |
d   d7  < |d |
d   d7  < ||
 }t|D ]!\}|dkru||
   d7  < qb|d	kr|	|
   d7  < qbq0t| }t|	 }t	||	D ]\}| | |< |	 | |	< qt
| ||d
}t||t|	|f}||d  ||d   ||  }tdD ]t|  fdd|  D |< q|t}t|j |j D ]dvrd|< q| | D ]|  d7  < qt| ||||d\}}}t|| |||d\}}}|| d }|| d }ddddddddddg}tdD ]dD ]}| | | |  d | |< qNqJddg}t	| |D ]\}dd t	|t||d
D }qo|d | }|d | } t|||  }!tdd|   tdd|   }"dtdd|!   }#dddg}$tdD ]}%dd t|j |j D }&t|||"|&|}'||'|# }(g d}i  t	| |D ]!\}dkr|dkr |fd  |f  d7  < q D ]td d |(|&|}) fddt	||)D }q
|d | |d | f|d | |d | ff}g }*t	||D ]\}})|*t||)dd qF|*d d | ||  |*d d | ||   }#|*d |*d  }"tfddt	|*|$D r|*d |*d f  S |*}$qdS )zsYN00 method main function (PRIVATE).

    Nomenclature is according to Yang and Nielsen (2000), PMID 10666704.
    r   )defaultdictexpmr   r   r   r   r\   r&   rH   r   r   r   r   c                       i | ]	\}}||  qS r   r   r   r|   ra   totr   r   
<dictcomp>      z_yn00.<locals>.<dictcomp>r   )ra   r?   r   c                 S   r   r   r   r   r   r   r   r     r   z_yn00.<locals>.<listcomp>r   r   h㈵>   c                 S      g | ]}d |vr|qS r   r   r   r   r   r   r     
    r   r   r   r   c                    s    g | ]\}}||    qS r   r   r   )codon_npathr   r   r   r     s     T)tc                 3   s$    | ]\}}t ||  k V  qd S r   )r   r   )	tolerancer   r   r     s   " z_yn00.<locals>.<genexpr>N)collectionsr   scipy.linalgr   r   r   r]   r   valuesro   _get_TV_get_kappa_tr   itemsr   r<   keysr;   _count_site_YN00r   r   _get_Q
setdefault_count_diff_YN00r:   r    )+rz   r{   ra   r?   r   r   fcodonr   	fold0_cnt	fold4_cntrD   r   r   f0_totalf4_totalr|   TVk04kappapir   r   bfreqSN1r   r   bfreqSN2r   r   bfreqSNr   r   r   r   r*   wr   dSdN_pretempr`   r   r   tvdSdNr   )r   r   r   r   r   rs     s     


(
 0$rs   c                 C   s   d}d}ddg}d}t | |D ]G\}}d||fvrVt ||D ]7\}	}
|	|
kr'n*|	|v r8|
|v r8|d  d7  < n|	|v rI|
|v rI|d  d7  < n|d  d7  < |d7 }qq|d | |d | fS )zGet TV (PRIVATE).

    Arguments:
     - T - proportions of transitional differences
     - V - proportions of transversional differences

    r   )r   r   r   r\   r&   )ro   )
codon_lst1
codon_lst2r?   r   r   r  sitesr   r   r   r|   r   r   r   r    s"   
r  Fc           	      C   s  | d | d  | d< | d | d  | d< d| d | d  | d | d    d| d | d  | d  | d  | d | d  | d  | d    d|d d| d  | d      |d	  d| d | d  | d  | d | d  | d     }d|d d| d  | d    }d
t | }d
t | }|| d }|du rd| d | d  | d  | d | d  | d   | | d | d  | d | d     }|S d| d  | d  d|| d    d| d  | d  d|| d     d| d  | d   | }|S )zmCalculate kappa (PRIVATE).

    The following formula and variable names are according to PMID: 10666704
    r   r   Yr   r   RrH   r&   r   g      F   r   )	r  r  r   r   r   ar   kappaF84
kappaHKY85r   r   r   r  2  sF   "2 
.""r  c                 C   s2  t | t |krtdt | t |f t | }d}d}d}|j}	|j}
i }t| |D ]\}}|dkrI|dkrI|||fd |||f  d7  < q+d }}ddddddddddg}| D ]\}}|d }d }}tdD ]m}|D ]h}|| |kr}qt|d	| | ||d d	  }||
v rqt|| }|| |v r||v r||9 }n|| |v r||v r||9 }|	| |	| kr||7 }|d |  || 7  < qt||7 }|d |  || 7  < qtqp||| 7 }||| 7 }q`d| ||  }||9 }||9 }|D ]}t|	 }|D ]}||  |  < qq|||fS )
a  Site counting method from Ina / Yang and Nielsen (PRIVATE).

    Method from `Ina (1995)`_ as modified by `Yang and Nielsen (2000)`_.
    This will return the total number of synonymous and nonsynonymous sites
    and base frequencies in each category. The function is equivalent to
    the ``CountSites()`` function in ``yn00.c`` of PAML.

    .. _`Ina (1995)`: https://doi.org/10.1007/BF00167113
    .. _`Yang and Nielsen (2000)`: https://doi.org/10.1093/oxfordjournals.molbev.a026236

    z?Length of two codon_lst should be the same (%d and %d detected)r   r   r   r\   r   r&   r   N)
r   r0   r<   r;   ro   r
  r  r   r   r  )r  r  r  ra   r?   r#   r   r   r   
codon_dictr   r   r   r|   r   r   freqSN
codon_pairnpathrD   SNposr   r   r   r   r   r   r   r   r  U  sl    

r  c                    sF  t tr
t tstdt dt dtdks%tdkr4tdt dt dg d}dks@dkrB|S d t fd	d
D sWtd dt fdd
D sjtd dkrp|S g }ttD ]\}}|d |d kr|	| qyd!dd}	t|dkrdd t||	|d |D }|S t|dkr;fdd|D }
g |
D ],}t
t|j|g}||d |d f ||d |d f f}	|d |d   qfddD t|D ]@\}}d| |  |d d  }dd t||	|||| d dD }dd t||	|||| d dD }q|S t|dkr!t
tg dd}g g }
|D ]r}d|d  |d   |d d d  }|d|d  |d   ||d d d  }|
	||f t
t|j||g}||d |d f ||d |d f ||d |d f f}	|d |d  |d   qQfddD t|
|D ]M\}}}dd t||	|d |d ||d dD }dd t||	|d |d |d ||d dD }d d t||	|d |d ||d dD }q|S )"a&  Count differences between two codons (three-letter string; PRIVATE).

    The function will weighted multiple pathways from codon1 to codon2
    according to P matrix of codon substitution. The proportion
    of transition and transversion (TV) will also be calculated in
    the function.
    z;_count_diff_YN00 accepts string object to represent codon (r   r   r   r   r   r\   r   c                 3   r   r   r   r   r   r   r   r     r   z#_count_diff_YN00.<locals>.<genexpr>r   r   c                 3   r   r   r   r   r   r   r   r     r   r   r   r&   c           	      S   s0  d}d}|j }|j}| |v s||v r<| | |v r$|| |v r$dd|dgS | | |v r6|| |v r6dd|dgS ddd|gS ||  || krn| | |v rV|| |v rV|dddgS | | |v rh|| |v rh|dddgS d|ddgS | | |v r|| |v rdd|dgS | | |v r|| |v rdd|dgS ddd|gS )Nr   r   r   )r<   r;   )	r   r   diffr?   r   r   r   dicr   r   r   r   count_TV  s*   z"_count_diff_YN00.<locals>.count_TVc                 S   r   r   r   r   r*   qr   r   r   r     r   z$_count_diff_YN00.<locals>.<listcomp>rH   c                    s0   g | ]} d | |   |d d   qS )Nr&   r   r   )r   r   r   r   r     s   0 c                       g | ]
}d | t   qS )rH   r   r   	path_probr   r   r         Nc                 S   r   r   r   r0  r   r   r   r     r   r   c                 S   r   r   r   r0  r   r   r   r     r   r   c                    r2  r$   r3  r   r4  r   r   r     r6  c                 S   r   r   r   r0  r   r   r   r     r   c                 S   r   r   r   r0  r   r   r   r     r   c                 S   r   r   r   r0  r   r   r   r     r   r   )r   r)   r   r   r   r0   r    r]   ro   r:   r   r   r1   r   )r   r   r   r`   r?   r  r   r   ra   r/  r   	codon_idxprobr   r   r   r*   r   r   r|   r   )r   r   r   r5  r   r    s   



C($	,00""
r  c              
   C   sl  ddl m} ddlm} | }t| |||d}t| |D ]\}}	d||	fvr0|||	f  d7  < qdd t|j |j	 D }
|||
|fd	d
}||g ddddd}|j
\}}}t||||
|}d }}t|
D ]@\}}t|
D ]7\}	}||	krz%|j| |j| kr||| |||	f  7 }n||| |||	f  7 }W qr ty   Y qrw qrqj||9 }||9 }|||
|fdd}||ddgdddd}|j
\}}d}t||||
|}d }}t|
D ]C\}}t|
D ]:\}	}||	kr z&|j| |j| kr||| |||	f  7 }n||| |||	f  7 }W q ty   Y qw qq|d9 }|d9 }|| }|| }||fS )z"ML method main function (PRIVATE).r   )Counter)minimizer   r\   r&   c                 S   r   r   r   r   r   r   r   r   9  r   z_ml.<locals>.<listcomp>c              	   S   s$   t | d | d | d ||||d S )z'Temporary function, params = [t, k, w].r   r&   rH   r`   r?   _likelihood_funcparamsr  	codon_cntr`   r?   r   r   r   func@  s   z_ml.<locals>.func)r&   皙?rH   zL-BFGS-B)绽|=r   rC  )rD  
   r   )rv   boundstolc              	   S   s    t | d | d d||||d S )z5Temporary function, params = [t, k]. w is fixed to 1.r   r&   r   r;  r<  r>  r   r   r   func_w1j  s   z_ml.<locals>.func_w1rB  )rC  rC  r   r   )r  r9  scipy.optimizer:  _get_piro   r   r<   r  r;   xr	  r]   r=   )rz   r{   cmethodr?   r9  r:  r@  r  r   r|   r`   rA  opt_resr   ra   r  r   SdNdc1c2rH  rhoSrhoNr   r   r   r   r   rp   -  s   



rp   c                    s\  i }|dkr[ddddd}| | D ]}|dkr$|D ]
}||  d7  < qqt |   fdd| D }|j |j D ]}d|vrX||d  ||d   ||d	   ||< q>|S |d
krdddddddddddddddg}| | D ]*}|dkr|d |d   d7  < |d |d   d7  < |d	 |d	   d7  < qwtdD ]}t ||    fdd||  D ||< qt|j |j D ] }d|vr|d |d  |d |d   |d	 |d	   ||< q|S |dkr,|j |j D ]}d|vrd||< q| | D ]}|dkr||  d7  < q
t |   fdd| D }|S )zObtain codon frequency dict (pi) from two codon list (PRIVATE).

    This function is designed for ML method. Available counting methods
    (cfreq) are F1x4, F3x4 and F64.
    ri   r   r   r\   r&   c                    r   r   r   r   r   r   r   r     r   z_get_pi.<locals>.<dictcomp>r   rH   rg   r   c                    r   r   r   r   r   r   r   r     r   rj   rB  c                    r   r   r   r   r   r   r   r     r   )r   r  r  r<   r  r;   r   r   )rz   r{   rL  r?   r  r  r   cr   r   r   rJ    s\   	( 4



rJ  c                 C   sh  | |krdS | |j v s||j v rdS | |vs||vrdS d}d}g }tt| |D ]\}	\}
}|
|kr;||	|
|f q)t|dkrDdS |j|  |j| kr~|d d |v rd|d d |v rd|||  S |d d |v rz|d d |v rz|||  S || S |d d |v r|d d |v r|| ||  S |d d |v r|d d |v r|| ||  S |||  S )a   Q matrix for codon substitution (PRIVATE).

    Arguments:
     - i, j  : three letter codon string
     - pi    : expected codon frequency
     - k     : transition/transversion ratio
     - w     : nonsynonymous/synonymous rate ratio
     - codon_table: Bio.Data.CodonTable object

    r   r   r   rH   r&   )r;   r]   ro   r:   r   r<   )r   r|   r  ra   r  r?   r   r   r-  r   rP  rQ  r   r   r   _q  s4       rU  c              
   C   s   ddl }t|}|||f}t|D ]}t|D ]}	||	kr1t|| ||	 | |||d|||	f< qqd}
t|D ]+}t||ddf  |||f< z|
| ||  |||f   7 }
W q9 tyd   Y q9w ||
 }|S )z*Q matrix for codon substitution (PRIVATE).r   Nr   )numpyr   zerosr   rU  r   r=   )r  ra   r  r`   r?   nprL   r   r   r|   nucl_substitutionsr   r   r   r	    s*   "r	  c              	   C   s   ddl m} t|||||}|||  }	d}
t|D ]>\}}t|D ]5\}}||f|v rW|	||f ||  dkrC|
|||f d 7 }
q"|
|||f t|| |	||f   7 }
q"q|
S )z,Likelihood function for ML method (PRIVATE).r   r   )r  r   r	  r]   r   )r   ra   r  r  r@  r`   r?   r   r   r   
likelihoodr   rP  r|   rQ  r   r   r   r=    s   (r=  __main__)run_doctest)rf   Nr&   Nr   )F)!rZ   	itertoolsr   mathr   Bio.Datar   Bio.Seqr   Bio.SeqRecordr   r   re   r~   rq   r   r   rr   r   r   rs   r  r  r  r  rp   rJ  rU  r	  r=  rW   
Bio._utilsr\  r   r   r   r   <module>   s>    h
I
>l-%1n
#C j32
