o
    RŀgJG                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddl	mZ dd	l	mZ dd
lmZ ddlmZ G dd deZdddZdd Zdd Zdd Zdd Zdd Zdd ZedkrpddlmZ e  dS dS ) zCode for dealing with Codon Alignment.

CodonAlignment class is inherited from MultipleSeqAlignment class. This is
the core class to deal with codon alignment in biopython.
    N)erfc)sqrt)BiopythonWarning)MultipleSeqAlignment)_get_codon_list)	cal_dn_ds)CodonSeq)
CodonTable)	SeqRecordc                   @   sd   e Zd ZdZdddZdd Zdd	 Zd
d Zdd Zdd Z	dddZ
	dddZedd ZdS )CodonAlignmenta  Codon Alignment class that inherits from MultipleSeqAlignment.

    >>> from Bio.SeqRecord import SeqRecord
    >>> a = SeqRecord(CodonSeq("AAAACGTCG"), id="Alpha")
    >>> b = SeqRecord(CodonSeq("AAA---TCG"), id="Beta")
    >>> c = SeqRecord(CodonSeq("AAAAGGTGG"), id="Gamma")
    >>> print(CodonAlignment([a, b, c]))
    CodonAlignment with 3 rows and 9 columns (3 codons)
    AAAACGTCG Alpha
    AAA---TCG Beta
    AAAAGGTGG Gamma

     Nc                 C   sF   t | | | D ]}t|jtstdq|  d dkr!tddS )zInitialize the class.zACodonSeq objects are expected in each SeqRecord in CodonAlignment   r   zKAlignment length is not a multiple of three (i.e. a whole number of codons)N)r   __init__
isinstanceseqr   	TypeErrorget_alignment_length
ValueError)selfrecordsnamerec r   Q/var/www/html/myenv/lib/python3.10/site-packages/Bio/codonalign/codonalignment.pyr   )   s   zCodonAlignment.__init__c                    s   t  j}d|    f g}|dkr#| fdd jD  n#| fdd jdd D  |d | j jd	 dd
 d|S )aD  Return a multi-line string summary of the alignment.

        This output is indicated to be readable, but large alignment
        is shown truncated. A maximum of 20 rows (sequences) and
        60 columns (20 codons) are shown, with the record identifiers.
        This should fit nicely on a single screen. e.g.

        z6CodonAlignment with %i rows and %i columns (%i codons)<   c                       g | ]	} j |d dqS r   length	_str_line.0r   r   r   r   
<listcomp>J       z*CodonAlignment.__str__.<locals>.<listcomp>c                    r   r   r   r!   r#   r   r   r$   L   r%   N   z...r   
)len_recordsr   get_aln_lengthextendappendr    join)r   rowslinesr   r#   r   __str__:   s   
	"

zCodonAlignment.__str__c                    s   t |tr
| j| S t |trt| j| S t|dkr td|\} t |tr0| j|   S t  trDd fdd| j| D S t fdd| j| D S )z3Return a CodonAlignment object for single indexing.   zInvalid index type.r   c                 3   s    | ]	}t |  V  qd S N)strr!   	col_indexr   r   	<genexpr>^       z-CodonAlignment.__getitem__.<locals>.<genexpr>c                 3   s    | ]}|  V  qd S r3   r   r!   r5   r   r   r7   `   s    
)	r   intr*   slicer   r)   r   r.   r   )r   index	row_indexr   r5   r   __getitem__Q   s   




zCodonAlignment.__getitem__c                 C   s   t |tr%t| t|krtdtdt dd t| |D }t|S t |tr<t| t|kr6td| 	 | S t
dt| d)ah  Combine two codonalignments with the same number of rows by adding them.

        The method also allows to combine a CodonAlignment object with a
        MultipleSeqAlignment object. The following rules apply:

            * CodonAlignment + CodonAlignment -> CodonAlignment
            * CodonAlignment + MultipleSeqAlignment -> MultipleSeqAlignment
        zTWhen adding two alignments they must have the same length (i.e. same number or rows)zsPlease make sure the two CodonAlignment objects are sharing the same codon table. This is not checked by Biopython.c                 s   s*    | ]\}}t t|j|j d V  qdS ))r   N)r
   r   r   )r"   leftrightr   r   r   r7   w   s
    
z)CodonAlignment.__add__.<locals>.<genexpr>z^Only CodonAlignment or MultipleSeqAlignment object can be added with a CodonAlignment object. z
 detected.)r   r   r)   r   warningswarnr   zipr   toMultipleSeqAlignmentr   object)r   othermergedr   r   r   __add__d   s0   
	
zCodonAlignment.__add__c                 C   s   |   d S )zGet alignment length.r   r   r#   r   r   r   r+      s   zCodonAlignment.get_aln_lengthc                 C   s   dd | j D }t|S )zConvert the CodonAlignment to a MultipleSeqAlignment.

        Return a MultipleSeqAlignment containing all the
        SeqRecord in the CodonAlignment using Seq to store
        sequences
        c                 S   s    g | ]}t |j |jd qS id)r
   r   toSeqrK   r!   r   r   r   r$      s     z9CodonAlignment.toMultipleSeqAlignment.<locals>.<listcomp>)r*   r   )r   
alignmentsr   r   r   rC      s   z%CodonAlignment.toMultipleSeqAlignmentNG86c                 C   s   ddl m} |du rtjd }dd | jD }t| j}g }g }t|D ]F}|g  |g  t|d D ]3}	||	kr[t| j| | j|	 ||d\}
}|| |
 || | q6|| d || d q6q$|||d	}|||d	}||fS )
zAvailable methods include NG86, LWL85, YN00 and ML.

        Argument:
         - method       - Available methods include NG86, LWL85, YN00 and ML.
         - codon_table  - Codon table to use for forward translation.

        r   )DistanceMatrixN   c                 S   s   g | ]}|j qS r   rJ   r"   ir   r   r   r$      s    z3CodonAlignment.get_dn_ds_matrix.<locals>.<listcomp>methodcodon_tableg        )matrix)	Bio.Phylo.TreeConstructionrO   r	   generic_by_idr*   r)   ranger-   r   )r   rT   rU   DMnamessize	dn_matrix	ds_matrixrR   jdndsdn_dmds_dmr   r   r   get_dn_ds_matrix   s4   




zCodonAlignment.get_dn_ds_matrixUPGMAc                 C   s   ddl m} |du rtjd }| j||d\}}| }| }|dkr0||}	||}
|	|
fS |dkrB||}	||}
|	|
fS td| d	)
zConstruct dn tree and ds tree.

        Argument:
         - dn_ds_method - Available methods include NG86, LWL85, YN00 and ML.
         - tree_method  - Available methods include UPGMA and NJ.

        r   )DistanceTreeConstructorNrP   rS   re   NJzUnknown tree method (z"). Only NJ and UPGMA are accepted.)rW   rf   r	   rX   rd   upgmanjRuntimeError)r   dn_ds_methodtree_methodrU   rf   rb   rc   dn_constructords_constructordn_treeds_treer   r   r   get_dn_ds_tree   s&   




	


zCodonAlignment.get_dn_ds_treec                 C   s   dd |j D }| |S )zConvert a MultipleSeqAlignment to CodonAlignment.

        Function to convert a MultipleSeqAlignment to CodonAlignment.
        It is the user's responsibility to ensure all the requirement
        needed by CodonAlignment is met.
        c                 S   s$   g | ]}t tt|j|jd qS rI   )r
   r   r4   r   rK   rQ   r   r   r   r$      s   $ z+CodonAlignment.from_msa.<locals>.<listcomp>)r*   )clsalignr   r   r   r   from_msa   s   zCodonAlignment.from_msa)r   N)rN   N)rN   re   N)__name__
__module____qualname____doc__r   r1   r=   rG   r+   rC   rd   rq   classmethodrt   r   r   r   r   r      s    
%

%
 r   皙?c                    s  ddl }|du rtjd }tdd | D stddd | D }tt|dkr-td	|d d
 }||j	}|j
D ]}d||< q<g }| D ]}	|g  |	D ] |d t j qPqGg }
t|D ] g }|D ]} fdd|D }|| qj|
| qdd\}}}}t|d\}}|
D ]`  d j dd  }d|v st|dkrqtdd  D }|rt||}t||}t||}t||| }||7 }||7 }qt||}t||}t||}t||| }||7 }||7 }qt||||gS )a  McDonald-Kreitman test for neutrality.

    Implement the McDonald-Kreitman test for neutrality (PMID: 1904993)
    This method counts changes rather than sites
    (http://mkt.uab.es/mkt/help_mkt.asp).

    Arguments:
     - codon_alns  - list of CodonAlignment to compare (each
       CodonAlignment object corresponds to gene sampled from a species)

    Return the p-value of test result.
    r   NrP   c                 s   s    | ]}t |tV  qd S r3   )r   r   rQ   r   r   r   r7      s    zmktest.<locals>.<genexpr>z#mktest accepts CodonAlignment list.c                 S   s   g | ]}|  qS r   rH   rQ   r   r   r   r$          zmktest.<locals>.<listcomp>z;CodonAlignment object for mktest should be of equal length.r   stopr'   c                    s   h | ]}|  qS r   r   r"   krR   r   r   	<setcomp>  r{   zmktest.<locals>.<setcomp>)r   r   r   r   )rU   -c                 s   s    | ]	}t |d kV  qdS )rP   N)r)   r}   r   r   r   r7     r8   )copyr	   rX   allr   r)   setrj   deepcopyforward_tablestop_codonsr-   r   r   rY   _get_codon2codon_matrixunion_get_subgraph_count_replacement_G_test)
codon_alnsrU   alphar   codon_aln_len	codon_num
codon_dictr|   	codon_lst	codon_aln	codon_setuniq_codonsr_   
uniq_codonsyn_fix
nonsyn_fixsyn_polynonsyn_polyGnonsyn_G	all_codon
fix_or_notnonsyn_subgraphsubgraphthis_nonthis_synr   r   r   mktest   s`   











r   c                 C   s  ddl }d}dd t| j | j D }|| j}| jD ]}d||< qt|}i }i }i }	i }
t|D ]O\}}i |	|< i |
|< t|D ]>\}}|D ]7}|d| | ||d d  }|| || krrd|
| |< d|	| |< qK||krd|
| |< d|	| |< qKqEq5|D ]4}i ||< i ||< |D ]'}||krd|| |< d|| |< qt|
|||| |< t|	|||| |< qq||fS )	zGet codon codon substitution matrix (PRIVATE).

    Elements in the matrix are number of synonymous and nonsynonymous
    substitutions required for the substitution.
    r   N)ATCr   c                 S   s   g | ]}d |vr|qS )Ur   rQ   r   r   r   r$   4  s
    z+_get_codon2codon_matrix.<locals>.<listcomp>r|   rP   g?)	r   listr   keysr   r   r)   	enumerate	_dijkstra)rU   r   
base_tuplecodonsr   r|   numr   r   graphgraph_nonsynrR   codonpbr_   	tmp_codoncodon1codon2r   r   r   r   +  sN   

 
r   c                 C   sl  i }i }|   D ]
}d||< d||< qd||< t|   }t|dkrtd}d}|D ]}|du r6|| }|}q)|| |k rB|| }|}q)|| | |  D ]\}	}
||	 || |
 krh|| |
 ||	< |||	< qN||krnnt|dks#g }|}d}||kr||dkr|d| || }nn||ks~|d| tt|d D ]}|| ||  ||d   7 }q|S )a  Dijkstra's algorithm Python implementation (PRIVATE).

    Algorithm adapted from
    http://thomas.pelletier.im/2010/02/dijkstras-algorithm-python-implementation/.
    However, an obvious bug in::

        if D[child_node] >(<) D[node] + child_value:

    is fixed.
    This function will return the distance between start and end.

    Arguments:
     - graph: Dictionary of dictionary (keys are vertices).
     - start: Start vertex.
     - end: End vertex.

    Output:
       List of vertices from the beginning to the end.

    d   r   r   NrP   )r   r   r)   removeitemscountinsertrY   )r   startendDPnodeunseen_nodesshortest	temp_node
child_nodechild_valuepathdistancerR   r   r   r   r   ]  sP   


r   c                 C   sX   ddl m} t| dkrdS t| dkr$t| }|||d  |d  S t| }t|S )z9Count replacement needed for a given codon_set (PRIVATE).r   floorrP   )r   r   r2   )mathr   r)   r   _prim)r   r   r   r   r   r   r   r     s   r   c                 C   s  ddl m} ddlm} ddlm} ddlm} ddlm} g }g }|  D ]0}|	| | | D ]$}	||	| | |	 f|vrU|	|| | |	 f|vrU|	||	| | |	 f q1q&|t
}
|D ]\}}}|
| 	|||f |
| 	|||f q]g }t|d }|
|d  dd }|| |r||\}}}||vr|| |	|||f |
| D ]}|d |vr||| q|sd}|D ]
}|||d 7 }q|S )	zPrim's algorithm to find minimum spanning tree (PRIVATE).

    Code is adapted from
    http://programmingpraxis.com/2010/04/09/minimum-spanning-tree-prims-algorithm/
    r   )defaultdict)heapify)heappop)heappushr   Nr2   )collectionsr   heapqr   r   r   r   r   r   r-   r   r   add)r   r   r   r   r   r   nodesedgesrR   r_   connn1n2cmstusedusable_edgescoster   r   r   r   r   r     sH   
,

r   c                 C   s@   i }| D ]}i ||< | D ]}||kr|| | || |< qq|S )z<Get the subgraph that contains all codons in list (PRIVATE).r   )r   r   r   rR   r_   r   r   r   r     s   r   c                 C   s   ddl m} d}t| }| d | d  }| d | d  }t| dd }t| dd }|| | || | || | || | g}t| |D ]\}	}
||	||	|
  7 }qGtt|S )zG test for 2x2 contingency table (PRIVATE).

    Arguments:
     - site_counts - [syn_fix, nonsyn_fix, syn_poly, nonsyn_poly]

    >>> print("%0.6f" % _G_test([17, 7, 42, 2]))
    0.004924
    r   )logr2   rP   r   N)r   r   sumrB   r   r   )site_countsr   r   tottot_syntot_nontot_fixtot_polyexpobsexr   r   r   r     s   



r   __main__)run_doctest)Nrz   )rx   r@   r   r   r   Bior   	Bio.Alignr   Bio.codonalign.codonseqr   r   r   Bio.Datar	   Bio.SeqRecordr
   r   r   r   r   r   r   r   r   ru   
Bio._utilsr   r   r   r   r   <module>   s0    
ND2D) 
