o
    RŀgJ                     @   s   d Z ddlZddlZddlZddlmZ ddlmZmZ ddlm	Z	 G dd dZ
G dd	 d	e
ZeZG d
d dZG dd dZG dd deZG dd dZG dd dZG dd deZG dd deZG dd deZedkr~ddlmZ e  dS dS )z*Classes and methods for tree construction.    N)BaseTree)	AlignmentMultipleSeqAlignment)substitution_matricesc                   @   sT   e Zd ZdZdddZdd Zdd Zd	d
 ZdddZdd Z	dd Z
dd ZdS )_Matrixa  Base class for distance matrix or scoring matrix.

    Accepts a list of names and a lower triangular matrix.::

        matrix = [[0],
                  [1, 0],
                  [2, 3, 0],
                  [4, 5, 6, 0]]
        represents the symmetric matrix of
        [0,1,2,4]
        [1,0,3,5]
        [2,3,0,6]
        [4,5,6,0]

    :Parameters:
        names : list
            names of elements, used for indexing
        matrix : list
            nested list of numerical lists in lower triangular format

    Examples
    --------
    >>> from Bio.Phylo.TreeConstruction import _Matrix
    >>> names = ['Alpha', 'Beta', 'Gamma', 'Delta']
    >>> matrix = [[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]]
    >>> m = _Matrix(names, matrix)
    >>> m
    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [1, 0], [2, 3, 0], [4, 5, 6, 0]])

    You can use two indices to get or assign an element in the matrix.

    >>> m[1,2]
    3
    >>> m['Beta','Gamma']
    3
    >>> m['Beta','Gamma'] = 4
    >>> m['Beta','Gamma']
    4

    Further more, you can use one index to get or assign a list of elements related to that index.

    >>> m[0]
    [0, 1, 2, 4]
    >>> m['Alpha']
    [0, 1, 2, 4]
    >>> m['Alpha'] = [0, 7, 8, 9]
    >>> m[0]
    [0, 7, 8, 9]
    >>> m[0,1]
    7

    Also you can delete or insert a column&row of elements by index.

    >>> m
    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [7, 0], [8, 4, 0], [9, 5, 6, 0]])
    >>> del m['Alpha']
    >>> m
    _Matrix(names=['Beta', 'Gamma', 'Delta'], matrix=[[0], [4, 0], [5, 6, 0]])
    >>> m.insert('Alpha', [0, 7, 8, 9] , 0)
    >>> m
    _Matrix(names=['Alpha', 'Beta', 'Gamma', 'Delta'], matrix=[[0], [7, 0], [8, 4, 0], [9, 5, 6, 0]])

    Nc                 C   s   t |tr tdd |D r tt|t|kr|| _ntdtd|du r;dd tdt| d D }|| _	dS t |trytd	d |D rytd
d |D ryt|t|krudd |D ttdt| d krq|| _	dS tdtdtd)zInitialize matrix.

        Arguments are a list of names, and optionally a list of lower
        triangular matrix data (zero matrix used by default).
        c                 s       | ]}t |tV  qd S N
isinstancestr.0s r   N/var/www/html/myenv/lib/python3.10/site-packages/Bio/Phylo/TreeConstruction.py	<genexpr>]       z#_Matrix.__init__.<locals>.<genexpr>zDuplicate names foundz#'names' should be a list of stringsNc                 S   s   g | ]}d g| qS )r   r   r   ir   r   r   
<listcomp>h       z$_Matrix.__init__.<locals>.<listcomp>   c                 s   r   r   )r
   listr   rowr   r   r   r   n   r   c                 s   s&    | ]}|D ]	}t |tjV  qqd S r   r
   numbersNumber)r   r   itemr   r   r   r   o   s    c                 S   s   g | ]}t |qS r   )lenr   r   r   r   r   v   s    z+'matrix' should be in lower triangle formatz,'names' and 'matrix' should be the same sizez,'matrix' should be a list of numerical lists)
r
   r   allr   setnames
ValueError	TypeErrorrangematrixselfr"   r&   r   r   r   __init__V   s,   
$
z_Matrix.__init__c                    s  t |ttfrRd t |tr| nt |tr&|jv r"j| ntdtd td kr6td fddt	d D  fd	dt	 tD  S t|d
krd}d}t
dd |D rj|\}}n,t
dd |D r|\}}|jv r|jv rj|}j|}ntdtd|td ks|td krtd||krj| | S j| | S td)am  Access value(s) by the index(s) or name(s).

        For a _Matrix object 'dm'::

            dm[i]                   get a value list from the given 'i' to others;
            dm[i, j]                get the value between 'i' and 'j';
            dm['name']              map name to index first
            dm['name1', 'name2']    map name to index first

        NItem not found.Invalid index type.r   Index out of range.c                    s   g | ]	}j   | qS r   r&   r   indexr(   r   r   r      s    z'_Matrix.__getitem__.<locals>.<listcomp>r   c                    s   g | ]	}j |   qS r   r-   r   r.   r   r   r      s       c                 s   r   r   r
   intr   r   r   r   r      r   z&_Matrix.__getitem__.<locals>.<genexpr>c                 s   r   r   r	   r   r   r   r   r      r   )r
   r2   r   r"   r/   r#   r$   r   
IndexErrorr%   r    r&   )r(   r   	row_index	col_indexrow_namecol_namer   r.   r   __getitem__   s@   


$
 z_Matrix.__getitem__c           	      C   s  t |ttfrzd}t |tr|}nt |tr&|| jv r"| j|}ntdtd|t| d kr6tdt |t	rvt
dd |D rvt|t| krrtd|D ]}|| | j| |< qQt|t| D ]}|| | j| |< qddS td	td
t|dkrd}d}t
dd |D r|\}}n,t
dd |D r|\}}|| jv r|| jv r| j|}| j|}ntdtd|t| d ks|t| d krtdt |tjr||kr|| j| |< dS || j| |< dS td
td)zSet value by the index(s) or name(s).

        Similar to __getitem__::

            dm[1] = [1, 0, 3, 4]    set values from '1' to others;
            dm[i, j] = 2            set the value from 'i' to 'j'

        Nr*   r+   r   r,   c                 s   s    | ]	}t |tjV  qd S r   r   r   nr   r   r   r      s    
z&_Matrix.__setitem__.<locals>.<genexpr>r   zValue not the same size.zInvalid value type.r0   c                 s   r   r   r1   r   r   r   r   r      r   c                 s   r   r   r	   r   r   r   r   r      r   )r
   r2   r   r"   r/   r#   r$   r   r3   r   r    r%   r&   r   r   )	r(   r   valuer/   r   r4   r5   r6   r7   r   r   r   __setitem__   sT   




 z_Matrix.__setitem__c                 C   sl   d}t |tr
|}nt |tr| j|}ntdt|d t| D ]}| j| |= q#| j|= | j|= dS )z.Delete related distances by the index or name.Nr+   r   )	r
   r2   r   r"   r/   r$   r%   r   r&   )r(   r   r/   r   r   r   r   __delitem__   s   

z_Matrix.__delitem__c                 C   s   t |tr@|du rt| }t |tstd| j|| | j|dg|  t|t| D ]}| j| |d q.|| |< dS td)zInsert distances given the name and value.

        :Parameters:
            name : str
                name of a row/col to be inserted
            value : list
                a row/col of values to be inserted

        Nr+   r   zInvalid name type.)	r
   r   r   r2   r$   r"   insertr&   r%   )r(   namer;   r/   r   r   r   r   r>     s   


z_Matrix.insertc                 C   s
   t | jS )zMatrix length.)r   r"   r(   r   r   r   __len__"  s   
z_Matrix.__len__c                 C   s"   | j jdttt| j| jf  S )zReturn Matrix as a string.z(names=%s, matrix=%s))	__class____name__tuplemapreprr"   r&   r@   r   r   r   __repr__&  s   
z_Matrix.__repr__c                    sB   d  fddtdt D }|d d  j }|jddS )	z%Get a lower triangular matrix string.
c                    s4   g | ]} j | d  d dd  j| D  qS )	c                 S   s   g | ]}t |d qS )f)formatr9   r   r   r   r   2  r   z._Matrix.__str__.<locals>.<listcomp>.<listcomp>)r"   joinr&   r   r@   r   r   r   /  s    z#_Matrix.__str__.<locals>.<listcomp>r   z
	rI      )tabsize)rL   r%   r   r"   
expandtabs)r(   matrix_stringr   r@   r   __str__,  s   
z_Matrix.__str__r   )rC   
__module____qualname____doc__r)   r8   r<   r=   r>   rA   rG   rQ   r   r   r   r   r      s    
@)6B
r   c                   @   s2   e Zd ZdZdddZdd Zdd Zd	d
 ZdS )DistanceMatrixzDistance matrix class that can be used for distance based tree algorithms.

    All diagonal elements will be zero no matter what the users provide.
    Nc                 C      t | || |   dS Initialize the class.N)r   r)   _set_zero_diagonalr'   r   r   r   r)   @     zDistanceMatrix.__init__c                 C   rV   )zSet Matrix's items to values.N)r   r<   rY   )r(   r   r;   r   r   r   r<   E  rZ   zDistanceMatrix.__setitem__c                 C   s&   t dt| D ]	}d| j| |< qdS )z,Set all diagonal elements to zero (PRIVATE).r   N)r%   r   r&   )r(   r   r   r   r   rY   J  s   z!DistanceMatrix._set_zero_diagonalc           	         s   | dtj d tdtttjd }dd tdtjd D }dt| d d	| d }t	t
jjD ](\ \}} fd
dt d tjD }t|g||}| |j|  q@dS )a  Write data in Phylip format to a given file-like object or handle.

        The output stream is the input distance matrix format used with Phylip
        programs (e.g. 'neighbor'). See:
        http://evolution.genetics.washington.edu/phylip/doc/neighbor.html

        :Parameters:
            handle : file or file-like object
                A writeable text mode file handle or other object supporting
                the 'write' method, such as StringIO or sys.stdout.

        z    rH      r   c                 s   s     | ]}d t | d V  qdS ){z:.4f}N)r   )r   xr   r   r   r   _      z/DistanceMatrix.format_phylip.<locals>.<genexpr>z{0:zs}z  c                 3   s    | ]
}j |   V  qd S r   r-   )r   jr   r(   r   r   r   c  s    N)writer   r"   maxrE   r%   r&   r   rL   	enumeratezip	itertoolschainrK   )	r(   handle
name_width
value_fmtsrow_fmtr?   valuesmirror_valuesfieldsr   r`   r   format_phylipO  s   $zDistanceMatrix.format_phylipr   )rC   rR   rS   rT   r)   r<   rY   rn   r   r   r   r   rU   :  s    
rU   c                   @   s   e Zd ZdZedZg Zg Ze	 Z
e
D ]%Ze	eZedkr"dZne Zeeejr4ee qee q[[[
[dge e ZdddZd	d
 Zdd ZdS )DistanceCalculatora  Calculates the distance matrix from a DNA or protein sequence alignment.

    This class calculates the distance matrix from a multiple sequence alignment
    of DNA or protein sequences, and the given name of the substitution model.

    Currently only scoring matrices are used.

    :Parameters:
        model : str
            Name of the model matrix to be used to calculate distance.
            The attribute ``dna_models`` contains the available model
            names for DNA sequences and ``protein_models`` for protein
            sequences.

    Examples
    --------
    Loading a small PHYLIP alignment from which to compute distances::

      >>> from Bio.Phylo.TreeConstruction import DistanceCalculator
      >>> from Bio import AlignIO
      >>> aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
      >>> print(aln)  # doctest:+NORMALIZE_WHITESPACE
      Alignment with 5 rows and 13 columns
      AACGTGGCCACAT Alpha
      AAGGTCGCCACAC Beta
      CAGTTCGCCACAA Gamma
      GAGATTTCCGCCT Delta
      GAGATCTCCGCCC Epsilon

    DNA calculator with 'identity' model::

      >>> calculator = DistanceCalculator('identity')
      >>> dm = calculator.get_distance(aln)
      >>> print(dm)  # doctest:+NORMALIZE_WHITESPACE
        Alpha   0.000000
        Beta    0.230769    0.000000
        Gamma   0.384615    0.230769    0.000000
        Delta   0.538462    0.538462    0.538462    0.000000
        Epsilon 0.615385    0.384615    0.461538    0.153846    0.000000
            Alpha   Beta    Gamma   Delta   Epsilon

    Protein calculator with 'blosum62' model::

      >>> calculator = DistanceCalculator('blosum62')
      >>> dm = calculator.get_distance(aln)
      >>> print(dm)  # doctest:+NORMALIZE_WHITESPACE
      Alpha   0.000000
      Beta    0.369048    0.000000
      Gamma   0.493976    0.250000    0.000000
      Delta   0.585366    0.547619    0.566265    0.000000
      Epsilon 0.700000    0.355556    0.488889    0.222222    0.000000
          Alpha   Beta    Gamma   Delta   Epsilon

    Same calculation, using the new Alignment object::

      >>> from Bio.Phylo.TreeConstruction import DistanceCalculator
      >>> from Bio import Align
      >>> aln = Align.read('TreeConstruction/msa.phy', 'phylip')
      >>> print(aln)  # doctest:+NORMALIZE_WHITESPACE
      Alpha             0 AACGTGGCCACAT 13
      Beta              0 AAGGTCGCCACAC 13
      Gamma             0 CAGTTCGCCACAA 13
      Delta             0 GAGATTTCCGCCT 13
      Epsilon           0 GAGATCTCCGCCC 13
      <BLANKLINE>

    DNA calculator with 'identity' model::

      >>> calculator = DistanceCalculator('identity')
      >>> dm = calculator.get_distance(aln)
      >>> print(dm)  # doctest:+NORMALIZE_WHITESPACE
      Alpha   0.000000
      Beta    0.230769    0.000000
      Gamma   0.384615    0.230769    0.000000
      Delta   0.538462    0.538462    0.538462    0.000000
      Epsilon 0.615385    0.384615    0.461538    0.153846    0.000000
          Alpha   Beta    Gamma   Delta   Epsilon

    Protein calculator with 'blosum62' model::

      >>> calculator = DistanceCalculator('blosum62')
      >>> dm = calculator.get_distance(aln)
      >>> print(dm)  # doctest:+NORMALIZE_WHITESPACE
      Alpha   0.000000
      Beta    0.369048    0.000000
      Gamma   0.493976    0.250000    0.000000
      Delta   0.585366    0.547619    0.566265    0.000000
      Epsilon 0.700000    0.355556    0.488889    0.222222    0.000000
          Alpha   Beta    Gamma   Delta   Epsilon

    ABCDEFGHIKLMNPQRSTVWXYZNUC.4.4blastnidentityNc                 C   sx   |r|| _ n|dkrd| _ nd| _ |dkrd| _dS || jv r2|dkr&d}n| }t|| _dS tdd| j )	z!Initialize with a distance model.rs   r   )-*Nrr   rq   z'Model not supported. Available models: , )skip_lettersscoring_matrixmodelsupperr   loadr#   rL   )r(   modelrw   r?   r   r   r   r)     s   

zDistanceCalculator.__init__c           
         s:  d}d} j du rt fddt||D }t|}nud}d}tdt|D ]d}|| }|| }	| jv s;|	 jv r<q'z| j ||f 7 }W n ty^   td| d|j d| ddw z| j |	|	f 7 }W n ty   td|	 d|j d| ddw | j ||	f 7 }q't	||}|dkrd	S d	||  S )
zCalculate pairwise distance from two sequences (PRIVATE).

        Returns a value between 0 (identical sequences) and 1 (completely
        different, or seq1 is an empty string.)
        r   Nc                 3   s0    | ]\}}| j vr| j vr||kV  qd S r   )rw   )r   l1l2r@   r   r   r     s    z/DistanceCalculator._pairwise.<locals>.<genexpr>zBad letter 'z' in sequence 'z' at position ''r   )
rx   sumrd   r   r%   rw   r3   r#   idrb   )
r(   seq1seq2score	max_score
max_score1
max_score2r   r}   r~   r   r@   r   	_pairwise  sJ   


zDistanceCalculator._pairwisec           	      C   s   t |tr7dd |jD }t|}t|}t|D ]}t|D ]}| || || ||| || f< qq|S t |tr`dd |D }t|}t	|dD ]\}}| ||||j
|j
f< qM|S td)zReturn a DistanceMatrix for an Alignment or MultipleSeqAlignment object.

        :Parameters:
            msa : Alignment or MultipleSeqAlignment object representing a
                DNA or protein multiple sequence alignment.

        c                 S      g | ]}|j qS r   r   r   r   r   r   r   .      z3DistanceCalculator.get_distance.<locals>.<listcomp>c                 S   r   r   r   r   r   r   r   r   5  r   r0   zBMust provide an Alignment object or a MultipleSeqAlignment object.)r
   r   	sequencesrU   r   r%   r   r   re   combinationsr   r$   )	r(   msar"   dmr:   i1i2r   r   r   r   r   get_distance%  s$   
&
zDistanceCalculator.get_distance)rs   N)rC   rR   rS   rT   r!   protein_alphabet
dna_modelsprotein_modelsr   r{   r"   r?   r&   lowerissubsetalphabetappendry   r)   r   r   r   r   r   r   ro   l  s,    \

+ro   c                   @      e Zd ZdZdd ZdS )TreeConstructorz$Base class for all tree constructor.c                 C      t d)zCaller to build the tree from an Alignment or MultipleSeqAlignment object.

        This should be implemented in subclass.
        Method not implemented!NotImplementedError)r(   r   r   r   r   
build_treeD     zTreeConstructor.build_treeN)rC   rR   rS   rT   r   r   r   r   r   r   A      r   c                   @   sB   e Zd ZdZddgZdddZdd Zd	d
 Zdd Zdd Z	dS )DistanceTreeConstructora  Distance based tree constructor.

    :Parameters:
        method : str
            Distance tree construction method, 'nj'(default) or 'upgma'.
        distance_calculator : DistanceCalculator
            The distance matrix calculator for multiple sequence alignment.
            It must be provided if ``build_tree`` will be called.

    Examples
    --------
    Loading a small PHYLIP alignment from which to compute distances, and then
    build a upgma Tree::

      >>> from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
      >>> from Bio.Phylo.TreeConstruction import DistanceCalculator
      >>> from Bio import AlignIO
      >>> aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
      >>> constructor = DistanceTreeConstructor()
      >>> calculator = DistanceCalculator('identity')
      >>> dm = calculator.get_distance(aln)
      >>> upgmatree = constructor.upgma(dm)
      >>> print(upgmatree)
      Tree(rooted=True)
          Clade(branch_length=0, name='Inner4')
              Clade(branch_length=0.18749999999999994, name='Inner1')
                  Clade(branch_length=0.07692307692307693, name='Epsilon')
                  Clade(branch_length=0.07692307692307693, name='Delta')
              Clade(branch_length=0.11057692307692304, name='Inner3')
                  Clade(branch_length=0.038461538461538464, name='Inner2')
                      Clade(branch_length=0.11538461538461536, name='Gamma')
                      Clade(branch_length=0.11538461538461536, name='Beta')
                  Clade(branch_length=0.15384615384615383, name='Alpha')

    Build a NJ Tree::

      >>> njtree = constructor.nj(dm)
      >>> print(njtree)
      Tree(rooted=False)
          Clade(branch_length=0, name='Inner3')
              Clade(branch_length=0.18269230769230765, name='Alpha')
              Clade(branch_length=0.04807692307692307, name='Beta')
              Clade(branch_length=0.04807692307692307, name='Inner2')
                  Clade(branch_length=0.27884615384615385, name='Inner1')
                      Clade(branch_length=0.051282051282051266, name='Epsilon')
                      Clade(branch_length=0.10256410256410259, name='Delta')
                  Clade(branch_length=0.14423076923076922, name='Gamma')

    Same example, using the new Alignment class::

      >>> from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
      >>> from Bio.Phylo.TreeConstruction import DistanceCalculator
      >>> from Bio import Align
      >>> aln = Align.read(open('TreeConstruction/msa.phy'), 'phylip')
      >>> constructor = DistanceTreeConstructor()
      >>> calculator = DistanceCalculator('identity')
      >>> dm = calculator.get_distance(aln)
      >>> upgmatree = constructor.upgma(dm)
      >>> print(upgmatree)
      Tree(rooted=True)
          Clade(branch_length=0, name='Inner4')
              Clade(branch_length=0.18749999999999994, name='Inner1')
                  Clade(branch_length=0.07692307692307693, name='Epsilon')
                  Clade(branch_length=0.07692307692307693, name='Delta')
              Clade(branch_length=0.11057692307692304, name='Inner3')
                  Clade(branch_length=0.038461538461538464, name='Inner2')
                      Clade(branch_length=0.11538461538461536, name='Gamma')
                      Clade(branch_length=0.11538461538461536, name='Beta')
                  Clade(branch_length=0.15384615384615383, name='Alpha')

    Build a NJ Tree::

      >>> njtree = constructor.nj(dm)
      >>> print(njtree)
      Tree(rooted=False)
          Clade(branch_length=0, name='Inner3')
              Clade(branch_length=0.18269230769230765, name='Alpha')
              Clade(branch_length=0.04807692307692307, name='Beta')
              Clade(branch_length=0.04807692307692307, name='Inner2')
                  Clade(branch_length=0.27884615384615385, name='Inner1')
                      Clade(branch_length=0.051282051282051266, name='Epsilon')
                      Clade(branch_length=0.10256410256410259, name='Delta')
                  Clade(branch_length=0.14423076923076922, name='Gamma')

    njupgmaNc                 C   sR   |du s	t |tr|| _ntd|| jv r|| _dS td| d d| j )rX   N)Must provide a DistanceCalculator object.zBad method: z. Available methods: rv   )r
   ro   distance_calculatorr$   methodsmethodrL   )r(   r   r   r   r   r   r)     s    



z DistanceTreeConstructor.__init__c                 C   sD   | j r| j |}d}| jdkr| |}|S | |}|S td)z7Construct and return a Tree, Neighbor Joining or UPGMA.Nr   r   )r   r   r   r   r   r$   )r(   r   r   treer   r   r   r     s   


z"DistanceTreeConstructor.build_treec                 C   s  t |ts	tdt|}dd |jD }d}d}d}t|dkr|d }tdt|D ]}td|D ]}	||||	f krH|||	f }|}|	}q4q-|| }
|| }|d7 }t	ddt
| }|j|
 |j| |
 rv|d	 |
_n
|d	 | |
 |
_| r|d	 |_n
|d	 | | |_|||< ||= tdt|D ]}||kr||kr|||f |||f  d	 |||f< qdt
| |j|< ||= t|dks"d|_t|S )
a  Construct and return an UPGMA tree.

        Constructs and returns an Unweighted Pair Group Method
        with Arithmetic mean (UPGMA) tree.

        :Parameters:
            distance_matrix : DistanceMatrix
                The distance matrix for tree construction.

        %Must provide a DistanceMatrix object.c                 S      g | ]}t d |qS r   r   Clader   r?   r   r   r   r         z1DistanceTreeConstructor.upgma.<locals>.<listcomp>r   r   r   r   NInnerr0   )r
   rU   r$   copydeepcopyr"   r   r%   r   r   r   cladesr   is_terminalbranch_length
_height_ofTree)r(   distance_matrixr   r   min_imin_jinner_countmin_distr   r_   clade1clade2inner_cladekr   r   r   r     sP   

$)
zDistanceTreeConstructor.upgmac                 C   sf  t |ts	tdt|}dd |jD }dgt| }d}d}d}t|dkr4|d }tj|ddS t|dkrzd}d}|| }	|| }
|||f d	 |	_	|||f |	j	 |
_	t
d
d}|j|	 |j|
 ||d< |d }tj|ddS t|dkrntdt|D ](}d||< tdt|D ]}||  |||f 7  < q|| t|d  ||< q|d |d  |d  }d}d}tdt|D ]"}td|D ]}|||f ||  ||  }||kr|}|}|}qq|| }	|| }
|d7 }t
d
dt| }|j|	 |j|
 |||f ||  ||  d	 |	_	|||f |	j	 |
_	|||< ||= tdt|D ]%}||krZ||krZ|||f |||f  |||f  d	 |||f< q6dt| |j|< ||= t|dksd
}|d |krd|d _	|d |d _	|d j|d  |d }n|d |d _	d|d _	|d j|d  |d }tj|ddS )zConstruct and return a Neighbor Joining tree.

        :Parameters:
            distance_matrix : DistanceMatrix
                The distance matrix for tree construction.

        r   c                 S   r   r   r   r   r   r   r   r     r   z.DistanceTreeConstructor.nj.<locals>.<listcomp>r   r   F)rootedr0   g       @Nr   r   )r
   rU   r$   r   r   r"   r   r   r   r   r   r   r   r%   r   )r(   r   r   r   	node_distr   r   r   rootr   r   r   r   r_   r   tempr   r   r   r   r     s   

"0


zDistanceTreeConstructor.njc                    s6   d}|  r|j}|S |t fdd|jD  }|S )zECalculate clade height -- the longest path to any terminal (PRIVATE).r   c                 3   s    | ]}  |V  qd S r   )r   r   cr@   r   r   r   s  r   z5DistanceTreeConstructor._height_of.<locals>.<genexpr>)r   r   rb   r   )r(   cladeheightr   r@   r   r   m  s   z"DistanceTreeConstructor._height_of)Nr   )
rC   rR   rS   rT   r   r)   r   r   r   r   r   r   r   r   r   L  s    V
Bgr   c                   @   r   )Scorerz(Base class for all tree scoring methods.c                 C   r   )ztCaller to get the score of a tree for the given alignment.

        This should be implemented in subclass.
        r   r   )r(   r   	alignmentr   r   r   	get_score}  r   zScorer.get_scoreN)rC   rR   rS   rT   r   r   r   r   r   r   z  r   r   c                   @   r   )TreeSearcherz*Base class for all tree searching methods.c                 C   r   )znCaller to search the best tree with a starting tree.

        This should be implemented in subclass.
        r   r   r(   starting_treer   r   r   r   search  r   zTreeSearcher.searchN)rC   rR   rS   rT   r   r   r   r   r   r     r   r   c                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )NNITreeSearcherzTree searching with Nearest Neighbor Interchanges (NNI) algorithm.

    :Parameters:
        scorer : ParsimonyScorer
            parsimony scorer to calculate the parsimony score of
            different trees during NNI algorithm.

    c                 C   s   t |tr
|| _dS td)rX   zMust provide a Scorer object.N)r
   r   scorerr$   )r(   r   r   r   r   r)     s   

zNNITreeSearcher.__init__c                 C   s   |  ||S )aI  Implement the TreeSearcher.search method.

        :Parameters:
           starting_tree : Tree
               starting tree of NNI method.
           alignment : Alignment or MultipleSeqAlignment object
               multiple sequence alignment used to calculate parsimony
               score of different NNI trees.

        )_nnir   r   r   r   r     s   zNNITreeSearcher.searchc                 C   sV   |}	 | j ||}|}| |D ]}| j ||}||k r"|}|}q||kr*	 |S q)zESearch for the best parsimony tree using the NNI algorithm (PRIVATE).)r   r   _get_neighbors)r(   r   r   	best_tree
best_scorer   tr   r   r   r   r     s   zNNITreeSearcher._nnic                 C   s  i }|  D ]}||jkr$||}t|dkr|j||< q|d ||< qg }g }|jddD ]O}||jkr|jd }|jd }|| || | s| s|jd }	|jd }
|jd }|jd= |jd= |j| |j|	 t	|}|| |jd= |jd= |j|
 |j| t	|}|| |jd= |jd= |j|	 |j
d|
 q/||v rq/|jd }|jd }|| }||jd kr&|jd }|jd= |jd= |j| |j| t	|}|| |jd= |jd= |j| |j| t	|}|| |jd= |jd= |j| |j
d| q/|jd }|jd= |jd= |j
d| |j| t	|}|| |jd= |jd= |j
d| |j| t	|}|| |jd= |jd= |j
d| |j
d| q/|S )zmGet all neighbor trees of the given tree (PRIVATE).

        Currently only for binary rooted trees.
        r   levelorderr   )find_cladesr   get_pathr   get_nonterminalsr   r   r   r   r   r>   )r(   r   parentsr   	node_path	neighborsroot_childsleftright
left_right
right_leftright_right	temp_treeparentsisterr   r   r   r     s   

























zNNITreeSearcher._get_neighborsN)rC   rR   rS   rT   r)   r   r   r   r   r   r   r   r     s    	r   c                   @   "   e Zd ZdZdddZdd ZdS )ParsimonyScorera	  Parsimony scorer with a scoring matrix.

    This is a combination of Fitch algorithm and Sankoff algorithm.
    See ParsimonyTreeConstructor for usage.

    :Parameters:
        matrix : _Matrix
            scoring matrix used in parsimony score calculation.

    Nc                 C   s    |rt |tr|| _dS td)rX   zMust provide a _Matrix object.N)r
   r   r&   r$   )r(   r&   r   r   r   r)   3  s   
zParsimonyScorer.__init__c                 C   sx  |  std|js|  | }|jdd d |  t|tr5tdd t	||D s4tdntdd t	||j
D sFtdd	}tt|d	 D ]}d	}|d
d
|f }|t||d	  krgqP| jstt	|dd |D }|jddD ]#}	|	j}
||
d	  }||
d  }||@ }|s||B }|d7 }|||	< q|ntd}| jj}t|}i }tt|D ]}|g| }||| }d	||< |||| < q|jddD ]\}	|	j}
||
d	  }||
d  }g }t|D ]@}|}|}t|D ].}| j|| || f ||  }| j|| || f ||  }||kr|}||kr#|}q|||  q|||	< qt|}||7 }qP|S )zCalculate parsimony score using the Fitch algorithm.

        Calculate and return the parsimony score given a tree and the
        MSA using either the Fitch algorithm (without a penalty matrix)
        or the Sankoff algorithm (with a matrix).
        z(The tree provided should be bifurcating.c                 S   s   | j S r   )r?   )termr   r   r   <lambda>H  s    z+ParsimonyScorer.get_score.<locals>.<lambda>)keyc                 s        | ]\}}|j |jkV  qd S r   r?   r   )r   r   ar   r   r   r   K  r^   z,ParsimonyScorer.get_score.<locals>.<genexpr>zDTaxon names of the input tree should be the same with the alignment.c                 s   r   r   r   )r   r   r   r   r   r   r   P  r^   r   Nc                 S   s   g | ]}|hqS r   r   r   r   r   r   r   d  r   z-ParsimonyScorer.get_score.<locals>.<listcomp>	postorderr   r   inf)is_bifurcatingr#   r   root_at_midpointget_terminalssortr
   r   r    rd   r   r%   r   r&   dictr   r   floatr"   r/   r   min)r(   r   r   termsr   r   score_icolumn_iclade_statesr   clade_childs
left_stateright_statestater   r   lengthclade_scoresr_   arrayr/   
left_scoreright_scoremmin_lmin_rr:   slsrr   r   r   r   :  s   






zParsimonyScorer.get_scorer   )rC   rR   rS   rT   r)   r   r   r   r   r   r   '  s    
r   c                   @   r   )ParsimonyTreeConstructora  Parsimony tree constructor.

    :Parameters:
        searcher : TreeSearcher
            tree searcher to search the best parsimony tree.
        starting_tree : Tree
            starting tree provided to the searcher.

    Examples
    --------
    We will load an alignment, and then load various trees which have already been computed from it::

      >>> from Bio import AlignIO, Phylo
      >>> aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
      >>> print(aln)
      Alignment with 5 rows and 13 columns
      AACGTGGCCACAT Alpha
      AAGGTCGCCACAC Beta
      CAGTTCGCCACAA Gamma
      GAGATTTCCGCCT Delta
      GAGATCTCCGCCC Epsilon

    Load a starting tree::

      >>> starting_tree = Phylo.read('TreeConstruction/nj.tre', 'newick')
      >>> print(starting_tree)
      Tree(rooted=False, weight=1.0)
          Clade(branch_length=0.0, name='Inner3')
              Clade(branch_length=0.01421, name='Inner2')
                  Clade(branch_length=0.23927, name='Inner1')
                      Clade(branch_length=0.08531, name='Epsilon')
                      Clade(branch_length=0.13691, name='Delta')
                  Clade(branch_length=0.2923, name='Alpha')
              Clade(branch_length=0.07477, name='Beta')
              Clade(branch_length=0.17523, name='Gamma')

    Build the Parsimony tree from the starting tree::

      >>> scorer = Phylo.TreeConstruction.ParsimonyScorer()
      >>> searcher = Phylo.TreeConstruction.NNITreeSearcher(scorer)
      >>> constructor = Phylo.TreeConstruction.ParsimonyTreeConstructor(searcher, starting_tree)
      >>> pars_tree = constructor.build_tree(aln)
      >>> print(pars_tree)
      Tree(rooted=True, weight=1.0)
          Clade(branch_length=0.0)
              Clade(branch_length=0.19732999999999998, name='Inner1')
                  Clade(branch_length=0.13691, name='Delta')
                  Clade(branch_length=0.08531, name='Epsilon')
              Clade(branch_length=0.04194000000000003, name='Inner2')
                  Clade(branch_length=0.01421, name='Inner3')
                      Clade(branch_length=0.17523, name='Gamma')
                      Clade(branch_length=0.07477, name='Beta')
                  Clade(branch_length=0.2923, name='Alpha')

    Same example, using the new Alignment class::

      >>> from Bio import Align, Phylo
      >>> alignment = Align.read(open('TreeConstruction/msa.phy'), 'phylip')
      >>> print(alignment)
      Alpha             0 AACGTGGCCACAT 13
      Beta              0 AAGGTCGCCACAC 13
      Gamma             0 CAGTTCGCCACAA 13
      Delta             0 GAGATTTCCGCCT 13
      Epsilon           0 GAGATCTCCGCCC 13
      <BLANKLINE>

    Load a starting tree::

      >>> starting_tree = Phylo.read('TreeConstruction/nj.tre', 'newick')
      >>> print(starting_tree)
      Tree(rooted=False, weight=1.0)
          Clade(branch_length=0.0, name='Inner3')
              Clade(branch_length=0.01421, name='Inner2')
                  Clade(branch_length=0.23927, name='Inner1')
                      Clade(branch_length=0.08531, name='Epsilon')
                      Clade(branch_length=0.13691, name='Delta')
                  Clade(branch_length=0.2923, name='Alpha')
              Clade(branch_length=0.07477, name='Beta')
              Clade(branch_length=0.17523, name='Gamma')

    Build the Parsimony tree from the starting tree::

      >>> scorer = Phylo.TreeConstruction.ParsimonyScorer()
      >>> searcher = Phylo.TreeConstruction.NNITreeSearcher(scorer)
      >>> constructor = Phylo.TreeConstruction.ParsimonyTreeConstructor(searcher, starting_tree)
      >>> pars_tree = constructor.build_tree(alignment)
      >>> print(pars_tree)
      Tree(rooted=True, weight=1.0)
          Clade(branch_length=0.0)
              Clade(branch_length=0.19732999999999998, name='Inner1')
                  Clade(branch_length=0.13691, name='Delta')
                  Clade(branch_length=0.08531, name='Epsilon')
              Clade(branch_length=0.04194000000000003, name='Inner2')
                  Clade(branch_length=0.01421, name='Inner3')
                      Clade(branch_length=0.17523, name='Gamma')
                      Clade(branch_length=0.07477, name='Beta')
                  Clade(branch_length=0.2923, name='Alpha')

    Nc                 C   s   || _ || _dS rW   )searcherr   )r(   r  r   r   r   r   r)     s   
z!ParsimonyTreeConstructor.__init__c                 C   s4   | j du rttdd}||| _ | j| j |S )zBuild the tree.

        :Parameters:
            alignment : MultipleSeqAlignment
                multiple sequence alignment to calculate parsimony tree.

        Nrs   r   )r   r   ro   r   r  r   )r(   r   dtcr   r   r   r     s   

z#ParsimonyTreeConstructor.build_treer   )rC   rR   rS   rT   r)   r   r   r   r   r   r    s    
dr  __main__)run_doctest)rT   re   r   r   	Bio.Phylor   	Bio.Alignr   r   r   r   rU   _DistanceMatrixro   r   r   r   r   r   r   r  rC   
Bio._utilsr  r   r   r   r   <module>   s8     '/ V  0 lz
