o
    Rŀg^                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlmZ G dd de	Z
dd	 Zd$d
dZdd Zdd Zdd Zdd Zd%ddZdd Zdd Zdd Zdd Zdd Zd d! Zd"d# ZdS )&zClasses and methods for finding consensus trees.

This module contains a ``_BitString`` class to assist the consensus tree
searching and some common consensus algorithms such as strict, majority rule and
adam consensus.
    N)literal_eval)MultipleSeqAlignment)BaseTreec                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zedd ZdS )
_BitStringaF  Helper class for binary string data (PRIVATE).

    Assistant class of binary string data used for storing and
    counting compatible clades in consensus tree searching. It includes
    some binary manipulation(&|^~) methods.

    _BitString is a sub-class of ``str`` object that only accepts two
    characters('0' and '1'), with additional functions for binary-like
    manipulation(&|^~). It is used to count and store the clades in
    multiple trees in consensus tree searching. During counting, the
    clades will be considered the same if their terminals(in terms of
    ``name`` attribute) are the same.

    For example, let's say two trees are provided as below to search
    their strict consensus tree::

        tree1: (((A, B), C),(D, E))
        tree2: ((A, (B, C)),(D, E))

    For both trees, a _BitString object '11111' will represent their
    root clade. Each '1' stands for the terminal clade in the list
    [A, B, C, D, E](the order might not be the same, it's determined
    by the ``get_terminal`` method of the first tree provided). For
    the clade ((A, B), C) in tree1 and (A, (B, C)) in tree2, they both
    can be represented by '11100'. Similarly, '11000' represents clade
    (A, B) in tree1, '01100' represents clade (B, C) in tree2, and '00011'
    represents clade (D, E) in both trees.

    So, with the ``_count_clades`` function in this module, finally we
    can get the clade counts and their _BitString representation as follows
    (the root and terminals are omitted)::

        clade   _BitString   count
        ABC     '11100'     2
        DE      '00011'     2
        AB      '11000'     1
        BC      '01100'     1

    To get the _BitString representation of a clade, we can use the following
    code snippet::

        # suppose we are provided with a tree list, the first thing to do is
        # to get all the terminal names in the first tree
        term_names = [term.name for term in trees[0].get_terminals()]
        # for a specific clade in any of the tree, also get its terminal names
        clade_term_names = [term.name for term in clade.get_terminals()]
        # then create a boolean list
        boolvals = [name in clade_term_names for name in term_names]
        # create the string version and pass it to _BitString
        bitstr = _BitString(''.join(map(str, map(int, boolvals))))
        # or, equivalently:
        bitstr = _BitString.from_bool(boolvals)

    To convert back::

        # get all the terminal clades of the first tree
        terms = [term for term in trees[0].get_terminals()]
        # get the index of terminal clades in bitstr
        index_list = bitstr.index_one()
        # get all terminal clades by index
        clade_terms = [terms[i] for i in index_list]
        # create a new calde and append all the terminal clades
        new_clade = BaseTree.Clade()
        new_clade.clades.extend(clade_terms)

    Examples
    --------
    >>> from Bio.Phylo.Consensus import _BitString
    >>> bitstr1 = _BitString('11111')
    >>> bitstr2 = _BitString('11100')
    >>> bitstr3 = _BitString('01101')
    >>> bitstr1
    _BitString('11111')
    >>> bitstr2 & bitstr3
    _BitString('01100')
    >>> bitstr2 | bitstr3
    _BitString('11101')
    >>> bitstr2 ^ bitstr3
    _BitString('10001')
    >>> bitstr2.index_one()
    [0, 1, 2]
    >>> bitstr3.index_one()
    [1, 2, 4]
    >>> bitstr3.index_zero()
    [0, 3]
    >>> bitstr1.contains(bitstr2)
    True
    >>> bitstr2.contains(bitstr3)
    False
    >>> bitstr2.independent(bitstr3)
    False
    >>> bitstr1.iscompatible(bitstr2)
    True
    >>> bitstr2.iscompatible(bitstr3)
    False

    c                 C   s:   t |trt||d|d krt| |S td)zInit from a binary string data.01z;The input should be a binary string composed of '0' and '1')
isinstancestrlencount__new__	TypeError)clsstrdata r   G/var/www/html/myenv/lib/python3.10/site-packages/Bio/Phylo/Consensus.pyr   z   s   z_BitString.__new__c                 C   s>   t d|  }t d| }||@ }tt|dd  t| S N0b   r   r   binzfillr
   selfotherselfintotherint	resultintr   r   r   __and__      z_BitString.__and__c                 C   s>   t d|  }t d| }||B }tt|dd  t| S r   r   r   r   r   r   __or__   r   z_BitString.__or__c                 C   s>   t d|  }t d| }||A }tt|dd  t| S r   r   r   r   r   r   __xor__   r   z_BitString.__xor__c                 C   s>   t d|  }t d| }||@ }tt|dd  t| S r   r   r   r   r   r   __rand__   r   z_BitString.__rand__c                 C   s>   t d|  }t d| }||B }tt|dd  t| S r   r   r   r   r   r   __ror__   r   z_BitString.__ror__c                 C   s>   t d|  }t d| }||A }tt|dd  t| S r   r   r   r   r   r   __rxor__   r   z_BitString.__rxor__c                 C   s   dt |  d S )Nz_BitString())r	   __repr__r   r   r   r   r&      s   z_BitString.__repr__c                 C      dd t | D S )z4Return a list of positions where the element is '1'.c                 S      g | ]
\}}|d kr|qS )r   r   .0inr   r   r   
<listcomp>       z(_BitString.index_one.<locals>.<listcomp>	enumerater'   r   r   r   	index_one      z_BitString.index_onec                 C   r(   )z4Return a list of positions where the element is '0'.c                 S   r)   )r   r   r*   r   r   r   r.      r/   z)_BitString.index_zero.<locals>.<listcomp>r0   r'   r   r   r   
index_zero   r3   z_BitString.index_zeroc                 C   s&   | |A }| d|  d| d kS )az  Check if current bitstr1 contains another one bitstr2.

        That is to say, the bitstr2.index_one() is a subset of
        bitstr1.index_one().

        Examples:
            "011011" contains "011000", "011001", "000011"

        Be careful, "011011" also contains "000000". Actually, all _BitString
        objects contain all-zero _BitString of the same length.

        r   r   r   r   xorbitr   r   r   contains   s   z_BitString.containsc                 C   s&   | |A }| d|  d| d kS )a  Check if current bitstr1 is independent of another one bitstr2.

        That is to say the bitstr1.index_one() and bitstr2.index_one() have
        no intersection.

        Be careful, all _BitString objects are independent of all-zero _BitString
        of the same length.
        r   r5   r6   r   r   r   independent   s   	z_BitString.independentc                 C   s   |  |p| | p| |S )zCheck if current bitstr1 is compatible with another bitstr2.

        Two conditions are considered as compatible:
         1. bitstr1.contain(bitstr2) or vice versa;
         2. bitstr1.independent(bitstr2).

        )r8   r9   )r   r   r   r   r   iscompatible   s   z_BitString.iscompatiblec                 C   s   | d tttt|S )N )joinmapr	   int)r   boolsr   r   r   	from_bool   s   z_BitString.from_boolN)__name__
__module____qualname____doc__r   r   r    r!   r"   r#   r$   r&   r2   r4   r8   r9   r:   classmethodr@   r   r   r   r   r      s"    b
r   c                    s>  t | }t|}| tt|g|\}fdd| D }|jdd dd t	 }|d 
dtkrA|j ntd	|d |i}|d
d D ]G}fdd| D  t	 }|j  | D ]'\}	}
|	|r||	=  fdd|
jD |
_|
j| |	|A }	|
||	<  nql|||< qQtj|dS )zSearch strict consensus tree from multiple trees.

    :Parameters:
        trees : iterable
            iterable of trees to produce consensus tree.

    c                    s    g | ]\}}|d   kr|qS r   r   )r+   bitstrt)
tree_countr   r   r.      s    z$strict_consensus.<locals>.<listcomp>c                 S   
   |  dS Nr   r5   rG   r   r   r   <lambda>      
 z"strict_consensus.<locals>.<lambda>Tkeyreverser   r   -Taxons in provided trees should be consistent   Nc                       g | ]} | qS r   r   r+   r,   termsr   r   r.          c                       g | ]}| vr|qS r   r   )r+   childclade_termsr   r   r.         root)iternextget_terminals_count_clades	itertoolschainitemssortr   Clader   r
   cladesextend
ValueErrorr2   r8   appendTree)trees
trees_iter
first_treebitstr_countsstrict_bitstrsr_   bitstr_cladesrG   cladebscr   )r\   rW   rI   r   strict_consensus   s8   


rw   c                    sr  t | }t|}| tt|g|\ }t   fdddd}t	 }|d 
dtkr:|j ntd|d |i}|dd	 D ]} | \}	}
d
|	 | }||d
 k r` nӇfdd| D t	 }|j ||_|
|	 |_t|dd dd}d}d	}g }|D ]+|sd} n!|r}|r|krtfdd|D r| q|sqJ|r||}fdd|jD |_|j| |||< |rg }|D ]}||  || }|j| |j| qfdd|D fdd|jD |_|||< t|td ks0t|td kr2t|jdkr2 nqJtj|dS )a7  Search majority rule consensus tree from multiple trees.

    This is a extend majority rule method, which means the you can set any
    cutoff between 0 ~ 1 instead of 0.5. The default value of cutoff is 0 to
    create a relaxed binary consensus tree in any condition (as long as one of
    the provided trees is a binary tree). The branch length of each consensus
    clade in the result consensus tree is the average length of all counts for
    that clade.

    :Parameters:
        trees : iterable
            iterable of trees to produce consensus tree.

    c                    s    |  d |  dt| fS )Nr   r   )r   r	   rL   )rq   r   r   rM   *  r/   z$majority_consensus.<locals>.<lambda>TrO   r   r   rR   rS   N      Y@c                    rT   r   r   rU   rV   r   r   r.   ;  rX   z&majority_consensus.<locals>.<listcomp>c                 S   rJ   rK   r5   ru   r   r   r   rM   @  rN   Fc                 3   s    | ]}|  V  qd S N)r9   r+   rv   ry   r   r   	<genexpr>T  s    z%majority_consensus.<locals>.<genexpr>c                    rY   r   r   r{   r[   r   r   r.   ^  s    c                    rT   r   r   rU   rV   r   r   r.   o  rX   c                    rY   r   r   r{   )remove_termsr   r   r.   p  r]   r      r^   )r`   ra   rb   rc   rd   re   sortedkeysr   rh   r   r
   ri   rj   rk   r2   
confidencebranch_lengthr:   r8   allrl   popremoverm   )rn   cutoff	tree_iterrp   rI   bitstrsr_   rs   rG   count_in_treesbranch_length_sumr   rt   bsckeys
compatibleparent_bitstrchild_bitstrsparent_claderemove_listrv   child_clader   )rq   ru   r\   r}   rW   r   majority_consensus  s   






&r   c                 C   s    dd | D }t jt|ddS )zSearch Adam Consensus tree from multiple trees.

    :Parameters:
        trees : list
            list of trees to produce consensus tree.

    c                 S      g | ]}|j qS r   r^   )r+   treer   r   r   r.         z"adam_consensus.<locals>.<listcomp>T)r_   rooted)r   rm   _part)rn   ri   r   r   r   adam_consensusz  s   r   c                    sP  d}| d   }dd |D  t|dkst|dkr!| d }|S tdt| h}| D ]}|jD ]}t| }t }t }|D ]N}	|	|krGq@|	|r^|| ||	|A  ||	 q@||	rk||	|A  q@|	|s||	|@  ||	|@ |A  ||	|@ |	A  ||	 q@||N }|rt	|dd	 d
D ]}
d}|D ]}	|
|	sd} nq|r||
 qq1q,t
 }t	|D ]d}| }t|dkr|j||d   qt|dkrt
 }|j||d   |j||d   |j| qt|dkr% fdd|D }g }| D ]}|t|| q|jt| q|S )z:Recursive function for Adam Consensus algorithm (PRIVATE).Nr   c                 S   r   r   namer+   termr   r   r   r.     r   z_part.<locals>.<listcomp>rS   r   r   c                 S   rJ   rK   r5   ry   r   r   r   rM     rN   z_part.<locals>.<lambda>)rP   TFc                    rT   r   r   rU   
term_namesr   r   r.     rX   )rb   r
   r   ri   _clade_to_bitstrsetr8   addr9   r   r   rh   r2   rl   
_sub_clader   )ri   	new_claderW   r   rt   rZ   rG   	to_removeto_addru   tar9   indicesbifur_clade
part_namesnext_cladesr   r   r   r     st   2








r   c                    s    fdd|D }  |}t|| krqt }|j| |jdddD ]G}||jkr/q't	|jddt	|@ }|rn|jdddD ])}t	|j}|| }	|
|rm|	rmt|	|_t }
|
jt| |j|
 qDq'|}|S )zTExtract a compatible subclade that only contains the given terminal names (PRIVATE).c                    s   g | ]}  |qS r   )find_anyr+   r   rt   r   r   r.     s    z_sub_clade.<locals>.<listcomp>Fpreorder)terminalorderTr   )common_ancestorr
   count_terminalsr   rh   ri   rj   find_cladesr_   r   issubsetlistrl   )rt   r   term_clades	sub_clade
temp_claderv   childrentctc_childrentc_new_cladesr   r   r   r   r     s*   



r   c           	      C   s   i }d}| D ]<}|d7 }t |}|jddD ]+}|| }||v r8|| \}}|d7 }||jp/d7 }||f||< qd|jp=df||< qq||fS )aw  Count distinct clades (different sets of terminal names) in the trees (PRIVATE).

    Return a tuple first a dict of bitstring (representing clade) and a tuple of its count of
    occurrences and sum of branch length for that clade, second the number of trees processed.

    :Parameters:
        trees : iterable
            An iterable that returns the trees to count

    r   rS   Fr   )_tree_to_bitstrsr   r   )	rn   r   rI   r   clade_bitstrsrt   rG   r   sum_blr   r   r   rc     s   	rc   c                 C   s   t dd | jddD }i }|}|du r(zt|}W n ty'   tddw | jddD ]}t||}|df||< q.|D ]+}|jddD ]"}t||}||v rh|| \}	}
|
d	 d
 | |	_|	|
d	 f||< qFq>| S )a  Calculate branch support for a target tree given bootstrap replicate trees.

    :Parameters:
        target_tree : Tree
            tree to calculate branch support for.
        trees : iterable
            iterable of trees used to calculate branch support.
        len_trees : int
            optional count of replicates in trees. len_trees must be provided
            when len(trees) is not a valid operation.

    c                 s   s    | ]}|j V  qd S rz   r   r   r   r   r   r|     s    zget_support.<locals>.<genexpr>Tr   NzzTrees does not support len(trees), you must provide the number of replicates in trees as the optional parameter len_trees.Fr   rS   rx   )r   r   r
   r   r   r   )target_treern   	len_treesr   r   sizert   rG   r   rv   rH   r   r   r   get_support  s4   

r   c                 c   s    t | d }d}||k rH|d7 }d}t|D ]'}td|d }|s0| dd||d f }q|| dd||d f 7 }q|V  ||k sdS dS )a  Generate bootstrap replicates from a multiple sequence alignment (OBSOLETE).

    :Parameters:
        msa : MultipleSeqAlignment
            multiple sequence alignment to generate replicates.
        times : int
            number of bootstrap times.

    r   rS   N)r
   rangerandomrandint)msatimeslengthr,   itemjcolr   r   r   	bootstrap  s   
r   c                 #   s    t | trMt| d }t|D ]:}d}t|D ])}td|d }|du r3| dd||d f }q|| dd||d f 7 }q||}|V  qdS | j\}	 t|D ]} fddt D }
|| dd|
f }|V  qVdS )ay  Generate bootstrap replicate trees from a multiple sequence alignment.

    :Parameters:
        alignment : Alignment or MultipleSeqAlignment object
            multiple sequence alignment to generate replicates.
        times : int
            number of bootstrap times.
        tree_constructor : TreeConstructor
            tree constructor to be used to build trees.

    r   NrS   c                    s   g | ]
}t d  d qS )r   rS   )r   r   )r+   r   mr   r   r.   P  r/   z#bootstrap_trees.<locals>.<listcomp>)r   r   r
   r   r   r   
build_treeshape)	alignmentr   tree_constructorr   r,   bootstrapped_alignmentr   r   r   r-   colsr   r   r   bootstrap_trees5  s&   


r   c                 C   s   t | ||}||}|S )a  Consensus tree of a series of bootstrap trees for a multiple sequence alignment.

    :Parameters:
        alignment : Alignment or MultipleSeqAlignment object
            Multiple sequence alignment to generate replicates.
        times : int
            Number of bootstrap times.
        tree_constructor : TreeConstructor
            Tree constructor to be used to build trees.
        consensus : function
            Consensus method in this module: ``strict_consensus``,
            ``majority_consensus``, ``adam_consensus``.

    )r   )r   r   r   	consensusrn   r   r   r   r   bootstrap_consensusU  s   r   c                    s.   dd | j ddD  t fdd|D S )zRCreate a BitString representing a clade, given ordered tree taxon names (PRIVATE).c                 S      h | ]}|j qS r   r   r   r   r   r   	<setcomp>k  r   z#_clade_to_bitstr.<locals>.<setcomp>Tr   c                 3   s    | ]}| v V  qd S rz   r   r   clade_term_namesr   r   r|   l  s    z#_clade_to_bitstr.<locals>.<genexpr>)r   r   r@   )rt   tree_term_namesr   r   r   r   i  s   r   c                 C   sB   i }dd | j ddD }| j ddD ]}t||}|||< q|S )zGCreate a dict of a tree's clades to corresponding BitStrings (PRIVATE).c                 S   r   r   r   r   r   r   r   r.   r  r   z$_tree_to_bitstrs.<locals>.<listcomp>Tr   F)r   r   )r   clades_bitstrsr   rt   rG   r   r   r   r   o  s   

r   c                 C   s2   i }t |  D ]\}}t|jpdd||< q|S )zGenerate a branch length dict for a tree, keyed by BitStrings (PRIVATE).

    Create a dict of all clades' BitStrings to the corresponding branch
    lengths (rounded to 5 decimal places).
    g           )r   rf   roundr   )r   r   rt   rG   r   r   r   _bitstring_topologyy  s   r   c                 C   sD   dd | j ddD }dd |j ddD }||ko!t| t|kS )zAre two trees are equal in terms of topology and branch lengths (PRIVATE).

    (Branch lengths checked to 5 decimal places.)
    c                 S   r   r   r   r   r   r   r   r     r   z"_equal_topology.<locals>.<setcomp>Tr   c                 S   r   r   r   r   r   r   r   r     r   )r   r   )tree1tree2term_names1term_names2r   r   r   _equal_topology  s
   r   rF   rz   )rD   rd   r   astr   	Bio.Alignr   	Bio.Phylor   r	   r   rw   r   r   r   r   rc   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s,    I
3h;
( 
