o
    RŀgK                 
   @   s  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddlm
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ dZg dZg dZdZdZdZdgZdZdZdZdZG dd deZ G dd dZ!G dd dZ"dEddZ#dd  Z$dFd"d#Z%d$d% Z&d&d' Z'd(d) Z(d*d+ Z)d,d- Z*d.d/ Z+d0d1 Z,d2d3 Z-d4d5 Z.G d6d7 d7Z/G d8d9 d9Z0G d:d; d;Z1zd<d=l2m3Z3 W n  e4y Z5 ze6d>e5 d?e
 d@dA Z7W Y dZ5[5n	dZ5[5ww dBdA Z7e8dCkrddDl9m:Z: e:  dS dS )GzNexus class. Parse the contents of a NEXUS file.

Based upon 'NEXUS: An extensible file format for systematic information'
Maddison, Swofford, Maddison. 1997. Syst. Biol. 46(4):590-621
    N)reduce)BiopythonDeprecationWarning)BiopythonWarning)File)	IUPACData)StandardData)TreeSeqF   )charstatelabels
charlabels	taxlabelstaxsetcharsetcharpartitiontaxpartitionmatrixtreeutree	translatecodonpossettitle)treesdata
characterstaxasetscodonsz()[]{}\,;:=*\'"`+-<>?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_z 	
&charsr   codonpositionszH#NEXUS
begin data; dimensions ntax=0 nchar=0; format datatype=dna; end; c                   @   s   e Zd ZdZdS )
NexusErrorz1Provision for the management of Nexus exceptions.N)__name__
__module____qualname____doc__ r(   r(   C/var/www/html/myenv/lib/python3.10/site-packages/Bio/Nexus/Nexus.pyr#   >   s    r#   c                   @   s`   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd ZdS )
CharBufferzHelps reading NEXUS-words and characters from a buffer (semi-PRIVATE).

    This class is not intended for public use (any more).
    c                 C   s   |r	t || _dS g | _dS Initialize the class.N)listbuffer)selfstringr(   r(   r)   __init__H   s   
zCharBuffer.__init__c                 C   s   | j r| j d S dS )z+Return the first character from the buffer.r   N)r.   r/   r(   r(   r)   peekO   s   
zCharBuffer.peekc                 C   s    d | j }|r|d S dS )zBReturn the first character from the buffer, do not include spaces. r   N)joinr.   strip)r/   br(   r(   r)   peek_nonwhitespaceV   s   zCharBuffer.peek_nonwhitespacec                 C   s   | j r	| j dS dS )z*Iterate over NEXUS characters in the file.r   N)r.   popr2   r(   r(   r)   __next__^   s   zCharBuffer.__next__c                 C   s&   	 t | }|du r	 dS |tvr|S q)z6Check for next non whitespace character in NEXUS file.TN)next
WHITESPACE)r/   pr(   r(   r)   next_nonwhitespacee   s   zCharBuffer.next_nonwhitespacec                 C   s4   | j d tv r| j dd | _ | j d tv sdS dS )z)Skip whitespace characters in NEXUS file.r      N)r.   r<   r2   r(   r(   r)   skip_whitespaceo   s   zCharBuffer.skip_whitespacec              	   C   s\   |D ])}z| j |}W n	 ty   Y qw d| j d| }| j |d | _ |  S dS )z@Iterate over the NEXUS file until a target character is reached.r4   N)r.   index
ValueErrorr5   )r/   targettposfoundr(   r(   r)   
next_untilt   s   zCharBuffer.next_untilc                 C   s   d | jdt| |kS )z#Return a word stored in the buffer.r4   N)r5   r.   len)r/   wordr(   r(   r)   	peek_word   s   zCharBuffer.peek_wordc                 C   s   g }d}|   }|sdS || |dkrd}n|dkrd}n|tv r%|S 	 |  }||krD|t|  |  |kr@t|  n |rCnn|rN|t|  n|rX|tv sX|tv rYn|t|  q&d|S )zReturn the next NEXUS word from a string.

        This deals with single and double quotes, whitespace and punctuation.
        FN'"Tr4   )r>   appendPUNCTUATIONr3   r;   r<   r5   )r/   rI   quotedfirstcr(   r(   r)   	next_word   s8   


zCharBuffer.next_wordc                 C   s   d | jS )z.Return the rest of the string without parsing.r4   )r5   r.   r2   r(   r(   r)   rest      zCharBuffer.restN)r$   r%   r&   r'   r1   r3   r8   r:   r>   r@   rG   rJ   rR   rS   r(   r(   r(   r)   r*   B   s    
&r*   c                   @   sJ   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dddZ
dS )
StepMatrixzCalculate a stepmatrix for weighted parsimony.

    See :
    COMBINATORIAL WEIGHTS IN PHYLOGENETIC ANALYSIS - A STATISTICAL PARSIMONY PROCEDURE
    Wheeler (1990), Cladistics 6:269-275.
    c                    sX   i | _ t|| _|r| j| | jD ]  fdd| jD D ]	}|  |d qqdS )r,   c                    s   g | ]}| kr|qS r(   r(   .0sxr(   r)   
<listcomp>       z'StepMatrix.__init__.<locals>.<listcomp>r   N)r   sortedsymbolsrM   set)r/   r^   gapyr(   rY   r)   r1      s   

zStepMatrix.__init__c                 C   s$   ||kr	||}}|| j || < dS )z+Set a given value in the matrix's position.Nr   r/   rZ   ra   valuer(   r(   r)   r_      s   
zStepMatrix.setc                 C   s,   ||kr	||}}| j ||   |7  < dS )z6Add the given value to existing, in matrix's position.Nrb   rc   r(   r(   r)   add   s   
zStepMatrix.addc                 C   s   t dd | j S )z9Calculate the associations, makes matrix of associations.c                 S   s   | | S Nr(   )rZ   ra   r(   r(   r)   <lambda>       z StepMatrix.sum.<locals>.<lambda>)r   r   valuesr2   r(   r(   r)   sum   s   zStepMatrix.sumc                 C   s2   |   }|dkr| jD ]}| j|  |  < q| S )zlCalculate the transformation matrix.

        Normalizes the columns of the matrix of associations.
        r   )rj   r   )r/   totalkr(   r(   r)   transformation   s
   
zStepMatrix.transformationc                 C   s6   | j D ]}| j | dkrt| j |  | j |< q| S )zCalculate the Phylogenetic weight matrix.

        Constructed from the logarithmic transformation of the
        transformation matrix.
        r   )r   mathlog)r/   rl   r(   r(   r)   	weighting   s
   
zStepMatrix.weightingyour_name_herec                 C   s   d|t | jf }|dd| j d7 }| jD ]I}|dd| 7 }| jD ]6}||kr1|d7 }q&||kr;||}}n||}}| j||  dkrN|d7 }q&|d	d
| j||   7 }q&|d7 }q|d7 }|S )zPrint a stepmatrix.zusertype %s stepmatrix=%d
z        
z[%s]   z	 .       r   z	inf.     z%2.2f
   ;
)rH   r^   r5   ljustr   )r/   namer   rZ   ra   x1y1r(   r(   r)   smprint   s    





zStepMatrix.smprintN)rq   )r$   r%   r&   r'   r1   r_   re   rj   rm   rp   rz   r(   r(   r(   r)   rU      s    
rU   Fc                 C   sZ   |r|  dd}ddd |D }|S |  dd}t|ttt r+d| d }|S )a  Return a taxon identifier according to NEXUS standard.

    Wrap quotes around names with punctuation or whitespace, and double
    single quotes.

    mrbayes=True: write names without quotes, whitespace or punctuation
    for the mrbayes software package.
     _r4   c                 s   s    | ]	}|t v r|V  qd S rf   )MRBAYESSAFErW   rQ   r(   r(   r)   	<genexpr>      zsafename.<locals>.<genexpr>rK   z'')replacer5   r_   intersectionr<   rN   )rw   mrbayessafer(   r(   r)   safename   s   	r   c                 C   sh   | sdS |  dr| ds|  dr2| dr2| dd } |  dr(| ds|  dr2| ds| S )z6Remove quotes and/or double quotes around identifiers.NrK   rL   r?   )
startswithendswith)rI   r(   r(   r)   
quotestrip  s   r   -?c                 C   s   t | }|dkr
dS |d }|dkr&| | |v r&|d8 }|dkr&| | |v sd}||k r@| | |v r@|d7 }||k r@| | |v s2||krJ|dkrJdS ||fS )zoReturn position of first and last character which is not in skiplist.

    Skiplist defaults to ['-','?'].
    r   NNr?   r   )r   r   rH   )sequenceskiplistlengthendstartr(   r(   r)   get_start_end  s   r   c                    s"   t  fdd D  fdddS )zBReturn a sorted list of keys of p sorted by values of p (PRIVATE).c                 3   s    | ]	} | r|V  qd S rf   r(   )rW   pnr=   r(   r)   r   4  r   z'_sort_keys_by_values.<locals>.<genexpr>c                    s    |  S rf   r(   )r   r   r(   r)   rg   4  rh   z&_sort_keys_by_values.<locals>.<lambda>)key)r]   r   r(   r   r)   _sort_keys_by_values2  s   "r   c                 C   s   t t| S )zRCheck all values in list are unique and return a pruned and sorted list (PRIVATE).)r]   r_   )ri   r(   r(   r)   _make_unique7  rT   r   c                 C   s~   || v r=| d}|d dr5d}|d dkr$t|d dd d }d|dd  d| }|}n|d7 }|| v s|S )zFReturn a unique name if label is already in previous_labels (PRIVATE)..r   copyr?      Nz.copy)splitr   intr5   )previous_labelslabellabel_splitcopy_num	new_labelr(   r(   r)   _unique_label<  s   

r   c                    s    fdd D S )zHConvert a Seq-object matrix to a plain sequence-string matrix (PRIVATE).c                    s   i | ]	}|t  | qS r(   strrW   rD   r   r(   r)   
<dictcomp>M      z(_seqmatrix2strmatrix.<locals>.<dictcomp>r(   r   r(   r   r)   _seqmatrix2strmatrixK  s   r   c                 C   sN  | sdS t t| } g }| dd }||d d  t|dkrd}t|D ]t\}}||d ||  kr6q'|dkrUt|dkrU||d  | ||d  krU||d  }q'|d| }t|dkrm|t|d d  n(|dkr|d|d d |d d f  n|d	|d d |d d |f  ||d } t|dks!d
|S )aH  Compact lists for Nexus output (PRIVATE).

    Example
    -------
    >>> _compact4nexus([1, 2, 3, 5, 6, 7, 8, 12, 15, 18, 20])
    '2-4 6-9 13-19\\3 21'

    Transform [1 2 3 5 6 7 8 12 15 18 20] (baseindex 0, used in the Nexus class)
    into '2-4 6-9 13-19\\3 21' (baseindex 1, used in programs like Paup or MrBayes.).

    r4   Nr   g      ?r?   r      z%d-%dz%d-%d\%dr{   )r]   r_   rM   rH   	enumerater   r5   )	orig_list	shortlistcliststepirZ   subr(   r(   r)   _compact4nexusP  s.   0$$
r   c              
      s  | sdS | d d }t | d d tdd | D dk}|r#d_d_d_d_d_j	 D ]\}}|j| d| < j|= q4j
	 D ]\}}|j
| d| < j
|= qLd	|ttjii_| dd D ]\fd
djD   fddjD } fddjD } D ]}	j|	  ttj|	 jjjj7  < q|D ]}	j|	  tjj 7  < q|D ]"}	tjj ttj|	 jjjj j|	< qʈj| j	 D ]\}}fdd|D j d| < qj
r)j
si _
j
fddj
	 D  ttjjj jd	 < jrUjsEi _jfddj	 D   jj7  _ jt|7  _qqjd	 D ]}
jd	 |
 j|
< qlS )a  Combine matrices in [(name,nexus-instance),...] and return new nexus instance.

    combined_matrix=combine([(name1,nexus_instance1),(name2,nexus_instance2),...]
    Character sets, character partitions and taxon sets are prefixed, readjusted
    and present in the combined matrix.
    Nr   r?   c                 S   s   h | ]}|d  j qS )r?   )datatyperW   nr(   r(   r)   	<setcomp>      zcombine.<locals>.<setcomp>NoneFr   combinedc                    s   g | ]	}| j v r|qS r(   )r   r   )mr(   r)   r[     r   zcombine.<locals>.<listcomp>c                       g | ]}| vr|qS r(   r(   r   bothr(   r)   r[     r\   c                    r   r(   r(   r   r   r(   r)   r[     r\   c                    s   g | ]}| j  qS r(   ncharrW   rZ   r   r(   r)   r[     r   c                    s    i | ]\}}  d | |qS )r   r(   )rW   tnts)r   r(   r)   r          zcombine.<locals>.<dictcomp>c                    s   i | ]
\}} j | |qS r(   r   )rW   r   r   r   r(   r)   r         )r   deepcopyrH   r   r   statelabels
interleaver   charsetsitemstaxsetsr-   ranger   charpartitionsr   r   r
   r   r   r`   missingextendupdatentax)matricesrw   mixed_datatypescncsr   r   combined_onlym_onlyrD   rQ   r(   )r   r   r   r   r)   combiney  sr   


 $ r   c           	      C   sV  | sdS t | }g }g }d}d}d}t|}	 |}zt|}W n ty*   d}Y nw |du r0ng||kr;|s;|s;d}nA|sL|sL|sL|dksI|dkrL|}n0|sd|dkrd|tv r_|dkr_|s_d}n|d	7 }n|s||d
kr||rod}n|d	8 }|dk r{tdq|dkr|dkr|s|d| g }n|| q|r|d| |dkrtd|S )a  Delete []-delimited comments out of a file and break into lines separated by ';' (PRIVATE).

    stripped_text=_kill_comments_and_break_lines(text):
    Nested and multiline comments are allowed. [ and ] symbols within single
    or double quotes are ignored, newline ends a quote, all symbols with quotes are
    treated the same (thus not quoting inside comments like [this character ']' ends a comment])
    Special [&...] and [\...] comments remain untouched, if not inside standard comment.
    Quotes inside special [& and [\ are treated as normal characters,
    but no nesting inside these special comments allowed (like [&   [\   ]]).
    ';' is deleted from end of line.

    NOTE: this function is very slow for large files, and obsolete when using C extension cnexus
    r4   Fr   TNrL   rK   [r?   ]z#Nexus formatting error: unmatched ];rr   z#Nexus formatting error: unmatched [)iterr;   StopIterationSPECIALCOMMENTSr#   rM   r5   )	textcontentsnewtextnewline
quotelevelspeciallevel	commlevelt2rD   r(   r(   r)   _kill_comments_and_break_lines  sd   

)r   c                 C   s^   g }| D ](}| dd dd }| dr|| q| dd}|r,|| q|S )zAdjust linebreaks to match ';', strip leading/trailing whitespace (PRIVATE).

    list_of_commandlines=_adjust_lines(input_text)
    Lines are adjusted so that no linebreaks occur within a commandline
    (except matrix command line)
    z
rr   r   r{   )r   r6   lowerr   rM   )linesformatted_linesliner(   r(   r)   _adjust_lines  s   
r   c                 C   s   |  d}|dkrX|  d}|dk rtd|  ||k r"td|  dt| |d | }||  }|| kr?| }| d	| | | |d d	  } |  d}|dks	| S )
zGReplace ambigs in xxx(ACG)xxx format by IUPAC ambiguity code (PRIVATE).(r   )r   z Missing closing parenthesis in: z Missing opening parenthesis in: r4   r?   N)findr#   r5   r]   upperr   )seqrev_ambig_valuesopeningclosingambig
ambig_coder(   r(   r)   _replace_parenthesized_ambigs'  s   

 
r   c                   @   s   e Zd ZdZdd ZdS )Commandlinez/Represent a commandline as command and options.c                    s`  i | _ g d| _z| dd\| _W n ty0   | d | _d| dd Y nw | j  | _| jtv rE | _ dS tdkrzS	dd fdd	t
tD }g  |D ]} | qe fd
d	t
tD }|D ]}|d  | j |d   < q||D ]}d| j |  < qW dS  ty   td| dw dS )r,   Nrr   r?   r   r{   = = c                    s>   g | ]} | d kr|dkr|t  kr|d ||d fqS )r   r   r?   r   r   optionsr(   r)   r[   P  s
     z(Commandline.__init__.<locals>.<listcomp>c                    r   r(   r(   r   )indicesr(   r)   r[   X  r\      zIncorrect formatting in line: )r   commandr6   r   rB   r5   r   SPECIAL_COMMANDSrH   r   r   r   r#   )r/   r   r   valued_indicessltoken_indicesopttokenr(   )r   r   r)   r1   <  s>   


 zCommandline.__init__Nr$   r%   r&   r'   r1   r(   r(   r(   r)   r   9  s    r   c                   @   s   e Zd ZdZdddZdS )BlockzARepresent a NEXUS block with block name and list of commandlines.Nc                 C   s   || _ g | _dS r+   )r   commandlines)r/   r   r(   r(   r)   r1   e  s   
zBlock.__init__rf   r	  r(   r(   r(   r)   r
  b  s    r
  c                   @   s&  e Zd ZdZdwddZdd Zdd ZeeeZd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Z d9d: Z!d;d< Z"d=d> Z#e$d?fd@dAZ%dxdBdCZ&dDdE Z'dwdFdGZ(dHdI Z)dJdK Z*dLdM Z+dNdO Z,				P	Q	Q		R	PdydSdTZ-			Q	Q		P	P		P	U	P	UdzdVdWZ.	Q	Q	P	U	Pd{dXdYZ/d|d[d\Z0dwd]d^Z1d}d_d`Z2d~dadbZ3ddddeZ4d}dfdgZ5d}dhdiZ6djdk Z7ddmdnZ8ddodpZ9dqdr Z:ddsdtZ;ddudvZ<dS )NexuszECreate the Nexus class, main class for the management of Nexus files.Nc                 C   s   d| _ d| _g | _g | _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _d| _d| _d| _d| _g | _i | _i | _i | _i | _g | _d| _g | _i | _i | _d| _d| jd< |rh| | dS | t dS )	r,   r   NdnaFr   r   r   gapmode) r   r   unaltered_taxlabelsr   r   r   r   respectcaser   r`   r^   equate	matchcharlabels	transposer   tokens	eliminater   unknown_blocksr   r   r   taxpartitionsr   r   
structuredr_   r   r   readDEFAULTNEXUS)r/   inputr(   r(   r)   r1   n  sH   
zNexus.__init__c                 C   s   t dt | jS )2Included for backwards compatibility (DEPRECATED).zThe get_original_taxon_order method has been deprecated and will likely be removed from Biopython in the near future. Please use the taxlabels attribute instead.warningswarnr   r   r2   r(   r(   r)   get_original_taxon_order  s
   zNexus.get_original_taxon_orderc                 C   s   t dt || _dS )r  zThe set_original_taxon_order method has been deprecated and will likely be removed from Biopython in the near future. Please use the taxlabels attribute instead.Nr  )r/   rd   r(   r(   r)   set_original_taxon_order  s
   
zNexus.set_original_taxon_orderc           
   
   C   sZ  z"t |}| }t|dd| _W d   n1 sw   Y  W n$ tttfyF   t|t	r7|}d| _nt
d|dd  ddY nw | }|drV|d	d }t|}t|D ]#\}}z|dd	  dkrw|d	d  ||< W q^ ty   Y q^w | |}	 zt|\}}	W n
 ty   Y dS w |tv r| ||	 n| ||	 q)
z@Read and parse NEXUS input (a filename, file-handle, or string).rw   Unknown_nexus_fileNinput_stringzUnrecognized input: d   z ...z#NEXUS   )r   	as_handler  getattrfilename	TypeErrorOSErrorAttributeError
isinstancer   r#   r6   r   _get_command_linesr   r   
IndexError_get_nexus_blockr;   r   KNOWN_NEXUS_BLOCKS_parse_nexus_block_unknown_nexus_block)
r/   r  fpfile_contentsr  r   clnexus_block_genr   r   r(   r(   r)   r    sH   


z
Nexus.readc                 c   s    d}g }|rI| d}| dr'|s d}| d  }n%td| | dr>|r:d}||fV  g }ntd|rE|| |sd	S d	S )
z>Return a generator for looping through Nexus blocks (PRIVATE).Fr   beginTr?   zIllegal block nesting in block r   zUnmatched 'end'.N)r9   r   r   r   r#   rM   )r/   r5  inblock
blocklinesr6  r   r(   r(   r)   r0    s&   


zNexus._get_nexus_blockc                 C   s(   t  }|j| ||_| j| d S rf   )r
  r  rM   r   r  )r/   r   r   blockr(   r(   r)   r3    s   zNexus._unknown_nexus_blockc              	   C   sb   |  || | jd }|jD ] }zt| d|j |j W q ty.   td|j ddw dS )z$Parse a known Nexus Block (PRIVATE).r   r|   zUnknown command: r{   N)_apply_block_structurer  r  r(  r  r   r,  r#   )r/   r   r   r;  r   r(   r(   r)   r2    s   

zNexus._parse_nexus_blockc                 C      d S rf   r(   r/   r   r(   r(   r)   _title     zNexus._titlec                 C   r=  rf   r(   r>  r(   r(   r)   _link  r@  zNexus._linkc                 C   s4   d|v rt |d | _d|v rt |d | _d S d S )Nr   r   )evalr   r   r>  r(   r(   r)   _dimensions  s
   zNexus._dimensionsc                 C   s.  d|v rd| _ d|v r@d|d  | _| jdr!| jds-| jdr5| jdr5| jdd | _| j s@t| j | _d	|v r|d	  | _	| j	d
ksU| j	dkr`t
j | _t
j| _nO| j	dkrpt
j | _t
j| _n?| j	dkrddt
jd| _t
jd | _n+| j	dkri | _| jstdd| j| _| j s|  j| j 7  _ntd| j	 d| j| j | _| j s| j | j  | _dd | j D }i | _| D ]\}}tdd |D }|| jd|< q| j	dv rt
j| _| j| jvr	| j| j | j| j< | j| j| j< n"| j	dkr3t
j| _| j| jvr,| j| j | j| j< | j| j| j< d|v r?|d d | _d|v rK|d d | _d|v rU|d | _d|v ra|d d | _d|v rk|d | _d |v r|d  d u s|d   d!krd| _d"|v rd| _ d#|v rd$| _ d S d S )%Nr  Tr^   r4   rL   rK   r?   r   r   r  
nucleotidernaproteinDNEQ)BZX*standardzSymbols must be defined when using standard datatype. Please remove any whitespace (spaces, tabs, etc.) between values for symbols as this confuses the Nexus parser.zUnsupported datatype: c                 S   s   i | ]\}}|d kr||qS )rK  r(   )rW   rl   vr(   r(   r)   r   L  s    z!Nexus._format.<locals>.<dictcomp>c                 s   s    | ]}|V  qd S rf   r(   r~   r(   r(   r)   r   O  s    z Nexus._format.<locals>.<genexpr>)r  rD  r   r   r`   r  r  r  r   yesr  notokensF)!r  r5   r   r^   r   r   r-   r   r   r   r   ambiguous_dna_valuesr   ambiguous_valuesunambiguous_dna_lettersunambiguous_lettersambiguous_rna_valuesunambiguous_rna_lettersprotein_lettersr#   valid_charactersr   rev_ambiguous_valuesr]   ambiguous_dna_lettersr   r`   ambiguous_rna_lettersr  r  r  r   r  )r/   r   revrl   rN  r   r(   r(   r)   _format  s   


















 


zNexus._formatc                 C   
   || _ d S rf   )r_   r>  r(   r(   r)   _setu     
z
Nexus._setc                 C   r^  rf   r   r>  r(   r(   r)   _optionsx  r`  zNexus._optionsc                 C   r^  rf   )r  r>  r(   r(   r)   
_eliminate{  r`  zNexus._eliminatec                 C   s   dS )a&  Get taxon labels (PRIVATE).

        As the taxon names are already in the matrix, this is superfluous
        except for transpose matrices, which are currently unsupported anyway.
        Thus, we ignore the taxlabels command to make handling of duplicate
        taxon names easier.
        Nr(   r>  r(   r(   r)   
_taxlabels~  s    zNexus._taxlabelsc                 C   s&   dd | j D }|dd}||S )z8Check for presence of taxon in self.taxlabels (PRIVATE).c                 S   s   i | ]	}| d d|qS )r{   r|   )r   r   r(   r(   r)   r     r   z*Nexus._check_taxlabels.<locals>.<dictcomp>r{   r|   )r   r   get)r/   taxonnextaxanexidr(   r(   r)   _check_taxlabels  s   
zNexus._check_taxlabelsc                 C   sv   i | _ t|}	 | }|du rdS | j|td}t| }|| j |< | }|du r.dS |dkr:td| dq)z$Get labels for characters (PRIVATE).TNset_type,Missing ',' in line r   )r   r*   rR   _resolveCHARSETr   r>   r#   )r/   r   optsw
identifierstaterQ   r(   r(   r)   _charlabels  s   
zNexus._charlabelsc                 C   s  i | _ i | _t|}| jstd	 | }|d u rd S | j|td}t| }|| j |< g | j|< |	 }|d u r=d S |dkr|dkrMtd| dt| }|d u r_td| d	 t
| j| t
| jkrrtd	| | j| | t| }|d u rd S |dkrnq`q)
Nz3Symbols must be defined when using character statesTri  rk  /rl  r   z Missing character state in line z?Character states exceed number of available symbols in line %s.)r   r   r*   r^   r#   rR   rm  rn  r   r>   rH   rM   )r/   r   ro  rp  rq  	characterrQ   rr  r(   r(   r)   _charstatelabels  sJ   

zNexus._charstatelabelsc                 C   r=  rf   r(   r>  r(   r(   r)   _statelabels  s   zNexus._statelabelsc                 C   sZ  | j r| js
tdi | _d}d}dd |dD }t|}	 zt|}W n tyD   || j k r7tdd|| j krAtd	dY n/w |d
7 }|| j krY| jsUtdd
}d}t	|}t
| }|  }d}	| jr|rxd| }	n,dt| }	n"d| }	t|	| jk rt|}|	d| 7 }	t|	| jk s| jdkrtt|	| j}
|d
kr|
}n$| jr	 |
| j}|dkrnt|
d| ||  |
|d
 d  }
qt|
D ]\}}|| jvr|| jkr|| jkrtd|||
f qn1t|	}
t|
D ](\}}|d D ]}|| jvr+|| jkr+|| jkr+td|||
f qq|rN| j| tt| j |}|
| j|< | j | n$t| j d|d
  |}| !|}|rl| j|  |
7  < ntd| q | jD ]}t| j| | jkrtd| jt| j| |f qvt"| j}t"| j dd }||krt#ddS )z+Create a matrix for NEXUS object (PRIVATE).z+Dimensions must be specified before matrix!r   Tc                 S   s    g | ]}|  d kr|  qS r4   )r6   rW   r|   r(   r(   r)   r[     r   z!Nexus._matrix.<locals>.<listcomp>rr   zNot enough taxa in matrix.NzToo many taxa in matrix.r?   z7Too many taxa in matrix - should matrix be interleaved?Fr4   rM  r   zMTaxon %s: Illegal character %s in sequence %s (check dimensions/interleaving)dzZTaxon %s not in first block of interleaved matrix. Check matrix dimensions and interleave.z<Matrix Nchar %d does not match data length (%d) for taxon %szcERROR: TAXLABELS must be identical with MATRIX. Please Report this as a bug, and send in data file.)$r   r   r#   r   r   r   r;   r   r   r*   r   rR   rS   r6   r5   rH   r   r
   r   rY  r  r   r   rX  r`   r   r   r  rM   r   r-   keysr   rh  r]   rB   )r/   r   taxcountfirst_matrix_blockr   lineiterr   	linecharsidr!   	iupac_seqrefseqr=   r   rQ   codingtaxon_presentre  
matrixkeystaxlabelssortr(   r(   r)   _matrix  s   






"	






d

zNexus._matrixc                 C   s   i | _ t|}	 z*t| }t| }|| j |< | }|du r%W dS |dkr1td| dW n ty:     tyI   td| ddw q)z!Translate a Nexus file (PRIVATE).TNrk  rl  r   zFormat error in line )r   r*   r   rR   r   r>   r#   	Exception)r/   r   ro  rq  r   rQ   r(   r(   r)   
_translatek  s&   
zNexus._translatec                 C   s   |  | dS )z@Use 'utree' to denote an unrooted tree (ex: clustalx) (PRIVATE).N)_treer>  r(   r(   r)   _utree  s   zNexus._utreec              
   C   sl  t |}| dkr| }| }| dkr#td|d d  d}d}| dkrn|  t|}|dkrEtd	||d d f t|}|d
}	t| |dkrYd}n|dkr`d}n|dkrht|	}| dks-t||||	 
 d}
| jr|
 D ],}zt| jt|
|jj |
|j_W q ttfy   td|
|jj d w | j|
 d S )NrL  r   z"Syntax error in tree description: 2   Fg      ?r   r    z7Illegal special comment [%s...] in tree description: %sr   RTUW)rw   weightrootedr   z/Unable to substitute %s using 'translate' data.)r*   r8   r>   rR   r#   r;   rG   floatr   rS   r6   r   get_terminalsr   r   noder   re  rB   KeyErrorr   rM   )r/   r   ro  dummyrw   r  r  symbolspecialrd   r   r   r(   r(   r)   r    sX   
zNexus._treec                 C   s:   t d}||_|D ]}|jt|| q	| j| dS )z2Apply Block structure to the NEXUS file (PRIVATE).r4   N)r
  r   r  rM   r   r  )r/   r   r   r;  r   r(   r(   r)   r<    s
   zNexus._apply_block_structurec                 C   $   | j |td\}}t|| j|< dS )zCreate unique taxset (PRIVATE).ri  N)_get_indicesTAXSETr   r   )r/   r   rw   r   r(   r(   r)   _taxset     zNexus._taxsetc                 C   r  )z&Create unique character set (PRIVATE).ri  N)r  rn  r   r   )r/   r   rw   sitesr(   r(   r)   _charset  r  zNexus._charsetc           
      C      i }d}t |}| |}|std| dd}	 t|}|du s(|dkr@|s@| j|tdd	\}}	t|	||< d}|du r?nn|d
krG| }||7 }q|| j|< dS )z1Collect taxpartition from a NEXUS file (PRIVATE).Fz"Formatting error in taxpartition: r{   r4   TNrk  :rj  	separatorrK   )r*   _name_n_vectorr#   r;   r  r  r   r  )
r/   r   r   r   ro  rw   r   rp  subname
subindicesr(   r(   r)   _taxpartition  .   

zNexus._taxpartitionc                    s^   t | j  | |  fdd| jD }|g ks t|dkr(td| d|d | _dS )zRead codon positions from a codons block as written from McClade (PRIVATE).

        Here codonposset is just a fancy name for a character partition with
        the name CodonPositions and the partitions N,1,2,3
        c                    r   r(   r(   r   prev_partitionsr(   r)   r[     r\   z&Nexus._codonposset.<locals>.<listcomp>r?   z!Formatting Error in codonposset: r{   r   N)r-   r   r{  _charpartitionrH   r#   r   )r/   r   	codonnamer(   r  r)   _codonposset  s   
zNexus._codonpossetc                 C   r=  rf   r(   r>  r(   r(   r)   _codeset  r@  zNexus._codesetc           
      C   r  )z6Collect character partition from NEXUS file (PRIVATE).Fz#Formatting error in charpartition: r{   r4   TNrk  r  r  rK   )r*   r  r#   r;   r  rn  r   r   )
r/   r   r   r   ro  rw   r   rp  r  r  r(   r(   r)   r    r  zNexus._charpartitionr   c                 C   sD   t |}| j||d}| j||d}|du rtd| d||fS )zParse the taxset/charset specification (PRIVATE).

        e.g. '1 2   3 - 5 dog cat   10 - 20 \\ 3'
        --> [0,1,2,3,4,'dog','cat',9,12,15,18]
        )r  ri  NFormatting error in line: r{   )r*   r  _parse_listr#   )r/   r   rj  r  ro  rw   r   r(   r(   r)   r  
  s   zNexus._get_indicesc                 C   s   |  }| }|dkr| }|std| dt|}|jdkrL| }| }| }| dkr<td| | dkrLtd| d	| | |krZtd| d|S )
z@Extract name and check that it's not in vector format (PRIVATE).rL  r  r{   r   vectorz"Unsupported VECTOR format in line rM  zUnknown qualifier z	 in line )rS   rR   r#   r   r8   r>   r   )r/   ro  r  rS   rw   open	qualifiercloser(   r(   r)   r    s$   
zNexus._name_n_vectorc                 C   s<  g }|  rz	 | }|sW |S | j||d}|  dkrz|}d}| }| j| |d}|tkrO|  dkrC| }	t| }|t||d | n;t|t	sYt|t	r_t
d| | j|}| j|}| j||d  }
||
 nt|t	r|| n|| q t
y     ty   Y dS w |S )zParse a NEXUS list (PRIVATE).

        e.g. [1, 2, 4-8\\2, dog, cat] --> [1,2,4,6,8,17,21],
        (assuming dog is taxon no. 17 and cat is taxon no. 21).
        Tri  r   r?   \z:Name if character sets not allowed in range definition: %sN)r8   rR   rm  r>   rn  r   r   r   r-  r-   r#   r   rA   rM   r  )r/   options_bufferrj  
plain_listrq  r   r   r   hyphen	backslashtaxranger(   r(   r)   r  -  sV   (

'zNexus._parse_listc                 C   sv  t |}|s
td|tkrfzt|}W n? tyS   | jr9|| j v r9| jD ]}| j| |kr7|   Y S q(n| jrH|| jv rH| j|  Y S td| dY dS w || jkr]|d S td|| jf |t	krzt|}W n) ty   | 
|}|r| Y S | jr|| jv r| j|  Y S td| dw |dkr|| jkr| j|d  S td|| jf td	| d
)a+  Translate identifier in list into character/taxon index (PRIVATE).

        Characters (which are referred to by their index in Nexus.py):
            Plain numbers are returned minus 1 (Nexus indices to python indices)
            Text identifiers are translated into their indices (if plain character identifiers),
            the first hit in charlabels is returned (charlabels don't need to be unique)
            or the range of indices is returned (if names of character sets).
        Taxa (which are referred to by their unique name in Nexus.py):
            Plain numbers are translated in their taxon name, underscores and spaces are considered equal.
            Names are returned unchanged (if plain taxon identifiers), or the names in
            the corresponding taxon set is returned.

        z0INTERNAL ERROR: Need type to resolve identifier.zUnknown character identifier: Nr?   z-Illegal character identifier: %d>nchar (=%d).zUnknown taxon identifier: r   z(Illegal taxon identifier: %d>ntax (=%d).zUnknown set specification: r   )r   r#   rn  r   rB   r   ri   r   r   r  rh  r   r   r   )r/   rq  rj  r   rl   taxlabels_idr(   r(   r)   rm  e  sh   


zNexus._resolvec                 C   r=  rf   r(   r>  r(   r(   r)   	_stateset     zNexus._statesetc                 C   r=  rf   r(   r>  r(   r(   r)   
_changeset  r  zNexus._changesetc                 C   r=  rf   r(   r>  r(   r(   r)   _treeset  r  zNexus._treesetc                 C   r=  rf   r(   r>  r(   r(   r)   _treepartition  r  zNexus._treepartitionFr(   r4   c
                    s   |s| j }|s	dS |s| j} rgi }
 D ]Pt|}| fddt| jD  t|}|d  d }|d}|dkrL|d| d  d	 }n|d  }||
< | j|||||||d
|	d	 q|
S | jd	 }| j|||||||d
|	d	 |S )zWrite a nexus file for each partition in charpartition.

        Only non-excluded characters and non-deleted taxa are included,
        just the data block is written.
        Nc                 3   s     | ]}|  vr|V  qd S rf   r(   r~   r   r=   r(   r)   r     s    z4Nexus.write_nexus_data_partitions.<locals>.<genexpr>z
Partition: rr   r   r   r|   z.dataF)	r)  r   	blocksizer   excludedeletecommentappend_setsr   )	r   r)  r-   r   r   r   r   rfindwrite_nexus_data)r/   r   r)  r  r   r  r  r   r  r   
pfilenamestotal_excludepcommentdot	pfilenamefnr(   r  r)   write_nexus_data_partitions  sZ   

z!Nexus.write_nexus_data_partitionsTc                    s  sj s	dS |sj}fdd D r(tddt tj |rS|jvr6td|j| }t|}i }|D ]}fdd|| D ||< qC fddjD }t	j
 d	}t|}t||d
  }|r~|r|d
 dkrdS tj|dd}|	s|d |r|d| d  |d |d||f  |dj  jr|d jr|dj  jr|dj  jr|dj  jr|dj  jr|dj  |s|r|d |d jrjdt}|ddfdd|D  d  |d  |s0|r-d!}nj}tfd"d|D }|rd
}|D ]R}|d| d#| d t|| d
kr|D ]$}|t|d$|d%  ||| ||t||   d&  q[|d& n|d' |t|| 7 }qBnw|rtd
||D ]+}|D ] }|t|d$|d%  ||| |||  d&  q|d& qnA|D ]>}||k r|t|d$d&  n|t|d$|d%  || }td
||D ]}|||||  d&  q~q|d( |
rD|r1|j d)d* |j d+d, n|j d- W d   |S W d   |S W d   |S 1 sXw   Y  |S ).a   Write a nexus file with data and sets block to a file or handle.

        Character sets and partitions are appended by default, and are
        adjusted according to excluded characters (i.e. character sets
        still point to the same sites (not necessarily same positions),
        without including the deleted characters.

        - filename - Either a filename as a string (which will be opened,
          written to and closed), or a handle object (which will
          be written to but NOT closed).
        - interleave_by_partition - Optional name of partition (string)
        - omit_NEXUS - Boolean.  If true, the '#NEXUS' line normally at the
          start of the file is omitted.

        Returns the filename/handle used to write the data.
        Nc                       g | ]	}  |s|qS r(   rh  r   r2   r(   r)   r[     r   z*Nexus.write_nexus_data.<locals>.<listcomp>zUnknown taxa: %s, zUnknown partition: c                    r   r(   r(   r~   r  r(   r)   r[   )  r\   c                        g | ]}|v r| vr|qS r(   r(   )rW   re  r  r   r(   r)   r[   +  s    )r  r  r   r4   rp  )modez#NEXUS
r   z]
zbegin data;
zdimensions ntax=%d nchar=%d;
zformat datatype=z respectcasez	 missing=z gap=z matchchar=z labels=z equate=z interleaveru   r  zcharlabels c                 3   s*    | ]}|d   dt  |  V  qdS )r?   r{   Nr   rW   rl   )newcharlabelsr(   r)   r   T  s   ( z)Nexus.write_nexus_data.<locals>.<genexpr>zmatrix
r   c                 3   s     | ]}t t| d V  qdS )r   N)rH   r   r   r  r(   r)   r   ^  s    : r  r?   rr   z	[empty]

z;
end;
F)r  r  r   include_codonsT)r  r  r   codons_only)r  r  r   )r   r)  r#   r5   r_   
differencer   r   r   r   crop_matrixrH   r   r'  writer   r  r   r`   r  r  r  r   _adjust_charlabelsr]   r   maxr   rv   r   r  )r/   r)  r   r  r  r  r   interleave_by_partitionr  
omit_NEXUSr  r   codons_block	partitionnamesnewpartitionr=   undeletecropped_matrixntax_adjustednchar_adjustedfhclkeys
namelengthseekre  	taxon_seqr(   )r  r  r   r   r  r/   r)   r    s  








"

	
g
g
ggzNexus.write_nexus_datac                    sV  | j s| js| jsdS |rdg}ndg}d}g t| jD ]}|v r-|d7 }d q||  q|s| j  D ]\}	}
fdd|
D }|r[|d	t|	 d
t|  q<| j D ] \}	} fdd|D }|r|dt|	 d
d	|  qa| j D ]R\}	}|s|	t
krq|r|	t
krqt|}i |D ]}fdd|| D }|r||< qr|r|	t
krd}nd}|d|t|	d	fdd|D f  q| j D ]8\}	}t|}i |D ]} fdd|| D }|r||< qr|dt|	d	fdd|D f  q|d t|dkr&dS d	|S )zReturn a sets block.r4   z
begin codonsz
begin setsr   r?   r   c                       g | ]
}| vr| qS r(   r(   r~   r  offlistr(   r)   r[     r   z%Nexus.append_sets.<locals>.<listcomp>zcharset r   c                    s    g | ]}| vrt |d qS )r  r  r   )r  r   r(   r)   r[     r   ztaxset r{   c                    r  r(   r(   r~   r  r(   r)   r[     r   r   r   z
%s %s = %sr  c                 3   s.    | ]}| v r| d t  |  V  qdS )r  N)r   rW   snr  r(   r)   r     s    z$Nexus.append_sets.<locals>.<genexpr>c                    r   r(   r(   r   r  r(   r)   r[     r\   ztaxpartition %s = %sc                 3   s<    | ]}| v rd t |ddd  | D f V  qdS )z%s: %sr{   c                 s   s    | ]}t |V  qd S rf   r  r   r(   r(   r)   r         z.Nexus.append_sets.<locals>.<genexpr>.<genexpr>N)r   r5   r  r  r(   r)   r     s    zend;
r  ru   )r   r   r   r   r   rM   r   r   r   r5   CODONPOSITIONSr   r  rH   )r/   r  r  r   r  r  setsboffsetrQ   r   nscsetrX   tsetr=   r  r  nspr  r(   )r  r  r   r  r  r)   r    s   	 

zNexus.append_setsr   c              	   C   s   |s(d| j v r#| j dd  dv r#d| j ddd d }n| j d }t|d<}| jD ]/}|dt| d  td	t	t
| j| |D ]}|t
| j| |||  d  qKq1W d   |S 1 slw   Y  |S )
zWrite matrix into a fasta file.r   r   paupnexusnexdatNz.fasrp  >rr   r   )r)  r   r   r5   r  r   r  r   r   rH   r   r   )r/   r)  widthr  re  r   r(   r(   r)   export_fasta  s   " 

&
zNexus.export_fastac                 C   s   |s(d| j v r#| j dd  dv r#d| j ddd d }n| j d }t|d*}|d| j| jf  | jD ]}|t	| d| j
| d	 q<W d   |S 1 sZw   Y  |S )
zWrite matrix into a PHYLIP file.

        Note that this writes a relaxed PHYLIP format file, where the names
        are not truncated, nor checked for invalid characters.
        r   r   r  Nz.phyrp  z%d %d
r{   rr   )r)  r   r   r5   r  r  r   r   r   r   r   )r/   r)  r  re  r(   r(   r)   export_phylip  s   " 

"
zNexus.export_phylipc              	      s  sj  fddjD }|sdS t|dkr+fddtt|d  D S fddtt|d  D }|dd D ]}g }|D ]x}| |d   }	|	jksn|	jkrhj	d 
 d	ksn|	|d krt|| qI|	|d v s|d jksj	d 
 d	kr|d jkr||d j|	|	f qI|	jv rtj|	 t|d }
|
r||d d
|
f qI|}qCdd |D }|S )z+Return a list with all constant characters.c                    r  r(   r(   r   r  r(   r)   r[   1  r   z"Nexus.constant.<locals>.<listcomp>Nr?   c                    r   r(   r(   r   r  r(   r)   r[   5  r\   r   c                    s2   g | ]\}}| vr|j | | fqS r(   )rR  rd  r   )rW   rZ   r   )r  r/   r(   r)   r[   7  s
    r  r   r4   c                 S   s   g | ]}|d  qS )r   r(   rV   r(   r(   r)   r[   f      )r   r   rH   r   r   r   r   r   r`   r   r   rM   rR  rd  r_   r   r5   )r/   r   r  r  r  constantre  newconstantsiteseqsite	intersectcposr(   r  r  r   r/   r)   r  -  sH   "



zNexus.constantc                    s   fddj D }|sdS g  |D ];}j| |  }jddkr-|jkr-j}|r>|jkr>| vr= | q  fddj	| D  qj v rd|rdt
 dkrdfd	d D      S )
zSummarize character.

        narrow=True:  paup-mode (a c ? --> ac; ? ? ? --> ?)
        narrow=false:           (a c ? --> a c g t -; ? ? ? --> a c g t -)
        c                    r   r(   r(   r   r  r(   r)   r[   o  r\   z!Nexus.cstatus.<locals>.<listcomp>Nr  r   c                 3   s    | ]	}| vr|V  qd S rf   r(   )rW   r7   )cstatusr(   r)   r   {  r   z Nexus.cstatus.<locals>.<genexpr>r?   c                    s   g | ]	}| j kr|qS r(   )r   ry  r2   r(   r)   r[   }  r   )r   r   r   r   rd  r`   r   rM   r   rR  rH   sort)r/   r  r  narrowr  rD   rQ   r(   )r  r  r/   r)   r  i  s"   
 zNexus.cstatusrq   c           
         s   t | j| j} fddt| jD D ]+}| ||}t|dd D ]\}}||d d D ]}	|| |	 d q/q#q|	 
 j|dS )zCalculate a stepmatrix for weighted parsimony.

        See Wheeler (1990), Cladistics 6:269-275 and
        Felsenstein (1981), Biol. J. Linn. Soc. 16:183-196
        c                    r   r(   r(   rV   r  r(   r)   r[     r\   z-Nexus.weighted_stepmatrix.<locals>.<listcomp>Nr   r?   )rw   )rU   rT  r`   r   r   r  r   re   r   rm   rp   rz   )
r/   rw   r  r  r   r  r  r   b1b2r(   r  r)   weighted_stepmatrix  s   zNexus.weighted_stepmatrixc                    s   sj fdd D rtddt j g krg fddjD }|s1i S fdd|D }fddtt| D }|g krRdd	 |D S d
d dd t| D D }tt||S  fdd	jD S )z=Return a matrix without deleted taxa and excluded characters.c                    r  r(   r  r   r2   r(   r)   r[     r   z%Nexus.crop_matrix.<locals>.<listcomp>zUnknown taxa: r  c                    r  r(   r(   r   r  r(   r)   r[     r   c                    s   g | ]}t  | qS r(   r   r  r   r(   r)   r[     r\   c                    s   g | ]
\}}| vr|qS r(   r(   )rW   r   rX   r  r(   r)   r[     r   c                 S   s   i | ]}|t d qS rx  r	   r   r(   r(   r)   r     r   z%Nexus.crop_matrix.<locals>.<dictcomp>c                 S      g | ]}t |qS r(   r	   rV   r(   r(   r)   r[     r
  c                 s   s    | ]}d  |V  qdS )r4   Nr5   r   r(   r(   r)   r     s    z$Nexus.crop_matrix.<locals>.<genexpr>c                    s&   i | ]}|v r| vr|| qS r(   r(   r   r  r(   r)   r     s     )	r   r#   r5   r_   r  r   r   zipdict)r/   r   r  r  r  r   sitesmr(   r  r)   r    s&   zNexus.crop_matrixc                    s   |s| j }t|t| d  t}| j||d  si S  t  d  s) S  fdd| jD }|rCtt fdd|D  ntt fdd|D  fddtt	D }d	d t| D }|rod
d |D }t
t||S )zReturn a bootstrapped matrix.r   )r  r  c                    s   g | ]}| v r|qS r(   r(   r   cmr(   r)   r[     r\   z#Nexus.bootstrap.<locals>.<listcomp>c                 3   s    | ]	}t  | V  qd S rf   r   r   r  r(   r)   r     r   z"Nexus.bootstrap.<locals>.<genexpr>c                 3   s    | ]} | V  qd S rf   r(   r   r  r(   r)   r     r  c                    s$   g | ]} t d t d  qS )r   r?   )randomrandintrH   ry  )r  r(   r)   r[     s    c                 S      g | ]}d  |qS rx  r  r   r(   r(   r)   r[     r   c                 S   r  r(   r	   rV   r(   r(   r)   r[     r
  )r   r-  r-   r{  r
   r  r   r  r   rH   r  )r/   r   r  r  
seqobjectsr  bootstrapsitesmbootstrapseqsr(   )r  r  r)   	bootstrap  s*   

zNexus.bootstrapc                 C   s   |st d| jt| }|dk r| | j|  n|dkr%|| j| 7 }|| jv r1t| j|}n|}|| jv r<tdt	|| j|< |  j
d7  _
| j| | j| dS )z&Add a sequence (string) to the matrix.zNew sequence must have a namer   zVERROR. There is a discrepancy between taxlabels and matrix keys. Report this as a bug.r?   N)r#   r   rH   
insert_gapr   r   r   r   rB   r
   r   rM   r  )r/   rw   r   diffunique_namer(   r(   r)   add_sequence  s$   

zNexus.add_sequencer?   c                    s2  ddd}|dk s|j krtd| |dkrdS ttfddjD  }d	gtj g| |||< d
d t| D   fddtjD }t|_ j |7  _ j	
 D ]\}}|||||dj	|< q]jD ]}	j|	 
 D ]\}
}|||||dj|	 |
< qyqpj|g| d_jS )zAdd a gap into the matrix and adjust charsets and partitions.

        pos=0: first position
        pos=nchar: last position
        Fc                 S   s   |    d}t| D ]$\}}||kr|| | |< ||kr.|s,|dkr.| |d  |d kr.|}q
|dkr@tt||| | ||< | S )zAdjust character sets if gaps are inserted (PRIVATE).

            Takes care of new gaps within a coherent character set.
            r   r?   )r  r   r-   r   )r_   rZ   rz  
leftgreedyaddposr   rQ   r(   r(   r)   _adjust  s    z!Nexus.insert_gap.<locals>._adjustr   zIllegal gap position: %dNc                 3       | ]
}t  j| V  qd S rf   r   r   r   r2   r(   r)   r         z#Nexus.insert_gap.<locals>.<genexpr>r   c                 S   r!  rx  r  r   r(   r(   r)   r[     r   z$Nexus.insert_gap.<locals>.<listcomp>c                    s    g | ]\}}|t  | fqS r(   r	   )rW   r   re  )mappedr(   r)   r[     r   )r*  )insertF)r   r#   r-   r  r   rH   r   r  r   r   r   r   r  r   )r/   rE   r   r*  r,  r  listedr   rX   r=   spr(   )r0  r/   r)   r&    s&   


zNexus.insert_gapc                 C   s   |r|rt d| jsdS t| j}i }|rF|  |tj d}|D ]}||vrC||| kr:|d7 }||| ks0| j| ||| < q$|S |rt|  |tj d}|D ]}||| krh|d7 }||| ks^| j| ||| < qV|S | jS )z\Return adjusted indices of self.charlabels if characters are excluded or inserted (PRIVATE).z)Can't exclude and insert at the same timeNr   r?   )r#   r   r]   r  rM   sysmaxsize)r/   r  r1  r  r  excountrQ   icountr(   r(   r)   r    s:   
zNexus._adjust_charlabelsc                    s    fddt | jD S )z6Return all character indices that are not in charlist.c                    r   r(   r(   r~   charlistr(   r)   r[   +  r\   z Nexus.invert.<locals>.<listcomp>)r   r   )r/   r:  r(   r9  r)   invert)  s   zNexus.invertc                    sH   t j |r j tfddjD  } fddt|D S )zReturn gap-only sites.c                 3   r-  rf   r.  r   r2   r(   r)   r   2  r/  z Nexus.gaponly.<locals>.<genexpr>c                    s"   g | ]\}}t | r|qS r(   )r_   issubset)rW   r   r  )r`   r(   r)   r[   3  s   " z!Nexus.gaponly.<locals>.<listcomp>)r_   r`   re   r   r  r   r   )r/   include_missingr  r(   )r`   r/   r)   gaponly-  s
   
zNexus.gaponlyc           	      C   s   |s| j }| j | jg}|s|ddg | jD ]O}t| j| }t|}t||d\}}|dkr9|dkr9|| }n|d|d  ||| d   }|| ||d  }|t|kr_td| t	|| j|< qdS )z{Replace all terminal gaps with missing character.

        Mixtures like ???------??------- are properly resolved.
        r   N)r   r   Nr?   zJIllegal sequence manipulation in Nexus.terminal_gap_to_missing in taxon %s)
r   r`   r   r   r   r   rH   r   RuntimeErrorr
   )	r/   r   skip_nr   re  r   r   r   r   r(   r(   r)   terminal_gap_to_missing5  s*   

 zNexus.terminal_gap_to_missingrf   )r   )	NNNFr(   r(   Nr4   F)NNr(   r(   NFFNFTFT)r(   r(   FTF)Nr   )Nr(   r(   )r(   T)rq   r(   r(   )r?   Fr   r2  )NT)=r$   r%   r&   r'   r1   r!  r"  propertyoriginal_taxon_orderr  r0  r3  r2  r?  rA  rC  r]  r_  ra  rb  rc  rh  rs  rv  rw  r  r  r  r  r<  r  r  r  r  r  r  rn  r  r  r  rm  r  r  r  r  r  r  r  r  r	  r  r  r  r  r%  r)  r&  r  r;  r>  rB  r(   r(   r(   r)   r  k  s    
-


'h=~*

8?
C
 -

e


<




0
r  r?   )cnexuszImport of C module failed (z-). Falling back to slow Python implementationc                 C   s   t | }t|}|S rf   )r   r   )r5  r   r  r(   r(   r)   r.  X  s   r.  c                 C   s>   t | }|dks|dkrtd| t|td}|S )Nr   r   z
Unmatched    )rE  scanfiler#   r   r   chr)r5  decommentedr  r(   r(   r)   r.  _  s
   
__main__)run_doctestr2  )r   );r'   r   rn   r  r5  r  	functoolsr   Bior   r   r   Bio.Datar   Bio.Nexus.StandardDatar   Bio.Nexus.Treesr   Bio.Seqr
   
INTERLEAVEr  r1  rN   r}   r<   r   rn  r  r  r  r  r#   r*   rU   r   r   r   r   r   r   r   r   r   r   r   r   r   r
  r  r4   rE  ImportErrorexr   r.  r$   
Bio._utilsrK  r(   r(   r(   r)   <module>   s   o
O
)RH)	           p

