o
    RŀgB                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 d	Zd
ZG dd de
ZG dd de	ZG dd deZG dd deZG dd de
ZG dd deZdddZdS )a  AlignIO support for "phylip" format from Joe Felsenstein's PHYLIP tools.

You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).

Support for "relaxed phylip" format is also provided. Relaxed phylip differs
from standard phylip format in the following ways:

 - No whitespace is allowed in the sequence ID.
 - No truncation is performed. Instead, sequence IDs are padded to the longest
   ID length, rather than 10 characters. A space separates the sequence
   identifier from the sequence.

Relaxed phylip is supported by RAxML and PHYML.

Note
====

In TREE_PUZZLE (Schmidt et al. 2003) and PHYML (Guindon and Gascuel 2003)
a dot/period (".") in a sequence is interpreted as meaning the same
character as in the first sequence.  The PHYLIP documentation from 3.3 to 3.69
http://evolution.genetics.washington.edu/phylip/doc/sequence.html says:

"a period was also previously allowed but it is no longer allowed,
because it sometimes is used in different senses in other programs"

Biopython 1.58 or later treats dots/periods in the sequence as invalid, both
for reading and writing. Older versions did nothing special with a dot/period.
    N)MultipleSeqAlignment)Seq)	SeqRecord   )AlignmentIterator)SequentialAlignmentWriter
   z/PHYLIP format no longer allows dots in sequencec                   @      e Zd ZdZefddZdS )PhylipWriterzPhylip alignment writer.c                 C   s  | j }t|dkrtd| }|D ]}|t|jkr tdq|dkr)tdg }g }|D ]-}	 t|j|}||v rEtd||jf || t|j}	d|	v rWtt	||	 q/|
dt||f  d}
	 t||D ]H\}}	|
dkr|
|d	| | n|
d
|  tdD ]"}|
d |d  }|	||d  }|
d
|  |d |kr nq|
d qp|
d7 }
|
d |krd	S |
d qk)a&  Use this to write (another) single alignment to an open file.

        This code will write interlaced alignments (when the sequences are
        longer than 50 characters).

        Note that record identifiers are strictly truncated to id_width,
        defaulting to the value required to comply with the PHYLIP standard.

        For more information on the file format, please see:
        http://evolution.genetics.washington.edu/phylip/doc/sequence.html
        http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
        r   Must have at least one sequence%Sequences must all be the same length Non-empty sequences are required<Repeated name %r (originally %r), possibly due to truncation. %i %s
TN    2   r   
r   )handlelen
ValueErrorget_alignment_lengthseqsanitize_nameidappendstr_NO_DOTSwritezipljustrange)self	alignmentid_widthr   length_of_seqsrecordnamesseqsnamesequenceblockchunkiseq_segment r0   H/var/www/html/myenv/lib/python3.10/site-packages/Bio/AlignIO/PhylipIO.pywrite_alignment6   s\   


zPhylipWriter.write_alignmentN__name__
__module____qualname____doc___PHYLIP_ID_WIDTHr2   r0   r0   r0   r1   r
   3       r
   c                   @   s0   e Zd ZdZeZdZdd Zdd Zdd Z	dS )	PhylipIteratora  Reads a Phylip alignment file returning a MultipleSeqAlignment iterator.

    Record identifiers are limited to at most 10 characters.

    It only copes with interlaced phylip files!  Sequential files won't work
    where the sequences are split over multiple lines.

    For more information on the file format, please see:
    http://evolution.genetics.washington.edu/phylip/doc/sequence.html
    http://evolution.genetics.washington.edu/phylip/doc/main.html#inputfiles
    Nc                 C   s^   |  }dd | D }t|dkrdS zt|d }t|d }W dS  ty.   Y dS w )Nc                 S      g | ]}|r|qS r0   r0   .0xr0   r0   r1   
<listcomp>       z-PhylipIterator._is_header.<locals>.<listcomp>   Fr   r   T)stripsplitr   intr   )r#   linepartsnumber_of_seqsr&   r0   r0   r1   
_is_header   s   zPhylipIterator._is_headerc                 C   s4   |d| j   }|| j d  dd}||fS )zExtract the sequence ID from a Phylip line (PRIVATE).

        Returning a tuple containing: (sequence_id, sequence_residues)

        The first 10 characters in the line are are the sequence id, the
        remainder are sequence data.
        Nr    )r%   rB   replace)r#   rE   seq_idr   r0   r0   r1   	_split_id   s   zPhylipIterator._split_idc                 C   s  | j }| jdu r| }n| j}d| _|st| }dd | D }t|dkr.tdzt|d }t|d }W n tyH   tddw | 	|sPJ | j
durc| j
|krctd|| j
f g }g }t|D ]"}|  }| |\}	}
||	 d	|
v rtt||
g qkd
}	 d
| kr| }|snd
| ks|sn;| 	|r|| _n2t|D ])}| dd
}
d	|
v rtt|| |
 | }|s|d |k rtdq|snqdd t||D }t|S ))Parse the next alignment from the handle.Nc                 S   r;   r0   r0   r<   r0   r0   r1   r?      r@   z+PhylipIterator.__next__.<locals>.<listcomp>rA   #First line should have two integersr   r   5Found %i records in this alignment, told to expect %ir   rI   Tr   zEnd of file mid-blockc                 s   s.    | ]\}}t td ||||dV  qdS )rI   r   r*   descriptionN)r   r   joinr=   r.   sr0   r0   r1   	<genexpr>  s
    
z*PhylipIterator.__next__.<locals>.<genexpr>)r   _headerreadlineStopIterationrB   rC   r   r   rD   rH   records_per_alignmentr"   rstriprL   r   r   rJ   r    r   r#   r   rE   rF   rG   r&   idsr)   r.   sequence_idrT   recordsr0   r0   r1   __next__   s|   






zPhylipIterator.__next__)
r4   r5   r6   r7   r8   r%   rV   rH   rL   r_   r0   r0   r0   r1   r:      s    r:   c                       s    e Zd ZdZ fddZ  ZS )RelaxedPhylipWriterzRelaxed Phylip format writer.c                    st   dd |D D ] t  fddtjD rtd  qt|dkr&d}ntdd |D d }t || dS )	z!Write a relaxed phylip alignment.c                 s   s    | ]}|j  V  qd S N)r   rB   r=   rT   r0   r0   r1   rU     s    z6RelaxedPhylipWriter.write_alignment.<locals>.<genexpr>c                 3   s    | ]}| v V  qd S ra   r0   )r=   cr*   r0   r1   rU     s    z&Whitespace not allowed in identifier: r   r   c                 s   s    | ]
}t |j V  qd S ra   )r   r   rB   rb   r0   r0   r1   rU     s    N)anystring
whitespacer   r   maxsuperr2   )r#   r$   r%   	__class__rd   r1   r2     s   z#RelaxedPhylipWriter.write_alignment)r4   r5   r6   r7   r2   __classcell__r0   r0   rj   r1   r`     s    r`   c                   @   s   e Zd ZdZdd ZdS )RelaxedPhylipIteratorzRelaxed Phylip format Iterator.c                 C   s(   | dd\}}| dd}||fS )zExtract the sequence ID from a Phylip line (PRIVATE).

        Returns a tuple containing: (sequence_id, sequence_residues)

        For relaxed format split at the first whitespace character.
        Nr   r   rI   )rC   rB   rJ   )r#   rE   rK   r+   r0   r0   r1   rL   &  s   zRelaxedPhylipIterator._split_idN)r4   r5   r6   r7   rL   r0   r0   r0   r1   rm   #  s    rm   c                   @   r	   )SequentialPhylipWriterz Sequential Phylip format Writer.c           	      C   s  | j }t|dkrtd| }|D ]}|t|jkr tdq|dkr)tdg }|D ]}t|j|}||v rBtd||jf || q-|dt||f  t	||D ]'\}}t
|j}d|v ritt||d| | || |d	 qXdS )
z'Write a Phylip alignment to the handle.r   r   r   r   r   r   r   Nr   )r   r   r   r   r   r   r   r   r   r    r   r   r!   )	r#   r$   r%   r   r&   r'   r(   r*   r+   r0   r0   r1   r2   5  s:   

z&SequentialPhylipWriter.write_alignmentNr3   r0   r0   r0   r1   rn   2  r9   rn   c                   @   s   e Zd ZdZdZdd ZdS )SequentialPhylipIteratora  Sequential Phylip format Iterator.

    The sequential format carries the same restrictions as the normal
    interleaved one, with the difference being that the sequences are listed
    sequentially, each sequence written in its entirety before the start of
    the next. According to the PHYLIP documentation for input file
    formatting, newlines and spaces may optionally be entered at any point
    in the sequences.
    Nc                 C   s  | j }| jdu r| }n| j}d| _|st| }dd | D }t|dkr.tdzt|d }t|d }W n tyH   tddw | 	|sPJ | j
durc| j
|krctd|| j
f g }g }t|D ]X}|  }| |\}	}
||	 t|
|k r|  }|sn(|d	krqd	|
| d
d	g}
t|
|krtdt|
|f t|
|k sd|
v rtt||
 qk	 | }|sn
| 	|r|| _nqdd t||D }t|S )rM   Nc                 S   r;   r0   r0   r<   r0   r0   r1   r?   }  r@   z5SequentialPhylipIterator.__next__.<locals>.<listcomp>rA   rN   r   r   rO   rI   r   z)Found a record of length %i, should be %ir   Tc                 s   s(    | ]\}}t t||||d V  qdS )rP   N)r   r   rS   r0   r0   r1   rU     s    
z4SequentialPhylipIterator.__next__.<locals>.<genexpr>)r   rV   rW   rX   rB   rC   r   r   rD   rH   rY   r"   rZ   rL   r   rR   rJ   r   r    r   r[   r0   r0   r1   r_   n  sx   







	z!SequentialPhylipIterator.__next__)r4   r5   r6   r7   rV   r_   r0   r0   r0   r1   ro   a  s    
ro   c                 C   sL   |   } dD ]}| |d} qdD ]}| |d} q|dur$| d| } | S )zSanitise sequence identifier for output.

    Removes the banned characters "[]()" and replaces the characters ":;"
    with "|". The name is truncated to "width" characters if specified.
    z[](),rI   z:;|N)rB   rJ   )r*   widthcharr0   r0   r1   r     s   r   ra   )r7   rf   	Bio.Alignr   Bio.Seqr   Bio.SeqRecordr   
Interfacesr   r   r8   r   r
   r:   r`   rm   rn   ro   r   r0   r0   r0   r1   <module>   s    ]~/X