o
    Rŀg7                     @   sH   d Z ddlmZ ddlmZ ddlmZ ddlmZ G dd deZ	d	S )
ai  Bio.AlignIO support for GCG MSF format.

The file format was produced by the GCG PileUp and and LocalPileUp tools,
and later tools such as T-COFFEE and MUSCLE support it as an optional
output format.

The original GCG tool would write gaps at ends of each sequence which could
be missing data as tildes (``~``), whereas internal gaps were periods (``.``)
instead. This parser replaces both with minus signs (``-``) for consistency
with the rest of ``Bio.AlignIO``.

You are expected to use this module via the Bio.AlignIO functions (or the
Bio.SeqIO functions if you want to work directly with the gapped sequences).
    )MultipleSeqAlignment)Seq)	SeqRecord   )AlignmentIteratorc                   @   s   e Zd ZdZdZdd ZdS )MsfIteratorzGCG MSF alignment iterator.Nc              	      s  | j }| jdu r| }n| j}d| _|stg d}|  d |vr6td|  d d|f |rFd|vrF| }|rFd|vs<|sLtd|d }|d	}||d
  dksl|d dvsl|d dkrrtd| z
t	||d  }W n ty   d}Y nw |dk rtd||d   ||d  }|dvrtd| g }g }	g }
g }| }|rR| dkrR| }| 
drId|v rBd|v rBd|v rB||dd d  }|d\}}|d\}}|d\}}| }|dr	|dd }||v rtd|d|v r!td||| |	t	|  |
t	|  |t|  ntd ||rR| dks|sYtd!|t|	kr{t|	 t fd"d#|	D }td$||t| f | }|std%| rtd&d'd( |D }d}||k rt|D ]
\}}| }|dkr| s|r| s| }|r| r|std)|  }|dkr`|r`|d |kr`zt	|d }W n ty   d}Y nw ||d kr
td*|d |f t|dkrVt|d
krd}nzt	|d }W n ty/   d}Y nw ||d+ |k r<|d+ n|krVtd,|d |d+ |k rP|d+ n||f | }|  }|s|	| |k rztd-|| |	| krzqtd.| d/||d |krt|dksJ ||| |dd  qtd.|d/||d+7 }| }| rtd0|||k s	 | }|sn| sn|  d |v r|| _n	td2|qd3d( |D }d4}tt|	|D ]#\}\}}t||k rt||krd1}|d5|t|   ||< q|r4ddl}dd6lm} |d7| d8d# t|||D }t|}| |krTtd9|| f |S ):z)Parse the next alignment from the handle.N)z!!NA_MULTIPLE_ALIGNMENTz!!AA_MULTIPLE_ALIGNMENTPileUpr   z$%s is not a known GCG MSF header: %sz, z MSF: z6Reached end of file without MSF/Type/Check header line
zMSF:   zType:)zCheck:z
CompCheck:z..zsGCG MSF header line should be '<optional text> MSF: <int> Type: <letter> <optional date> Check: <int> ..',  not: %rr   zCGCG MSF header line should have MDF: <int> for column count, not %r   )PNz]GCG MSF header line should have 'Type: P' (protein) or 'Type: N' (nucleotide), not 'Type: %s'z//zName: z Len: z Check: z	 Weight:    z oozDuplicated ID of  zSpace in ID zMalformed GCG MSF name line: z4End of file while looking for end of header // line.c                 3   s    | ]	}| krd V  qdS )r   N .0_
max_lengthr   E/var/www/html/myenv/lib/python3.10/site-packages/Bio/AlignIO/MsfIO.py	<genexpr>   s    z'MsfIterator.__next__.<locals>.<genexpr>zLGCG MSF header said alignment length %i, but %s of %i sequences said Len: %sz.End of file after // line, expected sequences.z4After // line, expected blank line before sequences.c                 S   s   g | ]}g qS r   r   r   r   r   r   
<listcomp>   s    z(MsfIterator.__next__.<locals>.<listcomp>z*End of file where expecting sequence data.z5Expected GCG MSF coordinate line starting %i, got: %r2   z2Expected GCG MSF coordinate line %i to %i, got: %r zExpected sequence for z, got: zExpected blank line, got: Tz)Unexpected line after GCG MSF alignment: c                 S   s&   g | ]}d  |ddddqS )r   ~-.)joinreplace)r   sr   r   r   r   .  s   & Fr   )BiopythonParserWarningzGOne of more alignment sequences were truncated and have been gap paddedc                 s   s0    | ]\}}}t t||||d |idV  qdS )weight)idnamedescriptionannotationsN)r   r   )r   ir"   wr   r   r   r   @  s
    
z5GCG MSF headers said alignment length %i, but have %i)handle_headerreadlineStopIterationstripsplit
ValueErrorr    indexint
startswithendswithNotImplementedErrorappendfloatmaxsumlen	enumerateextendzipwarningsBior#   warnr   get_alignment_length)selfr+   lineknown_headerspartsoffset
aln_lengthseq_typeidslengthschecksweightsrestr&   lengthcheckr$   	max_countseqscompleted_lengthidxwordsr)   paddedr"   r?   r#   recordsalignr   r   r   __next__#   sv  

'









P


zMsfIterator.__next__)__name__
__module____qualname____doc__r,   rY   r   r   r   r   r      s    r   N)
r]   	Bio.Alignr   Bio.Seqr   Bio.SeqRecordr   
Interfacesr   r   r   r   r   r   <module>   s   