o
    Rŀg3%                     @   s^   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 G dd	 d	ejZdS )
a  Bio.Align support for GCG MSF format.

The file format was produced by the GCG PileUp and LocalPileUp tools, and later
tools such as T-COFFEE and MUSCLE support it as an optional output format.

You are expected to use this module via the Bio.Align functions.
    N)BiopythonParserWarning)	Alignment)
interfaces)Seq)	SeqRecordc                   @   s   e Zd ZdZdZdd ZdS )AlignmentIteratorzGCG MSF alignment iterator.MSFc              	   C   s  zt |}W n ty   | dkrtdd Y d S w g d}|  d |vr;td|  d d|f |D ]}|d}d|v rO|drO nq=td	| }|	d
}||d  dksk|d dvrqtd| z
t
||d  }W n ty   td||d   d w ||d  }|dvrtd| g }g }	g }
g }|D ]x}| }|dkr nq|dr | }z|	d}|	d}|	d}|	d}W n ty   td|d w ||d  }t
||d  }t||d  }||d  }||v rtd||| |	| |
| || qtdzt |}W n ty8   tdd w | rBtdd gt| }|D ]r}| }|sVqK|d }z|	|}W n! ty   |D ]
}| st nqkY qKtd!| d"d w d |dd  }|	| t||d#  }|dk rtd$| ||  |7  < ||	|< td%d& |	D r nqKtd'|D ]}| d ksJ qtd(d& |D }||krtjd)||f tdd* |}t|D ]'\}}d |d+d#d,d#}t||k r|d#|t|  7 }|||< qd-d. |D }t|\}}d/d. t|||D }t||}|j}||krGtd)||f |S )0Nr   zEmpty file.)z!!NA_MULTIPLE_ALIGNMENTz!!AA_MULTIPLE_ALIGNMENTPileUpz$%s is not a known GCG MSF header: %sz, 
zMSF: z..z6Reached end of file without MSF/Type/Check header linezMSF:   zType:)Check:z
CompCheck:zsGCG MSF header line should be '<optional text> MSF: <int> Type: <letter> <optional date> Check: <int> ..',  not: %r   zCGCG MSF header line should have MSF: <int> for column count, not %r   )PNz]GCG MSF header line should have 'Type: P' (protein) or 'Type: N' (nucleotide), not 'Type: %s'z//zName: zName:zLen:zWeight:r   zMalformed GCG MSF name line: zDuplicated ID of z4End of file while looking for end of header // line.z.End of file after // line, expected sequences.z4After // line, expected blank line before sequences. zUnexpected line 'z
' in input-z-Received longer sequence than expected for %sc                 s   s    | ]}|d kV  qdS )r   N ).0lengthr   r   A/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/msf.py	<genexpr>       z9AlignmentIterator._read_next_alignment.<locals>.<genexpr>z*End of file where expecting sequence data.c                 s   s    | ]}t |V  qd S )N)lenr   seqr   r   r   r      r   z6GCG MSF headers said alignment length %i, but found %i)
stacklevel~.c                 S   s   g | ]}|  qS r   )encoder   r   r   r   
<listcomp>   s    z:AlignmentIterator._read_next_alignment.<locals>.<listcomp>c              	   S   s,   g | ]\}}}t t||||d |idqS )weight)idnamedescriptionannotations)r   r   )r   r$   r   r"   r   r   r   r!      s    )nextStopIterationtell
ValueErrorstripsplitjoinrstripendswithindexint
startswithfloatappendr   isdigitcountallmaxwarningswarnr   	enumeratereplacer   parse_printed_alignmentzipr   )selfstreamlineknown_headerspartsoffset
aln_lengthseq_typenames	remainingchecksweightswords
index_name	index_lenindex_weightindex_checkr$   r   r"   checkseqsr0   wordr   coordinatesrecords	alignmentcolumnsr   r   r   _read_next_alignment   s  

'












	




z&AlignmentIterator._read_next_alignmentN)__name__
__module____qualname____doc__fmtrW   r   r   r   r   r      s    r   )r[   r9   Bior   	Bio.Alignr   r   Bio.Seqr   Bio.SeqRecordr   r   r   r   r   r   <module>   s   