o
    Rŀg@                     @   st   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ G dd dejZG d	d
 d
ejZdS )a  Bio.Align support for tabular output from BLAST or FASTA.

This module contains a parser for tabular output from BLAST run with the
'-outfmt 7' argument, as well as tabular output from William Pearson's
FASTA alignment tools using the '-m 8CB' or '-m 8CC' arguments.
    N)	Alignment)
interfaces)Seq)	SeqRecordc                   @   s0   e Zd ZdZe Ze Ze Ze Z	dS )Statez=Enumerate alignment states needed when parsing a BTOP string.N)
__name__
__module____qualname____doc__enumautoMATCH	QUERY_GAP
TARGET_GAPNONE r   r   E/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/tabular.pyr      s    r   c                   @   s<   e Zd ZdZdZdd Zdd Zdd Zd	d
 Zdd Z	dS )AlignmentIteratora<  Alignment iterator for tabular output from BLAST or FASTA.

    For reading (pairwise) alignments from tabular output generated by BLAST
    run with the '-outfmt 7' argument, as well as tabular output generated by
    William Pearson's FASTA alignment programs with the '-m 8CB' or '-m 8CC'
    output formats.
    Tabularc                 C   sP   zt |}W n ty   tdd w |dstd| }| || d S )NzEmpty file.# zMissing header.)nextStopIteration
ValueError
startswithrstrip_parse_header)selfstreamliner   r   r   _read_header-   s   

zAlignmentIterator._read_headerc              	   C   s  i }d}z|dd   d d\}}||vrtdW n1 tyK   |dd  |d< t|}|ds4J |dd    d d\|d< |d< d	| _Y nw |||d< |d< d
| _|D ]}| }|dsgJ z|dd   d\}}W n ty   d}	||	sJ t|dt	|	  }
Y  new |dkr|d dkr|
dd\}}|  \}}t|| _|dv sJ n|}d | _z| d d\| _| _W qZ ty   | | _d | _Y qZw |dkr||d< qZ|dkr| d| _qZ|dkr||d< qZ|| _d S )N)	BLASTNBLASTPBLASTXTBLASTNTBLASTX
DELTABLASTPSIBLASTRPSBLAST
RPSTBLASTN      zNot a BLAST programzCommand liner   ProgramVersionz# FASTA processed z# BLAST processed z: z hits foundQueryFASTAz - )ntaaDatabaseFieldsz, RID)splitr   r   r   r   _final_prefixstripendswithintlenrsplit_query_size	_query_id_query_description_fieldsmetadata)r   r   r   r?   blast_programsprogramversionprefixvaluesuffixhits
query_line
query_sizeunitr   r   r   r   7   sb   $
	




zAlignmentIterator._parse_headerc                  C   s  |D ]*}|  }|dr,|| jr%|dr%| `| `| `| `| ` d S | || q d }d }d }d }d }d }d }	d }
d }d }d }d }d }d }d }| j}|	d}t
|t
| jks^J i }i }i }t|| jD ]\}}|dkr|}| jd ur|| jksJ qj|dkr|}	qj|dkrt|||< qj|dkrt|}qj|dkrt|||< qj|d	krt|||< qj|d
krt|}
qj|dkrt|}qj|dkrt|}qj|dkrt|}qj|dkrt||d< qj|dkrt||d< qj|dkr| |}qj|dkr| |}qj|dkr||d< qj|dkr||d< qj|dkr'||d< |d u r&|}qj|dkr@|d u r6t|}qj|t|ks?J qj|dkrJ||d< qj|dkrT||d< qj|dkr^||d< qj|dkrh||d< qj|dkrr||d < qj|d!kr|||d"< qj|d#kr||d$< qj|d%kr||d&< qj|d'kr||d(< qj|d)kr||d*< qj|d+kr||d,< qj|d-kr||d.< qj|d/kr||d0< qj|d1krt||d2< qj|d3kr||d< |	d u r|}	qj|d4krt|}qj|d5kr|}qj|d6kr|}qj|d7krt|}qj|d8krt|}|||< qj|d9krt|||< qj|d:kr)t|||< qj|d;kr5t|||< qj|d<krAt|||< qj|d=krK|||< qj|d>krU||d?< qj|d@kr_||d?< qjtdA| | jdB }|d u rx|d urx||d< |
d ur|d ur|
|k r|
dC8 }
n|dC8 }|d ur|d ur||k r|dC8 }n|dC8 }|d u s|dDv r|
d ur|
|dE< |d ur||dF< n'|d ur|
|k r|dCd d f  |
7  < n|
|dCd d f  |dCd d f< |d u s|dGv r	|d ur||dE< |d ur||dF< n|d ur|dHd d f  |7  < |d u r.|d u r'd }n>td |dI}n7|dJdK}|dLkrMt
|||
 ksDJ t|
|i|dI}n|dMkr_|
|dE< ||dF< t|}ntdN| t||dO}| jd uru| j|_|r{||_| jdB dGv r||dP< |d u rd }nB|dJdK}t|}n7|d u r|d u rd }n*td |dI}n#|dJdK}|d ur|d urt
||| ksJ t||i|dI}t||	dO}|r||_||g}t||}||_|d ur||_|S )QNr   z queries	zquery idz
subject idz
% identityzalignment length
mismatchesz	gap openszq. startzq. endzs. startzs. endevaluez	bit scoreBTOPaln_codezquery gigiz
query acc.zacc.zquery acc.verzacc.verzquery lengthzsubject idsidsz
subject gizsubject gisgiszsubject acc.zsubject accs.zaccs.zsubject tax idsztax idszsubject sci namesz	sci nameszsubject com namesz	com nameszsubject blast nameszblast nameszsubject super kingdomszsuper kingdomszsubject titletitlezsubject titlestitleszsubject strandstrandz% subject coveragez
% coveragezsubject acc.verzsubject lengthz	query seqzsubject seqscore	identical	positivesgapsz% positivesz% hsp coveragezquery/sbjct frameszquery frameframezsbjct framezUnexpected field '%s'r+   r*   )r"   r$   startend)r#   r$   r   )length- r#   r$   zUnknown program %s)idr\   )r   r   r5   r7   r>   r<   r=   r;   r   r4   r9   zipfloatr8   
parse_btopparse_cigarr   r?   r   replace	Exceptionr   descriptionannotationsr   rU   ) r   r   r   alignment_lengthrV   btopcigarrU   query_id	target_idquery_start	query_endtarget_start
target_endquery_sequencetarget_sequencetarget_lengthcoordinatesrH   columnsrg   query_annotationstarget_annotationscolumnfieldrA   	query_seqquery
target_seqtargetrecords	alignmentr   r   r   _read_next_alignments   s  











































































 













z&AlignmentIterator._read_next_alignmentc           	   	   C   sT  g }g }| d | d tj}td|}|D ]}|dr?|tjkr6| |d  | |d  tj}|d  d7  < q|drc|tjkrZ| |d  | |d  tj}|d  d7  < qzt	|}W n t
yt   d}Y nw |tjkr|d  |7  < |d  |7  < q| |d |  | |d |  tj}qt||g}|S )zParse a BTOP string and return alignment coordinates.

        A BTOP (Blast trace-back operations) string is used by BLAST to
        describe a sequence alignment.
        r   z([A-Z-*]{2}|\d+)r]   r*   )appendr   r   refindallr   r   r7   r   r8   r   r   nparray)	r   ri   target_coordinatesquery_coordinatesstatetokenstokenr\   rt   r   r   r   rb   O  s@   






zAlignmentIterator.parse_btopc                 C   s   g }g }d}d}| | | | tj}td|}t|ddd |ddd D ]0\}}	t|}|	dkr?||7 }||7 }n|	dkrH||7 }n|	dkrP||7 }| | | | q*t||g}
|
S )	a  Parse a CIGAR string and return alignment coordinates.

        A CIGAR string, as defined by the SAM Sequence Alignment/Map format,
        describes a sequence alignment as a series of lengths and operation
        (alignment/insertion/deletion) codes.
        r   z(M|D|I|\d+)Nr)   r*   MID)	r   r   r   r   r   r`   r8   r   r   )r   rj   r   r   target_coordinatequery_coordinater   r   r\   	operationrt   r   r   r   rc   {  s*   

&


zAlignmentIterator.parse_cigarN)
r   r   r	   r
   fmtr   r   r   rb   rc   r   r   r   r   r   "   s    
< ],r   )r
   r   r   numpyr   	Bio.Alignr   r   Bio.Seqr   Bio.SeqRecordr   Enumr   r   r   r   r   r   <module>   s   	