o
    Rŀgg                     @   sd   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	 G dd dej
Z
G d	d
 d
ejZdS )a?  Bio.Align support for Exonerate output format.

This module provides support for Exonerate outputs. Exonerate is a generic
tool for pairwise sequence comparison that allows you to align sequences using
several different models.

Bio.Align.exonerate was tested on the following Exonerate versions and models:

    - version: 2.2
    - models:
      - affine:local                - cdna2genome
      - coding2coding               - est2genome
      - genome2genome               - ner
      - protein2dna                 - protein2genome
      - ungapped                    - ungapped:translated

Although model testing were not exhaustive, the parser should be able to cope
with all Exonerate models. Please file a bug report if you stumble upon an
unparsable file.

You are expected to use this module via the Bio.Align functions.
    N)	Alignment)
interfaces)Seq)	SeqRecordc                       sF   e Zd ZdZdZd fdd	Zdd Zdd	 Zd
d Zdd Z	  Z
S )AlignmentWriterzEAlignment file writer for the Exonerate cigar and vulgar file format.	Exoneratevulgarc                    s@   t  | |dkr| j| _dS |dkr| j| _dS td| )a  Create an AlignmentWriter object.

        Arguments:
         - target    - output stream or file name
         - fmt       - write alignments in the vulgar (Verbose Useful Labelled
                       Gapped Alignment Report) format (fmt="vulgar") or in
                       the cigar (Compact Idiosyncratic Gapped Alignment Report)
                       format (fmt="cigar").
                       Default value is 'vulgar'.

        r   cigarz8argument fmt should be 'vulgar' or 'cigar' (received %s)N)super__init___format_alignment_vulgarformat_alignment_format_alignment_cigar
ValueError)selftargetfmt	__class__ G/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/exonerate.pyr   +   s   zAlignmentWriter.__init__c                 C   sf   z|j }W n ty   d}d}Y nw |dd}|dd}|d| d |d| d dS )zWrite the header. Command lineHostnamezCommand line: [z]
zHostname: [N)metadataAttributeErrorgetwrite)r   stream
alignmentsr   commandlinehostnamer   r   r   write_headerA   s   
zAlignmentWriter.write_headerc                 C   s   | d dS )zWrite the footer.z -- completed exonerate analysis
N)r   )r   r   r   r   r   write_footerN   s   zAlignmentWriter.write_footerc              
   C   s  t |ts	td|j}|d }|d }|d }|d }t|}|j}|j}	z|j}
W n t	y7   d}
Y nw z|	j}W n t	yH   d}Y nw z|	j
d }W n t	tfy]   d	}Y nw |d
kred}n||krld}n||krd}|dd	d	f  |dd	d	f< z|j
d }W n t	tfy   d	}Y nw |d
krd}n||krd}n||krd}|dd	d	f  |dd	d	f< t|jd}d|
t|t|||t|t|||g
}z|j}W nf t	y>   | D ]V}|\}}||krd}|}n;|dkrd}|}n2|dkrd}|}n(|d
kr|d
krd}|}n|d
kr$|d
kr$d}|}n
td||||f || |t| qY nw t| | D ]t\}}|\}}|dkr||kr_|}nQ|d| krx|}|d
ksoJ |d
ksvJ n8|d| kr|}|d
ksJ |d
ksJ ntd|dkr|dkr|}d}n|dkr|}d}n ||ksJ |}d}n|dkr|dkr|}d}n|dkr|}d}ntd||f |dkr|dkr|}d}n|dkr|}d}n||ksJ |}d}n|dkr||ksJ |}d}n|dkr$|dksJ |}d}n|dkr3|dks0J |}n}|dkr`|dkrKd}|| |t| |dkr^d}|| |t| qH|dkr|dkrxd}|| |t| |dkrd}|| |t| qH|dkr|dkr|}d}n|dkr|}d}n
td td!| || |t| qHd"|d# }|S )$zBReturn a string with a single alignment formatted as a cigar line.Expected an Alignment objectr   r   r      r   r)   r'   queryr   molecule_typeNprotein.+-r   r)   gzcigar:MDIz<Unexpected step target %d, query %d for molecule type %s, %s   z6Unexpected steps target %d, query %s for operation 'M'5Nz0Unexpected intron with steps target %d, query %d3CUSFz*Expected target step or query step to be 0Unknown operation %s 
)
isinstancer   	TypeErrorcoordinatesnpdiffr+   r   idr   annotationsKeyErrorformatscorestr
operations	transposer   appendzipdecodejoin)r   	alignmentrB   target_start
target_endquery_start	query_endstepsr+   r   query_id	target_idtarget_molecule_typetarget_strandquery_molecule_typequery_strandrI   wordsrK   steptarget_step
query_step	operationliner   r   r   r   R   sn  










	%



























z'AlignmentWriter._format_alignment_cigarc                 C   s  t |ts	td|j}|d }|d }|d }|d }t|}|j}|j}	z|j}
W n t	y7   d}
Y nw z|	j}W n t	yH   d}Y nw z|	j
d }W n t	tfy]   d	}Y nw |d
kred}n||krld}n||krd}|dd	d	f  |dd	d	f< z|j
d }W n t	tfy   d	}Y nw |d
krd}n||krd}n||krd}|dd	d	f  |dd	d	f< t|jd}d|
t|t|||t|t||t|g
}z|j}W n] t	y7   | D ]M}|\}}||krd}n-|dkrd}n&|dkrd}n|d
kr|d
krd}n|d
kr|d
krd}ntd|| |t| |t| qY n?w | }| }t|}d}||k rv|| \}}|| }|dkr||kr`n|d| krv|d
ksnJ |d
ksuJ n|d| kr|d
ksJ |d
ksJ ntd||f |dkr|dks|dksJ n|dkrd}|dks|dksJ n|dkr|dks|dksJ n|dkr||ksJ n|dkr|dksJ d}nr|dkr|dksJ d}nc|dkrD|dkr|dksJ |d7 }|| \}}|dksJ |dkr6|dks%J |d7 }|| \}}|dks6J || }|dksAJ d}n|dkrL|}n|d krT|}ntd!| || |t| |t| |d7 }||k sKd"|d# }|S )$zEReturn a string with a single alignment formatted as one vulgar line.r$   r%   r&   r(   r*   r+   r   r,   Nr-   r.   r/   r0   r   r)   r1   zvulgar:r2   Gz#Both target and query step are zeror5   z6Unexpected steps target %d, query %d for operation 'M'r6      r7   r4   r8   r9   r3   r:   r;   r<   r=   r>   r?   )r@   r   rA   rB   rC   rD   r+   r   rE   r   rF   rG   rH   rI   rJ   rK   rL   r   rM   rO   lenrP   )r   rQ   rB   rR   rS   rT   rU   rV   r+   r   rW   rX   rY   rZ   r[   r\   rI   r]   rK   r^   r_   r`   ra   nidummyrb   r   r   r   r     s   


























8z(AlignmentWriter._format_alignment_vulgar)r   )__name__
__module____qualname____doc__r   r   r"   r#   r   r   __classcell__r   r   r   r   r   &   s     Ar   c                   @   s<   e Zd ZdZdZdd Zedd Zedd Zd	d
 Z	dS )AlignmentIteratora1  Alignment iterator for the Exonerate text, cigar, and vulgar formats.

    Each line in the file contains one pairwise alignment, which are loaded
    and returned incrementally.  Alignment score information such as the number
    of matches and mismatches are stored as attributes of each alignment.
    r   c                 C   s   i | _ d| j d< t|}d}||sJ |t|d   }|ds&J |ds-J |dd | j d< t|}d	}||sCJ |t|d   }|dsTJ |ds[J |dd | j d
< d S )N	exonerateProgramzCommand line: []r)   r'   r   z
Hostname: r   )r   next
startswithre   stripendswith)r   r   rb   prefixr    r!   r   r   r   _read_header  s    
zAlignmentIterator._read_headerc                 C   sT  | d }t | d }t | d }| d }| d }t | d }t | d }| d }t| d	 }	d}
d}t| d	 d }td|d ft }||d
< |
|d< tt| dd d | dd d D ]]\}\}}t |}|dkrt||7 }|
|7 }
n6|dkr|dkr|dkr|
|d 7 }
n#|
|7 }
n|dkr|dkr|dkr||d 7 }n||7 }ntd| ||d|d f< |
|d|d f< q]|dkr|dd d f  |7  < |}d }nE|dkr||dd d f  |dd d f< |}d }n,|dkr|dkr|dd d f d d |dd d f< |dd d f  |7  < d}|}|dkr+|dd d f  |7  < |}d }nE|dkrE||dd d f  |dd d f< |}d }n+|dkrp|dkr`|dd d f d  |dd d f< |dd d f  |7  < d}|}td |d}td |d}t	||dd}t	||dd}|d ur||j
d< |d ur||j
d< t||g|}|	|_|S )Nr   r)   rd   r5                  r%   r(   	   
   r2   r4   r.   r3   z$Unknown operation %s in cigar stringr/   r0   r-   lengthr   rE   descriptionr,   )intfloatre   rC   empty	enumeraterN   r   r   r   rF   r   rI   )r]   rW   rT   rU   r\   rX   rR   rS   rZ   rI   qstsrf   rB   rg   ra   r^   target_lengthrY   query_lengthr[   
target_seq	query_seqr   r+   rQ   r   r   r   _parse_cigar  s   .


 

$

 

"



zAlignmentIterator._parse_cigarc                 C   s:  | d }t | d }t | d }| d }| d }t | d }t | d }| d }t| d	 }	| d
d d }
d}d}t| d	 d |
d }td|d ft }||d< ||d< t|}d}t|
| dd d | dd d D ]\}}}t |}t |}|dkrn|dkr|dks|dksJ n|dkrd}n|dkr|dks|dksJ n|dkr|d dksJ |d dksJ ny|dkr|dkrd}nn|dkrd}ngtd||f |dkr!d}|dkr||7 }||d|d f< ||d|d f< t	|||< |d7 }|dkr ||7 }||d|d f< ||d|d f< t	|||< |d7 }qn|dkr'n|dkr-ntd| ||7 }||7 }||d|d f< ||d|d f< t	|||< |d7 }qn|dkrl|dd d f  |7  < |}d }n/|dkr||dd d f  |dd d f< |}d }n|dkr|dd d f  |7  < d}|}|dkr|dd d f  |7  < |}d }n/|dkr||dd d f  |dd d f< |}d }n|dkr|dd d f  |7  < d}|}t
d |d }t
d |d }t||d!d"}t||d!d"}|d ur||jd#< |d ur||jd#< t||g|}||_|	|_|S )$Nr   r)   rd   r5   ry   rz   r{   r|   r}   r~   r7   r%   r(   r      r2   r6   r4   r8   r9   rc   r3   z9Unexpected gap operation with steps %d, %d in vulgar liner:   r;   r<   z%Unknown operation %s in vulgar stringr/   r0   r.   r-   r   r   r   r,   )r   r   re   countrC   r   	bytearrayrN   r   ordr   r   rF   r   rK   rI   )r]   rW   rT   rU   r\   rX   rR   rS   rZ   rI   opsr   r   rf   rB   rK   rg   ra   r`   r_   r   rY   r   r[   r   r   r   r+   rQ   r   r   r   _parse_vulgar  s   *






 


 




zAlignmentIterator._parse_vulgarc              	   C   s   |D ]J}|  }|dkr!z	t| W td ty    Y  d S w |dr7|dd   }| |}|  S |drI|dd   }| |}|  S td)Nz-- completed exonerate analysiszIFound additional data after 'completed exonerate analysis'; corrupt file?zvulgar: r}   zcigar: r|   z>Failed to find 'completed exonerate analysis'; truncated file?)ru   rs   StopIterationr   rt   splitr   r   )r   r   rb   r]   rQ   r   r   r   _read_next_alignment|  s,   




z&AlignmentIterator._read_next_alignmentN)
ri   rj   rk   rl   r   rx   staticmethodr   r   r   r   r   r   r   rn     s    
M
mrn   )rl   numpyrC   	Bio.Alignr   r   Bio.Seqr   Bio.SeqRecordr   r   rn   r   r   r   r   <module>   s      