o
    Rŀg                      @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ dZe	d	ej
Ze	d
ej
Zdd ZG dd deZG dd deZedkrQddlmZ e  dS dS )z7Bio.SearchIO parser for Exonerate vulgar output format.    N)Type   )_BaseExonerateIndexer)_BaseExonerateParser)_STRAND_MAP)ExonerateVulgarParserExonerateVulgarIndexerz^vulgar:\s+
        (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # query: ID, start, end, strand
        (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # hit: ID, start, end, strand
        (\d+)(\s+.*)$                         # score, vulgar components
        a+  
        \s+(\S+) # vulgar label (C/M: codon/match, G: gap, N: ner, 5/3: splice
                 #               site, I: intron, S: split codon, F: frameshift)
        \s+(\d+) # how many residues to advance in query sequence
        \s+(\d+) # how many residues to advance in hit sequence
        c                 C   s^  | d g}g }| d g}g }g | d< g | d< g | d< g | d< | d }| d }| d dkr.d	nd
}| d dkr8d	nd
}	t t|}
t|
D ]\}}|d t|d	 t|d }}}|dv sdJ d| |dv r||
|d	  d dvr||| || |dkr||}}|||  }|||	  }t||t||}}t||t||}}| d ||f | d ||f ||| 7 }|||	 7 }|t|
d	 ks|dv r|
|d	  d dvr|| || qDdD ]2}| |d  }|dk r| |d  | |d  | |d < | |d < |dkr||}}q||}}qt	t
||| d< t	t
||| d< | S )z:Parse the vulgar components present in the hsp dictionary.query_start	hit_startquery_split_codonshit_split_codonsquery_ner_rangeshit_ner_rangesquery_strandr   r   
hit_strand   	MCGF53INSzUnexpected vulgar label: %rMCGSS)query_hit_strandendstartr   query_ranges
hit_ranges)refindall	_RE_VCOMP	enumerateintappendminmaxlenlistzip)hspvulgar_compqstartsqendshstartshendsqposhposqmovehmovevcompsidxmatchlabelqstephstepqstarthstartqendhendsqstartsqendshstartshendseq_typer    rA   ]/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/ExonerateIO/exonerate_vulgar.pyparse_vulgar_comp'   s`   

$









rC   c                   @   s   e Zd ZdZdZdd ZdS )r   z$Parser for Exonerate vulgar strings.vulgarc                 C   s  |d }|d }|d }|  dd  tt| j}| jr|d |dks(J |d |d	ks3J |d
 |dks>J |d |dksIJ |d |dksTJ |d |dks_J |d |dksjJ |d |dksuJ |d |dksJ n?|d|d< |d	|d< |d|d
< |d|d< |d|d< |d|d< |d|d< |d|d< |d|d< t|d  |d< t|d  |d< t|d |d< t|d
 |d
< t|d |d< t|d |d< t|d |d< |d	 |d< t
||d }|||dS )zJParse alignment block for vulgar format, return query results, hits, hsps.qresulthitr(   c                 S   s
   |  dS )NrD   )
startswith)linerA   rA   rB   <lambda>   s   
 z=ExonerateVulgarParser.parse_alignment_block.<locals>.<lambda>idr   r	   r   	query_end   r         r
      hit_end   r      score	   
   r)   )rE   rF   r(   )
read_untilr   search
_RE_VULGARrH   has_c4_alignmentgroupr   r!   rstriprC   )selfheaderrE   rF   r(   vulgarsrA   rA   rB   parse_alignment_block   sD   z+ExonerateVulgarParser.parse_alignment_blockN)__name__
__module____qualname____doc__	_ALN_MARKr_   rA   rA   rA   rB   r   |   s    r   c                   @   s6   e Zd ZU dZeZee ed< dZ	dd Z
dd ZdS )	r   z)Indexer class for exonerate vulgar lines._parsers   vulgarc                 C   sF   | j }|| | }|| jsJ |tt| }|	dS )z/Return the query ID of the nearest vulgar line.r   )
_handleseekreadlinerG   _query_markr   rW   rX   decoderZ   )r\   poshandlerH   rJ   rA   rA   rB   get_qresult_id   s   

z%ExonerateVulgarIndexer.get_qresult_idc                 C   s~   | j }|| d}d}	 | }|s	 |S || jr:| t| }|du r.| |}n| |}||kr:	 |S ||7 }q)zJReturn the raw bytes string of a QueryResult object from the given offset.N    )rf   rg   rh   rG   ri   tellr%   rm   )r\   offsetrl   qresult_keyqresult_rawrH   cur_poscurr_keyrA   rA   rB   get_raw   s&   

zExonerateVulgarIndexer.get_rawN)r`   ra   rb   rc   r   re   r   r   __annotations__ri   rm   ru   rA   rA   rA   rB   r      s   
 
r   __main__)run_doctest)rc   r   typingr   _baser   r   r   __all__compileVERBOSErX   r   rC   r   r   r`   
Bio._utilsrx   rA   rA   rA   rB   <module>   s,   	U5)
