o
    Rŀg                     @   sX  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ dZdefdefdefdefdefdefdefdZdefdefdZdefdefdefdefdefdefdefd Zd!efd"efd#efd$efd%efd&efd'efd(efd)efd*efd+efd,Zd-efd.efd/efd0efd1efd2efd3efd4efd5efd6efd7efd8efd9efd:efd;Zd<efd=efd>efd?Zd@dAdBdCdDdEdFZdGZedHZ edIZ!edJZ"dKdL Z#G dMdN dNZ$G dOdP dPeZ%G dQdR dRe	Z&G dSdT dTZ'e(dUkr*ddVl)m*Z* e*  dS dS )Wz2Bio.SearchIO parser for BLAST+ XML output formats.    N)chain)ElementTree)escape)XMLGenerator)BiopythonParserWarning)SearchIndexer)Hit)HSP)HSPFragment)QueryResult)BlastXmlParserBlastXmlIndexerBlastXmlWriterstat_db_numstat_db_lenstat_eff_spacestat_hsp_len
stat_kappastat_lambdastat_entropy)zStatistics_db-numzStatistics_db-lenzStatistics_eff-spacezStatistics_hsp-lenStatistics_kappaStatistics_lambdaStatistics_entropy	accessionseq_len)Hit_accessionHit_lenbitscorebitscore_rawevalue	ident_numpos_numgap_numdensity)Hsp_bit-score	Hsp_score
Hsp_evalueHsp_identityHsp_positiveHsp_gapsHsp_densityquery_start	query_end	hit_starthit_endquery_frame	hit_framealn_spanpattern_startpattern_endhitquery)zHsp_query-fromzHsp_query-tozHsp_hit-fromz
Hsp_hit-toHsp_query-frameHsp_hit-frameHsp_align-lenHsp_pattern-fromHsp_pattern-toHsp_hseqHsp_qseqtargetprogramversion	referenceparam_evalue_thresholdparam_entrez_queryparam_filterparam_gap_extendparam_gap_openparam_includeparam_matrixparam_patternparam_score_matchparam_score_mismatch)BlastOutput_dbBlastOutput_programBlastOutput_versionBlastOutput_referenceParameters_expectParameters_entrez-queryParameters_filterzParameters_gap-extendzParameters_gap-openParameters_includeParameters_matrixParameters_patternParameters_sc-matchParameters_sc-mismatchiddescriptionlen)BlastOutput_query-IDBlastOutput_query-defzBlastOutput_query-len))r>   r>   )r?   r?   )r@   r@   )dbr=   zquery-IDrW   z	query-defrX   z	query-lenr   )paramN)	)matrixrG   )expectrA   )zsc-matchrI   )zsc-mismatchrJ   )zgap-openrE   )z
gap-extendrD   )filterrC   )patternrH   )zentrez-queryrB   )r]   r^   r_   ))zdb-numr   )zdb-lenr   )zhsp-lenr   )z	eff-spacer   )kappar   )lambdar   )entropyr   ))rW   rW   )defrX   )r   r   )rY   r   ))z	bit-scorer   )scorer   )r   r   )z
query-fromr+   )zquery-tor,   )zhit-fromr-   )zhit-tor.   )zpattern-fromr2   )z
pattern-tor3   )zquery-framer/   )z	hit-framer0   )identityr    )positiver!   )gapsr"   )z	align-lenr1   )r#   r#   )qseqr5   )hseqr4   )midlineN)preambler`   qresultstatr4   hsp)zBlastOutput_query-seqBlastOutput_mbstatIteration_query-defIteration_query-lenzIteration-hitsIteration_statIteration_messagerS   rR   rU   rV   rQ   rT   rP   Hit_hspsr9   r:   r6   r7   r'   r(   r)   r8   r*   Hsp_midlinez\d+\.\d+\.\d+\+?z +>z +c                 C   s   g }g }| }|  dr|}n| d | }dd tt|D }|D ]}t|dkr.|d ||d  ||d  q!|||fS )	aI  Extract IDs, descriptions, and raw ID from raw values (PRIVATE).

    Given values of the ``Hit_id`` and ``Hit_def`` elements, this function
    returns a tuple of three elements: all IDs, all descriptions, and the
    BLAST-generated ID. The BLAST-generated ID is set to ``None`` if no
    BLAST-generated IDs are present.

    zgnl|BL_ORD_ID| c                 S   s   g | ]
}t jt|d dqS )   )maxsplit)resplit_RE_ID_DESC_PATTERN).0x r   R/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/BlastIO/blast_xml.py
<listcomp>   s    z*_extract_ids_and_descs.<locals>.<listcomp>    r   r|   )
startswithr~   r   _RE_ID_DESC_PAIRS_PATTERNrY   append)raw_idraw_descidsdescsblast_gen_idid_desc_lineid_desc_pairspairr   r   r   _extract_ids_and_descs   s   	



r   c                   @   sB   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dS )r   z Parser for the BLAST XML format.Fc                 C   s4   t tj|dd| _|| _|| _|  \| _| _dS )Initialize the class.)startend)eventsN)	iterr   	iterparsexml_iter_use_raw_query_ids_use_raw_hit_ids_parse_preamble_meta	_fallbackselfhandleuse_raw_query_idsuse_raw_hit_idsr   r   r   __init__   s   zBlastXmlParser.__init__c                 c   s    |   E dH  dS )z8Iterate over BlastXmlParser object yields query results.N)_parse_qresultr   r   r   r   __iter__   s   zBlastXmlParser.__iter__c                 C   s   i }i }| j D ][\}}|dkr1|jtv r1t|j \}}|tur'||j||< n|j||< |  q|dkrW|jtv rWt|j \}}|turM||j||< n|j||< |  q|dkrb|jdkrb nq|ddurwt	t
|d d|d< ||fS )z=Parse all tag data prior to the first query result (PRIVATE).r   r   	Iterationr?   Nr   )r   tag
_ELEM_METAstrtextclear_ELEM_QRESULT_FALLBACKgetr~   search_RE_VERSIONgroup)r   metafallbackeventelem	attr_namecasterr   r   r   r      s.   

zBlastXmlParser._parse_preamblec              	   c   s    | j D ]\}}|dkr|jdkr|d}|du r"| jd }|d}|du r0| jd }|d}|du r>| jd	 }|}| jsd|d
rd|dd}|d }z|d }W n tyc   d}Y nw g g }}	| |	d|D ]9}
|
r|
j
|	v rtd|
j
|
j|f t |
j
 d|
j |
_|
j|
_
|
D ]}|
j|_qn|	|
j
 ||
 qrt||}||_t||_||_| j D ]
\}}t||| q|	d}|dur|	d}t D ]&\}}||}|dur|d }|dur|tur||}t||d | q|  |V  qdS )zParse query results (PRIVATE).r   r   Iteration_query-IDNrW   ru   rX   rv   rY   )Query_zlcl|r{   r|   r   r   Iteration_hitszRenaming hit ID %r to a BLAST-generated ID %r since the ID was already matched by your query %r. Your BLAST database may contain duplicate entries.rw   
Statistics)r   r   findtextr   r   r   r   
IndexError
_parse_hitfindrW   warningswarnblast_idr   rX   hit_idr   r   intr   r   itemssetattr_ELEM_QRESULT_OPTr   r   )r   r   qresult_elemquery_id
query_desc	query_lenblast_query_idid_deschit_listkey_listr4   rs   rq   keyvaluestat_iter_elem	stat_elemval_infor   r   r   r   r   "  s   


















zBlastXmlParser._parse_qresultc                 c   s"   |du rg }|D ]}| d}| d}| js!t||\}}}n
|g|g|}}}|d |dd }	}
|d |dd }}t| |d||	}t|}||_|
|_||_	||_
t D ]%\}}| |}|dur|d }|dur~|tur~||}t||d | qa|  |V  q	dS )a1  Yield a generator object that transforms Iteration_hits XML elements into Hit objects (PRIVATE).

        :param root_hit_elem: root element of the Iteration_hits tag.
        :type root_hit_elem: XML element tag
        :param query_id: QueryResult ID of this Hit
        :type query_id: string

        NHit_idHit_defr   r|   ry   )r   r   r   list
_parse_hspr   r   rX   _id_alt_description_altr   	_ELEM_HITr   r   r   r   )r   root_hit_elemr   hit_elem
raw_hit_idraw_hit_descr   r   blast_hit_idr   alt_hit_idshit_descalt_hit_descshspsr4   r   r   r   r   r   r   r   r     s<   


zBlastXmlParser._parse_hitc              
   c   s   |du rg }|D ]}i }t ||}t D ]/\}}||}	|d }
|	durE|dr5|
|	||d < q|
tur=|
|	}	t||d |	 q|d|jd< dD ]2}|d }|d	 }z
|| }|| }W n	 tym   Y qPw t||t	||d  t||t
|| qP| jd
}|dkrd|_n|dv rd|_t|g}t D ]!\}}||}	|d }
|	dur|
tur|
|	}	t||d |	 q|  |V  q	dS )aK  Yield a generator object that transforms Hit_hsps XML elements into HSP objects (PRIVATE).

        :param root_hsp_frag_elem: the ``Hit_hsps`` tag
        :type root_hsp_frag_elem: XML element tag
        :param query_id: query ID
        :type query_id: string
        :param hit_id: hit ID
        :type hit_id: string

        Nr|   )z-fromz-tor   rz   
similarity)r5   r4   rd   _start_endr>   blastnDNA)blastpblastxtblastntblastxprotein)r
   
_ELEM_FRAGr   r   endswithr   r   aln_annotationKeyErrorminmaxr   r   molecule_typer	   	_ELEM_HSPr   )r   root_hsp_frag_elemr   r   hsp_frag_elemcoordsfragr   r   r   r   
coord_type
start_typeend_typer   r   progrs   r   r   r   r     s\   #




zBlastXmlParser._parse_hspN)FF)
__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   r      s    
,l<r   c                   @   s@   e Zd ZdZeZdZdZdZdd Z	dd Z
d	d
 Zdd ZdS )r   z#Indexer class for BLAST XML output.s   <Iteration>s   </Iteration>i @  c                 K   s6   t | | | j| jfi |}|j|j| _| _dS )r   N)r   r   _parser_handler   r   )r   filenamekwargsiter_objr   r   r   r   2  s   zBlastXmlIndexer.__init__c                 c   s   | j }| j}d}| j}| j}|d td}td}d}	 | }	| }
|
s-dS ||
vr2q |
	|dks=J d|

 |sHJ |
||
v rO|
}n0|
g}|
ro||
vro| }
||
vsdJ |
||
 |
ro||
vsX|
 |szJ |
d|}|	|dksJ d	| |	|dksJ d	| t||}z|d
}|d}W n ty   t||sJ | jd  }| jd  }Y nw ||r|ddd }| |	t|fV  |d7 }q!)zLIterate over BlastXmlIndexer yields qstart_id, start_offset, block's length.s   Query_r   sb   <Iteration_query-ID>(.*?)</Iteration_query-ID>\s+?<Iteration_query-def>(.*?)</Iteration_query-def>s   </Iteration_query-def>Tr|   zXML without line breaks?    zXML without line breaks? %rr   rX   rW       N)qstart_mark	qend_mark
block_sizer  seekr~   compiletellreadlinecountlstripr   r   rstripr   joinr   r   AttributeErrorr   encoder   decoderY   )r   r  r  blast_id_markr  r   re_descre_desc_endcounterstart_offsetlineblockregxqstart_desc	qstart_idr   r   r   r   9  s^   





zBlastXmlIndexer.__iter__c                 C   s0   | j |fi | j}| j|_| j|_tt|S )z~Overwrite SearchIndexer parse (PRIVATE).

        As we need to set the meta and fallback dictionaries to the parser.
        )r  _kwargsr   r   nextr   )r   r   	generatorr   r   r   _parser  s   zBlastXmlIndexer._parsec                 C   sv   | j }| j}|| | }| | jsJ ||vr'|| 7 }||vs| |s0J |	|dks9J |S )z6Return the raw record from the file as a bytes string.r|   )
r  r  r  r  r  r   r  r  r   r  )r   offsetr  r   qresult_rawr   r   r   get_raw|  s   
zBlastXmlIndexer.get_rawN)r  r  r  r  r   r  r  r  r  r   r   r'  r*  r   r   r   r   r   *  s    9
r   c                   @   sh   e Zd ZdZdddZdd ZdddZdd ZdddZdd Z	dd Z
dd ZdddZdd Zd	S )_BlastXmlGeneratorzEvent-based XML Generator.utf-8r{   r   c                 C   s*   t | || || _d| _|| _g | _dS )r   r   N)r   r   _indent_level
_increment_parent_stack)r   outencodingindent	incrementr   r   r   r     s
   
z_BlastXmlGenerator.__init__c                 C   s   |  d dS )zStart the XML document.z<?xml version="1.0"?>
<!DOCTYPE BlastOutput PUBLIC "-//NCBI//NCBI BlastOutput/EN" "http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd">
N)_writer   r   r   r   startDocument  s   z _BlastXmlGenerator.startDocumentNFc                 C   s0   |du ri }|  | j| j  t| || dS )a  Start an XML element.

        :param name: element name
        :type name: string
        :param attrs: element attributes
        :type attrs: dictionary {string: object}
        :param children: whether the element has children or not
        :type children: bool

        N)ignorableWhitespacer-  r.  r   startElement)r   nameattrschildrenr   r   r   r8    s   z_BlastXmlGenerator.startElementc                 C   s   t | | | d dS )z&End and XML element of the given name.
N)r   
endElementr5  r   r9  r   r   r   r=    s   z_BlastXmlGenerator.endElementc                 C   sF   |du ri }| j ||dd |  j| j7  _| d | j| dS )zStart an XML element which has children.

        :param name: element name
        :type name: string
        :param attrs: element attributes
        :type attrs: dictionary {string: object}

        NT)r;  r<  )r8  r.  r/  r5  r0  r   )r   r9  r:  r   r   r   startParent  s   	
z_BlastXmlGenerator.startParentc                 C   s:   | j  }|  j| j8  _| | j| j  | | dS )z!End an XML element with children.N)r0  popr.  r/  r7  r-  r=  r>  r   r   r   	endParent  s   
z_BlastXmlGenerator.endParentc                 G   s   |D ]}|  | qdS )z$Start XML elements without children.N)r?  )r   namesr9  r   r   r   startParents  s   z_BlastXmlGenerator.startParentsc                 C   s   t |D ]}|   qdS )z0End XML elements, according to the given number.N)rangerA  )r   numir   r   r   
endParents  s   
z_BlastXmlGenerator.endParentsc                 C   s*   | j |i d |r| | | | dS )z>Create an XML element without children with the given content.)r:  N)r8  
charactersr=  )r   r9  contentr   r   r   simpleElement  s   
z _BlastXmlGenerator.simpleElementc                 C   s4   t t|}dD ]
\}}|||}q| | dS )zReplace quotes and apostrophe.))"z&quot;)'z&apos;N)r   r   replacer5  )r   rI  abr   r   r   rH    s   z_BlastXmlGenerator.characters)r,  r{   r   )NFN)r  r  r  r  r   r6  r8  r=  r?  rA  rC  rG  rJ  rH  r   r   r   r   r+    s    



r+  c                   @   s\   e Zd ZdZdddZdd Zddd	Zd
d Zdd Zdd Z	dd Z
dd Zdd ZdS )r   zStream-based BLAST+ XML Writer.Tc                 C   s   t |d| _|| _|| _dS )r   r,  N)r+  xmlr   r   r   r   r   r   r     s   
zBlastXmlWriter.__init__c                 C   s   | j }d\| _| _| _| _t|}|  |d | | |d | 	t
|g| |d |  | j| j| j| jfS )z,Write the XML contents to the output handle.)r   r   r   r   BlastOutputBlastOutput_iterationsr   )rQ  qresult_counterhit_counterhsp_counterfrag_counterr%  r6  r?  _write_preamble_write_qresultsr   rG  endDocument)r   qresultsrQ  first_qresultr   r   r   
write_file  s   



zBlastXmlWriter.write_fileNc              
   C   s   |du ri }t | D ]9\}}|| }z	tt||}W n ty3   |tvr1td|d|dY q
w ||v r<|| }| j|| q
dS )a  Write sibling XML elements (PRIVATE).

        :param block_name: common element name prefix
        :type block_name: string
        :param map_name: name of mapping between element and attribute names
        :type map_name: string
        :param obj: object whose attribute value will be used
        :type obj: object
        :param opt_dict: custom element-attribute mapping
        :type opt_dict: dictionary {string: string}

        NElement  (attribute ) not found)_WRITE_MAPSr   getattrr  _DTD_OPT
ValueErrorrQ  rJ  )r   
block_namemap_nameobjopt_dictr   attrrI  r   r   r   _write_elem_block  s   z BlastXmlWriter._write_elem_blockc              
   C   s   | j }td D ]m\}}d| }|dkr"|| | | |  qz	tt||}W n tyC   |tvrAt	d| d| dY qw |dkrT|j
  d|j }n|jrn|d	kr_|j}n|d
krnd|j|jg }||| qdS )z&Write the XML file preamble (PRIVATE).rp   BlastOutput_BlastOutput_paramr^  r_  r`  rM   r{   rZ   r[   N)rQ  ra  r?  _write_paramrA  r   rb  r  rc  rd  r>   upperr?   r   r  rW   rX   striprJ  )r   rq   rQ  r   ri  rI  r   r   r   rX  /  s0   

zBlastXmlWriter._write_preamblec                 C   s*   | j }|d | dd| |  dS )z4Write the parameter block of the preamble (PRIVATE).
ParametersParameters_r`   N)rQ  r?  rj  rA  )r   rq   rQ  r   r   r   rm  I  s   
zBlastXmlWriter._write_paramc                 C   s   | j }t|D ]t\}}|d |dt|d  i }| jr+|j}|jd |j }n|j}|j}||d}| 	dd|| |rP|d | 
|j |  n|dd	 |d
d | 	dd| |d |sp|dd |  jd7  _|  qdS )z<Write QueryResult objects into iteration elements (PRIVATE).r   zIteration_iter-numr|   r{   )r   ru   
Iteration_rq   r   r   rw   r   Statistics_rr   r   rx   zNo hits foundN)rQ  	enumerater?  rJ  r   r   r   rW   rX   rj  _write_hitshitsrA  rC  rG  rT  )r   r[  rQ  rE  rq   rh  r   r   r   r   r   rY  P  s6   




zBlastXmlWriter._write_qresultsc              
   C   s   | j }t|D ]l\}}|d |dt|d  i }| jr2|j}ddd t|j	|j
D }n|j}|jddd t|j	dd |j
dd D  }||d	}| d
d|| |d | |j |  jd7  _|d qdS )zWrite Hit objects (PRIVATE).r   Hit_numr|   z >c                 S      g | ]\}}| d | qS r{   r   r   r   yr   r   r   r     s    z.BlastXmlWriter._write_hits.<locals>.<listcomp>c                 S   rx  ry  r   rz  r   r   r   r     s    N)r   r   Hit_r4   ry   r   )rQ  rt  r?  rJ  r   r   r   r  zipid_alldescription_allrW   rX   rj  _write_hspsr   rU  rG  )r   rv  rQ  rE  r4   rh  r   r   r   r   r   ru  v  s.   


zBlastXmlWriter._write_hitsc                 C   s   | j }t|D ]_\}}|d |dt|d  td D ]2\}}d| }z	| |||}W n tyG   |tvrEt	d| d| dY qw ||t| q|  j
d7  _
|  jt|j7  _|  qd	S )
zWrite HSP objects (PRIVATE).HspHsp_numr|   rs   Hsp_r^  r_  r`  N)rQ  rt  r?  rJ  r   ra  _adjust_outputr  rc  rd  rV  rW  rY   	fragmentsrA  )r   r   rQ  rE  rs   r   ri  rI  r   r   r   r    s$   

zBlastXmlWriter._write_hspsc                 C   s   |dv r>t ||d }d|v rt ||d }nt ||}|jdkr<|jdk r<|dkr1t |d}|S |dkr<t |dd }|S |dv rLtt ||j}|S |dkrW|jd	 }|S |d
v rfddt ||f }|S t ||}|S )zGAdjust output to mimic native BLAST+ XML as much as possible (PRIVATE).)r+   r,   r-   r.   r2   r3   r|   r   r   r-   r.   )r;   r<   rz   r   )r&   r$   z%.*g   )rb  r/   r0   r   seqr   )r   rs   r   ri  rI  r   r   r   r    s.   

	

zBlastXmlWriter._adjust_output)TTrP  )r  r  r  r  r   r]  rj  rX  rm  rY  ru  r  r  r   r   r   r   r     s    

& r   __main__)run_doctest)+r  r~   r   	itertoolsr   	xml.etreer   xml.sax.saxutilsr   r   Bior   Bio.SearchIO._indexr   Bio.SearchIO._modelr   r	   r
   r   __all__r   floatr   r   r   r   r   r   r   ra  rc  r  r   r   r   r   r   r   r+  r   r  
Bio._utilsr  r   r   r   r   <module>   s   
	A


#  Dc_ 
i
