o
    Rŀgd                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ G dd dZG dd dZG dd dZG dd dZdS )zCode to parse BLAST XML output, and to parse the BLAST DTD file defining the XML.

The BLAST XML DTD file is available on the NCBI site at:
https://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd
    N)deque)expat)Entrez)	Alignment)Hit)HSP)Record)reverse_complement)Seq)
SeqFeature)SimpleLocation)	SeqRecordc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )
DTDHandlerz"Parser for the BLAST XML DTD file.c                 C   s@   t  }|t j | j|_|| _i | _i | _| j| jft	_
dS )z7Initialize the parser and parse the BLAST XML DTD file.N)r   ParserCreateSetParamEntityParsingXML_PARAM_ENTITY_PARSING_ALWAYS_externalEntityRefHandlerExternalEntityRefHandlerparserstart_methodsend_methods
XMLHandler_dtd_methodsselfr    r   E/var/www/html/myenv/lib/python3.10/site-packages/Bio/Blast/_parser.py__init__%   s   zDTDHandler.__init__c                 C   sf   |}|dvrdD ]}| |d}q|  dd}d| }d| }tt|| j|< tt|| j|< d S )N)zBlastOutput_query-IDzBlastOutput_query-defzBlastOutput_query-len)BlastOutput_
Iteration_Parameters_Statistics_Hit_Hsp_ -__start__end_)replacelowergetattrr   r   r   )r   namemodelmethod_nameprefixstart_method
end_methodr   r   r   _elementDeclHandler/   s   	zDTDHandler._elementDeclHandlerc                 C   s&   |d u sJ |d u sJ |  | dS N   )	parseFile)r   contextbasesystemIdpublicIdr   r   r   r   I   s   
z$DTDHandler._externalEntityRefHandlerc                 C   sh   t jd }tj|d|}| jd}| j|_t	|d}|
| W d   dS 1 s-w   Y  dS )zParse a DTD file.r   DTDsNrb)r   __path__ospathjoinr   ExternalEntityParserCreater2   ElementDeclHandleropen	ParseFile)r   filename	directoryr>   r   streamr   r   r   r5   O   s   
"zDTDHandler.parseFileN)__name__
__module____qualname____doc__r   r2   r   r5   r   r   r   r   r   "   s    
r   c                   @   s    e Zd ZdZdd Zdd ZdS )SchemaHandlerzXML Schema parser used to parse NCBI_BlastOutput2.xsd.

    The XML Schema for Blast XML2 is available from
    http://www.ncbi.nlm.nih.gov/data_specs/schema_alt/NCBI_BlastOutput2.xsd
    c                 C   s$   || _ i | _i | _| j| jft_dS )z!Initialize the XML Schema parser.N)r   r   r   r   _schema_methodsr   r   r   r   r   `   s   zSchemaHandler.__init__c                 C   st  d}|dkr>|d }t jd }tj|d|}tjdd}| j|_t	|d}|
| W d	   d	S 1 s7w   Y  d	S |d
kr|d}	|	d	u rMd	S | d|	 }
|	dkrftj| j|
< tj| j|
< d	S |	dv rld	S |	dkrsd}	n|	dkrzd}	n|	dkrd}	n|	dkrd}	n|	dkrd}	|	 dd}d| }d| }|	dkr|d7 }tt|| j|
< tt|| j|
< d	S d	S )Found XML start tag.

        Arguments:
         - name       -- name of the tag
         - attributes -- tag attributes

        http://www.ncbi.nlm.nih.govz(http://www.w3.org/2001/XMLSchema includeschemaLocationr   XSDs namespace_separatorr;   Nz(http://www.w3.org/2001/XMLSchema elementr,   BlastOutput2)errorErrcodemessagesubjectsbl2seqziter-numparamsparamsearch
iterationsSearch	Iterationzquery-titlez	query-deftitledefr%   r&   r'   r(   	eff-space_xml2)r   r<   r=   r>   r?   r   r   _startElementHandlerStartElementHandlerrB   rC   getr   _start_blastoutputr   _end_blastoutput_xml2r   r*   r)   r+   )r   r,   
attributes	namespacerD   rE   r>   r   rF   tagkeyr.   r0   r1   r   r   r   re   g   sL   
"
	z"SchemaHandler._startElementHandlerN)rG   rH   rI   rJ   r   re   r   r   r   r   rK   Y   s    rK   c                   @   s   e Zd ZdZdS )
_HSP_cache)num	bit_scorescoreevalueidentitypositive
query_fromquery_toquery_framequery_strandhit_fromhit_to	hit_frame
hit_strandqseqhseqgaps	align_lendensitymidlineN)rG   rH   rI   	__slots__r   r   r   r   rn      s    rn   c                   @   s  e Zd ZdZdZdZdZdd Zdd Zdd	 Z	d
d Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Z d8d9 Z!d:d; Z"d<d= Z#d>d? Z$d@dA Z%dBdC Z&dDdE Z'dFdG Z(dHdI Z)dJdK Z*dLdM Z+dNdO Z,dPdQ Z-dRdS Z.dTdU Z/dVdW Z0dXdY Z1dZd[ Z2d\d] Z3d^d_ Z4d`da Z5dbdc Z6ddde Z7dfdg Z8dhdi Z9djdk Z:dldm Z;dndo Z<dpdq Z=drds Z>dtdu Z?dvdw Z@dxdy ZAdzd{ ZBd|d} ZCd~d ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdd ZLdd ZMdd ZNdd ZOdd ZPdd ZQdd ZRdd ZSdd ZTdd ZUdd ZVdd ZWdd ZXdd ZYdd ZZdd Z[dd Z\dd Z]dd Z^dd Z_dd Z`dd Zadd Zbdd Zcdd Zddd ZeddÄ Zfddń ZgddǄ ZhddɄ Zidd˄ Zjdd̈́ Zkddτ Zlddф Zmddӄ ZnddՄ Zoddׄ Zpddل Zqddۄ Zrdd݄ Zsdd߄ Ztdd Zudd Zvdd Zwdd Zxdd Zydd Zzdd Z{dd Z|dd Z}dd Z~dd Zdd Zdd Zdd Zdd Zdd Zd d Zdd Zdd Zdd Zdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Zd<d= Zd>d? Zd@dA ZdBdC ZdDdE ZdFdG ZdHdI ZdJdK ZdLdM ZdNdO ZdPdQ ZdRdS ZdTdU ZdVdW ZdXdY ZdZd[ Zd\d] Zd^d_ Zd`da Zdbdc Zddde Zdfdg Zdhdi Zdjdk Zdldm Zdndo ZdS (p  r   zHandler for BLAST XML data.z)http://www.w3.org/2001/XMLSchema-instanceNc                 C   s   | j |_|tj || _dS )zInitialize the expat parser.N)_xmlDeclHandlerXmlDeclHandlerr   r   r   _parserr   r   r   r   r      s   
zXMLHandler.__init__c                 C   s    | j }|tjkr| j|_d S d S N)r   r   schema_namespace_start_blastxml2rf   )r   r/   urir   r   r   r   _startNamespaceDeclHandler   s   
z%XMLHandler._startNamespaceDeclHandlerc                 C      d S r   r   )r   r/   r   r   r   _endNamespaceDeclHandler      z#XMLHandler._endNamespaceDeclHandlerc                 C   s   dt j }|dksJ ||  \}}|dksJ t jdu r^tj|}tjd }tj	|d|}t
|d}	tjdd	}
t|
}|j|
_t
|d}	|
|	 W d   n1 sYw   Y  t j\| _| _| j}
| j|
_| j|
_| j|
_d
| _dS )z7Process the XML schema (before processing the element).z%s schemaLocationz%http://www.ncbi.nlm.nih.gov BlastXML2rN   Nr   rP   r;   rQ   rR   r$   )r   r   splitrL   r=   r>   basenamer   r<   r?   rB   r   r   rK   re   rf   rC   _start_methods_end_methodsr   _endElementHandlerEndElementHandler_characterDataHandlerCharacterDataHandler_characters)r   r,   rj   rm   domainurlrD   rE   r>   rF   r   handlerr   r   r   r      s*   




zXMLHandler._start_blastxml2c                 C      | j  dks	J d| _ d S Nr$   r   stripr   r,   rj   r   r   r   rh         
zXMLHandler._start_blastoutputc                 C   r   r   r   r   r   r   r   _start_program   r   zXMLHandler._start_programc                 C   r   r   r   r   r   r   r   _start_version   r   zXMLHandler._start_versionc                 C   r   r   r   r   r   r   r   _start_reference   r   zXMLHandler._start_referencec                 C   r   r   r   r   r   r   r   	_start_db   r   zXMLHandler._start_dbc                 C   r   r   r   r   r   r   r   _start_blastoutput_query_id   r   z&XMLHandler._start_blastoutput_query_idc                 C   r   r   r   r   r   r   r   _start_blastoutput_query_def   r   z'XMLHandler._start_blastoutput_query_defc                 C   r   r   r   r   r   r   r   _start_mbstat   r   zXMLHandler._start_mbstatc                 C   r   r   r   r   r   r   r   _start_param  r   zXMLHandler._start_paramc                 C   s$   | j  dks	J d| _ i | j_d S r   )r   r   _recordsr\   r   r   r   r   _start_parameters     zXMLHandler._start_parametersc                 C   r   r   r   r   r   r   r   _start_matrix  r   zXMLHandler._start_matrixc                 C   r   r   r   r   r   r   r   _start_expect  r   zXMLHandler._start_expectc                 C   r   r   r   r   r   r   r   _start_sc_match  r   zXMLHandler._start_sc_matchc                 C   r   r   r   r   r   r   r   _start_sc_mismatch  r   zXMLHandler._start_sc_mismatchc                 C   r   r   r   r   r   r   r   _start_include  r   zXMLHandler._start_includec                 C   r   r   r   r   r   r   r   _start_gap_open!  r   zXMLHandler._start_gap_openc                 C   r   r   r   r   r   r   r   _start_gap_extend%  r   zXMLHandler._start_gap_extendc                 C   r   r   r   r   r   r   r   _start_filter)  r   zXMLHandler._start_filterc                 C   r   r   r   r   r   r   r   
_start_cbs-  r   zXMLHandler._start_cbsc                 C   r   r   r   r   r   r   r   _start_db_gencode1  r   zXMLHandler._start_db_gencodec                 C   r   r   r   r   r   r   r   _start_query_gencode5  r   zXMLHandler._start_query_gencodec                 C   r   r   r   r   r   r   r   _start_bl2seq_mode9  r   zXMLHandler._start_bl2seq_modec                 C   r   r   r   r   r   r   r   _start_query_masking=  r   zXMLHandler._start_query_maskingc                 C   r   r   r   r   r   r   r   _start_rangeA  r   zXMLHandler._start_rangec                 C   r   r   r   r   r   r   r   _start_fromE  r   zXMLHandler._start_fromc                 C   r   r   r   r   r   r   r   	_start_toI  r   zXMLHandler._start_toc                 C   r   r   r   r   r   r   r   _start_patternM  r   zXMLHandler._start_patternc                 C   r   r   r   r   r   r   r   _start_entrez_queryQ  r   zXMLHandler._start_entrez_queryc                 C   s&   t  | j_| j dksJ d| _d S r   )r   r   _cacher   r   r   r   r   r   _start_iterationsU     

zXMLHandler._start_iterationsc                 C   r   r   r   r   r   r   r   _start_blastoutput_query_lenZ  r   z'XMLHandler._start_blastoutput_query_lenc                 C   r   r   r   r   r   r   r   _start_query_seq^  r   zXMLHandler._start_query_seqc                 C   s   t  }|| _d S r   )r   _record)r   r,   rj   recordr   r   r   _start_iterationb  s   
zXMLHandler._start_iterationc                 C   r   r   r   r   r   r   r   _start_iter_numf  r   zXMLHandler._start_iter_numc                 C   r   r   r   r   r   r   r   _start_query_idj  r   zXMLHandler._start_query_idc                 C   r   r   r   r   r   r   r   _start_query_defn  r   zXMLHandler._start_query_defc                 C   r   r   r   r   r   r   r   _start_query_lenr  r   zXMLHandler._start_query_lenc                 C   r   r   r   r   r   r   r   _start_hitsv  r   zXMLHandler._start_hitsc                 C   $   | j  dks	J d| _ t | _d S r   )r   r   r   _alignmentsr   r   r   r   
_start_hitz  r   zXMLHandler._start_hitc                 C   r   r   r   r   r   r   r   
_start_num  r   zXMLHandler._start_numc                 C   s   g | j _d S r   )r   targetsr   r   r   r   _start_description  s   zXMLHandler._start_descriptionc                 C   r   r   r   r   r   r   r   _start_hitdescr  r   zXMLHandler._start_hitdescrc                 C   r   r   r   r   r   r   r   	_start_id  r   zXMLHandler._start_idc                 C   r   r   r   r   r   r   r   
_start_def  r   zXMLHandler._start_defc                 C   r   r   r   r   r   r   r   _start_taxid  r   zXMLHandler._start_taxidc                 C   r   r   r   r   r   r   r   _start_sciname  r   zXMLHandler._start_scinamec                 C   r   r   r   r   r   r   r   _start_hsps  r   zXMLHandler._start_hspsc                 C   r   r   r   r   r   r   r   
_start_len  r   zXMLHandler._start_lenc                 C   r   r   r   r   r   r   r   _start_accession  r   zXMLHandler._start_accessionc                 C   r   r   )r   r   rn   _hspr   r   r   r   
_start_hsp  r   zXMLHandler._start_hspc                 C   r   r   r   r   r   r   r   _start_bit_score  r   zXMLHandler._start_bit_scorec                 C   r   r   r   r   r   r   r   _start_score  r   zXMLHandler._start_scorec                 C   r   r   r   r   r   r   r   _start_evalue  r   zXMLHandler._start_evaluec                 C   r   r   r   r   r   r   r   _start_query_from  r   zXMLHandler._start_query_fromc                 C   r   r   r   r   r   r   r   _start_query_to  r   zXMLHandler._start_query_toc                 C   r   r   r   r   r   r   r   _start_query_strand  r   zXMLHandler._start_query_strandc                 C   r   r   r   r   r   r   r   _start_hit_from  r   zXMLHandler._start_hit_fromc                 C   r   r   r   r   r   r   r   _start_hit_to  r   zXMLHandler._start_hit_toc                 C   r   r   r   r   r   r   r   _start_hit_strand  r   zXMLHandler._start_hit_strandc                 C   r   r   r   r   r   r   r   _start_pattern_from  r   zXMLHandler._start_pattern_fromc                 C   r   r   r   r   r   r   r   _start_pattern_to  r   zXMLHandler._start_pattern_toc                 C   r   r   r   r   r   r   r   _start_query_frame  r   zXMLHandler._start_query_framec                 C   r   r   r   r   r   r   r   _start_hit_frame  r   zXMLHandler._start_hit_framec                 C   r   r   r   r   r   r   r   _start_identity  r   zXMLHandler._start_identityc                 C   r   r   r   r   r   r   r   _start_positive  r   zXMLHandler._start_positivec                 C   r   r   r   r   r   r   r   _start_gaps  r   zXMLHandler._start_gapsc                 C   r   r   r   r   r   r   r   _start_align_len  r   zXMLHandler._start_align_lenc                 C   r   r   r   r   r   r   r   _start_density  r   zXMLHandler._start_densityc                 C   r   r   r   r   r   r   r   _start_qseq  r   zXMLHandler._start_qseqc                 C   r   r   r   r   r   r   r   _start_hseq  r   zXMLHandler._start_hseqc                 C   r   r   r   r   r   r   r   _start_midline  r   zXMLHandler._start_midlinec                 C   r   r   r   r   r   r   r   _start_stat  r   zXMLHandler._start_statc                 C   r   r   r   r   r   r   r   _start_message  r   zXMLHandler._start_messagec                 C   s"   | j  dks	J d| _ i | _d S r   )r   r   _statr   r   r   r   _start_statistics  s   
zXMLHandler._start_statisticsc                 C   r   r   r   r   r   r   r   _start_db_num  r   zXMLHandler._start_db_numc                 C   r   r   r   r   r   r   r   _start_db_len  r   zXMLHandler._start_db_lenc                 C   r   r   r   r   r   r   r   _start_hsp_len  r   zXMLHandler._start_hsp_lenc                 C   r   r   r   r   r   r   r   _start_eff_space  r   zXMLHandler._start_eff_spacec                 C   r   r   r   r   r   r   r   _start_kappa  r   zXMLHandler._start_kappac                 C   r   r   r   r   r   r   r   _start_lambda  r   zXMLHandler._start_lambdac                 C   r   r   r   r   r   r   r   _start_entropy#  r   zXMLHandler._start_entropyc                 C   r   r   r   r   r   r   r   _start_report'  r   zXMLHandler._start_reportc                 C   r   r   r   r   r   r   r   _start_search_target*  r   zXMLHandler._start_search_targetc                 C   r   r   r   r   r   r   r   _start_target-  r   zXMLHandler._start_targetc                 C   r   r   r   r   r   r   r   _start_results0  r   zXMLHandler._start_resultsc                 C   s:   | j  dks	J | j}d |_d |_d |_| ` | `| `d S r   )r   r   r   rf   r   r   r   )r   r,   r   r   r   r   _end_blastoutput3  s   zXMLHandler._end_blastoutputc                 C   s   | j  dks	J d S r   r   r   r,   r   r   r   ri   =     z XMLHandler._end_blastoutput_xml2c                 C   s   |  | d S r   )r   r   r   r   r   _end_blastxml2@  s   zXMLHandler._end_blastxml2c                 C   s   | j }|| _|| j_d| _ d S r   )r   _programr   program)r   r,   r  r   r   r   _end_programC  s   
zXMLHandler._end_programc                 C      | j | j_d| _ d S r   )r   r   versionr   r   r   r   _end_versionI     

zXMLHandler._end_versionc                 C   s   t | j| j_d| _d S r   )htmlunescaper   r   	referencer   r   r   r   _end_referenceM     
zXMLHandler._end_referencec                 C   r  r   )r   r   dbr   r   r   r   _end_dbQ  r	  zXMLHandler._end_dbc                 C      | j }td || j_d| _ d S r   )r   r   r   queryr   r,   query_idr   r   r   _end_blastoutput_query_idU     
z$XMLHandler._end_blastoutput_query_idc                 C      | j }|| jj_d| _ d S r   )r   r   r  descriptionr   r,   	query_defr   r   r   _end_blastoutput_query_defZ     

z%XMLHandler._end_blastoutput_query_defc                 C   &   t | j}td |d| jj_d| _d S N)lengthr$   )intr   r
   r   r  seqr   r,   r  r   r   r   _end_blastoutput_query_len_  r   z%XMLHandler._end_blastoutput_query_lenc                 C   s8   t | j}d| _t|t| jjjksJ || jj_d S r   )r
   r   lenr   r  r!  )r   r,   r!  r   r   r   _end_query_seqd  s   
zXMLHandler._end_query_seqc                 C   *   | j  dks	J d| _ | j| j_| `d S r   )r   r   r   r   mbstatr   r   r   r   _end_mbstatj     
zXMLHandler._end_mbstatc                 C   r   r   r   r   r   r   r   
_end_paramp  r   zXMLHandler._end_paramc                 C   r   r   r   r   r   r   r   _end_parameterst  r   zXMLHandler._end_parametersc                 C      | j | jjd< d| _ d S )Nmatrixr$   r   r   r\   r   r   r   r   _end_matrixx     
zXMLHandler._end_matrixc                 C      t | j| jjd< d| _d S )Nexpectr$   floatr   r   r\   r   r   r   r   _end_expect|  r   zXMLHandler._end_expectc                 C   r1  )Nzsc-matchr$   r   r   r   r\   r   r   r   r   _end_sc_match  r   zXMLHandler._end_sc_matchc                 C   r1  )Nzsc-mismatchr$   r6  r   r   r   r   _end_sc_mismatch  r   zXMLHandler._end_sc_mismatchc                 C   r1  )Nincluder$   r3  r   r   r   r   _end_include  r   zXMLHandler._end_includec                 C   r1  )Nzgap-openr$   r6  r   r   r   r   _end_gap_open  r   zXMLHandler._end_gap_openc                 C   r1  )Nz
gap-extendr$   r6  r   r   r   r   _end_gap_extend  r   zXMLHandler._end_gap_extendc                 C   r,  )Nfilterr$   r.  r   r   r   r   _end_filter  r0  zXMLHandler._end_filterc                 C   r1  )Ncbsr$   r6  r   r   r   r   _end_cbs  r   zXMLHandler._end_cbsc                 C   r1  )Nz
db-gencoder$   r6  r   r   r   r   _end_db_gencode  r   zXMLHandler._end_db_gencodec                 C   r1  )Nzbl2seq-moder$   r6  r   r   r   r   _end_bl2seq_mode  r   zXMLHandler._end_bl2seq_modec                 C   sB   | j  dks	J d| _ | j}| `t|dd}| jjj| d S )Nr$   masking)type)r   r   	_locationr   r   r  featuresappend)r   r,   locationfeaturer   r   r   _end_query_masking  s   zXMLHandler._end_query_maskingc                 C   s(   | j d }| ` | j}| `t||| _d S r3   )_from_tor   rE  )r   r,   startendr   r   r   
_end_range  s
   
zXMLHandler._end_rangec                 C      t | j| _d| _d S r   )r   r   rK  r   r   r   r   	_end_from     
zXMLHandler._end_fromc                 C   rP  r   )r   r   rL  r   r   r   r   _end_to  rR  zXMLHandler._end_toc                 C   r1  )Nzquery-gencoder$   r6  r   r   r   r   _end_query_gencode  r   zXMLHandler._end_query_gencodec                 C   r,  )Npatternr$   r.  r   r   r   r   _end_pattern  r0  zXMLHandler._end_patternc                 C   r,  )Nzentrez-queryr$   r.  r   r   r   r   _end_entrez_query  r0  zXMLHandler._end_entrez_queryc                 C   r   r   r   r   r   r   r   _end_iterations  r   zXMLHandler._end_iterationsc                 C   s0   | j  dks	J d| _ | jj| j | `d S r   )r   r   r   r   rG  r   r   r   r   r   _end_iteration  s   zXMLHandler._end_iterationc                 C      t | j| j_d| _d S r   )r   r   r   ro   r   r   r   r   _end_iter_num  r0  zXMLHandler._end_iter_numc                 C   r  r   )r   r   r   r  r  r   r   r   _end_query_id  r  zXMLHandler._end_query_idc                 C   r  r   )r   r   r  r  r  r   r   r   _end_query_def  r  zXMLHandler._end_query_defc                 C   r  r  )r   r   r
   r   r  r!  r"  r   r   r   _end_query_len  r   zXMLHandler._end_query_lenc                 C   r   r   r   r   r   r   r   	_end_hits  r   zXMLHandler._end_hitsc                 C   s2   | j  dks	J d| _ | j}| `| j| d S r   )r   r   r   r   rG  )r   r,   hitr   r   r   _end_hit  s
   zXMLHandler._end_hitc                 C   s   | j jd | j _d S )Nr   )r   r   targetr   r   r   r   _end_description  s   zXMLHandler._end_descriptionc                 C   s   | j j| j j d S r   )r   r   rG  rb  r   r   r   r   _end_hitdescr  r  zXMLHandler._end_hitdescrc                 C   r  r   )r   r   r   rb  )r   r,   hit_idr   r   r   _end_id  r  zXMLHandler._end_idc                 C   r  r   )r   r   rb  r  )r   r,   r  r   r   r   _end_def  r  zXMLHandler._end_defc                 C   s"   | j }t|| jjjd< d| _ d S )Ntaxidr$   )r   r   r   rb  annotations)r   r,   rh  r   r   r   
_end_taxid  s   
zXMLHandler._end_taxidc                 C   s   | j }|| jjjd< d| _ d S )Nscinamer$   )r   r   rb  ri  )r   r,   rk  r   r   r   _end_sciname  r  zXMLHandler._end_scinamec                 C   r  r   )r   r   rb  r,   )r   r,   	accessionr   r   r   _end_accession	  r  zXMLHandler._end_accessionc                 C   s^   t | j}td |d}| j}z|j}W n ty    ||j_Y n
w |jD ]}||_q$d| _d S r  )r   r   r
   r   r   AttributeErrorrb  r!  )r   r,   r  r!  
alignmentsr   rb  r   r   r   _end_len  s   



zXMLHandler._end_lenc                 C   r   r   r   r   r   r   r   	_end_hsps  r   zXMLHandler._end_hspsc                 C   s:   z| j }W n ty   | j}Y nw t| j|_d| _d S r   )r   ro  r   r   r   ro   )r   r,   elementr   r   r   _end_num!  s   


zXMLHandler._end_numc                 C   rZ  r   )r4  r   r   rp   r   r   r   r   _end_bit_score)  r0  zXMLHandler._end_bit_scorec                 C   rZ  r   )r4  r   r   rq   r   r   r   r   
_end_score-  r0  zXMLHandler._end_scorec                 C   rZ  r   )r4  r   r   rr   r   r   r   r   _end_evalue1  r0  zXMLHandler._end_evaluec                 C   rZ  r   )r   r   r   ru   r   r   r   r   _end_query_from5  r0  zXMLHandler._end_query_fromc                 C   rZ  r   )r   r   r   rv   r   r   r   r   _end_query_to9  r0  zXMLHandler._end_query_toc                 C   s$   | j }|dks	J || j_d| _ d S )NPlusr$   )r   r   rx   )r   r,   rx   r   r   r   _end_query_strand=     
zXMLHandler._end_query_strandc                 C   rZ  r   )r   r   r   ry   r   r   r   r   _end_hit_fromC  r0  zXMLHandler._end_hit_fromc                 C   rZ  r   )r   r   r   rz   r   r   r   r   _end_hit_toG  r0  zXMLHandler._end_hit_toc                 C   s$   | j }|dv s	J || j_d| _ d S )N)rz  Minusr$   )r   r   r|   )r   r,   r|   r   r   r   _end_hit_strandK  r|  zXMLHandler._end_hit_strandc                 C   rZ  r   )r   r   r   pattern_fromr   r   r   r   _end_pattern_fromQ  r0  zXMLHandler._end_pattern_fromc                 C   rZ  r   )r   r   r   
pattern_tor   r   r   r   _end_pattern_toU  r0  zXMLHandler._end_pattern_toc                 C   sn   t | j}| j}|dv r|dkrn|dv r|dv rn|dv r#|dkr#ntd| d| j || j_d	| _d S )
Nblastn	megablastr4   blastxtblastxr4         )blastptblastnrpsblastr   unexpected value z& in tag <Hsp_query-frame> for program r$   )r   r   r  
ValueErrorr   rw   )r   r,   rw   r  r   r   r   _end_query_frameY  s   

zXMLHandler._end_query_framec                 C   sn   t | j}| j}|dv r|dv rn|dv r|dkrn|dv r#|dv r#ntd| d| j || j_d	| _d S )
Nr  )r  r4   r  r  r  r   r  r  r  r  z$ in tag <Hsp_hit-frame> for program r$   )r   r   r  r  r   r{   )r   r,   r{   r  r   r   r   _end_hit_framei  s   

zXMLHandler._end_hit_framec                 C   rZ  r   )r   r   r   rs   r   r   r   r   _end_identity  r0  zXMLHandler._end_identityc                 C   rZ  r   )r   r   r   rt   r   r   r   r   _end_positive  r0  zXMLHandler._end_positivec                 C   rZ  r   )r   r   r   r   r   r   r   r   	_end_gaps  r0  zXMLHandler._end_gapsc                 C   rZ  r   )r   r   r   r   r   r   r   r   _end_align_len  r0  zXMLHandler._end_align_lenc                 C   rZ  r   )r   r   r   r   r   r   r   r   _end_density  r0  zXMLHandler._end_densityc                 C   r  r   )r   r   r}   r   r   r   r   	_end_qseq  r	  zXMLHandler._end_qseqc                 C   r  r   )r   r   r~   r   r   r   r   	_end_hseq  r	  zXMLHandler._end_hseqc                 C   r  r   )r   r   r   r   r   r   r   _end_midline  r	  zXMLHandler._end_midlinec           #      C   s:  | j  dks	J d| _ | j}| `| j}|j}| jj}|d u r#| jj}|j}|j	}t
|j}|j }	t
|	|ks;J |j }
t
|
|ksHJ t|
|	g\\}}}td ||d}|jd }|j}|dv r|| dt
| ksrJ tdt
|}| d|j d|j }|j}|dkr|d |d ksJ n|dk r|| d | d ksJ d	| d
}d|i}t|d|d}|j| n3|dd d f  |7  < || t
|ksJ ||i}|dkrz|j}W n	 ty   Y nw |dksJ t|||_| jj}|j}|j}|j	}t
|j}td |||d}|dv rz|j}W n ty9   |j }|dkr0d}n|dkr7d}Y nw |dkri|j!d }|j"}|dd d f  |7  < || t
|ks^J ||i}t|||_n|dkr|j"d }|j!}||dd d f  |dd d f< || t
|ksJ || |i}t||}|# |_n|dv r|j!d }|j"}|dd d f  |7  < || t
|ksJ ||i}t|||_nv|dv rA|j!d }|j"}|| dt
| ksJ td|}| d|j! d|j" }|j }|dkr|d |d ksJ n|dk r)|| d | d ks#J d	| d
}d|i}t|d|d}|j| t|||_nt$d| ||g} t%| |}!|j&|!_&|j'|!_'i }"|j(|"d< |j)|"d< |j*|"d< z|j+|"d< W n
 tyz   Y nw z|j,|"d< W n
 ty   Y nw |j-|"d< |"|!_.| j|! d S )Nr$   )r  r4   r  r  r   :z..zcomplement()coded_byCDS)rD  
qualifiersr  rz  r  r  r  r  r  zUnexpected program name '%s'z	bit scorerr   rs   rt   r   r   )/r   r   r   r  r   r   r  r   idr  r$  r!  r}   encoder~   r   parse_printed_alignmentr   ru   rv   r   rw   r   rF  rG  rx   ro  r
   r   rb  r,   r|   r{   ry   rz   r	   RuntimeErrorr   ro   rq   rp   rr   rs   rt   r   r   ri  )#r   r,   hspr  r   r  r  query_descriptionquery_lengthquery_seq_alignedtarget_seq_alignedtarget_seq_dataquery_seq_datacoordinatesquery_start	query_endrH  r  rw   r  rI  rx   rb  	target_idtarget_nametarget_descriptiontarget_lengthtarget_strandtarget_frametarget_start
target_endr!  	sequences	alignmentri  r   r   r   _end_hsp  s   














 













zXMLHandler._end_hspc                 C   r&  r   )r   r   r   r   statr   r   r   r   	_end_stat   r)  zXMLHandler._end_statc                 C   r  r   )r   r   rX   r   r   r   r   _end_message&  r	  zXMLHandler._end_messagec                 C   r   r   r   r   r   r   r   _end_statistics*  r   zXMLHandler._end_statisticsc                 C      t | j| jd< d| _d S )Nzdb-numr$   r   r   r   r   r   r   r   _end_db_num.  r  zXMLHandler._end_db_numc                 C   r  )Nzdb-lenr$   r  r   r   r   r   _end_db_len2  r  zXMLHandler._end_db_lenc                 C   r  )Nzhsp-lenr$   r  r   r   r   r   _end_hsp_len6  r  zXMLHandler._end_hsp_lenc                 C   s4   | j }| rt|}nt|}|| jd< d| _ d S Nrc   r$   )r   isdigitr   r4  r   )r   r,   
charactersvaluer   r   r   _end_eff_space:  s   


zXMLHandler._end_eff_spacec                 C   r  r  r  r   r   r   r   _end_eff_space_xml2C  r  zXMLHandler._end_eff_space_xml2c                 C   r  )Nkappar$   r4  r   r   r   r   r   r   
_end_kappaG  r  zXMLHandler._end_kappac                 C   r  )Nlambdar$   r  r   r   r   r   _end_lambdaK  r  zXMLHandler._end_lambdac                 C   r  )Nentropyr$   r  r   r   r   r   _end_entropyO  r  zXMLHandler._end_entropyc                 C   r   r   r   r   r   r   r   _end_reportS  r   zXMLHandler._end_reportc                 C   r   r   r   r   r   r   r   _end_search_targetV  r   zXMLHandler._end_search_targetc                 C   r   r   r   r   r   r   r   _end_targetY  r   zXMLHandler._end_targetc                 C   r   r   r   r   r   r   r   _end_results\  r   zXMLHandler._end_resultsc                 C   sF   | j }| j|_| j|_| j|_| j|_| j	|_
| j|_d| _d |_d S r   )r   r   r   r   StartNamespaceDeclHandlerr   EndNamespaceDeclHandlerre   rf   r   r   r   r   r   r   )r   r  encoding
standaloner   r   r   r   r   _  s   
zXMLHandler._xmlDeclHandlerc                 C   sh   |du sJ |du sJ |dvrt d|dksJ tjdu r+t }|d d| j_tj\| _| _dS )zHandle the DTD declaration.N)NCBI_BlastOutput.dtdz4http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtdz output from legacy BLAST programz-//NCBI//NCBI BlastOutput/ENr  r4   )	r  r   r   r   r5   r   r   r   r   )r   r6   r7   r8   r9   r   r   r   r   r   j  s   

z$XMLHandler._externalEntityRefHandlerc                 C   s:   | j |}|du rtd|| j  f || || dS )rM   Nz!Failed to find method for %s (%s))r   rg   r  keys)r   r,   rj   methodr   r   r   re   {  s   zXMLHandler._startElementHandlerc                 C   s.   | j |}|du rtd| || | dS )zLFound XML end tag.

        Arguments:
         - name -- tag name

        NzFailed to find method for %s)r   rg   r  )r   r,   r  r   r   r   r     s   zXMLHandler._endElementHandlerc                 C   s   |  j |7  _ dS )zWFound some text.

        Arguments:
         - characters -- characters read

        N)r   )r   r  r   r   r   r     s   z XMLHandler._characterDataHandlerc                 C   s   | S r   r   )r   r   r   r   __iter__  r   zXMLHandler.__iter__c                 C   s   z| j }W n ty   d }Y nw z| j}W n ty!   d }Y nw tt| }|d u r6|d u r6d| dS |d u rCd| d| dS |d u rPd| d| dS d| d| d| dS )	Nz(<Bio.Blast._parser.XMLHandler object at z with no stream or parser>z with parser z and no stream>z with stream z and no parser>z and parser >)_streamro  r   hexr  )r   rF   r   addressr   r   r   __repr__  s$   

zXMLHandler.__repr__)rG   rH   rI   rJ   r   rL   r   r   r   r   r   rh   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ri   r  r  r  r  r  r  r  r#  r%  r(  r*  r+  r/  r5  r7  r8  r:  r;  r<  r>  r@  rA  rB  rJ  rO  rQ  rS  rT  rV  rW  rX  rY  r[  r\  r]  r^  r_  ra  rc  rd  rf  rg  rj  rl  rn  rq  rr  rt  ru  rv  rw  rx  ry  r{  r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   re   r   r   r  r  r   r   r   r   r      sx   
 		r   )rJ   r
  os.pathr=   collectionsr   xml.parsersr   Bior   	Bio.Alignr   	Bio.Blastr   r   r   Bio.Seqr	   r
   Bio.SeqFeaturer   r   Bio.SeqRecordr   r   rK   rn   r   r   r   r   r   <module>   s&   
7E