o
    Rŀg{                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ de	fd	e	fd
e	fdZ
defdefdZdefdefdefdefdZG dd dZedkr^ddlmZ e  dS dS )z8Bio.SearchIO parser for InterProScan XML output formats.    N)ElementTree)Hit)HSP)HSPFragment)QueryResult	accessioniddescription)nameacdescbitscoreevalue)scorer   query_start	query_end	hit_starthit_end)startendz	hmm-startzhmm-endc                   @   sL   e Zd ZdZdd Zdd Zdd Zdd	 ZdddZdddZ	dd Z
d
S )InterproscanXmlParserz'Parser for the InterProScan XML format.c                 C   s"   t tj|dd| _|  | _dS )zInitialize the class.)r   r   )eventsN)iterr   	iterparsexml_iter_parse_header_meta)selfhandle r   `/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/InterproscanIO/interproscan_xml.py__init__'   s   zInterproscanXmlParser.__init__c                 c   s    |   E dH  dS )zIterate qresults.N)_parse_qresult)r   r   r   r    __iter__,   s   zInterproscanXmlParser.__iter__c                 C   sF   t | j\}}i }d|d< d|d< |jd |d< tdd|j| _|S )	z8Parse the header for the InterProScan version (PRIVATE).InterProtargetInterProScanprogramzinterproscan-versionversionzprotein-matches )nextr   attribresubtagNS)r   eventelemmetar   r   r    r   0   s   z#InterproscanXmlParser._parse_headerc                 c   s   | j D ]{\}}|dkr|j| jd kr|| jd }|j}|| jd }|jd }|jd }g }| || jd ||D ] }	|D ]}
|
j|	jkrZ|	jD ]}|
j	| qO nqD|	|	 q@t
||}t|d| | j D ]
\}}t||| qq|V  qd	S )
zParse query results (PRIVATE).r   proteinsequencexrefr   r
   matchesr	   N)r   r.   r/   findtextr+   
_parse_hitr   hspsappendr   setattrr   items)r   r0   r1   seq	query_seqr5   query_id
query_deschit_listhit_newhithspqresultkeyvaluer   r   r    r"   ;   s6   




z$InterproscanXmlParser._parse_qresultNc                 c   s$   |du rg }|D ]}t d| j d|dj}|| jd }|jd }| || jd }t| || jd |||}	t	|	|}
t
|
d	| t D ]\}\}}|j|}|durht
|
||| qP||
jd
< || jd }t|jd|
jd< t|jd|
jd< |
V  q	dS )zParse hit (PRIVATE).Nz%s(\w+)-matchz\1.	signaturer   entry	locationsdbxrefszHit typezsignature-library-releaselibraryTargetr(   zTarget version)r,   r-   r/   r7   r.   r+   _parse_xrefslist
_parse_hspr   r<   	_ELEM_HITr=   get
attributesstr)r   root_hit_elemr@   r?   hit_elemhit_typerJ   hit_idxrefsr:   rD   rG   attrcasterrH   signature_libr   r   r    r9   `   s4   


z InterproscanXmlParser._parse_hitc                 c   s(   |du rg }|D ]}t ||}t|dd |dur t|d| t D ]3\}\}}	|j|}
|
durW|dr?|	|
d }
|dkrGt|
}|dkrOt|
}t|||	|
 q$t|d	||  t|g}t|d
| t|d| t	 D ]\}\}}	|j|}
|
durt|||	|
 qu|V  q	dS )zParse hsp (PRIVATE).Nmolecule_typer3   queryr      r   r   aln_spanr@   rZ   )
r   r<   
_ELEM_FRAGr=   r+   rT   endswithintr   	_ELEM_HSP)r   root_hsp_elemr@   rZ   r?   hsp_elemfragrG   r\   r]   rH   r   r   rE   r   r   r    rR      s<   


z InterproscanXmlParser._parse_hspc                 C   s   g }|dur| d|jd   |durFg }||| jd  }||| jd  }|D ]}|jd }d|vr@|jd d | }| | q,|S )	zParse xrefs (PRIVATE).NzIPR:r   zgo-xrefzpathway-xrefr   :db)r;   r+   findallr/   )r   root_entry_elemr[   
xref_elemsrK   r5   r   r   r    rP      s   
z"InterproscanXmlParser._parse_xrefs)N)__name__
__module____qualname____doc__r!   r#   r   r"   r9   rR   rP   r   r   r   r    r   $   s    
%
&%r   __main__)run_doctest)rr   r,   	xml.etreer   Bio.SearchIO._modelr   r   r   r   rV   rS   floatrf   re   rc   r   ro   
Bio._utilsrt   r   r   r   r    <module>   s.    
