o
    RŀgN                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 d	d
l
mZ dZedZedZedZdZeeZedZedZedZG dd dZG dd deZedkruddlmZ e  dS dS )z7Bio.SearchIO parser for HMMER plain text output format.    N)Hit)HSP)HSPFragment)QueryResult)read_forward)removesuffix   )_BaseHmmerTextIndexer)Hmmer3TextParserHmmer3TextIndexerz^# .*?(\w?hmm\w+) :: .*$z# \w+ ([\w+\.]+) .*; http.*$z^# (.+):\s+(.+)$z^Query:\s*(.*)\s+\[\w=(\d+)\]z)score:\s(-?\d+\.?\d+)\sbits.*value:\s(.*)z^(\s+)(.+)\s(\w+)z%^(\s+\S+\s+[0-9-]+ )(.+?)(\s+[0-9-]+)c                   @   sP   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd ZdS )r
   z%Parser for the HMMER 3.0 text output.c                 C   s    || _ t| j | _|  | _dS )zInitialize the class.N)handler   line_parse_preamble_meta)selfr    r   T/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/HmmerIO/hmmer3_text.py__init__*   s   zHmmer3TextParser.__init__c                 c   s    |   E dH  dS )zIterate over query results.N)_parse_qresult)r   r   r   r   __iter__0   s   zHmmer3TextParser.__iter__c                 C   s$   	 | j r	|| j rdS t| j| _ q)zERead the file handle until the given function returns True (PRIVATE).TN)r   r   r   )r   	bool_funcr   r   r   _read_until4   s
   zHmmer3TextParser._read_untilc                 C   s   i }d}	 | j ds	 |S d| j v r|sd}nJ	 |S |s>tt| j }|r-|d|d< tt| j }|r=|d|d< n$|rbtt| j }d|dv rX|d	 |d< n
|d	||d< t	| j
| _ q)
z:Parse HMMER preamble (lines beginning with '#') (PRIVATE).FT#z- - -r   programversiontarget   )r   
startswithresearch_RE_PROGRAMgroup_RE_VERSION_RE_OPTstripr   r   )r   metahas_optsregxr   r   r   r   <   s4   
z Hmmer3TextParser._parse_preamblec           
      c   s   |  dd  | jrtt| j}|s#t| j| _tt| j}|r|d }t	|d| j
d| j
d| j
dd}d	}| jd
st| j| _| jdrh| j ddd }| |d< n| jdr| j ddd  }||d< | jd
rJ| jrd| jvr| ||}| jdr| jrd| jvrt| j| _| jrd| jvs| jrd| jvst||d}| D ]
\}}	t|||	 q|V  t| j| _| jdr| j | _d| jv rdS | jsdS dS )z%Parse a HMMER3 query block (PRIVATE).c                 S   
   |  dS )NzQuery:r   r   r   r   r   <lambda>h      
 z1Hmmer3TextParser._parse_qresult.<locals>.<lambda>r   r   r   r   r   )seq_lenr   r   r   z<unknown description>zScores for z
Accession: 	accessionzDescription:descriptionz//Internal pipeline)idhitsr   z[ok]N)r   r   r   r   _QRE_ID_LENr   r   r!   r$   intr   getr   split
_parse_hitr   itemssetattrreadline)
r   r'   qidqresult_attrsqdescacchit_listqresultattrvaluer   r   r   r   f   sR   



zHmmer3TextParser._parse_qresultc              
   C   sj  |  dd  t| j| _d}g }	 | jsg S | jdr&d}t| j| _n-| jdrD	 t| j| _| jdrCt|dksAJ g S q-| jd	rS| |||}|S d
d | j dD }t|dkrrd	|dd |d< nt|dk r|
d t|dksJ |d |t|d t|d t|d t|d t|d |d |d	}|
| t| j| _q)zAParse a HMMER3 hit block, beginning with the hit table (PRIVATE).c                 S   r(   )Nz    ------- ------ -----r)   r*   r   r   r   r+      r,   z-Hmmer3TextParser._parse_hit.<locals>.<lambda>Tz  ------ inclusionFz+   [No hits detected that satisfy reportingr1   r   zDomain annotation for each c                 S      g | ]}|r|qS r   r   .0xr   r   r   
<listcomp>       z/Hmmer3TextParser._parse_hit.<locals>.<listcomp>r.   
   	   N    r   r         )	r2   query_idevaluebitscorebiasdomain_exp_numdomain_obs_numr0   is_included)r   r   r   r   r   len_create_hitsr$   r7   joinappendfloatr5   )r   r<   r>   rV   hit_attr_listr@   row	hit_attrsr   r   r   r8      sN   






zHmmer3TextParser._parse_hitc                 C   sJ  |  dd  g }	 | jdrt|dksJ |S | jds"J | jtdd d	d
\}}| }|  dd  t| j| _g }	 | jdsc| jdsc| jdsc| jdsc| jdr|d}t	|}	|
 D ]\}
}|
dkrt|	|
}|r|r||rqpt|	|
| qp|	s||	_||	 ndd | j dD }t|dksJ t||}|r||_|r||_d|_| jddkrt|d d
 |_t|d |_t|d d
 |_t|d |_n)| jddv rt|d d
 |_t|d |_t|d d
 |_t|d |_d |_|_t|g}t|d |_|d
 dk|_t|d |_t|d |_ t|d  |_!t|d! |_"| jddkr[|d" |_#|d# |_$n| jddv rn|d# |_#|d" |_$t|d$ d
 |_%t|d% |_&|d& |_'t|d' |_(|| t| j| _qE| jdr| )||	j* q
)(zAParse a HMMER3 hsp block, beginning with the hsp table (PRIVATE).c                 S   r(   )N)r1   >>r)   r*   r   r   r   r+      r,   z/Hmmer3TextParser._create_hits.<locals>.<lambda>Tr1   r   r_   z>> Nz  r   c                 S   r(   )N)z ---   ------ ----- --------   [No individual domainsr)   r*   r   r   r   r+      s    z$   [No targets detected that satisfyr`   z%Internal pipeline statistics summary:z  Alignments for each domain:r0   c                 S   rD   r   r   rE   r   r   r   rH     rI   z1Hmmer3TextParser._create_hits.<locals>.<listcomp>r.      proteinr   hmmscanrN   rO   rK   rJ   	hmmsearchphmmer!r            rM                  )+r   r   r   rW   r7   r$   r   r   popr   r9   getattrr:   query_descriptionrZ   r   hit_descriptionmolecule_typer   r6   r5   	hit_starthit_endquery_start	query_end
hit_strandquery_strandr   domain_indexrV   r[   rR   rS   evalue_condrQ   hit_endtypequery_endtype	env_startenv_endenv_endtypeacc_avg_parse_aln_blockhsps)r   r^   r<   r>   r@   hidhdeschsp_listhit_attrhitrB   rC   cur_valparsedfraghspr   r   r   rX      s   














PzHmmer3TextParser._create_hitsc                 C   s:  t | j| _d}	 | jds| jdr|S | jd|d  s#J || d }d}d}i }d}| j | _	 d}	tt| j}	|	r|du rOt|		d}n|t|		dksZJ t|t|krj||		d	7 }nt|t|kry||		d	7 }t|t|ksJ n| jd
s| jds| jdr||_
| jddkr||_||_n| jddv r||_||_|d7 }d}d}i }d}nYt|t|krtt| j}	|	r|		d}
|
|v r||
  |		d	7  < n,|		d	||
< n$|durtt| j|d dd}d|vr||d< n|d  |7  < | j | _q8q	)z-Parse a HMMER3 HSP alignment block (PRIVATE).r   Tr_   r1   z  == domain %ir   rL   Nr   z  == domainr   rc   rd   rh   

similarity)r   r   r   r   r;   r   r   _HRE_ID_LINErW   r!   aln_annotationr   r6   r   query_HRE_ANNOT_LINEr   )r   r   r   dom_counterr   hmmseqaliseqannotaln_prefix_lenr'   
annot_namer   r   r   r   r   I  s|   






z!Hmmer3TextParser._parse_aln_blockN)__name__
__module____qualname____doc__r   r   r   r   r   r8   rX   r   r   r   r   r   r
   '   s    *=9mr
   c                   @   s$   e Zd ZdZeZdZdZdd ZdS )r   z*Indexer class for HMMER plain text output.s   Query: s   //c                 c   s    | j }|d | }tt }	 t|}| }|| j	r7t
||}|d }|t| }n|| jrH| |dfV  |}n|sLdS q)zFIterate over Hmmer3TextIndexer; yields query results' key, offsets, 0.r   Tr   N)_handleseektellr   compile_QRE_ID_LEN_PTNencoder   r   qresult_startr   r!   r$   rW   qresult_enddecode)r   r   start_offsetregex_idr   
end_offsetr'   qresult_keyr   r   r   r     s$   
zHmmer3TextIndexer.__iter__N)	r   r   r   r   r
   _parserr   r   r   r   r   r   r   r     s    r   __main__)run_doctest)r   r   Bio.SearchIO._modelr   r   r   r   Bio.SearchIO._utilsr   r   _baser	   __all__r   r    r"   r#   r   r4   _HRE_VALIDATEr   r   r
   r   r   
Bio._utilsr   r   r   r   r   <module>   s6   






    
