o
    Rŀg2                     @   s   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	l	m
Z
 d
ZedZG dd dZG dd dZG dd de
ZedkrVddlmZ e  dS dS )z,Bio.SearchIO parser for HMMER 2 text output.    N)Hit)HSP)HSPFragment)QueryResult)read_forward   )_BaseHmmerTextIndexer)Hmmer2TextParserHmmer2TextIndexerz(\S+):\s+domain (\d+) of (\d+)c                   @   s   e Zd Zdd ZdS )_HitPlaceholderc                 C   s:   t |}| j|_| j|_| j|_| jr| j|_| j|_|S )N)r   id_evaluebitscoredescriptiondomain_obs_num)selfhsp_listhit r   T/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/HmmerIO/hmmer2_text.py	createHit   s   z_HitPlaceholder.createHitN)__name__
__module____qualname__r   r   r   r   r   r      s    r   c                   @   sb   e Zd ZdZdd Zdd ZdddZd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )r	   z'Iterator for the HMMER 2.0 text output.c                 C   s   || _ g | _|  | _dS )zInitialize the class.N)handlebufparse_preamble_meta)r   r   r   r   r   __init__'   s   zHmmer2TextParser.__init__c                 c   sD    |   D ]}| jd|_| jd|_| jd|_|V  qdS )z4Iterate over Hmmer2TextParser, yields query results.programtargetversionN)parse_qresultr   getr   r    r!   )r   qresultr   r   r   __iter__-   s   zHmmer2TextParser.__iter__Tc                 C   st   t | jdkr| j S | j | _| jr,|r,| j s,| j | _| jr,|r,| j r| jr7|r7| j | _| jS )z<Return the next non-empty line, trailing whitespace removed.r   )lenr   popr   readlinelinestriprstrip)r   r+   r   r   r   	read_next5   s   
zHmmer2TextParser.read_nextc                 C   s   | j | dS )z-Un-read a line that should not be parsed yet.N)r   append)r   r)   r   r   r   	push_backA   s   zHmmer2TextParser.push_backc                 C   s"   | j dd\}}| | fS )z(Parse key-value pair separated by colon.:r   )r)   splitr*   )r   keyvaluer   r   r   parse_key_valueE   s   z Hmmer2TextParser.parse_key_valuec                 C   s  i }d}|   r|dkrA| jdr| jdd  |d< n!| jdr&q| jdr6| j d |d	< n
| jdd
kr@d}q|dksGJ d|v sMJ | jddkrX	 |S |  \}}|d dkrn|dkrm||d< qn|d dkr}|dkr}||d< q|||< |   s|S )zParse HMMER2 preamble.GENERIChmm-r   r   zHMMER isHMMERr   r!   $   OPTIONS    	hmmsearchzSequence databaser    hmmpfamzHMM file)r,   r)   
startswithr0   r*   countr3   )r   metastater1   r2   r   r   r   r   J   s>   zHmmer2TextParser.parse_preamblec                 c   s   |   r| jdsdS |  \}}t|d| _d}|   rI| jdsI| jdr3|  d | j_| jdr?|  d }|   rI| jdr%|  }t|dkr\| 	| | 
  | jdsp|    | jsjn| jdrb| j| j |dur|| j_| jV  |   sdS dS )	zParse a HMMER2 query block.QueryN)idScores	Accessionr   Descriptionr   )r,   r)   r=   r3   r   r$   	accession
parse_hitsr&   
parse_hspsparse_hsp_alignmentsr   r-   r   )r   _r   r   hit_placeholdersr   r   r   r"   m   s6   
zHmmer2TextParser.parse_qresultc           	      C   s   g }|   rk| jdr	 |S | jddkr	 |S | jds,| jds,| jdr-q| j }|d}t| }t| }t| }d|	 }t
 }||_||_||_||_||_|| |   s|S )	z7Parse a HMMER2 hit block, beginning with the hit table.Parsedzno hitsSequenceModelz	-------- r    )r,   r)   r=   findr0   r'   intfloatjoinr*   r   r   r   r   r   r   r-   )	r   rK   fieldsr   r   r   r   r   r   r   r   r   rG      s<   





zHmmer2TextParser.parse_hitsc              
      s  i }|   r| jds| jds| jdkrn| jds*| jds*| jdr+q| j \
 }}}}}}}	}
}t | jj}d|_| jd d	krdt	|d
 |_
t	||_t	|d
 |_t	||_n| jd dkrt	|d
 |_t	||_t	|d
 |_
t	||_t|g}t||_t|
|_t	|dd |_| jd d	kr|	|_||_n| jd dkr|	|_||_ |vrч fdd|D d }||g}|| < n|  }|j|_|| |   s|D ]}| j||j  qdS )z7Parse a HMMER2 hsp block, beginning with the hsp table.
Alignments	Histogram//rO   rN   z--------proteinr   r<   r   r;   /r   c                    s   g | ]	}|j  kr|qS r   r   ).0pr[   r   r   
<listcomp>   s    z/Hmmer2TextParser.parse_hsps.<locals>.<listcomp>N)r,   r)   r=   r0   r   r$   rB   molecule_typer   rR   	hit_starthit_endquery_start	query_endr   rS   r   r   domain_indexhit_endtypequery_endtyper   r   hit_descriptionr-   r   )r   rK   unordered_hitsdomainseq_fseq_t	seq_complhmm_fhmm_t	hmm_complscorer   fraghspplaceholderr   r]   r   r[   r   rH      sx   













>zHmmer2TextParser.parse_hspsc                 C   s`  | j dsdS |  r.| j dks| j drdS tt| j }|du r&q|d}t|d}t|d}| j| }|j	|krDq||d  d }d	}d	}d	}	d	}
d}|  r| j d
r| j dd dkry|
| j dd 
 7 }
|  synu| j dd dkr| j dd }d}n	| j dd }d}||7 }t|}| jddsnJ|| j d| d| |  7 }t|t| }|d
| 7 }|  sn(| j dd  }t|dkr|	| j dd  d 
 7 }	|  r| j d
s`| | j  |dr|dd }|dd }||jd< |
r|
|jd< | jd dkr"||_|	|_n|	|_||_|  sdS dS )z#Parse a HMMER2 HSP alignment block.rV   NrX   rW   r         r    rP         CS      z*->F)r+   z<-*
similarityr   r<   )r)   r=   r,   research_HSP_ALIGN_LINEgrouprR   r$   r   r*   r&   r0   r.   endswithaln_annotationr   r   query)r   matchr   idxnumr   rq   hmmseq	consensusotherseqstructureseqpadseqline_lenextra_paddingpartsr   r   r   rI      sp   



%

z%Hmmer2TextParser.parse_hsp_alignmentsN)T)r   r   r   __doc__r   r%   r,   r.   r3   r   r"   rG   rH   rI   r   r   r   r   r	   $   s    
#!Fr	   c                   @   s$   e Zd ZdZeZdZdZdd ZdS )r
   zIndexer for hmmer2-text format.s   Querys   //c           	      c   s    | j }|d | }td}d}t|}|drd}	 | }|| jr>t||}|	d
 }|t| }n|| jrO| |dfV  |}n|s]|r[| |dfV  dS t|}q )zFIterate over Hmmer2TextIndexer; yields query results' key, offsets, 0.r   s!   Query\s*(?:sequence|HMM)?:\s*(.*)Fs	   hmmsearchTr   N)_handleseektellr~   compiler   r=   qresult_startr   r   r*   r&   qresult_enddecode)	r   r   start_offsetregex_idis_hmmsearchr)   
end_offsetregxqresult_keyr   r   r   r%   S  s0   


zHmmer2TextIndexer.__iter__N)	r   r   r   r   r	   _parserr   r   r%   r   r   r   r   r
   J  s    r
   __main__)run_doctest)r   r~   Bio.SearchIO._modelr   r   r   r   Bio.SearchIO._utilsr   _baser   __all__r   r   r   r	   r
   r   
Bio._utilsr   r   r   r   r   <module>   s&   
  (,
