o
    Rŀg$                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 dZ
ed	Zed
ZedZedZedZdZdZdZG dd dZdS )zIBio.SearchIO parser for HHSUITE version 2 and 3 plain text output format.    N)Hit)HSP)HSPFragment)QueryResult)read_forward)Hhsuite2TextParserz^Query\s+(.+)\s?$z^No +(\d+)\s+$z>(\S+)\s+(.*)$z,^Q\s+(.+) +(\d+) +([A-Z-]+) +(\d+) +\(\d+\)$z,^T\s+(.+) +(\d+) +([A-Z-]+) +(\d+) +\(\d+\)$zDone!HHSUITEi  c                   @   sb   e Zd ZdZdd Zdd ZdefddZd	d
 Zdd Z	dd Z
edd Zdd Zdd ZdS )r   z3Parser for the HHSUITE version 2 and 3 text output.c                 C   s(   || _ t| j | _d| _d| _d| _dS )zInitialize the class.FN)handler   linedonequery_idseq_len)selfr	    r   X/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/HHsuiteIO/hhsuite2_text.py__init__/   s
   
zHhsuite2TextParser.__init__c                 c   s    |   E dH  dS )z9Iterate over query results - there will only ever be one.N)_parse_qresult)r   r   r   r   __iter__7   s   zHhsuite2TextParser.__iter__Tc                 C   sH   d}	 |r
| j s
dS || j rdS t| j| _ |d7 }||kr#tdq)zERead the file handle until the given function returns True (PRIVATE).r   TN   z&Exceeded max_read_until in _read_until)r
   r   r	   RuntimeError)r   	bool_funcstop_on_blankmax_read_untilcountr   r   r   _read_until;   s   

zHhsuite2TextParser._read_untilc                 C   sF   g }|    | jdd dd | js|  }|| | jr| |S )z$Parse HHSUITE output file (PRIVATE).c                 S   s   t t| S )N)research_RE_HIT_BLOCK_START)r
   r   r   r   <lambda>N   s    z3Hhsuite2TextParser._parse_qresult.<locals>.<lambda>F)r   )_parse_preambler   r   _parse_hit_blockappend_create_qresult)r   hit_block_datahit_dictr   r   r   r   I   s   

z!Hhsuite2TextParser._parse_qresultc                 C   sf   i }| j r1tt| j }|r|d| _| j dr&t| j  	 d | _
| j  | _ | j s|S )z%Parse metadata about query (PRIVATE).r   Match_columns)r
   r   r   	_RE_QUERYgroupr   
startswithintstripsplitr   r	   readline)r   metaregxr   r   r   r   U   s   z"Hhsuite2TextParser._parse_preamblec                 C   s   t | j| _tt| j}|std| j d|d|dddddddddddd}| j	 | _| 
| j| 	 t | j| _| j rN| jtrSd	| _|S tt| jr\|S | | q=)
zParse a hit block (PRIVATE).z*Unexpected content in HIT_BLOCK_DESC line''r      z ;N )hit_iddescriptionevalue	hit_starthit_endhit_seqprobquery_start	query_end	query_seqscoreT)r   r	   r
   r   r   _RE_HIT_BLOCK_DESCr   r'   lstripr,   _process_score_liner*   r(   _END_OF_FILE_MARKERr   r   _parse_hit_match_block)r   matchhit_datar   r   r   r    a   s:   
z#Hhsuite2TextParser._parse_hit_blockc              
   C   sv   dddd}|    D ],}|d\}}||v r8z
t|||| < W q ty7   td| d|   Y qw qdS )	a  Parse the scores from the line and populate hit_data dict (PRIVATE).

        Lines are of the form:
        Probab=99.95  E-value=3.7e-34  Score=210.31  Aligned_cols=171  Identities=100%  Similarity=2.050  Sum_probs=166.9

        E-value could be in decimal or scientific notation, so split the string rather then use regexp - this
        also means we should be tolerant of additional fields being added/removed
        r4   r<   r8   )zE-valueScoreProbab=z"HHsuite parser: unable to extract z from line: N)r*   r+   floatKeyErrorwarningswarn)r
   rC   	score_map
score_pairkeyvaluer   r   r   r?      s   
z&Hhsuite2TextParser._process_score_linec                 C   s   dd }	 | j  sdS tt| j }|r?||r?|d  |d 7  < |d du r5t|d|d< t|d	|d
< n2tt| j }|rq||rq|d  |d 7  < |d du rht|d|d< t|d	|d< | j	 | _ q)a  Parse a single block of hit sequence data (PRIVATE).

        Parses block such as ::

            Q ss_pred             ceecchHHHHHHHHHHHHHHHHHHHhhhhhcCCCCccc
            Q 4P79:A|PDBID|C  160 YELGPALYLGWSASLLSILGGICVFSTAAASSKEEPAT  197 (198)
            Q Consensus       160 ~~~g~sf~l~~~~~~l~~~~~~l~~~~~~~~~~~~~~~  197 (198)
                                  .++|||||++|++.++.+++++++++..+..++++..+
            T Consensus       327 ~~~GwS~~l~~~s~~l~lia~~l~~~~~~~~~~~~~~~  364 (364)
            T 5B2G_A          327 REMGASLYVGWAASGLLLLGGGLLCCSGPSSGENLYFQ  364 (364)
            T ss_dssp             EEECTHHHHHHHHHHHHHHHHHHHHCC-----------
            T ss_pred             cccchHHHHHHHHHHHHHHHHHHHHhcCCCCCCccccC

        c                 S   s   |  d dkS )zReturn True if match is not a Consensus column (PRIVATE).

            It's not possible to distinguish a sequence line from a Consensus line with
            a regexp, so need to check the ID column.
            r   	Consensus)r'   r*   )rB   r   r   r   match_is_valid   s   zAHhsuite2TextParser._parse_hit_match_block.<locals>.match_is_validTNr;      r9   r0      r:   r7   r5   r6   )
r
   r*   r   rB   _RE_MATCH_BLOCK_QUERY_SEQr'   r)   _RE_MATCH_BLOCK_HIT_SEQr	   r,   )r   hit_match_datarP   rB   r   r   r   rA      s$   
z)Hhsuite2TextParser._parse_hit_match_blockc                 C   s8  | j }i }t|D ]\}}|d }t||}d|_|d d |_|d |_|d d |_|d |_|d |_|d	 |_	t
|g}||_||_||_ |d
 |_d}	|	|_|d |_|d |_|d |_||vrt|g|}
|d
 |
_|	|
_|d |
_|d |
_|
||< q	|| | q	t| |}t|_| j|_|gS )zDCreate the Biopython data structures from the parsed data (PRIVATE).r2   proteinr9   r   r:   r5   r6   r7   r;   r3   Tr4   r<   r8   )r   	enumerater   molecule_typer9   r:   r5   r6   hitqueryr   r2   output_indexhit_descriptionis_includedr4   r<   r8   r   r3   r!   r   values_PROGRAMprogramr   )r   
hit_blocksr   r$   r[   blockr2   fraghspr]   rY   qresultr   r   r   r"      sD   













z"Hhsuite2TextParser._create_qresultN)__name__
__module____qualname____doc__r   r   MAX_READ_UNTILr   r   r   r    staticmethodr?   rA   r"   r   r   r   r   r   ,   s    "
*r   )ri   r   rI   Bio.SearchIO._modelr   r   r   r   Bio.SearchIO._utilsr   __all__compiler&   r   r=   rS   rT   r@   r_   rj   r   r   r   r   r   <module>   s$   




