o
    Rŀg                      @   sR  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dZ	i d	d
dddddddddddddddddddddddd d!d"d#d$d%d&d'd(i d)d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdT
Z
dUdV ZdWdX ZdYefdZefd[efd\efd]efd^Zi ddYefdAd_efddZefdd[efdId`efdCd]efdEdaefdd\efdJdbefdKdcefdLddefdMdeefdNdfefdOdgefdPdhefdQdiefdRdjefZdefdkefdefdlefdmefdnefdoefdpefdqefdrefdGefdjefdsZdtefduefdvefdwefdxefdyefdzefd7efd{efd|efd}
Zeeeee ee ee Zg d~Zh dZh dZedZdd Zdd ZG dd dZG dd deZG dd dZ e!dkrddl"m#Z# e#  dS dS )zKBio.SearchIO parser for BLAST+ tab output format, with or without comments.    N)SearchIndexer)Hit)HSP)HSPFragment)QueryResult)BlastTabIndexerBlastTabParserBlastTabWriterzquery idqseqidz
query acc.qacczquery acc.verqaccverzquery lengthqlenz
subject idsseqidzsubject acc.sacczsubject acc.versaccverzsubject lengthslenzalignment lengthlengthz	bit scorebitscorescoreevalue	identicalnidentz
% identitypident	positivespositivez% positivesppos
mismatchesmismatchgapszq. startqstartzq. endqendzs. startsstartzs. endsendzquery frameqframezsbjct framesframezquery/sbjct framesframesz	query seqqseqzsubject seqsseqz	gap opensgapopenzquery giqgizsubject ids	sallseqidz
subject gisgizsubject gissallgiBTOPbtopzsubject accs.sallaccstaxids	sscinames	scomnamessblastnames
sskingdomsstitle
salltitlessstrandqcovsqcovhsp)
zsubject tax idszsubject sci nameszsubject com nameszsubject blast nameszsubject super kingdomszsubject titlezsubject titleszsubject strandz% subject coveragez% hsp coveragec                 C   
   |  dS )N;splits r@   R/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/BlastIO/blast_tab.py_list_semicolJ      
rB   c                 C   r:   )N<>r<   r>   r@   r@   rA   _list_diamondN   rC   rE   id	accessionaccession_versionseq_lengi)r
   r   r   r   r)   id_allaccession_allgi_alltax_ids	sci_names	com_namesblast_namessuper_kingdomstitle	title_allstrandquery_coveragebitscore_raw	ident_num	ident_pctpos_numpos_pctmismatch_numgap_numgapopen_num)r   r   r   r   r   r   r   r   r   r(   r.   r9   aln_spanquery_start	query_end	hit_starthit_endquery_frame	hit_framequeryhit)
r   r   r    r!   r"   r#   r$   r%   r&   r'   )r
   r   r   r   r   r(   r   r    r!   r"   r   r   >   r   r
   r   >   r   r   r   r*   z\w-c                 C   s6   d}dD ]}t t| |j}|ttt|7 }q|S )z=Return the number of gap openings in the given HSP (PRIVATE).r   rf   rg   )strgetattrseqlenrefindall_RE_GAPOPEN)hspr(   seq_typerk   r@   r@   rA   _compute_gapopen_num   s
   rr   c                 C   s*  t | |sf|dsf|dkr| j| j | j | _nM|dr,t| || j| j | j  n:|dr?t| || j| j | j  n'|dkrQt| || j| j | j  n|dkrft | dr_t | dsatt	| | _
|d	kru| j| j d
 | _dS |dkr| j| j d
 | _dS |dkr| j| j d
 | _dS dS )z9Calculate the given HSP attribute, for writing (PRIVATE)._pctr_   identgapr\   r^   rf   rg   rY   d   r[   gap_pctN)hasattrendswithrX   r\   r]   r_   
startswithsetattrAttributeErrorrr   r^   rY   rZ   r[   rw   )rp   attrr@   r@   rA   _augment_blast_hsp   s(   


r~   c                   @   sn   e Zd ZdZdefddZdd Zdd Zd	d
 Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )r   z$Parser for the BLAST tabular format.Fc                 C   s,   || _ || _| || _| j   | _dS Initialize the class.N)handlehas_comments_prep_fieldsfieldsreadlinestriplineselfr   commentsr   r@   r@   rA   __init__   s   zBlastTabParser.__init__c                 c   sD    | j sdS | jr| j}n| j drtd| j}| E dH  dS )z2Iterate over BlastTabParser, yields query results.N#zsEncountered unexpected character '#' at the beginning of a line. Set comments=True if the file is a commented file.)r   r   _parse_commented_qresultrz   
ValueError_parse_qresult)r   iterfuncr@   r@   rA   __iter__   s   zBlastTabParser.__iter__c                 C   sr   t |tr| d}d|v r%|d}|d| t ||d d  }t|tr3t|t	s7t
d|S )zEValidate and format the given fields for use by the parser (PRIVATE). stdN   z-Required query and/or hit ID field not found.)
isinstanceri   r   r=   index_DEFAULT_FIELDSsetintersection_MIN_QUERY_FIELDS_MIN_HIT_FIELDSr   )r   r   idxr@   r@   rA   r      s   

 zBlastTabParser._prep_fieldsc                 c   s    	 |   }|rAz|d | _|  }W n ty(   d|vs J tt g}Y nw |D ]}| D ]
\}}t||| q1|V  q+ndS q)z>Yield ``QueryResult`` objects from a commented file (PRIVATE).Tr   N)_parse_commentsr   r   KeyErroriterr   itemsr{   )r   r   	qres_iterqresultkeyvaluer@   r@   rA   r      s$   
z'BlastTabParser._parse_commented_qresultc                 C   sB  i }	 d| j v r(d| j vr(| j tdd d}|d  |d< |d	 |d
< ngd| j v rM| j tdd dd	}|d |d< t|dkrL|d	 |d< nBd| j v r^| j tdd |d< n1d| j v ro| j tdd |d< n d| j v r{|  |d< nd| j v sd| j v r| j  | _ |S | j | _ | j s|S | j  | _ q)z;Return a dictionary containing tab file comments (PRIVATE).TBLAST	processedz #Nr   r   programr   versionQuery	# Query: rF      descriptionDatabasez# Database: targetRIDz# RID: ridFieldsr   z hits found)r   rl   r=   lower_parse_fields_liner   r   r   )r   r   program_line
query_liner@   r@   rA   r     s4   



zBlastTabParser._parse_commentsc                 C   s4   | j tdd }|d}dd |D }| |S )zDReturn column short names line from 'Fields' comment line (PRIVATE).z
# Fields: N, c                 S   s   g | ]}t | qS r@   )_LONG_SHORT_MAP).0	long_namer@   r@   rA   
<listcomp>E  s    z5BlastTabParser._parse_fields_line.<locals>.<listcomp>)r   rl   r=   r   )r   raw_field_strlong_fieldsr   r@   r@   rA   r   A  s   

z!BlastTabParser._parse_fields_linec                 C   s   | j }| j d}t|t|krtdt|t|f i i i i f\}}}}t|D ]=\}}|| }	d}
|tf|tf|t	f|t
ffD ]\}}|	|v ra||	 \}}|tur[||}|||< d}
qE|
sj|	tvsjJ q-||||dS )z3Return a dictionary of parsed row values (PRIVATE).	zExpected %i columns, found: %iFT)r   rg   rp   frag)r   r   r   r=   rl   r   	enumerate_COLUMN_QRESULT_COLUMN_HIT_COLUMN_HSP_COLUMN_FRAGri   _SUPPORTED_FIELDS)r   r   columnsr   rg   rp   r   r   r   sname
in_mappingparsed_dictmapping	attr_namecasterr@   r@   rA   _parse_result_rowH  s4   z BlastTabParser._parse_result_rowc                 C   sP   | d}|du rd|v r| dd }|du r| d}|du r&| d}|S )zNReturn the value used for a QueryResult or Hit ID from a parsed row (PRIVATE).rF   NrK   r   rG   rH   )get)r   parsedid_cacher@   r@   rA   _get_idk  s   


zBlastTabParser._get_idc                 c   s   d}d}d}d}d}d}d}d}d}	d}
d}d}d\}}g g }}	 |dur-|}|	}|
}| j rI| j d	sI|  }| |d
 }	| |d }
n|}d\}	}
||	krV|}n|}||
ks`||krc|}n|}|durJt||}|d  D ]4\}}dD ]'}||d krt||d |d  d }q{||d krt||d |d  }q{t||| qudD ]$}| 	|||d }t|d| | | 
|||d }t|d| | qt|g}|d  D ]
\}}t||| q|| ||krt|}|d  D ]\}}|dkrt||| qt|d|dd  q|| g }||ks'||krJt||}|d
  D ]\}}t||| q2|V  ||krHdS g }| j  | _ q#)z$Yield QueryResult objects (PRIVATE).r   r      r      N)NNTr   r   rg   r   rh   _start_end)rg   rf   %s_frame	%s_strandrp   rK   _id_alt)r   rz   r   r   r   r   minmaxr{   _get_frag_frame_get_frag_strandr   appendr   r   r   r   r   )r   	state_EOFstate_QRES_NEWstate_QRES_SAMEstate_HIT_NEWstate_HIT_SAME
qres_state	hit_state
file_statecur_qidcur_hidprev_qidprev_hidcurprevhit_listhsp_listr   r}   r   rq   framerU   rp   rg   r   r@   r@   rA   r   z  s   









zBlastTabParser._parse_qresultc                 C   sZ   |dv sJ t |d| d}|dur|S d|v r+|dkrdnd}t|d d| S dS )	zReturn fragment frame for given object (PRIVATE).

        Returns ``HSPFragment`` frame given the object, its sequence type,
        and its parsed dictionary values.
        rh   r   Nr%   rf   r   r   /)rj   intr=   )r   r   rq   	parsedictr   r   r@   r@   rA   r     s   zBlastTabParser._get_frag_framec                 C   sl   |dv sJ t |d| d}|dur|S |d| }|d| }|dur2|dur4||kr0dS dS dS dS )zReturn fragment strand for given object (PRIVATE).

        Returns ``HSPFragment`` strand given the object, its sequence type,
        and its parsed dictionary values.
        rh   r   N%s_start%s_endr   )rj   r   )r   r   rq   r   rU   startendr@   r@   rA   r     s   	zBlastTabParser._get_frag_strandN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r   r   r   r@   r@   r@   rA   r      s    )#kr   c                   @   sR   e Zd ZdZeZdefddZdd Zdd Z	d	d
 Z
dd Zdd Zdd ZdS )r   z$Indexer class for BLAST+ tab output.Fc                 C   sp   t j| |||d | jd s6d|v r|d| _dS d|v r&|d| _dS d|v r2|d| _dS tddS )r   )r   r   r   r
   r   r   zdCustom fields is missing an ID column. One of these must be present: 'qseqid', 'qacc', or 'qaccver'.N)r   r   _kwargsr   _key_idxr   )r   filenamer   r   r@   r@   rA   r     s   
zBlastTabIndexer.__init__c                 c   sP    | j }|d | jd s| j}n| j}| D ]\}}}| ||fV  qdS )zCIterate over the file handle; yields key, start offset, and length.r   r   N)_handleseekr   _qresult_index_qresult_index_commenteddecode)r   r   r   r   offsetr   r@   r@   rA   r   %  s   

zBlastTabIndexer.__iter__c           	      c   s    | j }|d d}d}d}d}	 | }| }|du r#|}|}n*||r5|t|d  d }n||ks>||rI|||| fV  |}n|sMdS q)z4Indexer for commented BLAST tabular files (PRIVATE).r   Ns	   # Query:    # BLAST processed)r   r   tellr   rz   rl   r=   )	r   r   start_offset
query_markqid_markend_mark
end_offsetr   qresult_keyr@   r@   rA   r   2  s*   

z(BlastTabIndexer._qresult_index_commentedc                 c   s    | j }|d d}d}| j}	 | }| }|du r%|d| }n%z	|d| }W n ty9   d}Y nw ||krJ|||| fV  |}|}|sNdS q)z7Indexer for noncommented BLAST tabular files (PRIVATE).r   NT   	    )r   r   r   r  r   r=   
IndexError)r   r   r  r
  key_idxr	  r   curr_keyr@   r@   rA   r   M  s.   
zBlastTabIndexer._qresult_indexc                 C   s&   | j d r| j}||S | j}||S )zJReturn the raw bytes string of a QueryResult object from the given offset.r   )r   _get_raw_qresult_commented_get_raw_qresult)r   r  getfuncr@   r@   rA   get_rawm  s
   
zBlastTabIndexer.get_rawc                 C   s   | j }|| d}| j}d}	 | }|du r |d| }nz	|d| }W n ty4   d}Y nw ||kr<	 |S ||7 }q)zWReturn the raw bytes string of a single QueryResult from a noncommented file (PRIVATE).r  NTr  )r   r   r   r   r=   r  )r   r  r   qresult_rawr  r
  r   r  r@   r@   rA   r  v  s&   
z BlastTabIndexer._get_raw_qresultc                 C   sf   | j }|| d}d}d}| }|r1|du r|}n||ks$||r'	 |S ||7 }| }|s|S )zTReturn the bytes raw string of a single QueryResult from a commented file (PRIVATE).r  r  N)r   r   r   rz   )r   r  r   r  r  r  r   r@   r@   rA   r    s    
z*BlastTabIndexer._get_raw_qresult_commentedN)r   r   r   r   r   _parserr   r   r   r   r   r  r  r  r@   r@   r@   rA   r     s     	r   c                   @   sF   e Zd ZdZdefddZdd Zdd Zd	d
 Zdd Z	dd Z
dS )r	   z#Writer for blast-tab output format.Fc                 C   s   || _ || _|| _dS r   )r   r   r   r   r@   r@   rA   r     s   
zBlastTabWriter.__init__c                 C   s   | j }d\}}}}|D ]A}| jr|| | |rE|| | | js)|d7 }|t|7 }|tdd |D 7 }|tdd |D 7 }| jrL|d7 }q| jrW|d|  ||||fS )zFWrite to the handle, return how many QueryResult objects were written.)r   r   r   r   r   c                 s   s    | ]}t |V  qd S N)rl   r   rg   r@   r@   rA   	<genexpr>      z,BlastTabWriter.write_file.<locals>.<genexpr>c                 s   s    | ]}t |jV  qd S r  )rl   	fragmentsr  r@   r@   rA   r    s    z# BLAST processed %i queries)r   r   write_build_comments_build_rowsrl   sum)r   qresultsr   qresult_counterhit_counterhsp_counterfrag_counterr   r@   r@   rA   
write_file  s$   zBlastTabWriter.write_filec                 C   sX  h d}d}|D ]}|D ]}g }| j D ]}|tv r#t|t| d }nc|tv r;|dkr1t|d}nUt|t| d }nK|dkrHd|j|jf }n>|tv rqzt|t| d }W n. typ   t| d }	t||	 t||	}Y nw |t	v rt|t	| d }n|t
vsJ q||v r| |||}| ||}|| qd|}
||
d	 7 }qq|S )
zLReturn a string containing tabular rows of the QueryResult object (PRIVATE).>   r    r"   r   r!    r   r*   rK   r%   z%i/%ir   
)r   r   rj   r   rd   re   r   r|   r~   r   r   _adjust_coords_adjust_outputr   join)r   r   coordinatesqresult_linesrg   rp   r   fieldr   r}   hsp_liner@   r@   rA   r    sD   


)zBlastTabWriter._build_rowsc                 C   s   |dv sJ | drdnd}t|d| d}|du r#tdd|  |dk rE|d	r5t|d
| }|S |drCt|d| d }|S |d	rN|d7 }|S )z?Adjust start and end coordinates according to strand (PRIVATE).)r   r    r!   r"   qrf   rg   r   Nz Required attribute %r not found.r   r   r   r   r   r   )rz   rj   r   ry   )r   r,  r   rp   rq   rU   r@   r@   rA   r'    s"   



zBlastTabWriter._adjust_coordsc                 C   s:  |dv rt |j}|S |dkrO|dk rd}|S |dk r!d| }|S |dk r+d| }|S |d	k r5d
| }|S |dk r?d| }|S |dk rId| }|S d| }|S |dv rYd| }|S |dkrw|dkrgd| }|S |dkrqd| }|S d| }|S |dv rd| }|S |dkrd|}|S |dv rd|}|S t |}|S )zPAdjust formatting of given field and value to mimic native tab output (PRIVATE).)r&   r'   r   gXz0.0g>N}a+z%2.0egH}M?z%3.0eg?z%4.3fg      ?z%3.2fg      $@z%2.1fz%5.0f)r   r   z%.2fr   i'  z%4.3egX@z%4.0dz%4.1f)r9   r8   z%.0fr6   rD   )r*   r/   r0   r1   r2   r3   r4   r;   )ri   rk   r)  )r   r,  r   r@   r@   rA   r(    s^   
;531/-+)%

	zBlastTabWriter._adjust_outputc                    s  g }dd t  D  |j }z|j}W n ty"   d| }Y n	w d| d| }|| |jdu r>|d|j  n|d|j d|j  z
|d	|j	  W n	 ty^   Y nw |d
|j
  |r{|dd fdd| jD   |dt|  d|d S )z9Return QueryResult tabular comment as a string (PRIVATE).c                 S   s   i | ]\}}||qS r@   r@   )r   kvr@   r@   rA   
<dictcomp>Z  s    z2BlastTabWriter._build_comments.<locals>.<dictcomp>z# %sz# r   Nz# Query: %sr   z	# RID: %sz# Database: %sz# Fields: %sr   c                 3   s    | ]} | V  qd S r  r@   )r   r,  inv_field_mapr@   rA   r  t  r  z1BlastTabWriter._build_comments.<locals>.<genexpr>z# %i hits foundr&  )r   r   r   upperr   r|   r   r   rF   r   r   r)  r   rl   )r   qresr   r   r   r   r@   r2  rA   r  U  s6   



zBlastTabWriter._build_commentsN)r   r   r   r   r   r   r$  r  r'  r(  r  r@   r@   r@   rA   r	     s    0Ar	   __main__)run_doctest)$r   rm   Bio.SearchIO._indexr   Bio.SearchIO._modelr   r   r   r   __all__r   rB   rE   ri   r   r   floatr   r   r   r   listr   r   r   r   compilero   rr   r~   r   r   r	   r   
Bio._utilsr7  r@   r@   r@   rA   <module>   s^  	
 !"#2	

	   >  
S
