o
    Rŀg|8                  0   @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ dZdZdZeeddddddddddddddddddddddddddddddddddddddddddddddf/ddZde_dd ZdS )aM  Code to invoke the NCBI BLAST server over the internet.

This module provides code to work with the WWW version of BLAST
provided by the NCBI. https://blast.ncbi.nlm.nih.gov/

Variables:

    - email        Set the Blast email parameter (default is None).
    - tool         Set the Blast tool parameter (default is ``biopython``).

    N)StringIO)	urlencode)build_opener)HTTPBasicAuthHandler)HTTPPasswordMgrWithDefaultRealm)install_opener)Request)urlopen)BiopythonWarning)function_with_previous	biopythonz(https://blast.ncbi.nlm.nih.gov/Blast.cgiz(none)g      $@2   i  XMLblastc2           E      C   s2  g d}2| |2vrt d|  dd|2 | r3| dkr3d} t|dk r3d}	d	}d
}d}
d}tdt i d|d|d|d|d|d|d|	d|
d|d|d|d|d|d|d|-d|d|i d|d|d|d |d!| d"|d#|d$|d%|d&|d'|d(|d)| d*|.d+|/d,|d-||d.d/}3|1durt }4|4d||0|1 t|4}5t	|5}6t
|6 |tkr|3ttd0 d1d2 |3 D }3t|3 }7t||7d3d4i}8t|8}9t|9\}:};|!|"|#|$|%|&|'|(|)|*|:|+||,d5d6}3d7d2 |3 D }3t|3 }7d8}<t }=	 t }>tj|< |> }?|?d:kr-t|? |>|? t_n|>t_|<d;k r<|tkr<d;}<t |= }@|@d<krQtd=|: d>t t||7d3d4i}8t|8}9|9  }A|Ad?krjqd@|Avrt	 t|AS |Ad@}B|AdA|B}C|A|Btd@ |C  }D|D dBkr	 t|AS q)Ca 	  BLAST search using NCBI's QBLAST server or a cloud service provider.

    Supports all parameters of the old qblast API for Put and Get.

    Please note that NCBI uses the new Common URL API for BLAST searches
    on the internet (http://ncbi.github.io/blast-cloud/dev/api.html). Thus,
    some of the parameters used by this function are not (or are no longer)
    officially supported by NCBI. Although they are still functioning, this
    may change in the future.

    The Common URL API (http://ncbi.github.io/blast-cloud/dev/api.html) allows
    doing BLAST searches on cloud servers. To use this feature, please set
    ``url_base='http://host.my.cloud.service.provider.com/cgi-bin/blast.cgi'``
    and ``format_object='Alignment'``. For more details, please see
    https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=CloudBlast

    Some useful parameters:

     - program        blastn, blastp, blastx, tblastn, or tblastx (lower case)
     - database       Which database to search against (e.g. "nr").
     - sequence       The sequence to search.
     - ncbi_gi        TRUE/FALSE whether to give 'gi' identifier.
     - descriptions   Number of descriptions to show.  Def 500.
     - alignments     Number of alignments to show.  Def 500.
     - expect         An expect value cutoff.  Def 10.0.
     - matrix_name    Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
     - filter         "none" turns off filtering.  Default no filtering
     - format_type    "HTML", "Text", "ASN.1", or "XML".  Def. "XML".
     - entrez_query   Entrez query to limit Blast search
     - hitlist_size   Number of hits to return. Default 50
     - megablast      TRUE/FALSE whether to use MEga BLAST algorithm (blastn only)
     - short_query    TRUE/FALSE whether to adjust the search parameters for a
                      short query sequence. Note that this will override
                      manually set parameters like word size and e value. Turns
                      off when sequence length is > 30 residues. Default: None.
     - service        plain, psi, phi, rpsblast, megablast (lower case)

    This function does no checking of the validity of the parameters
    and passes the values to the server as is.  More help is available at:
    https://ncbi.github.io/blast-cloud/dev/api.html

    )blastnblastpblastxtblastntblastxzProgram specified is z. Expected one of z, r   N   i        z"SHORT_QUERY_ADJUST" is incorrectly implemented (by NCBI) for blastn. We bypass the problem by manually adjusting the search parameters. Thus, results may slightly differ from web page searches.AUTO_FORMATCOMPOSITION_BASED_STATISTICSDATABASEDB_GENETIC_CODE	ENDPOINTSENTREZ_QUERYEXPECTFILTERGAPCOSTSGENETIC_CODEHITLIST_SIZEI_THRESHLAYOUT
LCASE_MASK	MEGABLASTMATRIX_NAMENUCL_PENALTYNUCL_REWARDOTHER_ADVANCED
PERC_IDENTPHI_PATTERNPROGRAMQUERY
QUERY_FILEQUERY_BELIEVE_DEFLINE
QUERY_FROMQUERY_TOSEARCHSP_EFFSERVICESHORT_QUERY_ADJUSTTEMPLATE_TYPETEMPLATE_LENGTH	THRESHOLDUNGAPPED_ALIGNMENTPut)	WORD_SIZECMD)emailtoolc                 S      i | ]\}}|d ur||qS N .0keyvaluerA   rA   E/var/www/html/myenv/lib/python3.10/site-packages/Bio/Blast/NCBIWWW.py
<dictcomp>       zqblast.<locals>.<dictcomp>z
User-AgentBiopythonClientGet)
ALIGNMENTSALIGNMENT_VIEWDESCRIPTIONSENTREZ_LINKS_NEW_WINDOW
EXPECT_LOWEXPECT_HIGHFORMAT_ENTREZ_QUERYFORMAT_OBJECTFORMAT_TYPENCBI_GIRIDRESULTS_FILEr4   SHOW_OVERVIEWr<   c                 S   r?   r@   rA   rB   rA   rA   rF   rG      rH      Tr   <   iX  zBLAST request z9 is taking longer than 10 minutes, consider re-issuing itz

zStatus=
READY)
ValueErrorjoinlenwarningswarnr
   r   add_passwordr   r   r   NCBI_BLAST_URLupdater=   r>   itemsr   encoder   r	   _parse_qblast_ref_pagetimeqblastprevioussleepreaddecodeindexstripupperr   )Eprogramdatabasesequenceurl_baseauto_formatcomposition_based_statisticsdb_genetic_code	endpointsentrez_queryexpectfiltergapcostsgenetic_codehitlist_sizei_threshlayout
lcase_maskmatrix_namenucl_penaltynucl_rewardother_advanced
perc_identphi_pattern
query_filequery_believe_defline
query_fromquery_tosearchsp_effservice	thresholdungapped_alignment	word_sizeshort_query
alignmentsalignment_viewdescriptionsentrez_links_new_window
expect_lowexpect_highformat_entrez_queryformat_objectformat_typencbi_giresults_fileshow_overview	megablasttemplate_typetemplate_lengthusernamepasswordprograms
parameterspassword_mgrhandleropenermessagerequesthandleridrtoedelay
start_timecurrentwaitelapsedresultsijstatusrA   rA   rF   rh   ,   s0  _	
 !"#$%)






rh   c                 C   s  |    }|d}|dkrd}n|d|}||td |  }|d}|dkr0d}n|d|}||td |  }|s|s|d}|dkrt||td d  }|ddd	 ddd	  }|rttd
| |d}|dkr||td d  }|ddd	 ddd	  }|rtd
| |d}|dkr||d ddd	 ddd	  }td
| td|std|d|std|dz|t|fW S  ty   td|dw )zExtract a tuple of RID, RTOE from the 'please wait' page (PRIVATE).

    The NCBI FAQ pages use TOE for 'Time of Execution', so RTOE is probably
    'Request Time of Execution' and RID would be 'Request Identifier'.
    zRID =NrZ   zRTOE =z<div class="error msInf">z</div>r   r   zError message from NCBI: z<p class="error">z</p>zMessage ID#<zNo RID and no RTOE found in the 'please wait' page, there was probably an error in your request but we could not extract a helpful error message.z9No RID found in the 'please wait' page. (although RTOE = )z9No RTOE found in the 'please wait' page. (although RID = z4A non-integer RTOE found in the 'please wait' page, )rk   rl   findr^   rn   splitr\   int)r   sr   r   r   r   msgrA   rA   rF   rf   4  s`   


 
 
(

rf   )__doc__rg   r_   ior   urllib.parser   urllib.requestr   r   r   r   r   r	   Bior
   
Bio._utilsr   r=   r>   rb   rh   ri   rf   rA   rA   rA   rF   <module>   s   
  