o
    Rŀg                    @   s6  d Z ddlmZ ddlmZ ddlmZ G dd deZG dd deZG d	d
 d
eZG dd deZG dd deZ	G dd deZ
G dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"eZd#d$ Zed%kre  d&S d&S )'a  Definitions for interacting with BLAST related applications (OBSOLETE).

Wrappers for the new NCBI BLAST+ tools (written in C++):

 - NcbiblastpCommandline - Protein-Protein BLAST
 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
 - NcbirpsblastCommandline - Reverse Position Specific BLAST
 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
 - NcbideltablastCommandline - Protein-Protein domain enhanced lookup time accelerated blast
 - NcbiblastformatterCommandline - Convert ASN.1 to other BLAST output formats
 - NcbimakeblastdbCommandline - Application to create BLAST databases

For further details, see:

Camacho et al. BLAST+: architecture and applications
BMC Bioinformatics 2009, 10:421
https://doi.org/10.1186/1471-2105-10-421

We have decided to remove this module in future, and instead recommend
building your command and invoking it via the subprocess module directly.
    )_Option)_Switch)AbstractCommandlinec                   @   "   e Zd ZdZdddZdd ZdS )_NcbibaseblastCommandlinezBase Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).

    This is provided for subclassing, it deals with shared options
    common to all the BLAST tools (blastn, rpsblast, rpsblast, etc
    AND blast_formatter).
    Nc                 K   s   |d usJ t ddgdt ddgdt ddgd	td
dgddddtddgddddt ddgdtddgdddtddgdddtddgdddt d d!gd"t d#d$gd%g}z|| j | _W n tyj   || _Y nw tj| |fi | d S )&N-hhz5Print USAGE and DESCRIPTION;  ignore other arguments.-helphelpKPrint USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments.-versionversion.Print version number;  ignore other arguments.-outoutOutput file for alignment.TFfilenameequatez-outfmtoutfmtzAlignment view.  Typically an integer 0-14 but for some formats can be named columns like '6 qseqid sseqid'.  Use 5 for XML output (differs from classic BLAST which used 7 for XML).z	-show_gisshow_giszShow NCBI GIs in deflines?z-num_descriptionsnum_descriptionszNumber of database sequences to show one-line descriptions for.

Integer argument (at least zero). Default is 500. See also num_alignments.r   z-num_alignmentsnum_alignmentszNumber of database sequences to show num_alignments for.

Integer argument (at least zero). Default is 200. See also num_alignments.z-line_lengthline_lengthzLine length for formatting alignments (integer, at least 1, default 60).

Not applicable for outfmt > 4. Added in BLAST+ 2.2.30.z-htmlhtmlz0Produce HTML output? See also the outfmt option.z-parse_deflinesparse_deflinesz2Should the query and subject defline(s) be parsed?)r   r   
parametersAttributeErrorr   __init__selfcmdkwargsextra_parameters r%   J/var/www/html/myenv/lib/python3.10/site-packages/Bio/Blast/Applications.pyr   0   sl   
?
z"_NcbibaseblastCommandline.__init__c                 C   sF   |D ]}|  |r || D ]}|  |rtd| d| dqqdS )zdValidate parameters for incompatibilities (PRIVATE).

        Used by the _validate method.
        Options  and  are incompatible.N)_get_parameter
ValueErrorr!   incompatiblesabr%   r%   r&   _validate_incompatibilitiesz   s   

z5_NcbibaseblastCommandline._validate_incompatibilitiesN)__name__
__module____qualname____doc__r   r0   r%   r%   r%   r&   r   (   s    
Jr   c                   @   r   )_NcbiblastCommandlinezBase Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).

    This is provided for subclassing, it deals with shared options
    common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
    Nc                 K   s  |d usJ t ddgddddt ddgd	dd
t ddgddd
t ddgddd
t ddgddd
t ddgddd
tddgdt ddgddddt ddgddddt d d!gd"dddt d#d$gd%dddt d&d'gd(dd
t d)d*gd+dd
t d,d-gd.dd
t d/d0gd1dd
t d2d3gd4dd
t d5d6gd7dd
t d8d9gd:dd
td;d<gd=t d>d?gd@dd
t dAdBgdCdd
t dDdEgdFdd
t dGdHgdIdd
t dJdKgdLdd
t dMdNgdOdddt dPdQgdRdddt dSdTgdUdd
tdVdWgdXg}z|| j | _W n ty   || _Y nw tj| |fi | d S )YNz-queryqueryzThe sequence to search with.TFr   z
-query_loc	query_locz4Location on the query sequence (Format: start-stop).r   z-dbdbzThe database to BLAST against.z-evalueevaluezExpectation value cutoff.z
-word_size	word_sizez8Word size for wordfinder algorithm.

Integer. Minimum 2.z-soft_maskingsoft_maskingzBApply filtering locations as soft masks (Boolean, Default = true).z-lcase_maskinglcase_maskingz:Use lower case filtering in query and subject sequence(s)?z-gilistgilistzRestrict search of database to list of GI's.

Incompatible with: negative_gilist, seqidlist, negative_seqidlist, remote, subject, subject_locz-negative_gilistnegative_gilistzRestrict search of database to everything except the listed GIs.

Incompatible with: gilist, seqidlist, remote, subject, subject_locz
-seqidlist	seqidlistzyRestrict search of database to list of SeqID's.

Incompatible with: gilist, negative_gilist, remote, subject, subject_locz-negative_seqidlistnegative_seqidlistzRestrict search of database to everything except listed SeqID's.

Incompatible with: gilist, seqidlist, remote, subject, subject_locz-entrez_queryentrez_queryz>Restrict search with the given Entrez query (requires remote).z-qcov_hsp_percqcov_hsp_perczJPercent query coverage per hsp (float, 0 to 100).

Added in BLAST+ 2.2.30.-max_target_seqsmax_target_seqszDMaximum number of aligned sequences to keep (integer, at least one).z-dbsizedbsizez+Effective length of the database (integer).z	-searchspsearchspz/Effective length of the search space (integer).z-max_hsps_per_subjectmax_hsps_per_subjectzNOverride max number of HSPs per subject saved for ungapped searches (integer).z	-max_hspsmax_hspszMSet max number of HSPs saved per subject sequence

Ddefault 0 means no limit.z-sum_statisticssum_statisticszUse sum statistics.z
-sum_stats	sum_statsz6Use sum statistics (boolean).

Added in BLAST+ 2.2.30.z-xdrop_ungapxdrop_ungapz:X-dropoff value (in bits) for ungapped extensions (float).z
-xdrop_gap	xdrop_gapzDX-dropoff value (in bits) for preliminary gapped extensions (float).z-xdrop_gap_finalxdrop_gap_finalz=X-dropoff value (in bits) for final gapped alignment (float).z-window_sizewindow_sizezFMultiple hits window size, use 0 to specify 1-hit algorithm (integer).z-import_search_strategyimport_search_strategyzBSearch strategy to use.

Incompatible with: export_search_strategyz-export_search_strategyexport_search_strategyzXFile name to record the search strategy used.

Incompatible with: import_search_strategyz-num_threadsnum_threadszoNumber of threads to use in the BLAST search.

Integer, at least one. Default is one. Incompatible with: remotez-remoteremotezcExecute search remotely?

Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ...)r   r   r   r   r   r   r    r%   r%   r&   r      s  
  $
z_NcbiblastCommandline.__init__c                 C   sF   g ddgdgg dd}|  | | jr| jstdt|  d S )N)r>   r?   rR   rQ   r?   )r>   r?   rS   )rS   rP   r>   r@   z+Option entrez_query requires remote option.)r0   rB   rS   r+   r   	_validater!   r-   r%   r%   r&   rT   ;  s   
z_NcbiblastCommandline._validater1   r2   r3   r4   r5   r   rT   r%   r%   r%   r&   r6      s
    
 /r6   c                   @   r   )_Ncbiblast2SeqCommandlinea  Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).

    This is provided for subclassing, it deals with shared options
    common to all the BLAST tools supporting two-sequence BLAST
    (blastn, psiblast, etc) but not rpsblast or rpstblastn.
    Nc                 K   s   |d usJ t ddgdddt ddgdddt d	d
gddddt ddgdddt ddgdddt ddgdddt ddgdddg}z|| j | _W n tyU   || _Y nw tj| |fi | d S )Nz-gapopengapopenzCost to open a gap (integer).Fr   z
-gapextend	gapextendzCost to extend a gap (integer).z-subjectsubjectzSubject sequence(s) to search.

Incompatible with: db, gilist, seqidlist, negative_gilist, negative_seqidlist, db_soft_mask, db_hard_mask

See also subject_loc.Tr   z-subject_locsubject_loczLocation on the subject sequence (Format: start-stop).

Incompatible with: db, gilist, seqidlist, negative_gilist, negative_seqidlist, db_soft_mask, db_hard_mask, remote.

See also subject.-culling_limitculling_limitzHit culling limit (integer).

If the query range of a hit is enveloped by that of at least this many higher-scoring hits, delete the hit.

Incompatible with: best_hit_overhang, best_hit_score_edge.-best_hit_overhangbest_hit_overhangzBest Hit algorithm overhang value (float, recommended value: 0.1)

Float between 0.0 and 0.5 inclusive. Incompatible with: culling_limit.-best_hit_score_edgebest_hit_score_edgezBest Hit algorithm score edge value (float).

Float between 0.0 and 0.5 inclusive. Recommended value: 0.1

Incompatible with: culling_limit.)r   r   r   r6   r   r    r%   r%   r&   r   P  sT   
		4
z"_Ncbiblast2SeqCommandline.__init__c                 C   s0   g dddgg dd}|  | t|  d S )N)r9   r>   r?   r@   rS   r_   ra   )r9   r>   r?   r@   )r[   r]   rZ   r0   r6   rT   rU   r%   r%   r&   rT     s   
z#_Ncbiblast2SeqCommandline._validater1   rV   r%   r%   r%   r&   rW   H  s    
?rW   c                   @   r   )_NcbiblastMain2SeqCommandlinea  Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).

    This is provided for subclassing, it deals with shared options
    common to the main BLAST tools blastp, blastn, blastx, tblastx, tblastn
    but not psiblast, rpsblast or rpstblastn.
    Nc                 K   sr   |d usJ t ddgdddt ddgdddg}z|| j | _W n ty,   || _Y nw tj| |fi | d S )	Nz-db_soft_maskdb_soft_maskzFiltering algorithm for soft masking (integer).

Filtering algorithm ID to apply to BLAST database as soft masking. Incompatible with: db_hard_mask, subject, subject_locFr   z-db_hard_maskdb_hard_maskzFiltering algorithm for hard masking (integer).

Filtering algorithm ID to apply to BLAST database as hard masking. Incompatible with: db_soft_mask, subject, subject_loc)r   r   r   rW   r   r    r%   r%   r&   r     s$   
z&_NcbiblastMain2SeqCommandline.__init__c                 C   s*   g dg dd}|  | t|  d S )N)re   rZ   r[   )rd   rZ   r[   )rd   re   r0   rW   rT   rU   r%   r%   r&   rT     s
   
z'_NcbiblastMain2SeqCommandline._validater1   rV   r%   r%   r%   r&   rc     s    
rc   c                   @      e Zd ZdZdddZdS )NcbiblastpCommandlineay  Create a commandline for the NCBI BLAST+ program blastp (for proteins).

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old blastall tool with separate tools for each of the searches.
    This wrapper therefore replaces BlastallCommandline with option -p blastp.

    >>> from Bio.Blast.Applications import NcbiblastpCommandline
    >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
    ...                               evalue=0.001, remote=True, ungapped=True)
    >>> cline
    NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
    >>> print(cline)
    blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    blastpc              	   K   s   t ddgddd ddt dd	gd
t ddgdddt ddgddd ddt ddgdddtddgdtddgdg| _tj| |fi | dS )Initialize the class.-tasktaskzHTask to execute (string, blastp (default), blastp-fast or blastp-short).c                 S      | dv S )N)ri   zblastp-fastzblastp-shortr%   valuer%   r%   r&   <lambda>      z0NcbiblastpCommandline.__init__.<locals>.<lambda>Fchecker_functionr   -matrixmatrix'Scoring matrix name (default BLOSUM62).
-threshold	thresholdFMinimum score for words to be added to the BLAST lookup table (float).r   -comp_based_statscomp_based_stats"  Use composition-based statistics (string, default 2, i.e. True).

0, F or f: no composition-based statistics

2, T or t, D or d : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties

Note that tblastn also supports values of 1 and 3.c                 S   rm   N0Ft2TtDdr%   rn   r%   r%   r&   rp         -segsegzyFilter query sequence with SEG (string).

Format: "yes", "window locut hicut", or "no" to disable
Default is "12 2.2 2.5"	-ungappedungapped Perform ungapped alignment only?-use_sw_tbackuse_sw_tback2Compute locally optimal Smith-Waterman alignments?Nr   r   r   rc   r   r!   r"   r#   r%   r%   r&   r     s<   *zNcbiblastpCommandline.__init__N)ri   r2   r3   r4   r5   r   r%   r%   r%   r&   rh     s    rh   c                   @   "   e Zd ZdZdddZdd ZdS )	NcbiblastnCommandlinea  Wrapper for the NCBI BLAST+ program blastn (for nucleotides).

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old blastall tool with separate tools for each of the searches.
    This wrapper therefore replaces BlastallCommandline with option -p blastn.

    For example, to run a search against the "nt" nucleotide database using the
    FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
    cut off of 0.001, saving the output to a file in XML format:

    >>> from Bio.Blast.Applications import NcbiblastnCommandline
    >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
    ...                               evalue=0.001, out="m_cold.xml", outfmt=5)
    >>> cline
    NcbiblastnCommandline(cmd='blastn', out='m_cold.xml', outfmt=5, query='m_cold.fasta', db='nt', evalue=0.001, strand='plus')
    >>> print(cline)
    blastn -out m_cold.xml -outfmt 5 -query m_cold.fasta -db nt -evalue 0.001 -strand plus

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    blastnc                 K   s>  t ddgddd ddt dd	gd
dd ddt ddgdddt ddgdddt ddgdddt ddgdddt ddgdddt ddgdddt dd gd!ddt d"d#gd$ddt d%d&gd'ddt d(d)gd*d+d ddt d,d-gd.d/d ddtd0d1gd2t d3d4gd5ddtd6d7gd8t d9d:gd;ddg| _tj| |fi | d<S )=rj   -strandstrandjQuery strand(s) to search against database/subject.

Values allowed are "both" (default), "minus", "plus".c                 S   rm   N)bothminusplusr%   rn   r%   r%   r&   rp   '  r   z0NcbiblastnCommandline.__init__.<locals>.<lambda>Frr   rk   rl   zTask to execute (string, default 'megablast')

Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast' (the default), or 'vecscreen'.c                 S   rm   )N)r   zblastn-shortzdc-megablast	megablast	vecscreenr%   rn   r%   r%   r&   rp   0  rq   z-penaltypenaltyz:Penalty for a nucleotide mismatch (integer, at most zero).r   z-rewardrewardz7Reward for a nucleotide match (integer, at least zero).z
-use_index	use_indexz7Use MegaBLAST database index (Boolean, Default = False)z-index_name
index_namezMegaBLAST database index name.z-dustdustzzFilter query sequence with DUST (string).

Format: 'yes', 'level window linker', or 'no' to disable.

Default = '20 64 1'.z-filtering_dbfiltering_dbz<BLAST database containing filtering elements (i.e. repeats).z-window_masker_taxidwindow_masker_taxidz=Enable WindowMasker filtering using a Taxonomic ID (integer).z-window_masker_dbwindow_masker_dbzCEnable WindowMasker filtering using this repeats database (string).z-perc_identityperc_identityz,Percent identity (real, 0 to 100 inclusive).z-template_typetemplate_typezDiscontiguous MegaBLAST template type (string).

Allowed values: 'coding', 'coding_and_optimal' or 'optimal'.
Requires: template_length.c                 S   rm   )N)codingcoding_and_optimaloptimalr%   rn   r%   r%   r&   rp   k  rq   z-template_lengthtemplate_lengthziDiscontiguous MegaBLAST template length (integer).

Allowed values: 16, 18, 21.

Requires: template_type.c                 S   rm   )N)         161821r%   rn   r%   r%   r&   rp   t  r   z
-no_greedy	no_greedyz,Use non-greedy dynamic programming extensionz-min_raw_gapped_scoremin_raw_gapped_scorezgMinimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages (integer).r   r   r   z-off_diagonal_rangeoff_diagonal_rangezNumber of off-diagonals to search for the 2nd hit (integer).

Expects a positive integer, or 0 (default) to turn off.Added in BLAST 2.2.23+Nr   r   r%   r%   r&   r     s   			jzNcbiblastnCommandline.__init__c                 C   s.   | j r| jr| jr| j stdt|  d S )Nz;Options template_type and template_type require each other.)r   r   r+   rc   rT   )r!   r%   r%   r&   rT     s   zNcbiblastnCommandline._validateN)r   rV   r%   r%   r%   r&   r     s    
nr   c                   @   rg   )NcbiblastxCommandlinea  Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database).

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old blastall tool with separate tools for each of the searches.
    This wrapper therefore replaces BlastallCommandline with option -p blastx.

    >>> from Bio.Blast.Applications import NcbiblastxCommandline
    >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
    >>> cline
    NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
    >>> print(cline)
    blastx -query m_cold.fasta -db nr -evalue 0.001

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    blastxc                 K   s   t ddgddd ddt dd	gd
dd ddt ddgdddt ddgdddt ddgdddt ddgdddt ddgdddt ddgdddt dd gd!ddtd"d#gd$td%d&gd'g| _tj| |fi | d(S ))rj   rk   rl   z:Task to execute (string, blastx (default) or blastx-fast).c                 S   rm   )N)r   zblastx-fastr%   rn   r%   r%   r&   rp     r   z0NcbiblastxCommandline.__init__.<locals>.<lambda>Frr   r   r   r   c                 S   rm   r   r%   rn   r%   r%   r&   rp     r   -query_gencodequery_gencode<Genetic code to use to translate query (integer, default 1).r   -frame_shift_penaltyframe_shift_penaltyiFrame shift penalty (integer, at least 1, default ignored) (OBSOLETE).

This was removed in BLAST 2.2.27+-max_intron_lengthmax_intron_lengthMaximum intron length (integer).

Length of the largest intron allowed in a translated nucleotide sequence when linking multiple distinct alignments (a negative value disables linking). Default zero.rt   ru   rv   rw   rx   ry   rz   r{   a  Use composition-based statistics for blastp, blastx, or tblastn.

D or d: default (equivalent to 2 )

0 or F or f: no composition-based statistics

1: Composition-based statistics as in NAR 29:2994-3005, 2001

2 or T or t : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties

3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally.

For programs other than tblastn, must either be absent or be D, F or 0

Default = 2.r   r   yFilter query sequence with SEG (string).

Format: "yes", "window locut hicut", or "no" to disable.Default is "12 2.2 2.5"r   r   r   r   r   r   Nr   r   r%   r%   r&   r     sl   MzNcbiblastxCommandline.__init__N)r   r   r%   r%   r%   r&   r         r   c                   @   rg   )NcbitblastnCommandlinea  Wrapper for the NCBI BLAST+ program tblastn.

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old blastall tool with separate tools for each of the searches.
    This wrapper therefore replaces BlastallCommandline with option -p tblastn.

    >>> from Bio.Blast.Applications import NcbitblastnCommandline
    >>> cline = NcbitblastnCommandline(help=True)
    >>> cline
    NcbitblastnCommandline(cmd='tblastn', help=True)
    >>> print(cline)
    tblastn -help

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    tblastnc                 K   s   t ddgddd ddt dd	gd
ddt ddgdddt ddgdddt ddgdddt ddgdddt ddgddd ddt ddgdddtdd gd!td"d#gd$t d%d&gd'd(dd)g| _tj| |fi | d*S )+rj   rk   rl   z<Task to execute (string, tblastn (default) or tblastn-fast).c                 S   rm   )N)r   ztblastn-fastr%   rn   r%   r%   r&   rp     r   z1NcbitblastnCommandline.__init__.<locals>.<lambda>Frr   -db_gencode
db_gencoder   r   r   r   r   r   r   r   rt   ru   rv   rw   rx   ry   rz   r{   a  Use composition-based statistics (string, default 2, i.e. True).

0, F or f: no composition-based statistics

1: Composition-based statistics as in NAR 29:2994-3005, 2001

2, T or t, D or d : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties

3: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally

Note that only tblastn supports values of 1 and 3.c                 S   rm   )N
0Ft12TtDd3r%   rn   r%   r%   r&   rp   >  r   r   r   {Filter query sequence with SEG (string).

Format: "yes", "window locut hicut", or "no" to disable.

Default is "12 2.2 2.5"r   r   r   r   r   r   -in_pssmin_pssmz<PSI-BLAST checkpoint file.

Incompatible with: remote, queryTr   Nr   r   r%   r%   r&   r     sn   IzNcbitblastnCommandline.__init__N)r   r   r%   r%   r%   r&   r     r   r   c                   @   rg   )NcbitblastxCommandlinea  Wrapper for the NCBI BLAST+ program tblastx.

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old blastall tool with separate tools for each of the searches.
    This wrapper therefore replaces BlastallCommandline with option -p tblastx.

    >>> from Bio.Blast.Applications import NcbitblastxCommandline
    >>> cline = NcbitblastxCommandline(help=True)
    >>> cline
    NcbitblastxCommandline(cmd='tblastx', help=True)
    >>> print(cline)
    tblastx -help

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    tblastxc                 K   s   t ddgddd ddt dd	gd
ddt ddgd
ddt ddgdddt ddgdddt ddgdddt ddgdddg| _tj| |fi | dS )rj   r   r   r   c                 S   rm   r   r%   rn   r%   r%   r&   rp   u  r   z1NcbitblastxCommandline.__init__.<locals>.<lambda>Frr   r   r   r   r   r   r   r   r   r   rt   ru   rv   rw   rx   ry   r   r   r   N)r   r   rc   r   r   r%   r%   r&   r   m  sL   0zNcbitblastxCommandline.__init__N)r   r   r%   r%   r%   r&   r   [  r   r   c                   @   r   )	NcbipsiblastCommandlinea  Wrapper for the NCBI BLAST+ program psiblast.

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old blastpgp tool with a similar tool psiblast. This wrapper
    therefore replaces BlastpgpCommandline, the wrapper for blastpgp.

    >>> from Bio.Blast.Applications import NcbipsiblastCommandline
    >>> cline = NcbipsiblastCommandline(help=True)
    >>> cline
    NcbipsiblastCommandline(cmd='psiblast', help=True)
    >>> print(cline)
    psiblast -help

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    psiblastc                 K   s>  t ddgdddt ddgdddt d	d
gddd ddt ddgdddt ddgdddtddgdt ddgdddt ddgddddt d d!gd"dddtd#d$gd%td&d'gd(t d)d*gd+dddt d,d-gd.ddt d/d0gd1dddt d2d3gd4ddt d5d6gd7ddtd8d9gd:t d;d<gd=dddg| _tj| |fi | d>S )?rj   rt   ru   rv   Fr   rw   rx   ry   rz   r{   r|   c                 S   rm   r}   r%   rn   r%   r%   r&   rp     r   z2NcbipsiblastCommandline.__init__.<locals>.<lambda>rr   r   r   zFilter query sequence with SEG (string).

Format: "yes", "window locut hicut", or "no" to disable. Default is "12 2.2 2.5"-gap_triggergap_triggerz6Number of bits to trigger gapping (float, default 22).r   r   r   -num_iterationsnum_iterationszcNumber of iterations to perform (integer, at least one).

Default is one. Incompatible with: remote	-out_pssmout_pssm#File name to store checkpoint file.Tr   -out_ascii_pssmout_ascii_pssm)File name to store ASCII version of PSSM.-save_pssm_after_last_roundsave_pssm_after_last_round)Save PSSM after the last database search.-save_each_pssmsave_each_pssmz]Save PSSM after each iteration

File name is given in -save_pssm or -save_ascii_pssm options.z-in_msain_msazaFile name of multiple sequence alignment to restart PSI-BLAST.

Incompatible with: in_pssm, queryz-msa_master_idxmsa_master_idxzIndex of sequence to use as master in MSA.

Index (1-based) of sequence to use as the master in the multiple sequence alignment. If not specified, the first sequence is used.r   r   zIPSI-BLAST checkpoint file.

Incompatible with: in_msa, query, phi_pattern-pseudocountpseudocountzJPseudo-count value used when constructing PSSM.

Integer. Default is zero.-inclusion_ethreshinclusion_ethreshzKE-value inclusion threshold for pairwise alignments (float, default 0.002).z-ignore_msa_masterignore_msa_masterzIgnore the master sequence when creating PSSM.

Requires: in_msa
Incompatible with: msa_master_idx, in_pssm, query, query_loc, phi_patternz-phi_patternphi_patternzCFile name containing pattern to search.

Incompatible with: in_pssmNr   r   r   rW   r   r   r%   r%   r&   r     s   vz NcbipsiblastCommandline.__init__c                 C   s4   dgddgg dg dd}|  | t|  d S )NrS   r   r7   )r   r7   r   )r   r   r7   r8   r   )r   r   r   r   rf   rU   r%   r%   r&   rT   .  s   
z!NcbipsiblastCommandline._validateN)r   rV   r%   r%   r%   r&   r     s    
zr   c                   @   r   )	NcbirpsblastCommandlinea  Wrapper for the NCBI BLAST+ program rpsblast.

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old rpsblast tool with a similar tool of the same name. This
    wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast.

    >>> from Bio.Blast.Applications import NcbirpsblastCommandline
    >>> cline = NcbirpsblastCommandline(help=True)
    >>> cline
    NcbirpsblastCommandline(cmd='rpsblast', help=True)
    >>> print(cline)
    rpsblast -help

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    rpsblastc              
   K   s   t ddgdddt ddgdddt d	d
gdddt ddgdddt ddgddd ddtddgdg| _tj| |fi | dS )rj   r   r   r   Fr   r\   r]   zHit culling limit (integer).

If the query range of a hit is enveloped by that of at least this many higher-scoring hits, delete the hit. Incompatible with: best_hit_overhang, best_hit_score_edge.r^   r_   zBest Hit algorithm overhang value (recommended value: 0.1).

Float between 0.0 and 0.5 inclusive. Incompatible with: culling_limit.r`   ra   zBest Hit algorithm score edge value (recommended value: 0.1).

Float between 0.0 and 0.5 inclusive. Incompatible with: culling_limit.rz   r{   Use composition-based statistics.

D or d: default (equivalent to 0)

0 or F or f: Simplified Composition-based statistics as in Bioinformatics 15:1000-1011, 1999

1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001

Default = 0.c                 S   rm   NDd0Ff1Ttr%   rn   r%   r%   r&   rp     r   z2NcbirpsblastCommandline.__init__.<locals>.<lambda>rr   r   r   r   Nr   r   r   r6   r   r   r%   r%   r&   r   Q  s@   3z NcbirpsblastCommandline.__init__c                 C   s$   dddgi}|  | t|  d S )Nr]   r_   ra   rb   rU   r%   r%   r&   rT     s   
z!NcbirpsblastCommandline._validateN)r   rV   r%   r%   r%   r&   r   ?  s    
;r   c                   @   rg   )NcbirpstblastnCommandlinea  Wrapper for the NCBI BLAST+ program rpstblastn.

    With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
    replaced the old rpsblast tool with a similar tool of the same name, and a
    separate tool rpstblastn for Translated Reverse Position Specific BLAST.

    >>> from Bio.Blast.Applications import NcbirpstblastnCommandline
    >>> cline = NcbirpstblastnCommandline(help=True)
    >>> cline
    NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
    >>> print(cline)
    rpstblastn -help

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    
rpstblastnc              	   K   s   t ddgddd ddt dd	gd
ddt ddgdddt ddgddd ddtddgdtddgdg| _tj| |fi | dS )rj   r   r   r   c                 S   rm   r   r%   rn   r%   r%   r&   rp     r   z4NcbirpstblastnCommandline.__init__.<locals>.<lambda>Frr   r   r   r   r   r   r   r   rz   r{   r   c                 S   rm   r   r%   rn   r%   r%   r&   rp     r   r   r   r   r   r   r   Nr   r   r%   r%   r&   r     s:   ,z"NcbirpstblastnCommandline.__init__N)r   r   r%   r%   r%   r&   r     r   r   c                   @   r   )	NcbiblastformatterCommandlinea:  Wrapper for the NCBI BLAST+ program blast_formatter.

    With the release of BLAST 2.2.24+ (i.e. the BLAST suite rewritten in C++
    instead of C), the NCBI added the ASN.1 output format option to all the
    search tools, and extended the blast_formatter to support this as input.

    The blast_formatter command allows you to convert the ASN.1 output into
    the other output formats (XML, tabular, plain text, HTML).

    >>> from Bio.Blast.Applications import NcbiblastformatterCommandline
    >>> cline = NcbiblastformatterCommandline(archive="example.asn", outfmt=5, out="example.xml")
    >>> cline
    NcbiblastformatterCommandline(cmd='blast_formatter', out='example.xml', outfmt=5, archive='example.asn')
    >>> print(cline)
    blast_formatter -out example.xml -outfmt 5 -archive example.asn

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.

    Note that this wrapper is for the version of blast_formatter from BLAST
    2.2.24+ (or later) which is when the NCBI first announced the inclusion
    this tool. There was actually an early version in BLAST 2.2.23+ (and
    possibly in older releases) but this did not have the -archive option
    (instead -rid is a mandatory argument), and is not supported by this
    wrapper.
    blast_formatterc                 K   sV   t ddgdddt ddgdd	dd
t ddgddd ddg| _tj| |fi | dS )rj   z-ridridz8BLAST Request ID (RID), not compatible with archive arg.Fr   z-archivearchivez5Archive file of results, not compatible with rid arg.Tr   rD   rE   z,Maximum number of aligned sequences to keep.c                 S   s   | dkS )N   r%   rn   r%   r%   r&   rp     r   z8NcbiblastformatterCommandline.__init__.<locals>.<lambda>rr   N)r   r   r   r   r   r%   r%   r&   r     s&   z&NcbiblastformatterCommandline.__init__c                 C   s"   ddgi}|  | t|  d S )Nr   r   )r0   r   rT   rU   r%   r%   r&   rT     s   

z'NcbiblastformatterCommandline._validateN)r   rV   r%   r%   r%   r&   r     s    
r   c                   @   rg   )NcbideltablastCommandlinea  Create a commandline for the NCBI BLAST+ program deltablast (for proteins).

    This is a wrapper for the deltablast command line command included in
    the NCBI BLAST+ software (not present in the original BLAST).

    >>> from Bio.Blast.Applications import NcbideltablastCommandline
    >>> cline = NcbideltablastCommandline(query="rosemary.pro", db="nr",
    ...                               evalue=0.001, remote=True)
    >>> cline
    NcbideltablastCommandline(cmd='deltablast', query='rosemary.pro', db='nr', evalue=0.001, remote=True)
    >>> print(cline)
    deltablast -query rosemary.pro -db nr -evalue 0.001 -remote

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    
deltablastc                 K   s  t ddgdt ddgdddt d	d
gddd ddt ddgdddt ddgdddtddgdt ddgdddt ddgddddt d d!gd"dddtd#d$gd%td&d'gd(t d)d*gd+ddt d,d-gd.ddt d/d0gd1ddt d2d3gd4ddtd5d6gd7g| _tj| |fi | d8S )9rj   rt   ru   rv   rw   rx   ry   Fr   rz   r{   a#  Use composition-based statistics (string, default 2, i.e. True).

0, F or f: no composition-based statistics.

2, T or t, D or d : Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties

Note that tblastn also supports values of 1 and 3.c                 S   rm   r}   r%   rn   r%   r%   r&   rp   7  r   z4NcbideltablastCommandline.__init__.<locals>.<lambda>rr   r   r   r   r   r   z0Number of bits to trigger gapping. Default = 22.r   r   r   r   r   zXNumber of iterations to perform. (integer >=1, Default is 1).

Incompatible with: remoter   r   r   Tr   r   r   r   r   r   r   r   r   z^Save PSSM after each iteration.

File name is given in -save_pssm or -save_ascii_pssm options.r   r   zDPseudo-count value used when constructing PSSM (integer, default 0).z-domain_inclusion_ethreshdomain_inclusion_ethreshz\E-value inclusion threshold for alignments with conserved domains.

(float, Default is 0.05)r   r   zFPairwise alignment e-value inclusion threshold (float, default 0.002).z-rpsdbrpsdbz;BLAST domain database name (dtring, Default = 'cdd_delta').z-show_domain_hitsshow_domain_hitsz5Show domain hits?

Incompatible with: remote, subjectNr   r   r%   r%   r&   r   &  s   ]z"NcbideltablastCommandline.__init__N)r   r   r%   r%   r%   r&   r     r   r   c                   @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )NcbimakeblastdbCommandlinea2  Wrapper for the NCBI BLAST+ program makeblastdb.

    This is a wrapper for the NCBI BLAST+ makeblastdb application
    to create BLAST databases. By default, this creates a blast database
    with the same name as the input file. The default output location
    is the same directory as the input.

    >>> from Bio.Blast.Applications import NcbimakeblastdbCommandline
    >>> cline = NcbimakeblastdbCommandline(dbtype="prot",
    ...                                    input_file="NC_005816.faa")
    >>> cline
    NcbimakeblastdbCommandline(cmd='makeblastdb', dbtype='prot', input_file='NC_005816.faa')
    >>> print(cline)
    makeblastdb -dbtype prot -in NC_005816.faa

    You would typically run the command line with cline() or via the Python
    subprocess module, as described in the Biopython tutorial.
    makeblastdbc                 K   sv  t ddgdt ddgdt ddgd	td
dgddddtddgdddd dtddgddddd dtddgddddtddgd dd| jd!td"d#gd$dddt d%d&gd't d(d)gd*td+d,gd-dddtd.d/gd0dddtd1d2gd3dddt d4d5gd6td7d8gd9dddtd:d;gd<dddtd=d>gd?dddtd@dAgdBdddCd d!tdDdEgdFdddg| _tj| |fi | dGS )Hrj   r   r   z4Print USAGE and DESCRIPTION; ignore other arguments.r	   r
   r   r   r   r   r   r   r   TFr   z-blastdb_versionblastdb_versionzeVersion of BLAST database to be created. Tip: use BLAST database version 4 on 32 bit CPU. Default = 5c                 S      | dkp| dkS )N      r%   xr%   r%   r&   rp         z5NcbimakeblastdbCommandline.__init__.<locals>.<lambda>)r   rs   z-dbtypedbtypez.Molecule type of target db ('nucl' or 'prot').c                 S   r   )Nnuclprotr%   r   r%   r%   r&   rp     r   )r   is_requiredrs   z-in
input_filezInput file/database name.z-input_type
input_typezTType of the data specified in input_file.

Default = 'fasta'. Added in BLAST 2.2.26.)r   r   rs   z-titletitlezTitle for BLAST database.z-parse_seqidsparse_seqidsziOption to parse seqid for FASTA input if set.

For all other input types, seqids are parsed automaticallyz-hash_index
hash_indexz%Create index of sequence hash values.z
-mask_data	mask_datazComma-separated list of input files containing masking data as produced by NCBI masking applications (e.g. dustmasker, segmasker, windowmasker).z-mask_idmask_idzKComma-separated list of strings to uniquely identify the masking algorithm.z
-mask_desc	mask_desczTComma-separated list of free form strings to describe the masking algorithm details.z-gi_maskgi_maskzCreate GI indexed masking data.z-gi_mask_namegi_mask_namez2Comma-separated list of masking data output files.z-max_file_szmax_file_szz<Maximum file size for BLAST database files. Default = '1GB'.z-logfilelogfilez3File to which the program log should be redirected.z-taxidtaxidz'Taxonomy ID to assign to all sequences.c                 S   s   t | t| | kS r1   )typeintr   r%   r%   r&   rp     s    z
-taxid_map	taxid_mapzZText file mapping sequence IDs to taxonomy IDs.

Format:<SequenceId> <TaxonomyId><newline>N)r   r   _input_type_checkerr   r   r   r   r%   r%   r&   r     s   yz#NcbimakeblastdbCommandline.__init__c                 C   s   |dv S )N)asn1_binasn1_txtblastdbfastar%   )r!   commandr%   r%   r&   r    s   z.NcbimakeblastdbCommandline._input_type_checkerc                 C   s   dgdgdgd}|D ]}|  |r)|| D ]}|  |r(td| d| dqq| jr4| js4td| jr>| js>td	| jrH| jsHtd
| jrU| jrQ| jsUtd| jr_| js_tdt	
|  d S )Nr  r  r  )r  r  r  r'   r(   r)   z,Option mask_id requires mask_data to be set.z,Option mask_desc requires mask_id to be set.z/Option gi_mask requires parse_seqids to be set.z=Option gi_mask_name requires mask_data and gi_mask to be set.z1Option taxid_map requires parse_seqids to be set.)r*   r+   r  r  r  r  r  r  r  r   rT   r,   r%   r%   r&   rT     s0   	

z$NcbimakeblastdbCommandline._validateN)r   )r2   r3   r4   r5   r   r  rT   r%   r%   r%   r&   r     s
    
}r   c                  C   s   ddl } | jdd dS )z;Run the Bio.Blast.Applications module's doctests (PRIVATE).r   Nr   )verbose)doctesttestmod)r  r%   r%   r&   _test;  s   r  __main__N)r5   Bio.Applicationr   r   r   r   r6   rW   rc   rh   r   r   r   r   r   r   r   r   r   r   r  r2   r%   r%   r%   r&   <module>   s6   ^ CQ-B d`G SG;t 4
