o
    Rŀg(                     @   sn   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ G dd deZed	kr5dd
l	m
Z
 e
  dS dS )z;Command line wrapper for the motif finding program XXmotif.    N)	_Argument)_Option)_Switch)AbstractCommandlinec                   @   s   e Zd ZdZdddZdS )XXmotifCommandlineu  Command line wrapper for XXmotif.

    http://xxmotif.genzentrum.lmu.de/

    Notes
    -----
    Last checked against version: 1.3

    References
    ----------
    Luehr S, Hartmann H, and Söding J. The XXmotif web server for eXhaustive,
    weight matriX-based motif discovery in nucleotide sequences,
    Nucleic Acids Res. 40: W104-W109 (2012).

    Hartmann H, Guthoehrlein EW, Siebert M., Luehr S, and Söding J. P-value
    based regulatory motif discovery using positional weight matrices,
    Genome Res. 23: 181–194 (2013)

    Examples
    --------
    >>> from Bio.motifs.applications import XXmotifCommandline
    >>> out_dir = "results"
    >>> in_file = "sequences.fasta"
    >>> xxmotif_cline = XXmotifCommandline(outdir=out_dir, seqfile=in_file, revcomp=True)
    >>> print(xxmotif_cline)
    XXmotif results sequences.fasta --revcomp

    You would typically run the command line with xxmotif_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

    XXmotifc                    s.  t d tddgddddd dtd	d
gddddd dtg dddddtg ddtg ddtg ddtg ddtg dddd ddtg dddd ddtg d d!d"d ddtg d#d$d%d ddtg d&d'd(d ddtg d)d*tg d+d,d-d ddtg d.d/tg d0d1d2d ddtg d3d4 fd5dddtg d6d7d8d ddtg d9d:d;d ddtg d<d=tg d>d?d@d ddtg dAdB fdCdddtg dDdEdddtg dFdGdHd ddtg dIdJdKd ddtdLdMgdNtdOdPgdQtdRdSgdTg| _tj| |fi | dUS )VzInitialize the class.ACGTNXoutdirOUTDIRz output directory for all resultsTc                 S   s   d| vS )N  xr   r   T/var/www/html/myenv/lib/python3.10/site-packages/Bio/motifs/applications/_xxmotif.py<lambda>?       z-XXmotifCommandline.__init__.<locals>.<lambda>)filenameis_requiredchecker_functionseqfileSEQFILEz:file name with sequences from positive set in FASTA formatc                 S   s   t j| d dkS )Nr    )ospathsplitr   r   r   r   r   G   s    )z--negSetnegSetNEGSETnegsetz4sequence set which has to be used as a reference setF)r   equate)z--zoopsZOOPSzoopsz7use zero-or-one occurrence per sequence model (DEFAULT))z--mopsMOPSmopsz*use multiple occurrence per sequence model)z--oopsOOPSoopsz%use one occurrence per sequence model)z	--revcompREVCOMPrevcompz?search in reverse complement of sequences as well (DEFAULT: NO))z--background-model-orderzbackground-model-orderzBACKGROUND-MODEL-ORDERbackground_model_orderz;order of background distribution (DEFAULT: 2, 8(--negset) )c                 S   
   t | tS N
isinstanceintr   r   r   r   r   f      
 )r   r   )z--pseudoPSEUDOpseudoz-percentage of pseudocounts used (DEFAULT: 10)c                 S   r(   r)   r*   r   r   r   r   r   l   r-   )z-gz--gapsGAPSgapsz>maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)c                 S      | dv S )N)r   r   r   r   r   r   r   r   )z--typeTYPEtypezdefines what kind of start seeds are used (DEFAULT: ALL)possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEMc                 S   r2   )N)ALLallFIVEMERSfivemers
PALINDROME
palindromeTANDEMtandemNOPALINDROMEnopalindromeNOTANDEMnotandemr   r   r   r   r   r   y       )z--merge-motif-thresholdzmerge-motif-thresholdzMERGE-MOTIF-THRESHOLDmerge_motif_thresholdzddefines the similarity threshold for merging motifs (DEFAULT: HIGH)possible modes: LOW, MEDIUM, HIGHc                 S   r2   )N)LOWlowMEDIUMmediumHIGHhighr   r   r   r   r   r      rB   )z--no-pwm-length-optimizationzno-pwm-length-optimizationzNO-PWM-LENGTH-OPTIMIZATIONno_pwm_length_optimizationz=do not optimize length during iterations (runtime advantages))z--max-match-positionszmax-match-positionszMAX-MATCH-POSITIONSmax_match_positionsz^max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)c                 S   r(   r)   r*   r   r   r   r   r      r-   )z--batchBATCHbatchz:suppress progress bars (reduce output size for batch jobs))z--maxPosSetSizemaxPosSetSizeMAXPOSSETSIZEmaxpossetsizezEmaximum number of sequences from the positive set used [DEFAULT: all]c                 S   r(   r)   r*   r   r   r   r   r      r-   )z--trackedMotiftrackedMotifTRACKEDMOTIFtrackedmotifzEinspect extensions and refinement of a given seed (DEFAULT: not used)c                       t  fdd| D S )Nc                 3       | ]}| v V  qd S r)   r   .0c_valid_alphabetr   r   	<genexpr>       @XXmotifCommandline.__init__.<locals>.<lambda>.<locals>.<genexpr>anyr   rY   r   r   r          )z--formatFORMATformatzEdefines what kind of format the input sequences have (DEFAULT: FASTA)c                 S   r2   )N)FASTAfastaMFASTAmfastar   r   r   r   r   r      r   )z--maxMultipleSequencesmaxMultipleSequencesMAXMULTIPLESEQUENCESmaxmultiplesequencesz?maximum number of sequences used in an alignment [DEFAULT: all]c                 S   r(   r)   r*   r   r   r   r   r      r-   )z--localizationLOCALIZATIONlocalizationzfuse localization information to calculate combined P-values(sequences should have all the same length))z--downstream
DOWNSTREAM
downstreamzJnumber of residues in positive set downstream of anchor point (DEFAULT: 0)c                 S   r(   r)   r*   r   r   r   r   r      r-   )z-mz--startMotif
startMotif
STARTMOTIF
startmotifzStart motif (IUPAC characters)c                    rT   )Nc                 3   rU   r)   r   rV   rY   r   r   r[      r\   r]   r^   r   rY   r   r   r      r`   )z-pz--profileFileprofileFilePROFILEFILEprofilefilezprofile file)z--startRegionstartRegionSTARTREGIONstartregionzWexpected start position for motif occurrences relative to anchor point (--localization)c                 S   r(   r)   r*   r   r   r   r   r      r-   )z--endRegion	endRegion	ENDREGION	endregionzUexpected end position for motif occurrences relative to anchor point (--localization)c                 S   r(   r)   r*   r   r   r   r   r      r-   z
--XXmaskermaskerzImask the input sequences for homology, repeats and low complexity regionsz--XXmasker-pos	maskerposzKmask only the positive set for homology, repeats and low complexity regionsz--no-graphics
nographicsz$run XXmotif without graphical outputN)setr   r   r   
parametersr   __init__)selfcmdkwargsr   rY   r   r   3   s0  			

  KzXXmotifCommandline.__init__N)r   )__name__
__module____qualname____doc__r   r   r   r   r   r      s     r   __main__)run_doctest)r   r   Bio.Applicationr   r   r   r   r   r   
Bio._utilsr   r   r   r   r   <module>   s    t
