o
    RŀgAF                    @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z ddl	mZ ddl	mZ ddlmZ dd	lmZ d
dddddddddddddddZeZdefddZG dd dZG dd  d eZG d!d" d"eZG d#d$ d$eZG d%d& d&eZG d'd( d(eZG d)d* d*eZG d+d, d,eZG d-d. d.eZG d/d0 d0eZ G d1d2 d2eZ!G d3d4 d4eZ"G d5d6 d6eZ#G d7d8 d8eZ$G d9d: d:eZ%G d;d< d<eZ&G d=d> d>eZ'G d?d@ d@eZ(G dAdB dBeZ)G dCdD dDe*Z+edEZ,e+ Z-G dFdG dGe+eZ.e+ Z/e+ Z0e1 D ]7\Z2\Z3Z4e5dHdI e3D Z6e7ed e6i Z8e4D ]Z9e8e9e6ee9 Z:e:; r?e/<e: q)e0<e: q)qe+e/Z=e=>e0 dJdK e=D Z?e@ >eAeBe?e= dLe5e? ZC[9[4[2[3[6[?dS )Ma  Restriction Enzyme classes.

Notes about the diverses class of the restriction enzyme implementation::

            RestrictionType is the type of all restriction enzymes.
        -----------------------------------------------------------------------
            AbstractCut implements some methods that are common to all enzymes.
        -----------------------------------------------------------------------
            NoCut, OneCut,TwoCuts   represent the number of double strand cuts
                                    produced by the enzyme.
                                    they correspond to the 4th field of the
                                    rebase record emboss_e.NNN.
                    0->NoCut    : the enzyme is not characterised.
                    2->OneCut   : the enzyme produce one double strand cut.
                    4->TwoCuts  : two double strand cuts.
        -----------------------------------------------------------------------
            Meth_Dep, Meth_Undep    represent the methylation susceptibility to
                                    the enzyme.
                                    Not implemented yet.
        -----------------------------------------------------------------------
            Palindromic,            if the site is palindromic or not.
            NotPalindromic          allow some optimisations of the code.
                                    No need to check the reverse strand
                                    with palindromic sites.
        -----------------------------------------------------------------------
            Unknown, Blunt,         represent the overhang.
            Ov5, Ov3                Unknown is here for symmetry reasons and
                                    correspond to enzymes that are not
                                    characterised in rebase.
        -----------------------------------------------------------------------
            Defined, Ambiguous,     represent the sequence of the overhang.
            NotDefined
                                    NotDefined is for enzymes not characterised
                                    in rebase.

                                    Defined correspond to enzymes that display
                                    a constant overhang whatever the sequence.
                                    ex : EcoRI. G^AATTC -> overhang :AATT
                                                CTTAA^G

                                    Ambiguous : the overhang varies with the
                                    sequence restricted.
                                    Typically enzymes which cut outside their
                                    restriction site or (but not always)
                                    inside an ambiguous site.
                                    ex:
                                    AcuI CTGAAG(22/20)  -> overhang : NN
                                    AasI GACNNN^NNNGTC  -> overhang : NN
                                         CTGN^NNNNNCAG

                note : these 3 classes refers to the overhang not the site.
                   So the enzyme ApoI (RAATTY) is defined even if its
                   restriction site is ambiguous.

                        ApoI R^AATTY -> overhang : AATT -> Defined
                             YTTAA^R
                   Accordingly, blunt enzymes are always Defined even
                   when they cut outside their restriction site.
        -----------------------------------------------------------------------
            Not_available,          as found in rebase file emboss_r.NNN files.
            Commercially_available
                                    allow the selection of the enzymes
                                    according to their suppliers to reduce the
                                    quantity of results.
                                    Also will allow the implementation of
                                    buffer compatibility tables. Not
                                    implemented yet.

                                    the list of suppliers is extracted from
                                    emboss_s.NNN
        -----------------------------------------------------------------------

    N)BiopythonWarning)PrintFormat)	rest_dict)	suppliers)typedict)
MutableSeq)SeqARWMHVDNCYSMHBVNGRSKBVDNTYWKHBDNABDGHKMNSRWVCBDHKMNSTWVYABDHKMNRTWVYCBDGHKMNSRVYACBDHMNSRWVYBDGHKNSRTWVYACBDHKMNSRTWVYCBDGHKMNSRTWVYACBDGHKMNSRWVYABDGHKMNSRTWVYACBDGHKMNSRTWVY)ACGTRYWSMKHBVDNreturnc                  C   s>   t d} tdtd }dD ]}|| |< || || < qt| S )N   r   as   ABCDGHKMNRSTVWY)	bytearrayordbytes)tableupper_to_lowerc r0   O/var/www/html/myenv/lib/python3.10/site-packages/Bio/Restriction/Restriction.py_make_FormattedSeq_tablew   s   r2   c                   @   s   e Zd ZdZej ej  Ze	 Z
dddZdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )FormattedSeqa  A linear or circular sequence object for restriction analysis.

    Translates a Bio.Seq into a formatted sequence to be used with Restriction.

    Roughly: remove anything which is not IUPAC alphabet and then add a space
             in front of the sequence to get a biological index instead of a
             python index (i.e. index of the first base is 1 not 0).

    Retains information about the shape of the molecule linear (default) or
    circular. Restriction sites are search over the edges of circular sequence.
    Tc                 C   s   t |ttfr:| | _t|}|j| j| jd| _	d| j	v r(t
d|  d| j	d | _	|| _|j| _dS t |trQ|j| _|j	| _	|j| _|j| _dS t
dt| )a  Initialize ``FormattedSeq`` with sequence and topology (optional).

        ``seq`` is either a ``Bio.Seq``, ``Bio.MutableSeq`` or a
        ``FormattedSeq``. If ``seq`` is a ``FormattedSeq``, ``linear``
        will have no effect on the shape of the sequence.
        )deleter   zInvalid character found in  ASCIIz expected Seq or MutableSeq, got N)
isinstancer   r   islowerlowerr,   	translate_table_remove_charsdata	TypeErrordecodelinear	__class__klassr3   type)selfseqr@   r=   r0   r0   r1   __init__   s   


zFormattedSeq.__init__c                 C   s   t | jd S )zkReturn length of ``FormattedSeq``.

        ``FormattedSeq`` has a leading space, thus subtract 1.
           )lenr=   rD   r0   r0   r1   __len__      zFormattedSeq.__len__c                 C   s   d| dd d| j dS )z-Represent ``FormattedSeq`` class as a string.zFormattedSeq(rG   Nz	, linear=)r@   rI   r0   r0   r1   __repr__   s   zFormattedSeq.__repr__c                 C   s&   t |trt| t|krdS dS dS )z8Implement equality operator for ``FormattedSeq`` object.TF)r7   r3   reprrD   otherr0   r0   r1   __eq__   s
   
zFormattedSeq.__eq__c                 C   
   d| _ dS )zCircularise sequence in place.FNrM   rI   r0   r0   r1   circularise      
zFormattedSeq.circularisec                 C   rS   )zLinearise sequence in place.TNrM   rI   r0   r0   r1   	linearise   rU   zFormattedSeq.linearisec                 C      |  | }d|_|S )z*Make a new instance of sequence as linear.TrA   r@   rD   newr0   r0   r1   	to_linear      
zFormattedSeq.to_linearc                 C   rW   )z,Make a new instance of sequence as circular.FrX   rY   r0   r0   r1   to_circular   r\   zFormattedSeq.to_circularc                 C      | j S )z8Return if sequence is linear (True) or circular (False).rM   rI   r0   r0   r1   	is_linear      zFormattedSeq.is_linearc                 C   s:   |   r| j}n
| j| jd|  }dd t||D S )an  Return a list of a given pattern which occurs in the sequence.

        The list is made of tuple (location, pattern.group).
        The latter is used with non palindromic sites.
        Pattern is the regular expression pattern corresponding to the
        enzyme restriction site.
        Size is the size of the restriction enzyme recognition-site size.
        rG   c                 S   s   g | ]	}|  |jfqS r0   )startgroup.0ir0   r0   r1   
<listcomp>       z)FormattedSeq.finditer.<locals>.<listcomp>)r_   r=   refinditer)rD   patternsizer=   r0   r0   r1   ri      s   	zFormattedSeq.finditerc                 C   s*   | j r| | j|   S | | j| S )a  Return substring of ``FormattedSeq``.

        The class of the returned object is the class of the respective
        sequence. Note that due to the leading space, indexing is 1-based:

        >>> from Bio.Seq import Seq
        >>> from Bio.Restriction.Restriction import FormattedSeq
        >>> f_seq = FormattedSeq(Seq('ATGCATGC'))
        >>> f_seq[1]
        Seq('A')

        )r9   rB   r=   )rD   re   r0   r0   r1   __getitem__   s   zFormattedSeq.__getitem__NT)__name__
__module____qualname____doc__string
whitespaceencodedigitsr<   r2   r;   rF   rJ   rN   rR   rT   rV   r[   r]   r_   ri   rl   r0   r0   r0   r1   r3      s    
	r3   c                   @   s   e Zd ZdZd)ddZdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( ZdS )*RestrictionTypezjRestrictionType. Type from which all enzyme classes are derived.

    Implement the operator methods.
     r0   Nc                 C   sb   d|v rt d|dz
t| j| _W dS  ty    Y dS  ty0   t d| jddw )zInitialize RestrictionType instance.

        Not intended to be used in normal operation. The enzymes are
        instantiated when importing the module.
        See below.
        -zProblem with hyphen in z as enzyme namez-Problem with regular expression, re.compiled(rL   N)
ValueErrorrh   compilecompsiteAttributeError	Exception)clsnamebasesdctr0   r0   r1   rF      s   zRestrictionType.__init__c                 C   s.   t |trt| |gS t |tr|| S t)zAdd restriction enzyme to a RestrictionBatch().

        If other is an enzyme returns a batch of the two enzymes.
        If other is already a RestrictionBatch add enzyme to it.
        )r7   rv   RestrictionBatchadd_nocheckr>   r~   rQ   r0   r0   r1   __add__  s
   


zRestrictionType.__add__c                 C   
   |  |S )zOverride '/' operator to use as search method.

        >>> from Bio.Restriction import EcoRI
        >>> EcoRI/Seq('GAATTC')
        [2]

        Returns RE.search(other).
        searchr   r0   r0   r1   __truediv__!     
	zRestrictionType.__truediv__c                 C   r   )zOverride division with reversed operands to use as search method.

        >>> from Bio.Restriction import EcoRI
        >>> Seq('GAATTC')/EcoRI
        [2]

        Returns RE.search(other).
        r   r   r0   r0   r1   __rtruediv__,  r   zRestrictionType.__rtruediv__c                 C   r   )zOverride '//' operator to use as catalyse method.

        >>> from Bio.Restriction import EcoRI
        >>> EcoRI//Seq('GAATTC')
        (Seq('G'), Seq('AATTC'))

        Returns RE.catalyse(other).
        catalyser   r0   r0   r1   __floordiv__7  r   zRestrictionType.__floordiv__c                 C   r   )zAs __floordiv__, with reversed operands.

        >>> from Bio.Restriction import EcoRI
        >>> Seq('GAATTC')//EcoRI
        (Seq('G'), Seq('AATTC'))

        Returns RE.catalyse(other).
        r   r   r0   r0   r1   __rfloordiv__B  r   zRestrictionType.__rfloordiv__c                 C   r^   )z(Return the name of the enzyme as string.rn   r~   r0   r0   r1   __str__M  r`   zRestrictionType.__str__c                 C   s   | j  S )z\Implement repr method.

        Used with eval or exec will instantiate the enzyme.
        r   r   r0   r0   r1   rN   Q  s   zRestrictionType.__repr__c                 C   s   z| j W S  ty   Y dS w )z3Return length of recognition site of enzyme as int.r   )rk   r|   r   r0   r0   r1   rJ   X  s
   zRestrictionType.__len__c                 C   s   t | S )zImplement ``hash()`` method for ``RestrictionType``.

        Python default is to use ``id(...)``
        This is consistent with the ``__eq__`` implementation
        idr   r0   r0   r1   __hash__c     zRestrictionType.__hash__c                 C   s   t | t |kS )zOverride '==' operator.

        True if RE and other are the same enzyme.

        Specifically this checks they are the same Python object.
        r   r   r0   r0   r1   rR   k  s   zRestrictionType.__eq__c                 C   s"   t |tsdS | j|jkrdS dS )ae  Override '!=' operator.

        Isoschizomer strict (same recognition site, same restriction) -> False
        All the other-> True

        WARNING - This is not the inverse of the __eq__ method

        >>> from Bio.Restriction import SacI, SstI
        >>> SacI != SstI  # true isoschizomers
        False
        >>> SacI == SstI
        False
        TF)r7   rv   characr   r0   r0   r1   __ne__u  s
   
zRestrictionType.__ne__c                 C   s.   t |tsdS | j|jkr| j|jkrdS dS )a0  Override '>>' operator to test for neoschizomers.

        neoschizomer : same recognition site, different restriction. -> True
        all the others :                                             -> False

        >>> from Bio.Restriction import SmaI, XmaI
        >>> SmaI >> XmaI
        True
        FT)r7   rv   siter   r   r0   r0   r1   
__rshift__  s
   

zRestrictionType.__rshift__c                 C   s(   t |tstdt| d| |S )zOverride '%' operator to test for compatible overhangs.

        True if a and b have compatible overhang.

        >>> from Bio.Restriction import XhoI, SalI
        >>> XhoI % SalI
        True
        zexpected RestrictionType, got  instead)r7   rv   r>   rC   _mod1r   r0   r0   r1   __mod__  s   
	
zRestrictionType.__mod__c                 C   sD   t |tstt| t|krdS | jt|kr | j|jkr dS dS )a  Compare length of recognition site of two enzymes.

        Override '>='. a is greater or equal than b if the a site is longer
        than b site. If their site have the same length sort by alphabetical
        order of their names.

        >>> from Bio.Restriction import EcoRI, EcoRV
        >>> EcoRI.size
        6
        >>> EcoRV.size
        6
        >>> EcoRI >= EcoRV
        False
        TFr7   rv   NotImplementedErrorrH   rk   rn   r   r0   r0   r1   __ge__  s   
zRestrictionType.__ge__c                 C   sD   t |tstt| t|krdS | jt|kr | j|jkr dS dS )zCompare length of recognition site of two enzymes.

        Override '>'. Sorting order:

        1. size of the recognition site.
        2. if equal size, alphabetical order of the names.

        TFr   r   r0   r0   r1   __gt__  s   
	zRestrictionType.__gt__c                 C   sF   t |tstt| t|k rdS t| t|kr!| j|jkr!dS dS )zCompare length of recognition site of two enzymes.

        Override '<='. Sorting order:

        1. size of the recognition site.
        2. if equal size, alphabetical order of the names.

        TFr7   rv   r   rH   rn   r   r0   r0   r1   __le__     
	zRestrictionType.__le__c                 C   sF   t |tstt| t|k rdS t| t|kr!| j|jk r!dS dS )zCompare length of recognition site of two enzymes.

        Override '<'. Sorting order:

        1. size of the recognition site.
        2. if equal size, alphabetical order of the names.

        TFr   r   r0   r0   r1   __lt__  r   zRestrictionType.__lt__)rw   r0   N)rn   ro   rp   rq   rF   r   r   r   r   r   r   rN   rJ   r   rR   r   r   r   r   r   r   r   r0   r0   r0   r1   rv      s(    

rv   c                   @   s   e Zd ZdZedddZedd Zedd Zed	d
 Zedd Z	edd Z
edddZedddZedddZedd ZdS )AbstractCutzImplement the methods that are common to all restriction enzymes.

    All the methods are classmethod.

    For internal use only. Not meant to be instantiated.
    Tc                 C   s,   t |tr|| _|  S t||| _|  S )a  Return a list of cutting sites of the enzyme in the sequence.

        Compensate for circular sequences and so on.

        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.

        If linear is False, the restriction sites that span over the boundaries
        will be included.

        The positions are the first base of the 3' fragment,
        i.e. the first base after the position the enzyme will cut.
        )r7   r3   dna_searchr~   r   r@   r0   r0   r1   r     s
   
zAbstractCut.searchc                    s   t  j j r fddfdd jD  _dS t jD ]\}}|dk r5 j|  7  < q# t jddd D ]\}}|krU j|d    8  < q@ dS dS )Remove cuts that are outsite of the sequence (PRIVATE).

        For internal use only.

        Drop the site that are situated outside the sequence in linear
        sequence. Modify the index for site in circular sequences.
        c                    s:   |  j  }d|   k okn  od|  k okS   S )NrG   )ovhg)cut_on_watsoncut_on_crick)r~   lengthr0   r1   filtering_function'  s   
0z-AbstractCut._drop.<locals>.filtering_functionc                    s   g | ]} |r|qS r0   r0   )rd   cut)r   r0   r1   rf   +      z%AbstractCut._drop.<locals>.<listcomp>rG   N)rH   r   r_   results	enumerate)r~   indexlocationr0   )r~   r   r   r1   _drop  s   
	
zAbstractCut._dropc                 C   s(   t dd t D }td| dS )z.Print all the suppliers of restriction enzyme.c                 s   s    | ]}|d  V  qdS r   Nr0   rd   xr0   r0   r1   	<genexpr>;      z,AbstractCut.all_suppliers.<locals>.<genexpr>z,
N)sortedsuppliers_dictvaluesprintjoinr~   supplyr0   r0   r1   all_suppliers8     zAbstractCut.all_suppliersc                 C   s
   | |k S )ao  Test for real isoschizomer.

        True if other is an isoschizomer of RE, but not an neoschizomer,
        else False.

        Equischizomer: same site, same position of restriction.

        >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
        >>> SacI.is_equischizomer(SstI)
        True
        >>> SmaI.is_equischizomer(XmaI)
        False

        r0   r   r0   r0   r1   is_equischizomer>  s   
zAbstractCut.is_equischizomerc                 C   s   | |? S )zTest for neoschizomer.

        True if other is an isoschizomer of RE, else False.
        Neoschizomer: same site, different position of restriction.
        r0   r   r0   r0   r1   is_neoschizomerP  s   zAbstractCut.is_neoschizomerc                 C   s   | |k p| |? S )a5  Test for same recognition site.

        True if other has the same recognition site, else False.

        Isoschizomer: same site.

        >>> from Bio.Restriction import SacI, SstI, SmaI, XmaI
        >>> SacI.is_isoschizomer(SstI)
        True
        >>> SmaI.is_isoschizomer(XmaI)
        True

        r0   r   r0   r0   r1   is_isoschizomerY  s   zAbstractCut.is_isoschizomerNc                    6   |st } fdd|D }| }||= |  |S )zList equischizomers of the enzyme.

        Return a tuple of all the isoschizomers of RE.
        If batch is supplied it is used instead of the default AllEnzymes.

        Equischizomer: same site, same position of restriction.
        c                    s   g | ]} |ks|qS r0   r0   r   r   r0   r1   rf   u  r   z.AbstractCut.equischizomers.<locals>.<listcomp>
AllEnzymesr   sortr~   batchrre   r0   r   r1   equischizomersj  s   	
zAbstractCut.equischizomersc                    s"   |st }t fdd|D }|S )zList neoschizomers of the enzyme.

        Return a tuple of all the neoschizomers of RE.
        If batch is supplied it is used instead of the default AllEnzymes.

        Neoschizomer: same site, different position of restriction.
        c                 3   s    | ]	} |? r|V  qd S Nr0   r   r   r0   r1   r         z,AbstractCut.neoschizomers.<locals>.<genexpr>)r   r   r~   r   r   r0   r   r1   neoschizomers{  s   	zAbstractCut.neoschizomersc                    r   )zList all isoschizomers of the enzyme.

        Return a tuple of all the equischizomers and neoschizomers of RE.
        If batch is supplied it is used instead of the default AllEnzymes.
        c                    s    g | ]} |? s |ks|qS r0   r0   r   r   r0   r1   rf          z-AbstractCut.isoschizomers.<locals>.<listcomp>r   r   r0   r   r1   isoschizomers  s   
zAbstractCut.isoschizomersc                 C   r^   )zReturn the theoretically cutting frequency of the enzyme.

        Frequency of the site, given as 'one cut per x bases' (int).
        )freqr   r0   r0   r1   	frequency  s   zAbstractCut.frequencyrm   r   )rn   ro   rp   rq   classmethodr   r   r   r   r   r   r   r   r   r   r0   r0   r0   r1   r     s,    




r   c                   @   L   e Zd ZdZedd Zedd Zedd Zedd	 Zed
d Z	dS )NoCutaf  Implement the methods specific to the enzymes that do not cut.

    These enzymes are generally enzymes that have been only partially
    characterised and the way they cut the DNA is unknown or enzymes for
    which the pattern of cut is to complex to be recorded in Rebase
    (ncuts values of 0 in emboss_e.###).

    When using search() with these enzymes the values returned are at the start
    of the restriction site.

    Their catalyse() method returns a TypeError.

    Unknown and NotDefined are also part of the base classes of these enzymes.

    Internal use only. Not meant to be instantiated.
    c                 C      dS yReturn if the cutting pattern has one cut.

        True if the enzyme cut the sequence one time on each strand.
        Fr0   r   r0   r0   r1   cut_once     zNoCut.cut_oncec                 C   r   wReturn if the cutting pattern has two cuts.

        True if the enzyme cut the sequence twice on each strand.
        Fr0   r   r0   r0   r1   	cut_twice  r   zNoCut.cut_twicec                 c       |V  dS )a  Return a generator that moves the cutting position by 1 (PRIVATE).

        For internal use only.

        location is an integer corresponding to the location of the match for
        the enzyme pattern in the sequence.
        _modify returns the real place where the enzyme will cut.

        Example::

            EcoRI pattern : GAATTC
            EcoRI will cut after the G.
            so in the sequence:
                     ______
            GAATACACGGAATTCGA
                     |
                     10
            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
            EcoRI cut after the G so:
            EcoRI._modify(10) -> 11.

        If the enzyme cut twice _modify will returns two integer corresponding
        to each cutting site.
        Nr0   r~   r   r0   r0   r1   _modify  s   
zNoCut._modifyc                 c   r   )zReturn a generator that moves the cutting position by 1 (PRIVATE).

        For internal use only.

        As _modify for site situated on the antiparallel strand when the
        enzyme is not palindromic.
        Nr0   r   r0   r0   r1   _rev_modify  s   
	zNoCut._rev_modifyc                 C   s   dddd| j fS )  Return a list of the enzyme's characteristics as tuple.

        the tuple contains the attributes:

        - fst5 -> first 5' cut ((current strand) or None
        - fst3 -> first 3' cut (complementary strand) or None
        - scd5 -> second 5' cut (current strand) or None
        - scd5 -> second 3' cut (complementary strand) or None
        - site -> recognition site.

        Nr   r   r0   r0   r1   characteristic  s   zNoCut.characteristicN
rn   ro   rp   rq   r   r   r   r   r   r   r0   r0   r0   r1   r     s    




r   c                   @   r   )OneCutzImplement the methods for enzymes that cut the DNA only once.

    Correspond to ncuts values of 2 in emboss_e.###

    Internal use only. Not meant to be instantiated.
    c                 C   r   )r   Tr0   r   r0   r0   r1   r     r   zOneCut.cut_oncec                 C   r   r   r0   r   r0   r0   r1   r   
  r   zOneCut.cut_twicec                 c   s    || j  V  dS )a  Return a generator that moves the cutting position by 1 (PRIVATE).

        For internal use only.

        location is an integer corresponding to the location of the match for
        the enzyme pattern in the sequence.
        _modify returns the real place where the enzyme will cut.

        Example::

            EcoRI pattern : GAATTC
            EcoRI will cut after the G.
            so in the sequence:
                     ______
            GAATACACGGAATTCGA
                     |
                     10
            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
            EcoRI cut after the G so:
            EcoRI._modify(10) -> 11.

        if the enzyme cut twice _modify will returns two integer corresponding
        to each cutting site.
        N)fst5r   r0   r0   r1   r     s   zOneCut._modifyc                 c   s    || j  V  dS )zReturn a generator that moves the cutting position by 1 (PRIVATE).

        For internal use only.

        As _modify for site situated on the antiparallel strand when the
        enzyme is not palindromic
        N)fst3r   r0   r0   r1   r   .  s   	zOneCut._rev_modifyc                 C   s   | j | jdd| jfS )a  Return a list of the enzyme's characteristics as tuple.

        The tuple contains the attributes:

        - fst5 -> first 5' cut ((current strand) or None
        - fst3 -> first 3' cut (complementary strand) or None
        - scd5 -> second 5' cut (current strand) or None
        - scd5 -> second 3' cut (complementary strand) or None
        - site -> recognition site.

        N)r   r   r   r   r0   r0   r1   r   9  s   zOneCut.characteristicNr   r0   r0   r0   r1   r     s    




r   c                   @   r   )TwoCutszImplement the methods for enzymes that cut the DNA twice.

    Correspond to ncuts values of 4 in emboss_e.###

    Internal use only. Not meant to be instantiated.
    c                 C   r   r   r0   r   r0   r0   r1   r   Q  r   zTwoCuts.cut_oncec                 C   r   )r   Tr0   r   r0   r0   r1   r   Y  r   zTwoCuts.cut_twicec                 c   s    || j  V  || j V  dS )a  Return a generator that moves the cutting position by 1 (PRIVATE).

        For internal use only.

        location is an integer corresponding to the location of the match for
        the enzyme pattern in the sequence.
        _modify returns the real place where the enzyme will cut.

        example::

            EcoRI pattern : GAATTC
            EcoRI will cut after the G.
            so in the sequence:
                     ______
            GAATACACGGAATTCGA
                     |
                     10
            dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
            EcoRI cut after the G so:
            EcoRI._modify(10) -> 11.

        if the enzyme cut twice _modify will returns two integer corresponding
        to each cutting site.
        N)r   scd5r   r0   r0   r1   r   a  s   zTwoCuts._modifyc                 c   s    || j  V  || j V  dS )zReturn a generator that moves the cutting position by 1 (PRIVATE).

        for internal use only.

        as _modify for site situated on the antiparallel strand when the
        enzyme is not palindromic
        N)r   scd3r   r0   r0   r1   r   ~  s   	zTwoCuts._rev_modifyc                 C   s   | j | j| j| j| jfS )r   )r   r   r   r   r   r   r0   r0   r1   r     s   zTwoCuts.characteristicNr   r0   r0   r0   r1   r   I  s    



r   c                   @      e Zd ZdZedd ZdS )Meth_DepzpImplement the information about methylation.

    Enzymes of this class possess a site which is methylable.
    c                 C   r   )mReturn if recognition site can be methylated.

        True if the recognition site is a methylable.
        Tr0   r   r0   r0   r1   is_methylable  r   zMeth_Dep.is_methylableNrn   ro   rp   rq   r   r   r0   r0   r0   r1   r         r   c                   @   r   )
Meth_UndepzwImplement information about methylation sensitibility.

    Enzymes of this class are not sensible to methylation.
    c                 C   r   )r   Fr0   r   r0   r0   r1   r     r   zMeth_Undep.is_methylableNr   r0   r0   r0   r1   r     r   r   c                   @   (   e Zd ZdZedd Zedd ZdS )Palindromica  Implement methods for enzymes with palindromic recognition sites.

    palindromic means : the recognition site and its reverse complement are
                        identical.
    Remarks     : an enzyme with a site CGNNCG is palindromic even if some
                  of the sites that it will recognise are not.
                  for example here : CGAACG

    Internal use only. Not meant to be instantiated.
    c                    s:    j  j j} fdd|D  _ jr    jS )zReturn a list of cutting sites of the enzyme in the sequence (PRIVATE).

        For internal use only.

        Implement the search method for palindromic enzymes.
        c                    s$   g | ]\}}  |D ]}|qqS r0   )r   )rd   sgr   r   r0   r1   rf        $ z'Palindromic._search.<locals>.<listcomp>)r   ri   r{   rk   r   r   )r~   sitelocr0   r   r1   r     s
   zPalindromic._searchc                 C   r   )8Return if the enzyme has a palindromic recognition site.Tr0   r   r0   r0   r1   is_palindromic     zPalindromic.is_palindromicNrn   ro   rp   rq   r   r   r   r0   r0   r0   r1   r     s    
r   c                   @   r   )NonPalindromiczImplement methods for enzymes with non-palindromic recognition sites.

    Palindromic means : the recognition site and its reverse complement are
                        identical.

    Internal use only. Not meant to be instantiated.
    c                 C   s   | j | j| j}g | _| j}| j}t| }g | _|D ]\}}||r/|  jt	||7  _q|  jt	||7  _q|  j| j7  _| jrO| j
  |   | jS )zReturn a list of cutting sites of the enzyme in the sequence (PRIVATE).

        For internal use only.

        Implement the search method for non palindromic enzymes.
        )r   ri   r{   rk   r   r   r   stron_minuslistr   r   )r~   iteratormodifrevmodifr   ra   rb   r0   r0   r1   r     s   
zNonPalindromic._searchc                 C   r   )r   Fr0   r   r0   r0   r1   r     r  zNonPalindromic.is_palindromicNr  r0   r0   r0   r1   r    s    
r  c                   @   sj   e Zd ZdZedddZeZedd Zedd Zed	d
 Z	edd Z
edd Zedd ZdS )UnknownzImplement methods for enzymes that produce unknown overhangs.

    These enzymes are also NotDefined and NoCut.

    Internal use only. Not meant to be instantiated.
    Tc                 C   s   t | j d)i  List the sequence fragments after cutting dna with enzyme.

        Return a tuple of dna as will be produced by using RE to restrict the
        dna.

        dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.

        If linear is False, the sequence is considered to be circular and the
        output will be modified accordingly.
        z restriction is unknown.)r   rn   r   r0   r0   r1   r   
  s   zUnknown.catalysec                 C   r   Return if the enzyme produces blunt ends.

        True if the enzyme produces blunt end.

        Related methods:

        - RE.is_3overhang()
        - RE.is_5overhang()
        - RE.is_unknown()

        Fr0   r   r0   r0   r1   is_blunt     zUnknown.is_bluntc                 C   r   Return if the enzymes produces 5' overhanging ends.

        True if the enzyme produces 5' overhang sticky end.

        Related methods:

        - RE.is_3overhang()
        - RE.is_blunt()
        - RE.is_unknown()

        Fr0   r   r0   r0   r1   is_5overhang)  r  zUnknown.is_5overhangc                 C   r   Return if the enzyme produces 3' overhanging ends.

        True if the enzyme produces 3' overhang sticky end.

        Related methods:

        - RE.is_5overhang()
        - RE.is_blunt()
        - RE.is_unknown()

        Fr0   r   r0   r0   r1   is_3overhang8  r  zUnknown.is_3overhangc                 C   r   )~Return the type of the enzyme's overhang as string.

        Can be "3' overhang", "5' overhang", "blunt", "unknown".
        unknownr0   r   r0   r0   r1   overhangG  r   zUnknown.overhangc                 C      g S )=List all enzymes that produce compatible ends for the enzyme.r0   r   r0   r0   r1   compatible_endO  r  zUnknown.compatible_endc                 C   r   Test if other enzyme produces compatible ends for enzyme (PRIVATE).

        For internal use only.

        Test for the compatibility of restriction ending of RE and other.
        Fr0   r   r0   r0   r1   r   T  s   zUnknown._mod1Nrm   rn   ro   rp   rq   r   r   catalyzer  r  r  r  r  r   r0   r0   r0   r1   r
    s"    




r
  c                   @   sl   e Zd ZdZedddZeZedd Zedd Zed	d
 Z	edd Z
edddZedd ZdS )BluntzImplement methods for enzymes that produce blunt ends.

    The enzyme cuts the + strand and the - strand of the DNA at the same
    place.

    Internal use only. Not meant to be instantiated.
    Tc                       |  ||| j s dd fS g }td }  rH| dd   |r9| fddt|D 7 }| d d  t|S | d d  dd    |sat|S | fddt|D 7 }t|S )r  rG   Nr   c                    $   g | ]} | |d    qS rG   r0   r   dr   r0   r1   rf     r   z"Blunt.catalyse.<locals>.<listcomp>r   c                    r"  r#  r0   r   r$  r0   r1   rf     r   r   r   rH   r_   appendrangetupler~   r   r@   	fragmentsr   r0   r$  r1   r   h  "   &zBlunt.catalysec                 C   r   )r  Tr0   r   r0   r0   r1   r    r  zBlunt.is_bluntc                 C   r   r  r0   r   r0   r0   r1   r    r  zBlunt.is_5overhangc                 C   r   r  r0   r   r0   r0   r1   r    r  zBlunt.is_3overhangc                 C   r   )r  bluntr0   r   r0   r0   r1   r    r   zBlunt.overhangNc                 C   s"   |st }tdd tt D }|S )r  c                 s   s    | ]	}|  r|V  qd S r   r  r   r0   r0   r1   r     r   z'Blunt.compatible_end.<locals>.<genexpr>r   r   iterr   r0   r0   r1   r    s   zBlunt.compatible_endc                 C   s
   t | tS )zTest if other enzyme produces compatible ends for enzyme (PRIVATE).

        For internal use only

        Test for the compatibility of restriction ending of RE and other.
        )
issubclassr   )rQ   r0   r0   r1   r     s   
zBlunt._mod1rm   r   )rn   ro   rp   rq   r   r   r  r  r  r  r  r  staticmethodr   r0   r0   r0   r1   r   _  s"    /



r   c                   @   l   e Zd ZdZedddZeZedd Zedd Zed	d
 Z	edd Z
edddZedd ZdS )Ov5zImplement methods for enzymes that produce 5' overhanging ends.

    The enzyme cuts the + strand after the - strand of the DNA.

    Internal use only. Not meant to be instantiated.
    Tc                    s   |  ||| j s dd fS td }g }  rH| dd   |r9| fddt|D 7 }| d d  t|S | d d  dd    |sat|S | fddt|D 7 }t|S )r  rG   Nr   c                    r"  r#  r0   r   r$  r0   r1   rf     r   z Ov5.catalyse.<locals>.<listcomp>r   c                    r"  r#  r0   r   r$  r0   r1   rf     r   r&  )r~   r   r@   r   r+  r0   r$  r1   r     s"   &zOv5.catalysec                 C   r   r  r0   r   r0   r0   r1   r    r  zOv5.is_bluntc                 C   r   )r  Tr0   r   r0   r0   r1   r  +  r  zOv5.is_5overhangc                 C   r   r  r0   r   r0   r0   r1   r  :  r  zOv5.is_3overhangc                 C   r   )r  z5' overhangr0   r   r0   r0   r1   r  I  r   zOv5.overhangNc                    &   |st }t fddtt D }|S )r  c                 3   $    | ]}|  r|  r|V  qd S r   r  r   r   r0   r1   r   V     " z%Ov5.compatible_end.<locals>.<genexpr>r/  r   r0   r   r1   r  Q     zOv5.compatible_endc                 C      t |tr
| |S dS r  )r1  r4  _mod2r   r0   r0   r1   r   Y  s   

z	Ov5._mod1rm   r   r  r0   r0   r0   r1   r4    "    /



r4  c                   @   r3  )Ov3zImplement methods for enzymes that produce 3' overhanging ends.

    The enzyme cuts the - strand after the + strand of the DNA.

    Internal use only. Not meant to be instantiated.
    Tc                    r!  )r  rG   Nr   c                    r"  r#  r0   r   r$  r0   r1   rf     r   z Ov3.catalyse.<locals>.<listcomp>r   c                    r"  r#  r0   r   r$  r0   r1   rf     r   r&  r*  r0   r$  r1   r   o  r,  zOv3.catalysec                 C   r   r  r0   r   r0   r0   r1   r    r  zOv3.is_bluntc                 C   r   r  r0   r   r0   r0   r1   r    r  zOv3.is_5overhangc                 C   r   )r  Tr0   r   r0   r0   r1   r    r  zOv3.is_3overhangc                 C   r   )r  z3' overhangr0   r   r0   r0   r1   r    r   zOv3.overhangNc                    r5  )r  c                 3   r6  r   r  r   r   r0   r1   r     r8  z%Ov3.compatible_end.<locals>.<genexpr>r/  r   r0   r   r1   r    r9  zOv3.compatible_endc                 C   r:  r  )r1  r=  r;  r   r0   r0   r1   r     s   

z	Ov3._mod1rm   r   r  r0   r0   r0   r1   r=  g  r<  r=  c                   @   r   )Definedaj  Implement methods for enzymes with defined recognition site and cut.

    Typical example : EcoRI -> G^AATT_C
                      The overhang will always be AATT
    Notes:
        Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
        Their overhang is always the same : blunt!

    Internal use only. Not meant to be instantiated.
    c                 C   r   )-  Return if recognition sequence and cut are defined.

        True if the sequence recognised and cut is constant,
        i.e. the recognition site is not degenerated AND the enzyme cut inside
        the site.

        Related methods:

        - RE.is_ambiguous()
        - RE.is_unknown()

        Tr0   r   r0   r0   r1   
is_defined     zDefined.is_definedc                 C   r   1  Return if recognition sequence and cut may be ambiguous.

        True if the sequence recognised and cut is ambiguous,
        i.e. the recognition site is degenerated AND/OR the enzyme cut outside
        the site.

        Related methods:

        - RE.is_defined()
        - RE.is_unknown()

        Fr0   r   r0   r0   r1   is_ambiguous  rB  zDefined.is_ambiguousc                 C   r   Return if recognition sequence is unknown.

        True if the sequence is unknown,
        i.e. the recognition site has not been characterised yet.

        Related methods:

        - RE.is_defined()
        - RE.is_ambiguous()

        Fr0   r   r0   r0   r1   
is_unknown  r  zDefined.is_unknownc                 C   s0  | j }| j}| j}|  rd}|S |  rX||  krdkr*n n	d| j d }|S |dkr@|d| d ||d  d }|S |d| d |||  d ||d  }|S |  rl|d| d ||d  }|S ||  krvdkrn nd	| d
 }|S |d| d |||  d ||d  }|S )aE  Return a string representing the recognition site and cuttings.

        Return a representation of the site with the cut on the (+) strand
        represented as '^' and the cut on the (-) strand as '_'.
        ie:

        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
        >>> EcoRI.elucidate()   # 5' overhang
        'G^AATT_C'
        >>> KpnI.elucidate()    # 3' overhang
        'G_GTAC^C'
        >>> EcoRV.elucidate()   # blunt
        'GAT^_ATC'
        >>> SnaI.elucidate()    # NotDefined, cut profile unknown.
        '? GTATAC ?'
        >>>

        %cut twice, not yet implemented sorry.r   N^_NN^_z^_N_^N)r   r   r   r   r  r  )r~   f5f3r   rh   r0   r0   r1   	elucidate*  s.    
,,zDefined.elucidatec                 C   s(   |j | j krdS t|tr|| S dS )r  TF)ovhgseqr1  	Ambiguousr;  r   r0   r0   r1   r;  S  s
   

zDefined._mod2N)
rn   ro   rp   rq   r   rA  rE  rH  rR  r;  r0   r0   r0   r1   r?    s    



(r?  c                   @   r   )rT  ax  Implement methods for enzymes that produce variable overhangs.

    Typical example : BstXI -> CCAN_NNNN^NTGG
                      The overhang can be any sequence of 4 bases.

    Notes:
        Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N
        Their overhang is always the same : blunt!

    Internal use only. Not meant to be instantiated.

    c                 C   r   r@  Fr0   r   r0   r0   r1   rA  t  rB  zAmbiguous.is_definedc                 C   r   )rD  Tr0   r   r0   r0   r1   rE    rB  zAmbiguous.is_ambiguousc                 C   r   rF  r0   r   r0   r0   r1   rH    r  zAmbiguous.is_unknownc                 C   s   t | jt |jkrdS | j}|D ]'}|dv r	 |dv r$d|d}|dv r8dt|  d }|||}qt||jrBdS dS )	r  FATCGr&   .
RYWMSKHDBV[]T)rH   rS  r   splitmatchingrh   match)r~   rQ   sebaseexpandr0   r0   r1   r;    s   zAmbiguous._mod2c                 C   s  | j }| j}t| }| j}|  rd}|S |  r||  kr#dkr-n nd| d }|S d|  kr7|kr_n n&d||   krE|kr_n n|d| d |||  d ||d  }|S d|  kri|krn n|d| d ||d  |d  d }|S d||   kr|krn ndt|d  |d|  d ||d  }|S || dk rdt|d  d t|| d  | }|S ||kr||| d  d || | d  d }|S dt|d  | |d  d }|S |  r|dk rd	t|d  | }|S ||kr||| d  d
 }|S td| j	|f |dkr?|dkr1d| d }|S |d || d  d }|S d||   k rM|krun n&d|  kr[|krun n|d| d |||  d ||d  }|S d||   k r|krn n|d| d ||d  || d  d }|S d|  kr|krn ndd||   |d|  d ||d  }|S |dkr||d  d || | d  d }|S |dk rdt|| | d  d t|d  | }|S dt|| d  | || d  d }|S )F  Return a string representing the recognition site and cuttings.

        Return a representation of the site with the cut on the (+) strand
        represented as '^' and the cut on the (-) strand as '_'.
        ie:

        >>> from Bio.Restriction import EcoRI, KpnI, EcoRV, SnaI
        >>> EcoRI.elucidate()   # 5' overhang
        'G^AATT_C'
        >>> KpnI.elucidate()    # 3' overhang
        'G_GTAC^C'
        >>> EcoRV.elucidate()   # blunt
        'GAT^_ATC'
        >>> SnaI.elucidate()     # NotDefined, cut profile unknown.
        '? GTATAC ?'
        >>>

        rI  r   rJ  rK  NrL  rM  r&   zN^_z^_Nz%s.easyrepr() : error f5=%irN  rO  )
r   r   rH   r   r   r  absr  ry   r   )r~   rP  rQ  r   r   rh   r0   r0   r1   rR    sv   )&4,$(", (( 




<, ,	,
$
,(zAmbiguous.elucidateN)
rn   ro   rp   rq   r   rA  rE  rH  r;  rR  r0   r0   r0   r1   rT  f  s    



rT  c                       s`   e Zd ZdZe fddZedd Zedd Zedd	 Zed
d Z	edd Z
  ZS )
NotDefinedzImplement methods for enzymes with non-characterized overhangs.

    Correspond to NoCut and Unknown.

    Internal use only. Not meant to be instantiated.
    c                    s   | j  rdS t   dS )r   N)r   r_   superr   r   rA   r0   r1   r     s   
	zNotDefined._dropc                 C   r   rU  r0   r   r0   r0   r1   rA    rB  zNotDefined.is_definedc                 C   r   rC  r0   r   r0   r0   r1   rE  *  rB  zNotDefined.is_ambiguousc                 C   r   )rG  Tr0   r   r0   r0   r1   rH  :  r  zNotDefined.is_unknownc                 C   s   t dt| t|t| f )r  z0%s.mod2(%s), %s : NotDefined. pas glop pas glop!)ry   r  r   r0   r0   r1   r;  I  s
   zNotDefined._mod2c                 C   s   d| j  dS )ra  z? z ?r   r   r0   r0   r1   rR  ]  s   zNotDefined.elucidate)rn   ro   rp   rq   r   r   rA  rE  rH  r;  rR  __classcell__r0   r0   re  r1   rc    s    



rc  c                   @   s@   e Zd ZdZedd Zedd Zedd Zedd	 Zd
S )Commercially_availablezzImplement methods for enzymes which are commercially available.

    Internal use only. Not meant to be instantiated.
    c                 C   s$   | j D ]}tt| d d  qdS )(Print a list of suppliers of the enzyme.r   ,N)supplr   r   )r~   r   r0   r0   r1   r     s   
z Commercially_available.suppliersc                    s    fddt  D S ))Return a list of suppliers of the enzyme.c                    s"   g | ]\}}| j v r|d  qS r   )rj  rd   kvr   r0   r1   rf        " z8Commercially_available.supplier_list.<locals>.<listcomp>r   itemsr   r0   r   r1   supplier_list  s   z$Commercially_available.supplier_listc                 C   r   )eReturn the recommended buffer of the supplier for this enzyme.

        Not implemented yet.
        Nr0   r~   supplierr0   r0   r1   buffers  s    zCommercially_available.buffersc                 C   r   )WReturn if enzyme is commercially available.

        True if RE has suppliers.
        Tr0   r   r0   r0   r1   is_comm  r   zCommercially_available.is_commN)	rn   ro   rp   rq   r   r   rs  rw  ry  r0   r0   r0   r1   rg  t  s    



rg  c                   @   s@   e Zd ZdZedd Zedd Zedd Zedd	 Z	d
S )Not_availablez~Implement methods for enzymes which are not commercially available.

    Internal use only. Not meant to be instantiated.
    c                   C   r   )rh  Nr0   r0   r0   r0   r1   r     r  zNot_available.suppliersc                 C   r  )rk  r0   r   r0   r0   r1   rs    r  zNot_available.supplier_listc                 C   s   t d)rt  z"Enzyme not commercially available.)r>   ru  r0   r0   r1   rw    r   zNot_available.buffersc                 C   r   )rx  Fr0   r   r0   r0   r1   ry    r   zNot_available.is_commN)
rn   ro   rp   rq   r2  r   r   rs  rw  ry  r0   r0   r0   r1   rz    s    


rz  c                   @   s   e Zd ZdZd8ddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zd9ddZdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zed0d1 Zed2d3 Zd:d5d6Zd7S );r   z-Class for operations on more than one enzyme.r0   c                    sV    fdd|D }|dd |D 7 }t  | t  _d _dd |D  _dS )z=Initialize empty RB or pre-fill with enzymes (from supplier).c                    s   g | ]}  |qS r0   )formatr   rI   r0   r1   rf         z-RestrictionBatch.__init__.<locals>.<listcomp>c                 S   s&   g | ]}t | d  D ]}t|q
qS r#  )r   eval)rd   nr   r0   r0   r1   rf     s   & Nc                 S   s   g | ]}|t v r|qS r0   r   r   r0   r0   r1   rf     r   )setrF   dictfromkeysmappingalready_mappedr   )rD   firstr   r0   rI   r1   rF     s   zRestrictionBatch.__init__c                 C   sL   t | dk rd|  S dd|  dd d|  dd fS )z=Return a readable representation of the ``RestrictionBatch``.   +z...N   )rH   r   elementsrI   r0   r0   r1   r     s
   *zRestrictionBatch.__str__c                 C   s   d|    dS )z?Represent ``RestrictionBatch`` class as a string for debugging.zRestrictionBatch(rL   )r  rI   r0   r0   r1   rN     s   zRestrictionBatch.__repr__c                 C   s0   z|  |}W n
 ty   Y dS w t| |S )z*Implement ``in`` for ``RestrictionBatch``.F)r{  ry   r  __contains__rP   r0   r0   r1   r    s   zRestrictionBatch.__contains__c                 C   r   )z.Override '/' operator to use as search method.r   rP   r0   r0   r1   __div__  rU   zRestrictionBatch.__div__c                 C   r   )zAOverride division with reversed operands to use as search method.r   rP   r0   r0   r1   __rdiv__  rU   zRestrictionBatch.__rdiv__c                 C   r   )z\Override Python 3 division operator to use as search method.

        Like __div__.
        r   rP   r0   r0   r1   r        
zRestrictionBatch.__truediv__c                 C   r   )zIAs __truediv___, with reversed operands.

        Like __rdiv__.
        r   rP   r0   r0   r1   r     r  zRestrictionBatch.__rtruediv__Fc                 C   s:   |  |}|| v r|S |r| | |S td|j d)a3  Check if enzyme is in batch and return it.

        If add is True and enzyme is not in batch add enzyme to batch.
        If add is False (which is the default) only return enzyme.
        If enzyme is not a RestrictionType or can not be evaluated to
        a RestrictionType, raise a ValueError.
        zenzyme z is not in RestrictionBatch)r{  addry   rn   )rD   enzymer  er0   r0   r1   get  s   

zRestrictionBatch.getc                 C   s2   t t|| }t }tt|dgt| |_|S )zFilter enzymes in batch with supplied function.

        The new batch will contain only the enzymes for which
        func return True.
        Tr  filterr   r  ziprH   _data)rD   funcr%  rZ   r0   r0   r1   lambdasplit
  s   zRestrictionBatch.lambdasplitc                 C   s4   t | }| j| |d D ]	}| t| qdS )zAdd all enzymes from a given supplier to batch.

        letter represents the suppliers as defined in the dictionary
        RestrictionDictionary.suppliers
        Returns None.
        Raise a KeyError if letter is not a supplier code.
        rG   N)r   r   r'  r   r}  )rD   letterrv  r   r0   r0   r1   add_supplier  s
   zRestrictionBatch.add_supplierc                 C   s   t dd | jD }|S )zList the current suppliers for the restriction batch.

        Return a sorted list of the suppliers which have been used to
        create the batch.
        c                 s   s    | ]	}t | d  V  qdS r   r  r   r0   r0   r1   r   (  r   z5RestrictionBatch.current_suppliers.<locals>.<genexpr>)r   r   )rD   
suppl_listr0   r0   r1   current_suppliers"  s   z"RestrictionBatch.current_suppliersc                 C   s   |  | | S )ziOverride '+=' for use with sets.

        b += other -> add other to b, check the type of other.
        )r  rP   r0   r0   r1   __iadd__+  s   
zRestrictionBatch.__iadd__c                 C   s   |  | }|| |S )zTOverride '+' for use with sets.

        b + other -> new RestrictionBatch.
        )rA   r  )rD   rQ   rZ   r0   r0   r1   r   3  s   

zRestrictionBatch.__add__c                 C      t | | |S )a&  Remove enzyme from restriction batch.

        Safe set.remove method. Verify that other is a RestrictionType or can
        be evaluated to a RestrictionType.
        Raise a ValueError if other can not be evaluated to a RestrictionType.
        Raise a KeyError if other is not in B.
        )r  remover{  rP   r0   r0   r1   r  <  s   zRestrictionBatch.removec                 C   r  )a  Add a restriction enzyme to the restriction batch.

        Safe set.add method. Verify that other is a RestrictionType or can be
        evaluated to a RestrictionType.
        Raise a ValueError if other can not be evaluated to a RestrictionType.
        )r  r  r{  rP   r0   r0   r1   r  F  s   zRestrictionBatch.addc                 C   s   t | |S )z:Add restriction enzyme to batch without checking its type.)r  r  rP   r0   r0   r1   r   O  s   zRestrictionBatch.add_nocheckc              	   C   sX   zt |tr	|W S t tt|trt|W S W n ttfy#   Y nw t|j d)zEvaluate enzyme (name) and return it (as RestrictionType).

        If y is a RestrictionType return y.
        If y can be evaluated to a RestrictionType return eval(y).
        Raise a ValueError in all other case.
        z is not a RestrictionType)r7   rv   r}  r  	NameErrorSyntaxErrorry   rA   rD   yr0   r0   r1   r{  S  s   

zRestrictionBatch.formatc                 C   s   t |tpt tt|tS )zgReturn if enzyme (name) is a known enzyme.

        True if y or eval(y) is a RestrictionType.
        )r7   rv   r}  r  r  r0   r0   r1   is_restrictionc  s   zRestrictionBatch.is_restrictionc                    s@    fdd}t t|| }t }tt|dgt| |_|S )zExtract enzymes of a certain class and put in new RestrictionBatch.

        It works but it is slow, so it has really an interest when splitting
        over multiple conditions.
        c                    s<   D ]}  |jd}t| |r|rq dS |r dS qdS )NTF)r  rn   r1  )elementrB   bboolclassesr0   r1   	splittests  s   
z)RestrictionBatch.split.<locals>.splittestTr  )rD   r  r  r  r%  rZ   r0   r  r1   r[  l  s
   zRestrictionBatch.splitc                 C   s   t dd | D S )zList the enzymes of the RestrictionBatch as list of strings.

        Give all the names of the enzymes in B sorted alphabetically.
        c                 s       | ]}t |V  qd S r   r  rd   r  r0   r0   r1   r     r   z,RestrictionBatch.elements.<locals>.<genexpr>)r   rI   r0   r0   r1   r    s   zRestrictionBatch.elementsc                 C   s   dd | D S )zList the names of the enzymes of the RestrictionBatch.

        Return a list of the name of the elements of the batch.
        c                 S      g | ]}t |qS r0   r  r  r0   r0   r1   rf         z.RestrictionBatch.as_string.<locals>.<listcomp>r0   rI   r0   r0   r1   	as_string  rK   zRestrictionBatch.as_stringc                 C   s   dd t  D }|S )zYReturn a dictionary with supplier codes.

        Letter code for the suppliers.
        c                 S   s   i | ]	\}}||d  qS rl  r0   rm  r0   r0   r1   
<dictcomp>  rg   z0RestrictionBatch.suppl_codes.<locals>.<dictcomp>rq  r   r0   r0   r1   suppl_codes  s   zRestrictionBatch.suppl_codesc                 C   s(   dd |    D }td| dS )zPrint a list of supplier codes.c                 S   s   g | ]}d  |qS )z = )r   rc   r0   r0   r1   rf     r|  z/RestrictionBatch.show_codes.<locals>.<listcomp>
N)r  rr  r   r   r   r0   r0   r1   
show_codes  r   zRestrictionBatch.show_codesTc                    s   t | dsd| _t tr2t |f| jkr| jS t |f| _t |fdd| D | _| jS t trYt  jf| jkrD| jS t  jf| _ fdd| D | _| jS tdt	  d)z?Return a dic of cutting sites in the seq for the batch enzymes.r  Nc                       i | ]}||  qS r0   r   r   )fseqr0   r1   r    r   z+RestrictionBatch.search.<locals>.<dictcomp>c                    r  r0   r   r   )r   r0   r1   r    r   z)Expected Seq or MutableSeq instance, got r   )
hasattrr  r7   DNAr  r  r3   r@   r>   rC   )rD   r   r@   r0   )r   r  r1   r     s    



zRestrictionBatch.searchN)r0   r0   )Frm   )rn   ro   rp   rq   rF   r   rN   r  r  r  r   r   r  r  r  r  r  r   r  r  r   r{  r  r[  r  r  r   r  r  r   r0   r0   r0   r1   r     s:    
		
		
		

r   rw   c                   @   s  e Zd ZdZeedfddZdd Zdd Zd	d
 Z	dd Z
dd Zd9ddZd9ddZdd Zd:ddZd;ddZd;ddZd;ddZd;dd Zd;d!d"Zd;d#d$Zd;d%d&Zd;d'd(Zd;d)d*Zd;d+d,Zd;d-d.Zd;d/d0Zd;d1d2Zd;d3d4Zd;d5d6Zd;d7d8ZdS )<Analysisz:Provide methods for enhanced analysis and pretty printing.Tc                 C   s<   t | | || _|| _|| _| jr| | j| j dS dS )a  Initialize an Analysis with RestrictionBatch and sequence.

        For most of the methods of this class if a dictionary is given it will
        be used as the base to calculate the results.
        If no dictionary is given a new analysis using the RestrictionBatch
        which has been given when the Analysis class has been instantiated,
        will be carried out and used.
        N)r   rF   rbsequencer@   r   )rD   restrictionbatchr  r@   r0   r0   r1   rF     s   zAnalysis.__init__c                 C   s   d| j d| jd| j dS )z)Represent ``Analysis`` class as a string.z	Analysis(ri  rL   )r  r  r@   rI   r0   r0   r1   rN     s   zAnalysis.__repr__c                    s    fdd| j  D S )zFilter result for keys which are in wanted (PRIVATE).

        Internal use only. Returns a dict.

        Screen the results through wanted set.
        Keep only the results for which the enzymes is in wanted set.
        c                       i | ]\}}| v r||qS r0   r0   rm  wantedr0   r1   r        z%Analysis._sub_set.<locals>.<dictcomp>r  rr  )rD   r  r0   r  r1   _sub_set  s   	zAnalysis._sub_setc                 C   s   t |tstdt| dt |tstdt| d|dk r)|t| j7 }|dk r4|t| j7 }||k r9n||}}||k rH||| jfS dS )zSet boundaries to correct values (PRIVATE).

        Format the boundaries for use with the methods that limit the
        search to only part of the sequence given to analyse.
        zexpected int, got r   rG   N)r7   intr>   rC   rH   r  _test_normal)rD   ra   endr0   r0   r1   _boundaries  s   


zAnalysis._boundariesc                 C   s   ||  ko	|k S   S )zTTest if site is between start and end (PRIVATE).

        Internal use only
        r0   rD   ra   r  r   r0   r0   r1   r  	  s   zAnalysis._test_normalc                 C   s6   ||  kot | jkn  pd|  ko|k S   S )zmTest if site is between end and start, for circular sequences (PRIVATE).

        Internal use only.
        rG   )rH   r  r  r0   r0   r1   _test_reverse	  s   6zAnalysis._test_reverseNrw   c                 C   s   |s| j }t| |||S )zhCollect data and pass to PrintFormat.

        If dct is not given the full dictionary is used.
        )r  r   format_outputrD   r   titles1r0   r0   r1   r  	  s   zAnalysis.format_outputc                 C   s   t | ||| dS )zPrint the output of the analysis.

        If dct is not given the full dictionary is used.
        s1: Title for non-cutting enzymes
        This method prints the output of A.format_output() and it is here
        for backwards compatibility.
        N)r   r  r  r0   r0   r1   
print_that	  s   zAnalysis.print_thatc                 K   s   |  D ]n\}}|dv r!t| || | j| j | _| j| j | _q|dkr4t| d| | | j| j q|dkrCt	
| || j| j} q|dkrUt| d| | | j| q|dv r`t| || q|dv rltd| dtd	| d
S )a  Change parameters of print output.

        It is possible to change the width of the shell by setting
        self.ConsoleWidth to what you want.
        self.NameWidth refer to the maximal length of the enzyme name.

        Changing one of these parameters here might not give the results
        you expect. In which case, you can settle back to a 80 columns shell
        or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
        you get it right.
        )	NameWidthConsoleWidthr  r  r@   )IndentMaxsize)Cmodulo	PrefWidthz
To change z&, change NameWidth and/or ConsoleWidthzAnalysis has no attribute N)rr  setattrr  r  r  r  r   r  r@   r  rF   r|   )rD   whatrn  ro  r0   r0   r1   change'	  s*   
zAnalysis.changec                 C   r^   )zPerform analysis with all enzymes of batch and return all results.

        Full Restriction Map of the sequence, as a dictionary.
        )r  )rD   r@   r0   r0   r1   fullI	  s   zAnalysis.fullc                 C      |s| j }dd | D S )z&Return only cuts that have blunt ends.c                 S      i | ]\}}|  r||qS r0   r.  rm  r0   r0   r1   r  T	  r  z"Analysis.blunt.<locals>.<dictcomp>r  rD   r   r0   r0   r1   r-  P	     zAnalysis.bluntc                 C   r  )z(Return only cuts that have 5' overhangs.c                 S   r  r0   r7  rm  r0   r0   r1   r  Z	  r  z&Analysis.overhang5.<locals>.<dictcomp>r  r  r0   r0   r1   	overhang5V	  r  zAnalysis.overhang5c                 C   r  )z(Return only cuts that have 3' overhangs.c                 S   r  r0   r>  rm  r0   r0   r1   r  `	  r  z&Analysis.overhang3.<locals>.<dictcomp>r  r  r0   r0   r1   	overhang3\	  r  zAnalysis.overhang3c                 C   r  )z@Return only results from enzymes that produce defined overhangs.c                 S   r  r0   )rA  rm  r0   r0   r1   r  f	  r  z$Analysis.defined.<locals>.<dictcomp>r  r  r0   r0   r1   definedb	  r  zAnalysis.definedc                 C   r  )z6Return only results from enzyme with at least one cut.c                 S   s   i | ]	\}}|r||qS r0   r0   rm  r0   r0   r1   r  l	  rg   z'Analysis.with_sites.<locals>.<dictcomp>r  r  r0   r0   r1   
with_sitesh	  r  zAnalysis.with_sitesc                 C   r  )z=Return only results from enzymes that don't cut the sequence.c                 S   s   i | ]	\}}|s||qS r0   r0   rm  r0   r0   r1   r  r	  rg   z)Analysis.without_site.<locals>.<dictcomp>r  r  r0   r0   r1   without_siten	  r  zAnalysis.without_sitec                        |s| j } fdd| D S )z?Return only results from enzymes that cut the sequence N times.c                    s"   i | ]\}}t | kr||qS r0   rH   rm  r&   r0   r1   r  x	  rp  z)Analysis.with_N_sites.<locals>.<dictcomp>r  )rD   r&   r   r0   r  r1   with_N_sitest	     zAnalysis.with_N_sitesc                    r  )z<Return only results from enzymes that cut (x,y,z,...) times.c                    s"   i | ]\}}t | v r||qS r0   r  rm  r  r0   r1   r  ~	  rp  z-Analysis.with_number_list.<locals>.<dictcomp>r  )rD   r  r   r0   r  r1   with_number_listz	  r  zAnalysis.with_number_listc                    s\   t |D ]\}}|tvrtd| t ||= q s%t|| j| jS  fdd|D S )z8Return only results from enzymes which names are listed.zno data for the enzyme: c                    s   i | ]}| v r| | qS r0   r0   )rd   r~  r   r0   r1   r  	  r  z&Analysis.with_name.<locals>.<dictcomp>)	r   r   warningswarnr   r   r   r  r@   )rD   namesr   re   r  r0   r  r1   	with_name	  s   zAnalysis.with_namec                    s<    fdd| D }|st || jS  fdd| D S )z8Return only results form enzymes with a given site size.c                    s   g | ]	}|j  kr|qS r0   )rk   )rd   r   	site_sizer0   r1   rf   	  rg   z+Analysis.with_site_size.<locals>.<listcomp>c                    r  r0   r0   rm  r  r0   r1   r  	  r  z+Analysis.with_site_size.<locals>.<dictcomp>)r   r   r  rr  )rD   r  r   sitesr0   r  r1   with_site_size	  s   zAnalysis.with_site_sizec           	      C   sd   |  ||\}}}|s| j}t|}| D ]\}}|s ||= q|D ]}||||r+q"||=  q|S )zAReturn only results from enzymes that only cut within start, end.r  r  r  rr  	rD   ra   r  r   testr%  keyr  r   r0   r0   r1   only_between	  s   zAnalysis.only_betweenc           	      C   sX   |  ||\}}}i }|s| j}| D ]\}}|D ]}||||r(|||<  nqq|S )zReturn only results from enzymes that cut at least within borders.

        Enzymes that cut the sequence at least in between start and end.
        They may cut outside as well.
        r  r  rr  r  r0   r0   r1   between	  s   zAnalysis.betweenc                    s`   g } kr fdd|   | D }t|S  fdd|   | D }t|S )zReturn only results from within start, end.

        Enzymes must cut inside start/end and may also cut outside. However,
        only the cutting positions within start/end will be returned.
        c                    (   g | ]\}}| fd d|D fqS )c                    s(   g | ]}|  kr krn n|qS r0   r0   rd   vvr  ra   r0   r1   rf   	  s   ( 9Analysis.show_only_between.<locals>.<listcomp>.<listcomp>r0   rm  r  r0   r1   rf   	      z.Analysis.show_only_between.<locals>.<listcomp>c                    r  )c                    s    g | ]}|ks| kr|qS r0   r0   r  r  r0   r1   rf   	  r   r  r0   rm  r  r0   r1   rf   	  r  )r  rr  r  rD   ra   r  r   r%  r0   r  r1   show_only_between	  s   	zAnalysis.show_only_betweenc           	      C   sf   |  ||\}}}|s| j}t|}| D ]\}}|s ||= q|D ]}||||r/||=  nq"q|S )zReturn only results from enzymes that only cut outside start, end.

        Enzymes that cut the sequence outside of the region
        in between start and end but do not cut inside.
        r  r  r0   r0   r1   only_outside	  s   zAnalysis.only_outsidec           	      C   sV   |  ||\}}}|s| j}i }| D ]\}}|D ]}||||r#q|||<  q|S )zReturn only results from enzymes that at least cut outside borders.

        Enzymes that cut outside the region in between start and end.
        They may cut inside as well.
        r  r  r0   r0   r1   outside	  s   zAnalysis.outsidec                 C   s*   |s| j }|  }|| ||| |S )z@Return only results from enzymes that don't cut between borders.)r  r  updater   r  r0   r0   r1   
do_not_cut	  s
   zAnalysis.do_not_cut)Nrw   rw   rm   r   ) rn   ro   rp   rq   _restrictionbatch
_empty_DNArF   rN   r  r  r  r  r  r  r  r  r-  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r0   r0   r0   r1   r    s:    


	

"















r  c                 c   r  r   )r}  r   r0   r0   r1   r   $
  r   r   c                 C   r  r0   r  r   r0   r0   r1   rf   B
  r  rf   )r3   r  r   r   CommOnlyNonComm)Drq   	itertoolsrh   rr   r  Bior   Bio.Restriction.PrintFormatr   &Bio.Restriction.Restriction_Dictionaryr   
enzymedictr   r   r   Bio.Seqr   r   r\  r  r,   r2   r3   rC   rv   r   r   r   r   r   r   r   r  r
  r   r4  r=  r?  rT  rc  rg  rz  r  r   r  r  r  r  r  rr  TYPEr   enzymesr)  bases2__new__r   rn  newenzry  r   r   r  r  localsr  r  __all__r0   r0   r0   r1   <module>   s   
J	v   +YOQ *]   	w p&(  	  E

