o
    Rŀgh?                     @   s   d Z ddlZG dd dZdS )a	  Print the results of restriction enzyme analysis.

PrintFormat prints the results from restriction analysis in 3 different
format: list, column or map.

The easiest way to use it is:

    >>> from Bio.Restriction.PrintFormat import PrintFormat
    >>> from Bio.Restriction.Restriction import RestrictionBatch
    >>> from Bio.Seq import Seq
    >>> pBs_mcs = Seq('GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC')
    >>> restriction_batch = RestrictionBatch(['EcoRI', 'BamHI', 'ApaI'])
    >>> result = restriction_batch.search(pBs_mcs)
    >>> my_map = PrintFormat()
    >>> my_map.print_that(result, 'My pBluescript mcs analysis:\n',
    ...               'No site:\n')
    My pBluescript mcs analysis:
    ApaI       :  12.
    EcoRI      :  50.
    No site:
    BamHI     
    <BLANKLINE>
    >>> my_map.sequence = pBs_mcs
    >>> my_map.print_as("map")
    >>> my_map.print_that(result)
               12 ApaI
               |                                                
               |                                     50 EcoRI
               |                                     |          
    GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC
    ||||||||||||||||||||||||||||||||||||||||||||||||||||||
    CCATGGCCCGGGGGGGAGCTCCAGCTGCCATAGCTATTCGAACTATAGCTTAAG
    1                                                   54
    <BLANKLINE>
    <BLANKLINE>
       Enzymes which do not cut the sequence.
    <BLANKLINE>
    BamHI     
    <BLANKLINE>
    >>>

Some of the methods of PrintFormat are meant to be overridden by derived
class.

Use the following parameters to control the appearance:

- ConsoleWidth : width of the console used default to 80.
                 should never be less than 60.
- NameWidth    : space attributed to the name in PrintList method.
- Indent       : Indent of the second line.
- MaxSize      : Maximal size of the sequence (default=6:
                 -> 99 999 bp + 1 trailing ','
                 people are unlikely to ask for restriction map of sequences
                 bigger than 100.000 bp. This is needed to determine the
                 space to be reserved for sites location.

                 - MaxSize = 5  =>   9.999 bp
                 - MaxSize = 6  =>  99.999 bp
                 - MaxSize = 7  => 999.999 bp

Example output::

    <------------ ConsoleWidth --------------->
    <- NameWidth ->
    EcoRI         :   1, 45, 50, 300, 400, 650,
                          700, 1200, 2500.
                      <-->
                        Indent

    Nc                   @   s   e Zd ZdZdZdZdZee Zee ZdZ	ee Z
d$ddZd%d
dZd%ddZd&ddZdd Zdd Zdd Zdd Zd'ddZd'ddZd'ddZd'dd Zd!d" Zd#S )(PrintFormatzBPrintFormat allow the printing of results of restriction analysis.P   
         listc                 C   s4   |dkr
| j | _dS |dkr| j| _dS | j| _dS )aL  Print the results as specified.

        Valid format are:
            'list'      -> alphabetical order
            'number'    -> number of sites in the sequence
            'map'       -> a map representation of the sequence with the sites.

        If you want more flexibility over-ride the virtual method make_format.
        mapnumberN)	_make_mapmake_format_make_number
_make_list)selfwhat r   O/var/www/html/myenv/lib/python3.10/site-packages/Bio/Restriction/PrintFormat.pyprint_as`   s
   
zPrintFormat.print_as c                 C   sT   |s| j }g g }}| D ]\}}|r|||f q|| q| ||||S )a  Summarise results as a nicely formatted string.

        Arguments:
         - dct is a dictionary as returned by a RestrictionBatch.search()
         - title is the title of the map.
           It must be a formatted string, i.e. you must include the line break.
         - s1 is the title separating the list of enzymes that have sites from
           those without sites.
         - s1 must be a formatted string as well.

        The format of print_that is a list.
        )resultsitemsappendr   )r   dcttitles1lsnckvr   r   r   format_outputq   s   
zPrintFormat.format_outputc                 C   s   t | ||| dS )a*  Print the output of the format_output method (OBSOLETE).

        Arguments:
         - dct is a dictionary as returned by a RestrictionBatch.search()
         - title is the title of the map.
           It must be a formatted string, i.e. you must include the line break.
         - s1 is the title separating the list of enzymes that have sites from
           those without sites.
         - s1 must be a formatted string as well.

        This method prints the output of A.format_output() and it is here
        for backwards compatibility.
        N)printr   )r   r   r   r   r   r   r   
print_that   s   zPrintFormat.print_thatr   c                 C   s   |  ||||S )zVirtual method used for formatting results.

        Virtual method.
        Here to be pointed to one of the _make_* methods.
        You can as well create a new method and point make_format to it.
        )r   )r   cutr   r   r   r   r   r   r      s   zPrintFormat.make_formatc                 C      |  ||| || S )a  Summarise a list of positions by enzyme (PRIVATE).

        Return a string of form::

            title.

            enzyme1     :   position1, position2.
            enzyme2     :   position1, position2, position3.

        Arguments:
         - ls is a tuple or list of cutting enzymes.
         - title is the title.
         - nc is a tuple or list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        )_make_list_only_make_nocut_onlyr   r   r   r   r   r   r   r   r      s   zPrintFormat._make_listc                 C   r"   )a  Summarise mapping information as a string (PRIVATE).

        Return a string of form::

            | title.
            |
            |     enzyme1, position
            |     |
            | AAAAAAAAAAAAAAAAAAAAA...
            | |||||||||||||||||||||
            | TTTTTTTTTTTTTTTTTTTTT...

        Arguments:
         - ls is a list of cutting enzymes.
         - title is the title.
         - nc is a list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        )_make_map_onlyr$   r%   r   r   r   r
      s   zPrintFormat._make_mapc                 C   r"   )a  Format cutting position information as a string (PRIVATE).

        Returns a string in the form::

            title.

            enzyme which cut 1 time:

            enzyme1     :   position1.

            enzyme which cut 2 times:

            enzyme2     :   position1, position2.
            ...

        Arguments:
         - ls is a list of cutting enzymes.
         - title is the title.
         - nc is a list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        )_make_number_onlyr$   r%   r   r   r   r      s   zPrintFormat._make_numberc                 C   s   ||  || S )a  Summarise non-cutting enzymes (PRIVATE).

        Return a formatted string of the non cutting enzymes.

        ls is a list of cutting enzymes -> will not be used.
        Here for compatibility with make_format.

        Arguments:
         - title is the title.
         - nc is a list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        )r$   r%   r   r   r   _make_nocut   s   zPrintFormat._make_nocutc           	      C   st   |s|S d}|p	d}dj }t|D ]}||tt|| jf}t|| jkr0|||df}d}q|||df}|S )zSummarise non-cutting enzymes (PRIVATE).

        Return a formatted string of the non cutting enzymes.

        Arguments:
         - nc is a tuple or list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        r   z,
   Enzymes which do not cut the sequence.


)joinsortedstrljust	NameWidthlenlinesize)	r   r   r   r   r   st
stringsiteJoinkeyr   r   r   r$      s   	zPrintFormat._make_nocut_onlyc                 C   s   |s|S |  ||S )a  Summarise list of positions per enzyme (PRIVATE).

        Return a string of form::

            title.

            enzyme1     :   position1, position2.
            enzyme2     :   position1, position2, position3.
            ...

        Arguments:
         - ls is a tuple or list of results.
         - title is a string.
         - Non cutting enzymes are not included.
        )_PrintFormat__next_sectionr%   r   r   r   r#     s   zPrintFormat._make_list_onlyc                 C   s   |s|S |j dd d t|}d}g }|D ](\}}	t|	}
|
|kr7|d| 7 }| ||}||	fg|
}}q|||	f q|d| 7 }| ||S )a  Summarise number of cuts as a string (PRIVATE).

        Return a string of form::

            title.

            enzyme which cut 1 time:

            enzyme1     :   position1.

            enzyme which cut 2 times:

            enzyme2     :   position1, position2.
            ...

        Arguments:
         - ls is a list of results.
         - title is a string.
         - Non cutting enzymes are not included.
        c                 S   s   t | d S )N   )r/   )xr   r   r   <lambda>2  s    z/PrintFormat._make_number_only.<locals>.<lambda>)r4   r6   z 

enzymes which cut %i times :

)sortiterr/   r5   r   )r   r   r   r   r   iteratorcur_lennew_sectnamesiteslengthr   r   r   r'     s    zPrintFormat._make_number_onlyc           "      C   sz  |s|S t dd |D }|pd}i }|D ]\}}	|	D ]}
|
|v r+||
 t| qt|g||
< qqt | }i }ddt| j}}}td|dD ]$}|d }g }|||< g }|D ]}||kri|| q]|| q]|}qM|||< t| j}t| j }d}d\}}d}dj}td|dD ]}|d }|}|| D ]}d}||kr|| D ]	}d	||f}q|dd
 }||t||df}|||df}|||f}|||f} nR|| D ]	}d	||f}q|d } ||d| d  t||df}||d| d  ||| d f}||d| d  ||| d df}|||f}|||f}qd||| |d ||| |t	t|d ddt
t|ddff}!|||!f}qd}|| D ]}d}||kr|| D ]
}||d	|f}qm|d|d  }||t||df}|||df}|||f}|||f} nT|| D ]
}||d	|f}q|d } ||d| d  t||df}||d| d  ||| d f}||d| d  ||| d df}|||f}|||f}q`d}!|||| df}!||!|||  df}!||!||| df}!||!|t	t|d dd	|| d  t
t|ddff}!|||!f}|S )a  Make string describing cutting map (PRIVATE).

        Return a string of form::

            | title.
            |
            |     enzyme1, position
            |     |
            | AAAAAAAAAAAAAAAAAAAAA...
            | |||||||||||||||||||||
            | TTTTTTTTTTTTTTTTTTTTT...

        Arguments:
         - ls is a list of results.
         - title is a string.
         - Non cutting enzymes are not included.
        c                 s   s    | ]	\}}t |V  qd S Nr,   ).0r7   yr   r   r   	<genexpr>U  s    z-PrintFormat._make_map_only.<locals>.<genexpr>r   r   <   |)r   r   z<                                                             ;   r)   r6   N   z                              z

   )r+   r   r,   keysr/   sequencerange
complementr*   r-   rjust)"r   r   r   r   r   
resultKeysr   	enzymemapenzymer!   cmappingcutlocr7   counterr@   loc	remainingr4   rM   revsequenceabase	emptyliner3   linesnchunklineoline2linetotr   mapunitr   r   r   r&   A  s   
 "$


 "$zPrintFormat._make_map_onlyc                 C   s   d| j | j d  }| j| j }td| }ddj}}t|D ]<\}}	d}
|ddd |	D df}t||krLd	d
 t	||D }||}
n|}
||t
|| j d|
df}q!|S )a  Next section (PRIVATE).

        Arguments:
         - ls is a tuple/list of tuple (string, [int, int]).
         - into is a string to which the formatted ls will be added.

        Format ls as a string of lines:
        The form is::

            enzyme1     :   position1.
            enzyme2     :   position2, position3.

        then add the formatted ls to tot
        return tot.
        r)   rH   z([\w,\s()]){1,%i}[,\.]r   z, c                 s   s    | ]}t |V  qd S rA   rB   )rC   siter   r   r   rE     s    z-PrintFormat.__next_section.<locals>.<genexpr>.c                 S   s   g | ]}|  qS r   )group)rC   r7   r   r   r   
<listcomp>  s    z.PrintFormat.__next_section.<locals>.<listcomp>z :  )r.   Indentr0   MaxSizerecompiler*   r+   r/   finditerr,   r-   )r   r   intoindentationr0   patseveralr3   r>   r?   r2   outputr   r   r   __next_section  s   zPrintFormat.__next_sectionN)r   )r   r   )r   r   r   r   )r   r   )__name__
__module____qualname____doc__ConsoleWidthr.   rk   Cmodulo	PrefWidthrj   r0   r   r   r    r   r   r
   r   r(   r$   r#   r'   r&   r5   r   r   r   r   r   U   s.    







& r   )rx   rl   r   r   r   r   r   <module>   s   	H