o
    Rŀg8                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 dd	lm
Z
 d
d Zdd ZG dd de	ZG dd de	ZG dd de
ZG dd deZdd Zdd ZedkrmddlmZ edd dS dS )zBio.SeqIO support for the "fasta" (aka FastA or Pearson) file format.

You are expected to use this module via the Bio.SeqIO functions.
    )Seq)	SeqRecord   )_clean)_get_seq_string)_TextIOSource)SequenceIterator)SequenceWriterc                 c   s    | D ]}|d dkr|dd   } nqdS g }| D ]*}|d dkr?|d|ddddfV  g }|dd   }q||   q|d|ddddfV  dS )a  Iterate over Fasta records as string tuples.

    Arguments:
     - handle - input stream opened in text mode

    For each record a tuple of two strings is returned, the FASTA title
    line (without the leading '>' character), and the sequence (with any
    whitespace removed). The title line is not divided up into an
    identifier (the first word) and comment or description.

    >>> with open("Fasta/dups.fasta") as handle:
    ...     for values in SimpleFastaParser(handle):
    ...         print(values)
    ...
    ('alpha', 'ACGTA')
    ('beta', 'CGTC')
    ('gamma', 'CCGCC')
    ('alpha (again - this is a duplicate entry to test the indexing code)', 'ACGTA')
    ('delta', 'CGCGC')

    r   >r   N  )rstripjoinreplaceappend)handlelinetitlelines r   E/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqIO/FastaIO.pySimpleFastaParser   s     $r   c                 c   s    d}t | D ]9\}}|d dkr(|d dkrtd| d|dd  }q|d dkr9td	| d
| d|| fV  q|dkrGdS |d dkrUtd| d|d dks_J ddS )a  Iterate over no-wrapping Fasta records as string tuples.

    Arguments:
     - handle - input stream opened in text mode

    Functionally the same as SimpleFastaParser but with a strict
    interpretation of the FASTA format as exactly two lines per
    record, the greater-than-sign identifier with description,
    and the sequence with no line wrapping.

    Any line wrapping will raise an exception, as will excess blank
    lines (other than the special case of a zero-length sequence
    as the second line of a record).

    Examples
    --------
    This file uses two lines per FASTA record:

    >>> with open("Fasta/aster_no_wrap.pro") as handle:
    ...     for title, seq in FastaTwoLineParser(handle):
    ...         print("%s = %s..." % (title, seq[:3]))
    ...
    gi|3298468|dbj|BAA31520.1| SAMIPF = GGH...

    This equivalent file uses line wrapping:

    >>> with open("Fasta/aster.pro") as handle:
    ...     for title, seq in FastaTwoLineParser(handle):
    ...         print("%s = %s..." % (title, seq[:3]))
    ...
    Traceback (most recent call last):
       ...
    ValueError: Expected FASTA record starting with '>' character. Perhaps this file is using FASTA line wrapping? Got: 'MTFGLVYTVYATAIDPKKGSLGTIAPIAIGFIVGANI'

       r   r
   ziExpected FASTA record starting with '>' character. Perhaps this file is using FASTA line wrapping? Got: ''r   NzoTwo '>' FASTA lines in a row. Missing sequence line if this is strict two-line-per-record FASTA format. Have '>z' and 'zjMissing sequence line at end of file if this is strict two-line-per-record FASTA format. Have title line 'z+line[0] == '>' ; this should be impossible!)	enumerate
ValueErrorr   strip)r   idxr   r   r   r   r   FastaTwoLineParserI   s:   $r    c                       sB   e Zd ZdZ	ddeddddf fddZdd	 Zd
d Z  ZS )FastaIteratorzParser for Fasta files.Nsourcealphabetreturnc                    s&   |durt dt j|ddd dS )a|  Iterate over Fasta records as SeqRecord objects.

        Arguments:
         - source - input stream opened in text mode, or a path to a file
         - alphabet - optional alphabet, not used. Leave as None.

        By default this will act like calling Bio.SeqIO.parse(handle, "fasta")
        with no custom handling of the title lines:

        >>> with open("Fasta/dups.fasta") as handle:
        ...     for record in FastaIterator(handle):
        ...         print(record.id)
        ...
        alpha
        beta
        gamma
        alpha
        delta

        If you want to modify the records before writing, for example to change
        the ID of each record, you can use a generator function as follows:

        >>> def modify_records(records):
        ...     for record in records:
        ...         record.id = record.id.upper()
        ...         yield record
        ...
        >>> with open('Fasta/dups.fasta') as handle:
        ...     for record in modify_records(FastaIterator(handle)):
        ...         print(record.id)
        ...
        ALPHA
        BETA
        GAMMA
        ALPHA
        DELTA

        Nz,The alphabet argument is no longer supportedtFastamodefmt)r   super__init__)selfr"   r#   	__class__r   r   r+      s   +zFastaIterator.__init__c                 C      |  |}|S z9Start parsing the file, and return a SeqRecord generator.iterater,   r   recordsr   r   r   parse      
zFastaIterator.parsec              	   c   j    t |D ]-\}}z
|ddd }W n ty&   |r"J t|d}Y nw tt||||dV  qdS z.Parse the file and generate SeqRecord objects.Nr   r   r   )idnamedescription)r   split
IndexErrorreprr   r   r,   r   r   sequence
first_wordr   r   r   r2         
zFastaIterator.iterateN)	__name__
__module____qualname____doc__r   r+   r5   r2   __classcell__r   r   r-   r   r!      s    /r!   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )FastaTwoLineIteratorz9Parser for Fasta files with exactly two lines per record.c                    s   t  j|ddd dS )a  Iterate over two-line Fasta records (as SeqRecord objects).

        Arguments:
         - source - input stream opened in text mode, or a path to a file

        This uses a strict interpretation of the FASTA as requiring
        exactly two lines per record (no line wrapping).

        Only the default title to ID/name/description parsing offered
        by the relaxed FASTA parser is offered.
        r%   FASTAr'   Nr*   r+   )r,   r"   r-   r   r   r+      s   zFastaTwoLineIterator.__init__c                 C   r/   r0   r1   r3   r   r   r   r5      r6   zFastaTwoLineIterator.parsec              	   c   r7   r8   )r    r<   r=   r>   r   r   r?   r   r   r   r2      rB   zFastaTwoLineIterator.iterate)rD   rE   rF   rG   r+   r5   r2   rH   r   r   r-   r   rI      s
    rI   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	FastaWriterzClass to write Fasta format files (OBSOLETE).

    Please use the ``as_fasta`` function instead, or the top level
    ``Bio.SeqIO.write()`` function instead using ``format="fasta"``.
    <   Nc                    s,   t  | |r|dk rt|| _|| _dS )a  Create a Fasta writer (OBSOLETE).

        Arguments:
         - target - Output stream opened in text mode, or a path to a file.
         - wrap -   Optional line length used to wrap sequence lines.
           Defaults to wrapping the sequence at 60 characters
           Use zero (or None) for no wrapping, giving a single
           long line for the sequence.
         - record2title - Optional function to return the text to be
           used for the title line of each record.  By default
           a combination of the record.id and record.description
           is used.  If the record.description starts with the
           record.id, then just the record.description is used.

        You can either use::

            handle = open(filename, "w")
            writer = FastaWriter(handle)
            writer.write_file(myRecords)
            handle.close()

        Or, follow the sequential file writer system, for example::

            handle = open(filename, "w")
            writer = FastaWriter(handle)
            writer.write_header() # does nothing for Fasta files
            ...
            Multiple writer.write_record() and/or writer.write_records() calls
            ...
            writer.write_footer() # does nothing for Fasta files
            handle.close()

        r   N)r*   r+   r   wraprecord2title)r,   targetrN   rO   r-   r   r   r+      s   "
zFastaWriter.__init__c                 C   s  | j r| |  |}n'| |j}| |j}|r'|ddd |kr'|}n|r1| d| }n|}d|vs9J d|vs?J | jd| d t|}d|vsSJ d|vsYJ | jryt	dt
|| jD ]}| j|||| j  d  qedS | j|d  dS )z(Write a single Fasta record to the file.Nr   r   r   
r   r
   )rO   cleanr9   r;   r<   r   writer   rN   rangelen)r,   recordr   r9   r;   datair   r   r   write_record$  s(    zFastaWriter.write_record)rM   N)rD   rE   rF   rG   r+   rY   rH   r   r   r-   r   rL      s    )rL   c                       s"   e Zd ZdZd fdd	Z  ZS )FastaTwoLineWritera_  Class to write 2-line per record Fasta format files (OBSOLETE).

    This means we write the sequence information  without line
    wrapping, and will always write a blank line for an empty
    sequence.

    Please use the ``as_fasta_2line`` function instead, or the top level
    ``Bio.SeqIO.write()`` function instead using ``format="fasta"``.
    Nc                    s   t  j|d|d dS )aO  Create a 2-line per record Fasta writer (OBSOLETE).

        Arguments:
         - handle - Handle to an output file, e.g. as returned
           by open(filename, "w")
         - record2title - Optional function to return the text to be
           used for the title line of each record.  By default
           a combination of the record.id and record.description
           is used.  If the record.description starts with the
           record.id, then just the record.description is used.

        You can either use::

            handle = open(filename, "w")
            writer = FastaWriter(handle)
            writer.write_file(myRecords)
            handle.close()

        Or, follow the sequential file writer system, for example::

            handle = open(filename, "w")
            writer = FastaWriter(handle)
            writer.write_header() # does nothing for Fasta files
            ...
            Multiple writer.write_record() and/or writer.write_records() calls
            ...
            writer.write_footer() # does nothing for Fasta files
            handle.close()

        N)rN   rO   rK   )r,   r   rO   r-   r   r   r+   N  s   zFastaTwoLineWriter.__init__rC   )rD   rE   rF   rG   r+   rH   r   r   r-   r   rZ   C  s    
rZ   c                 C   s   t | j}t | j}|r|ddd |kr|}n|r#| d| }n|}d|vs+J d|vs1J d| dg}t| }d|vsBJ d|vsHJ tdt|dD ]}||||d  d  qPd	|S )
zTurn a SeqRecord into a FASTA formatted string.

    This is used internally by the SeqRecord's .format("fasta")
    method and by the SeqIO.write(..., ..., "fasta") function.
    Nr   r   r   rQ   r   r
   rM   r   )	r   r9   r;   r<   r   rT   rU   r   r   )rV   r9   r;   r   r   rW   rX   r   r   r   as_fastap  s    


r[   c                 C   s   t | j}t | j}|r|ddd |kr|}n|r#| d| }n|}d|vs+J d|vs1J t| }d|vs;J d|vsAJ d| d| dS )zTurn a SeqRecord into a two-line FASTA formatted string.

    This is used internally by the SeqRecord's .format("fasta-2line")
    method and by the SeqIO.write(..., ..., "fasta-2line") function.
    Nr   r   r   rQ   r   r
   )r   r9   r;   r<   r   )rV   r9   r;   r   rW   r   r   r   as_fasta_2line  s   

r\   __main__)run_doctest)verboseN)rG   Bio.Seqr   Bio.SeqRecordr   
Interfacesr   r   r   r   r	   r   r    r!   rI   rL   rZ   r[   r\   rD   
Bio._utilsr^   r   r   r   r   <module>   s(   
/BE$O-