o
    Rŀg*                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddl
mZ dd	lmZ d
dlmZ dd Zdd Zdd Zdd Zd%ddZdd Zdd ZeeeedZdd Zd&dd Zd&d!d"ZG d#d$ d$eZdS )'zBio.SeqIO support for the SnapGene file format.

The SnapGene binary format is the native format used by the SnapGene program
from GSL Biotech LLC.
    )datetimesub)unpack)parseString)Seq)
SeqFeature)SimpleLocation)	SeqRecord   )SequenceIteratorc                 c   s    	 |  d}t|dk rdS td|d }|  d}t|dk r%tdtd|d }|  |}t||k r;td|||fV  q)	a?  Iterate over the packets of a SnapGene file.

    A SnapGene file is made of packets, each packet being a TLV-like
    structure comprising:

      - 1 single byte indicating the packet's type;
      - 1 big-endian long integer (4 bytes) indicating the length of the
        packet's data;
      - the actual data.
    Tr   Nz>Br      zUnexpected end of packetz>I)readlenr   
ValueError)handlepacket_typelengthdata r   H/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqIO/SnapGeneIO.py_iterate   s   


r   c                 C   sb   |j rtdtd| d  |\}}t|d|_ d|jd< |d@ r*d|jd< d
S d	|jd< d
S )z}Parse a DNA sequence packet.

    A DNA sequence packet contains a single byte flag followed by the
    sequence itself.
    z*The file contains more than one DNA packetz>B%dsr   ASCIIDNAmolecule_typecirculartopologylinearN)seqr   r   r   decodeannotations)r   r   recordflagssequencer   r   r   _parse_dna_packet7   s   
r$   c                 C   s   t |d}t|d}|dkrd|jd< nd|jd< t|d}|r+t|d|jd	< t|d
}|r5||_t|d}|rP|ddd |_||_	|sR|j|_dS dS dS )zParse a 'Notes' packet.

    This type of packet contains some metadata about the sequence. They
    are stored as a XML string with a 'Notes' root node.
    UTF-8Type	SyntheticSYNdata_file_divisionUNCLastModifiedz%Y.%m.%ddateAccessionNumberComments r   r   N)
r   r   _get_child_valuer    r   strptimeidsplitnamedescription)r   r   r!   xmltyper,   acccommentr   r   r   _parse_notes_packetI   s&   




r:   c                 C   s,   t d|\}}}}|ddkrtddS )zParse a SnapGene cookie packet.

    Every SnapGene file starts with a packet of this type. It acts as
    a magic cookie identifying the file as a SnapGene file.
    z>8sHHHr   SnapGenez%The file is not a valid SnapGene fileN)r   r   r   )r   r   r!   cookieseq_typeexp_versionimp_versionr   r   r   _parse_cookie_packetf   s   r@   Fc           	      C   sz   dd |  dD \}}|d }|r|d7 }|d7 }||kr4t|t||d}td||d}|| }|S t|||d}|S )Nc                 s   s    | ]}t |V  qd S N)int).0xr   r   r   	<genexpr>r   s    z"_parse_location.<locals>.<genexpr>-r   )strandr   )r3   r	   r   )		rangespecrG   r!   	is_primerstartendl1l2locationr   r   r   _parse_locationq   s   rO   c                    s(  t |d}|dD ]}i }t|ddd}d}tt|ddd}|d	kr)d
}d}	g }
d |dD ]:}t|dddkr?q4t|d} d7  t|||}|	sS|}	n|d
kr\||	 }	n|	| }	t|d}|rn|
 |g q4t|
dkr|d
krt fdd|
D }
d	dd |
D g|d< |	st
d|dD ]I}t|ddd}g }|dD ]4}|dr|t|jd j q|dr|t|jd j q|dr|t|jd j q|||< qt|d}|rd |vr|g|d < n||d  vr|g|d< t|	||d!}|j| qdS )"zParse a sequence features packet.

    This packet stores sequence features (except primer binding sites,
    which are in a dedicated Primers packet). The data is a XML string
    starting with a 'Features' root node.
    r%   Featurer7   misc_featuredefaultr   directionality1   Nr   Segmentstandardgapranger4   c                    s    g | ]\}} | d  |gqS )r   r   rC   ir4   n_partsr   r   
<listcomp>   s     z*_parse_features_packet.<locals>.<listcomp>;c                 s   s"    | ]\}}| d | V  qdS ):Nr   r\   r   r   r   rE      s     z)_parse_features_packet.<locals>.<genexpr>partszMissing feature locationQzMissing qualifier nameerrorVtextpredefrB   labelr7   
qualifiers)r   r   getElementsByTagName_get_attribute_valuerB   rO   appendr   reversedjoinr   hasAttribute_decode
attributesvaluer   features)r   r   r!   r6   featurequalsr7   rG   rT   rN   subpartssegmentrngnext_locationr4   	qualifierqnameqvaluesru   r   r^   r   _parse_features_packet   sr   








r   c                 C   s   t |d}|dD ]_}i }t|d}|r|g|d< g }|dD ]G}t|ddd}	tt|d	d
d}
|
dkr<d}
nd}
t|	|
|dd}tt|dd
ddk}|rX||v rXq#|| t|d|d}|j| q#qdS )zParse a Primers packet.

    A Primers packet is similar to a Features packet but specifically
    stores primer binding features. The data is a XML string starting
    with a 'Primers' root node.
    r%   Primerr4   rj   BindingSiterN   zMissing binding site locationre   boundStrand0rR   r   rW   T)rI   
simplifiedprimer_bindrk   N)	r   r   rm   rn   rB   rO   ro   r   rv   )r   r   r!   r6   primerrx   r4   	locationssiter{   rG   rN   r   rw   r   r   r   _parse_primers_packet   s8   


r   )r         
   c                 C   s   t dd| S )Nz<[^>]+> r   )rh   r   r   r   rs     s   rs   Nc                 C   s*   |  |rt| j| jS |rt||S rA   )rr   rs   rt   ru   r   )noder4   rS   rf   r   r   r   rn     s
   
rn   c                 C   sJ   |  |}|r|d jr|d jj| jkrt|d jjS |r#t||S )Nr   )rm   
childNodes
firstChildnodeType	TEXT_NODErs   r   r   )r   r4   rS   rf   childrenr   r   r   r0     s   
r0   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )SnapGeneIteratorzParser for SnapGene files.c                    s   t  j|ddd dS )a   Parse a SnapGene file and return a SeqRecord object.

        Argument source is a file-like object or a path to a file.

        Note that a SnapGene file can only contain one sequence, so this
        iterator will always return a single record.
        br;   )modefmtN)super__init__)selfsource	__class__r   r   r   &  s   zSnapGeneIterator.__init__c                 C   s   |  |}|S )z9Start parsing the file, and return a SeqRecord generator.)iterate)r   r   recordsr   r   r   parse0  s   
zSnapGeneIterator.parsec                 c   s    t d}t|}z	t|\}}}W n ty   tddw |dkr'tdt||| |D ]\}}}t|}|durC|||| q/|jsKtd|V  dS )z.Iterate over the records in the SnapGene file.NzEmpty file.	   z5The file does not start with a SnapGene cookie packetzNo DNA packet in file)	r
   r   nextStopIterationr   r@   _packet_handlersgetr   )r   r   r!   packetsr   r   r   handlerr   r   r   r   5  s&   


zSnapGeneIterator.iterate)__name__
__module____qualname____doc__r   r   r   __classcell__r   r   r   r   r   #  s
    
r   )F)NN)r   r   rer   structr   xml.dom.minidomr   Bio.Seqr   Bio.SeqFeaturer   r	   Bio.SeqRecordr
   
Interfacesr   r   r$   r:   r@   rO   r   r   r   rs   rn   r0   r   r   r   r   r   <module>   s4   
O*

	