o
    Rŀg                      @   s  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 dd	l
mZ dd
l
mZ ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ eee d ZG dd deZG dd deZG dd deZG dd deZG dd deZd1ddZdd Zd d! ZG d"d# d#eZG d$d% d%eZG d&d' d'eZ G d(d) d)e Z!d*d+ Z"d,d- Z#e$d.krdd/l%m&Z& e&dd0 dS dS )2a  Bio.SeqIO support for the "genbank" and "embl" file formats.

You are expected to use this module via the Bio.SeqIO functions.
Note that internally this module calls Bio.GenBank to do the actual
parsing of GenBank, EMBL and IMGT files.

See Also:
International Nucleotide Sequence Database Collaboration
http://www.insdc.org/

GenBank
http://www.ncbi.nlm.nih.gov/Genbank/

EMBL Nucleotide Sequence Database
http://www.ebi.ac.uk/embl/

DDBJ (DNA Data Bank of Japan)
http://www.ddbj.nig.ac.jp/

IMGT (use a variant of EMBL format with longer feature indents)
http://imgt.cines.fr/download/LIGM-DB/userman_doc.html
http://imgt.cines.fr/download/LIGM-DB/ftable_doc.html
http://www.ebi.ac.uk/imgt/hla/docs/manual.html

    N)datetime)ascii_letters)digits)BiopythonWarning)
SeqFeature)SeqIO)_ImgtScanner)EmblScanner)GenBankScanner)UndefinedSequenceError   )_get_seq_string)SequenceIterator)SequenceWriterz_-'*c                       (   e Zd ZdZ fddZdd Z  ZS )GenBankIteratorzParser for GenBank files.c                       t  j|ddd dS )a  Break up a Genbank file into SeqRecord objects.

        Argument source is a file-like object opened in text mode or a path to a file.
        Every section from the LOCUS line to the terminating // becomes
        a single SeqRecord with associated annotation and features.

        Note that for genomes or chromosomes, there is typically only
        one record.

        This gets called internally by Bio.SeqIO for the GenBank file format:

        >>> from Bio import SeqIO
        >>> for record in SeqIO.parse("GenBank/cor6_6.gb", "gb"):
        ...     print(record.id)
        ...
        X55053.1
        X62281.1
        M81224.1
        AJ237582.1
        L31939.1
        AF297471.1

        Equivalently,

        >>> with open("GenBank/cor6_6.gb") as handle:
        ...     for record in GenBankIterator(handle):
        ...         print(record.id)
        ...
        X55053.1
        X62281.1
        M81224.1
        AJ237582.1
        L31939.1
        AF297471.1

        tGenBankmodefmtNsuper__init__selfsource	__class__ E/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqIO/InsdcIO.pyr   A   s   %zGenBankIterator.__init__c                 C      t dd|}|S z9Start parsing the file, and return a SeqRecord generator.r   )debug)r
   parse_recordsr   handlerecordsr    r    r!   parseh      zGenBankIterator.parse__name__
__module____qualname____doc__r   r)   __classcell__r    r    r   r!   r   >   s    'r   c                       r   )EmblIteratorzParser for EMBL files.c                    r   )aS  Break up an EMBL file into SeqRecord objects.

        Argument source is a file-like object opened in text mode or a path to a file.
        Every section from the LOCUS line to the terminating // becomes
        a single SeqRecord with associated annotation and features.

        Note that for genomes or chromosomes, there is typically only
        one record.

        This gets called internally by Bio.SeqIO for the EMBL file format:

        >>> from Bio import SeqIO
        >>> for record in SeqIO.parse("EMBL/epo_prt_selection.embl", "embl"):
        ...     print(record.id)
        ...
        A00022.1
        A00028.1
        A00031.1
        A00034.1
        A00060.1
        A00071.1
        A00072.1
        A00078.1
        CQ797900.1

        Equivalently,

        >>> with open("EMBL/epo_prt_selection.embl") as handle:
        ...     for record in EmblIterator(handle):
        ...         print(record.id)
        ...
        A00022.1
        A00028.1
        A00031.1
        A00034.1
        A00060.1
        A00071.1
        A00072.1
        A00078.1
        CQ797900.1

        r   EMBLr   Nr   r   r   r    r!   r   q   s   +zEmblIterator.__init__c                 C   r"   r#   )r	   r%   r&   r    r    r!   r)      r*   zEmblIterator.parser+   r    r    r   r!   r1   n   s    -r1   c                       r   )ImgtIteratorzParser for IMGT files.c                    r   )au  Break up an IMGT file into SeqRecord objects.

        Argument source is a file-like object opened in text mode or a path to a file.
        Every section from the LOCUS line to the terminating // becomes
        a single SeqRecord with associated annotation and features.

        Note that for genomes or chromosomes, there is typically only
        one record.
        r   IMGTr   Nr   r   r   r    r!   r      s   
zImgtIterator.__init__c                 C   r"   r#   )r   r%   r&   r    r    r!   r)      r*   zImgtIterator.parser+   r    r    r   r!   r3      s    r3   c                       r   )GenBankCdsFeatureIteratorzDParser for GenBank files, creating a SeqRecord for each CDS feature.c                    r   )ah  Break up a Genbank file into SeqRecord objects for each CDS feature.

        Argument source is a file-like object opened in text mode or a path to a file.

        Every section from the LOCUS line to the terminating // can contain
        many CDS features.  These are returned as with the stated amino acid
        translation sequence (if given).
        r   r   r   Nr   r   r   r    r!   r         	z"GenBankCdsFeatureIterator.__init__c                 C      t dd|S r#   )r
   parse_cds_featuresr   r'   r    r    r!   r)         zGenBankCdsFeatureIterator.parser+   r    r    r   r!   r5          r5   c                       r   )EmblCdsFeatureIteratorzAParser for EMBL files, creating a SeqRecord for each CDS feature.c                    r   )ae  Break up a EMBL file into SeqRecord objects for each CDS feature.

        Argument source is a file-like object opened in text mode or a path to a file.

        Every section from the LOCUS line to the terminating // can contain
        many CDS features.  These are returned as with the stated amino acid
        translation sequence (if given).
        r   r2   r   Nr   r   r   r    r!   r      r6   zEmblCdsFeatureIterator.__init__c                 C   r7   r#   )r	   r8   r9   r    r    r!   r)      r:   zEmblCdsFeatureIterator.parser+   r    r    r   r!   r<      r;   r<   c                    s   t | tjrd|    S t | tjrd| j  | j  f S t | tjr0d| j  | j  f S t | tjr<d|    S t | tjrHd|    S t | tj	r]dd
 fdd	| jD  S t | tjrgtd
td)zBuild a GenBank/EMBL position string (PRIVATE).

    Use offset=1 to add one to convert a start position from python counting.
    z%iz(%i.%i)z(%i^%i)z<%iz>%iz
one-of(%s),c                 3       | ]}t | V  qd S N)_insdc_feature_position_string.0poffsetr    r!   	<genexpr>   s    

z1_insdc_feature_position_string.<locals>.<genexpr>z)Please report this as a bug in Biopython.z&Expected a SeqFeature position object.)
isinstancer   ExactPositionWithinPosition_left_rightBetweenPositionBeforePositionAfterPositionOneOfPositionjoinposition_choicesPositionNotImplementedError
ValueError)posrE   r    rD   r!   r@      s.   
r@   c                 C   sN  | j r
| j  d}nd}| jrJ t| jtjr;t| jtjr;| j| jkr;| j|kr0d||f S d|| j| jd f S t| jtjrXt| jtjrX| jd | jkrXd|| jf S t| jtjsft| jtjrt| jtjrxt| jtjrxtdt| jtjrd|| jt	| jf S d	|t	| jd| jd f S |t	| jd d
 t	| j S )N: z%s%i^1z%s%i^%ir   z%s%izFeature with unknown locationz	%s<%i..%sz	%s%s..>%iz..)
refref_dbrG   startr   rH   endUnknownPositionrT   r@   )location
rec_lengthrX   r    r    r!   6_insdc_location_string_ignoring_strand_and_subfeatures   sX   



r_   c              	      s   z1| j }| jdkr d| jd fdd|ddd D f W S d| jd fdd|D f W S  tyN   t|  }| jdkrJd	| d
 Y S | Y S w )aQ  Build a GenBank/EMBL location from a (Compound) SimpleLocation (PRIVATE).

    There is a choice of how to show joins on the reverse complement strand,
    GenBank used "complement(join(1,10),(20,100))" while EMBL used to use
    "join(complement(20,100),complement(1,10))" instead (but appears to have
    now adopted the GenBank convention). Notice that the order of the entries
    is reversed! This function therefore uses the first form. In this situation
    we expect the CompoundLocation and its parts to all be marked as
    strand == -1, and to be in the order 19:100 then 0:10.
    zcomplement(%s(%s))r=   c                 3   r>   r?   )r_   rA   r^   r    r!   rF   P  s    
z)_insdc_location_string.<locals>.<genexpr>Nz%s(%s)c                 3   r>   r?   )_insdc_location_stringrA   ra   r    r!   rF   Z  s    zcomplement())partsstrandoperatorrP   AttributeErrorr_   )r]   r^   rd   locr    ra   r!   rb   >  s*   


rb   c                   @   sd   e Zd ZdZdZdZde ZdZdZddd	Z	d
d Z
dd ZedddZedd Zdd ZdS )_InsdcWriterz2Base class for GenBank and EMBL writers (PRIVATE).P       z     %s                )	anticodoncitationcodon_startcompare	directionestimated_lengthmod_basenumberrpt_typerpt_unit_rangetag_peptidetransl_excepttransl_tableNc                 C   s  t |std| dt t|dkrtd| dt |d u r2| j| j d| d d S t	|t
r=|dd}|d u rPt	|tsK|| jv rNd	}nd
}|r_| j d| d| d}n| j d| d| }t|| jkr{| j|d  d S | rt|| jkr| j|d  d S ttt|d | j| jd dD ]
}|| dkr nq|| dkr| j}|| jksJ | j|d | d  | j||d    }| sd S d S )NzFeature qualifier key '.' contains characters not allowed by standard.   zF' is longer than maximum length specified by standard (20 characters)./
"z""FTz="=r   r`   rl   )#_allowed_table_component_name_chars
issupersetwarningswarnr   lenr'   writeQUALIFIER_INDENT_STRrG   strreplaceintFTQUAL_NO_QUOTE	MAX_WIDTHlstriprangeminQUALIFIER_INDENT)r   keyvaluequotelineindexr    r    r!   _write_feature_qualifier~  sV   



z%_InsdcWriter._write_feature_qualifierc                 C   s|   | j | j }t||kr|S |d| d}|dkr&td| t |S |d|d  d | j | ||d d  S )z@Split a feature location into lines (break at commas) (PRIVATE).Nr=   r`   zCouldn't split location:
r   r}   )	r   r   r   rfindr   r   r   r   _wrap_location)r   r]   lengthr   r    r    r!   r     s   z_InsdcWriter._wrap_locationc           	      C   s   |j sJ ||j dd}t|std| dt t|dkr-td| dt t|j	|}| j
| d| j | | d }| j| |j D ]\}}t|ttfrf|D ]}| || q\qO| || qOdS )	z=Write a single SeqFeature object to features table (PRIVATE).rl   _zFeature key 'rz      zF' is longer than maximum length specified by standard (15 characters).Nr}   )typer   r   r   r   r   r   r   rb   r]   QUALIFIER_INDENT_TMPr   r   r'   r   
qualifiersitemsrG   listtupler   )	r   featurerecord_lengthf_typer]   r   r   valuesr   r    r    r!   _write_feature  s6   


z_InsdcWriter._write_feature.Fc                 C   sX   z| j | }W n ty   | Y S w t|tr(|s"t|dks"J t|d S t|S )a  Get an annotation dictionary entry (as a string) (PRIVATE).

        Some entries are lists, in which case if just_first=True the first entry
        is returned.  If just_first=False (default) this verifies there is only
        one entry before returning it.
        r   r   )annotationsKeyErrorrG   r   r   r   )recordr   default
just_firstanswerr    r    r!   _get_annotation_str  s   
z _InsdcWriter._get_annotation_strc                 C   s  |   } t| |kr| gS |  }d} |r@t| d t|d  |kr@| d|d 7 } |   } |r@t| d t|d  |ks#| g}|r~|d} |rwt| d t|d  |krw| d|d 7 } |   } |rwt| d t|d  |ksZ||  |sE|rJ |S )zReturn a list of strings (PRIVATE).

        Any single words which are too long get returned as a whole line
        (e.g. URLs) without an exception or warning.
        rW   r   r   rl   )stripr   splitpopappend)textmax_lenwordsr   r    r    r!   _split_multi_line  s(     
  
z_InsdcWriter._split_multi_linec                 C   s   |j dd}t|ttfrd|}| |}g }|rSt||krG|d|d  d}|dkr5t	d|d|d  ||d d }}n|d}}|
| |s|S )z5Return a list of strings, splits on commas (PRIVATE).contigrW   Nr   r=   r`   zCould not break up CONTIG)r   getrG   r   r   rP   cleanr   r   rT   r   )r   r   r   r   r   rU   r   r    r    r!   _split_contig  s   

$


z_InsdcWriter._split_contig)NN)r   F)r,   r-   r.   r/   r   r   r   r   r   r   r   r   staticmethodr   r   r   r    r    r    r!   ri   g  s    
8#
ri   c                   @   s   e Zd ZdZdZdZdZdZdZdZ	dZ
d	d
 Zdd Zdd Zedd Zedd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!S )"GenBankWriterzGenBank writer.   rk   z-START##z-END##z :: <   	   c                 C   s   t || jk s	J t || j| j kr-|r#td|d|dt n
td|dt | jd|| j|	ddf  dS )	zrWrite single line in each GenBank record (PRIVATE).

        Used in the 'header' of each GenBank record.
        zAnnotation z too long for z line	 too longz%s%s
r}   rl   N)
r   HEADER_WIDTHr   r   r   r   r'   r   ljustr   )r   tagr   r    r    r!   _write_single_line=  s   z GenBankWriter._write_single_linec                 C   sJ   | j | j }| ||}| ||d  |dd D ]}| d| qdS )zuWrite multiple lines in each GenBank record (PRIVATE).

        Used in the 'header' of each GenBank record.
        r   r   NrW   r   r   r   r   r   r   r   r   linesr   r    r    r!   _write_multi_lineO  s   zGenBankWriter._write_multi_linec                 C   s8   t |D ]\}}|dkr| || q| d| qd S )Nr   rW   )	enumerater   )r   r   	text_listir   r    r    r!   _write_multi_entries[  s
   z"GenBankWriter._write_multi_entriesc                 C   s   d}z| j d }W n ty   | Y S w t|tr$t|dkr$|d }t|tr0|d }g d}t|tr?t|dkrA|S ztt	|dd  |
|d	d
 d t	|dd  W |S  tyk   |}Y |S w )Nz01-JAN-1980dater   r   z%d-%b-%Y)JANFEBMARAPRMAYJUNJULAUGSEPOCTNOVDEC            )r   r   rG   r   r   r   strftimeupperr   r   r   rT   )r   r   r   monthsr    r    r!   	_get_dated  s(   
8zGenBankWriter._get_datec                 C   sz   z| j d }W n ty   d}Y nw |dv rnddddddd}z|| }W n ty2   d}Y nw t|d	ks;J |S )
Ndata_file_divisionUNK)PRIRODMAMVRTINVPLNBCTVRLPHGSYNUNAESTPATSTSGSSHTGHTCENVCONTSAr   r   r   r   )FUNHUMMUSPROUNCXXXr   r   r   r   )r   divisionembl_to_gbkr    r    r!   _get_data_division  s*   -z GenBankWriter._get_data_divisionc                 C   s:   t d}| j|ddd}|rt ||kr||S d| S )z>Set the topology to 'circular', 'linear' if defined (PRIVATE).circulartopologyrW   r   rl   )r   r   r   )r   r   max_topology_lenr  r    r    r!   _get_topology  s
   
zGenBankWriter._get_topologyc           
   	   C   s0  |j }|r	|dkr|j}|r|dkr| j|ddd}t|dkr6t|d ttt| dkr6td	t t| dkrFt	d
|dt|dkrRtdt | |dd}|du rat	d|rt|dkr|
dd
dd}t|dkrtd|dt d}|dv rd}|dkrd}nd}| |}| |}t|dkrttt|dt|d  kr|d tt| }n%tt|d}||t|d  }t|dksJ |d|v sJ |t|dksJ t|dksJ d|||d||| |f }t|d krI| }	|	d d!vrt	d"| |	d  dksAd|	d#   v sAd$|	d#   v sAt	d%| | j| dS t|d ksVJ t||d&d'  |tt|gkskJ ||d'd( d)vrzt	d"| |d(d* d+vrt	d,| |d*d-  dksd|d*d-   v sd$|d*d-   v st	d%| |d-d. dkrt	d/| |d.d0  d1vrt	d2| |d0d3 dkrt	d4| |d5d6 dkrt	d7| |d8d9 d:krt	d;| |d<d= d:krt	d>| | j| dS )?zWrite the LOCUS line (PRIVATE).z<unknown name>z<unknown id>	accessionTr      r      zoIncreasing length of locus line to allow long name. This will result in fields that are not in usual positions.zInvalid whitespace in z for LOCUS linel   g] zThe sequence length is very long. The LOCUS line will be increased in length to compensate. This may cause unexpected behavior.molecule_typeN$missing molecule_type in annotations   zunassigned rW   zgenomic zMolecule type r   DNA)proteinPROTEINaabpr   rl   r   r   z!LOCUS       %s %s    %s %s %s %s
rj   )r  r  z=LOCUS line does not contain size units at expected position:
   RNAzALOCUS line does not contain valid sequence type (DNA, RNA, ...):
r   (   ,   )z bp z aa /   )   zss-zds-zms-zCLOCUS line does not have valid strand type (Single stranded, ...):
6   7   z2LOCUS line does not contain space at position 55:
?   )rW   linearr  zALOCUS line does not contain valid entry (linear, circular, ...):
@   z2LOCUS line does not contain space at position 64:
C   D   z2LOCUS line does not contain space at position 68:
F   G   -z6LOCUS line does not contain - at position 71 in date:
J   K   z6LOCUS line does not contain - at position 75 in date:
)nameidr   r   r   r   r   r   r   rT   r   r  r  rjustr   r   r   r   r'   r   repr)
r   r   locusmol_typeunitsr  r   name_lengthr   	splitliner    r    r!   _write_the_first_line  s    

,	*	z#GenBankWriter._write_the_first_linec                 C   s*  d}|j d D ]}t|tjsq|d7 }t|}|jrFt|jdkrF|j d}|r1d|v r1d}nd}|d||jd jd |jd j	f 7 }| 
d	| |jrV| d
|j |jr`| d|j |jrj| d|j |jrt| d|j |jr~| d|j |jr| d|j |jr| d|j qd S )Nr   
referencesr   r  r  residuesbasesz  (%s %i to %i)	REFERENCEz	  AUTHORSz	  CONSRTMz  TITLEz	  JOURNALz	  MEDLINEz	   PUBMEDz  REMARK)r   rG   r   	Referencer   r]   r   r   rZ   r[   r   authorsr   consrtmtitlejournal
medline_id	pubmed_idcomment)r   r   rt   rX   datar  r+  r    r    r!   _write_references  sB   
zGenBankWriter._write_referencesc                 C   sX  g }d|j v rj|j d }d}| D ]\}}| D ]\}}t||kr(t|n|}qq| D ]9\}}|d| | j  | D ]\}}d|t|  }	|| |	 | j |  qC|d| | j  q0d|j v r|j d }t|tr||	d7 }nt|t
tfr|t
|7 }ntd| d|d  |d	d  D ]}
| d
|
 qd S )Nstructured_commentr   z##rl   r:  r}   'Could not understand comment annotationCOMMENTr   rW   )r   r   r   r   STRUCTURED_COMMENT_STARTSTRUCTURED_COMMENT_DELIMSTRUCTURED_COMMENT_ENDrG   r   r   r   r   rT   r   )r   r   r   r:  paddingr   r;  subkeysubdataspacesr   r    r    r!   _write_comment  s6   




zGenBankWriter._write_commentc                 C   sJ   | j | j }| ||}| d|d  |dd  D ]}| d| qd S )NCONTIGr   r   rW   r   r   r   r   r   r   r   r   r   r    r    r!   _write_contig  s   zGenBankWriter._write_contigc              	   C   s   zt |}W n ty#   d|jv r| | Y d S | jd Y d S w | }t|}| jd td|| j	D ]4}| jt
|d | j t|t|| j	 |dD ]}| jd|||d    qU| jd q9d S )Nr   zORIGIN
r   r   
   rl   r}   )r   r   r   rK  r'   r   lowerr   r   LETTERS_PER_LINEr   r'  SEQUENCE_INDENTr   )r   r   r;  seq_lenline_numberr   r    r    r!   _write_sequence  s(   


 zGenBankWriter._write_sequencec                 C   sD  | j }| | |j}|ddkr(||dd d  r(|jddd }| j|d|dd}|}|j|d rVzd|t	|jddd f }W n	 t
yU   Y nw | j|d	dd}|j}|d
krgd}|d7 }| d| | d| |dkr| d| d|  n| d| g }|jD ]}	d|	vr|	dd}	||	 q| d| ~zd|jd }
|
ds|
d7 }
W n ty   d}
Y nw | d|
 d|jv r|jd }t|trt|dksJ ||d }| d| | d| |d | |d}t|| j| j kr|d| j| j d  d }| d| zd|jd }|ds5|d7 }W n tyB   d}Y nw | d| d|jv rd|jd }t|tr^|d }| d | d!|jv ro| | d"|jv s{d#|jv r| | |d$ t|}|jD ]	}| || q|  | |d% dS )&)Write a single record to the output file.r   r   Nr   r  Tr  z%s.%igi<unknown description>rW   
DEFINITION	ACCESSIONVERSIONz  GI:z: rV   DBLINK; keywordsKEYWORDSsegmentSEGMENTSOURCEr   organismr  z...z
  ORGANISMtaxonomy	db_sourceDBSOURCEr/  r:  r=  z)FEATURES             Location/Qualifiers
//
)!r'   r.  r&  countr   isdigitr   r   
startswithr   rT   descriptionr   r   dbxrefsr   r   r   rP   r   endswithr   rG   r   r   r   r   r<  rG  r   featuresr   rR  )r   r   r'   r   r  acc_with_versionrT  descrdbxrefs_with_spacexr[  r]  orgra  rb  r^   r   r    r    r!   write_record  s   
(











zGenBankWriter.write_recordN)r,   r-   r.   r/   r   r   r@  rB  rA  rN  rO  r   r   r   r   r   r  r  r.  r<  rG  rK  rR  rq  r    r    r    r!   r   2  s0    	
"
W
  ,$r   c                   @   s   e Zd ZdZdZdZdded   ZdZdZd	Z	d
Z
e	e
 Zd	Zdd Zdd Zdd Zdd Zdd Zedd Zdd Zdd Zdd Zdd ZdS ) 
EmblWriterzEMBL writer.   rk   FTrl   r   zFT   %s                z,FH   Key             Location/Qualifiers
FH
rL  r   c                 C   s2   | j | j }| ||}|D ]}| d| qd S )NCOrI  rJ  r    r    r!   rK    
   zEmblWriter._write_contigc              	   C   s  | j }zt|}W n ty%   d|jv r| | Y d S |d Y d S w | }t|}|jd}|d ur|d|v r||	d|	d }|	d|	d }|	d	|	d
 }|	d|	d }	||| | |	  }
|d|||||	|
f  n|d t
|| j D ]=}|d t
| jD ]}| j| | j|  }|d|||| j    q|t|d | j | j |d q|| j r|| j }|d t
| jD ]}| j| | j|  }|d|||| j   d q|t|| j |d d S d S )Nr   zSQ   
r  r  AaCcGgTr   z7SQ   Sequence %i BP; %i A; %i C; %i G; %i T; %i other;
z    rl   r   r}   r   )r'   r   r   r   rK  r   rM  r   r   re  r   rN  BLOCKS_PER_LINELETTERS_PER_BLOCKr   r'  POSITION_PADDINGr   )r   r   r'   r;  rP  r  a_countc_countg_countt_countotherrQ  blockr   r    r    r!   rR    s^   




 

&zEmblWriter._write_sequencec                 C   sR   t |dksJ |d | }t || jkrtd|dt | j|d  d S )Nr   r  zLine r   r}   )r   r   r   r   r   r'   r   )r   r   r   r   r    r    r!   r     s
   zEmblWriter._write_single_linec                 C   s2   | j | j }| ||}|D ]}| || qd S r?   r   r   r    r    r!   r     rv  zEmblWriter._write_multi_linec           
      C   s  d|j v r,|j ddd  r,d|j ddd  }| j|d|j ddd dd}nd}| j|d|j dd}d	|v rDtd
| dd|v rPtd| d| j|ddd}|jd}|du rftd|dvrstd| t	 |
 }d|v r~d}nd|v rd}nd|v rd}d}ntd| d| |}| j}	| dd|||||t||f  |	d | d|d	  |	d dS )z$Write the ID and AC lines (PRIVATE).r   r   zSV r  r   Tr  rW   ;z+Cannot have semi-colon in EMBL accession, ''rl   z'Cannot have spaces in EMBL accession, 'r  r  r  Nr  )r  zgenomic DNAzunassigned DNAmRNAr  r  zNon-standard molecule type: r  BPr  r  AAz$failed to understand molecule_type 'IDz%s; %s; %s; %s; ; %s; %i %s.XX
AC)r&  rsplitrf  r   rT   r   r   r   r   r   r   r  r'   r   r   r   )
r   r   versionr  r  r*  mol_type_upperr+  r   r'   r    r    r!   _write_the_first_lines  sN    

	
z!EmblWriter._write_the_first_linesc                 C   sr   z| j d }W n ty   d}Y nw |dv rnddd}z|| }W n ty.   d}Y nw t|dks7J |S )Nr   r   )r   r   r   r   r   r   r   r   r   r   r   r   TGNr   r   r   r   )r   r   r   r   )r   r   gbk_to_emblr    r    r!   r  "  s   &
zEmblWriter._get_data_divisionc                 C   s,   |j d D ]}| d| q| jd d S )Nr[  KWr  )r   r   r'   r   )r   r   keywordr    r    r!   _write_keywords]  s   zEmblWriter._write_keywordsc                 C   s   d}|j d D ]v}t|tjsq|d7 }| dd|  |jr:t|jdkr:| dd|jd jd |jd jf  |j	rH| dd	|j	 d
 |j
rS| d|j
  |jr_| d|jd  |jrm| dd|j d |jrw| d|j | jd qd S )Nr   r/  r   RNz[%i]RPz%i-%iRXzPUBMED; r   RGRAr  RTr~   z";RLr  )r   rG   r   r3  r   r]   r   rZ   r[   r9  r5  r4  r   r6  r7  r'   r   )r   r   rt   rX   r    r    r!   r<  f  s.   zEmblWriter._write_referencesc                 C   sj   |j d }t|tr|d}nt|ttfr|}ntd|s"d S |D ]}| d| q$| j	d d S )Nr:  r}   r>  CCr  )
r   rG   r   r   r   r   rT   r   r'   r   )r   r   r:  r   r   r    r    r!   rG    s   

zEmblWriter._write_commentc                 C   s  | j }| | t|jD ].}|dr'| d|dd d  |d  n|dr;| d|d  |d  nq|j}|dkrEd	}| d
| |d d|j	v rZ| 
| | d| |d zd|j	d d	 }W n ty{   d	}Y nw | d| |d d|j	v r| | d|j	v r| | || j t|}|jD ]}| || q|d | | |d dS )rS  zBioProject:PRr   Nr  r  zProject:rU  r   DEr[  OSr`  rZ  ra  OCr/  r:  rd  )r'   r  sortedri  rg  r   r   rh  r   r   r  r   rP   r   r<  rG  FEATURE_HEADERr   rk  r   rR  )r   r   r'   xrefrm  ra  r^   r   r    r    r!   rq    sL   















zEmblWriter.write_recordN)r,   r-   r.   r/   r   r   r   r   r  r  r~  rN  r  rK  rR  r   r   r  r   r  r  r<  rG  rq  r    r    r    r!   rr    s,    ;E
:	 rr  c                   @   s0   e Zd ZdZdZdZdded   ZdZdZd	S )

ImgtWriterz"IMGT writer (EMBL format variant).rs     rt  rl   r   zFT   %s                    z0FH   Key                 Location/Qualifiers
FH
N)	r,   r-   r.   r/   r   r   r   r   r  r    r    r    r!   r    s    r  c                 C      t  j| dd}t||dS )z Fast GenBank to FASTA (PRIVATE).Fdo_featuresfasta)r
   r%   r   r   in_fileout_filer(   r    r    r!   _genbank_convert_fasta     r  c                 C   r  )zFast EMBL to FASTA (PRIVATE).Fr  r  )r	   r%   r   r   r  r    r    r!   _embl_convert_fasta  r  r  __main__)run_doctest)verbose)r   )'r/   r   r   stringr   r   Bior   r   r   Bio.GenBank.Scannerr   r	   r
   Bio.Seqr   
Interfacesr   r   r   setr   r   r1   r3   r5   r<   r@   r_   rb   ri   r   rr  r  r  r  r,   
Bio._utilsr  r    r    r    r!   <module>   sR   
06
!>) L    R  e
