o
    RŀgZ                     @   sZ   d Z ddlZdddZdd ZG d	d
 d
ZG dd dZG dd dZG dd dZdS )a  Hold GenBank data in a straightforward format.

Classes:
 - Record - All of the information in a GenBank record.
 - Reference - hold reference data for a record.
 - Feature - Hold the information in a Feature Table.
 - Qualifier - Qualifiers on a Feature.

    N    c                 C   s  t j| }| s	dS |r| |}nd}g }|t| k r0|| |||   ||7 }|t| k sg }d}|D ]-}	t|d t|	 |krV|rS|dkrN||7 }|| |	}q6|dkr]|	}q6|||	 7 }q6|rk|| |d d }
|dd D ]}|
d| | d 7 }
qw|
S )av  Write a line of GenBank info that can wrap over multiple lines (PRIVATE).

    This takes a line of information which can potentially wrap over
    multiple lines, and breaks it up with carriage returns and
    indentation so it fits properly into a GenBank record.

    Arguments:
     - information - The string holding the information we want
       wrapped in GenBank method.
     - indent - The indentation on the lines we are writing.
     - wrap_space - Whether or not to wrap only on spaces in the
       information.
     - split_char - A specific character to split the lines on. By default
       spaces are used.

    z.
r    r   r   
N)RecordGB_LINE_LENGTHsplitlenappend)informationindent
wrap_space
split_charinfo_length
info_partscur_posoutput_partscur_part	info_partoutput_infooutput_part r   F/var/www/html/myenv/lib/python3.10/site-packages/Bio/GenBank/Record.py_wrapped_genbank   s:   


r   c                 C   s@   |  d}|d d }|dd D ]}|d| | d 7 }q|S )a&  Write out information with the specified indent (PRIVATE).

    Unlike _wrapped_genbank, this function makes no attempt to wrap
    lines -- it assumes that the information already has newlines in the
    appropriate places, and will add the specified indent to the start of
    each line.
    r   r   r   Nr   )r   )r   r   r   r   r   r   r   r   _indent_genbankO   s
   
	r   c                   @   sh  e Zd ZdZdZdZdZdZdZdZ	dZ
d	ee d
 Zde d	 eee  d
 Zde d	 eee  d
 Zd	ee d
 Zde	 d	 eee	  d
 Zdee
 d
 Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Z d+d, Z!d-d. Z"d/d0 Z#d1d2 Z$d3d4 Z%d5d6 Z&d7d8 Z'd9d: Z(d;S )<r   a#	  Hold GenBank information in a format similar to the original record.

    The Record class is meant to make data easy to get to when you are
    just interested in looking at GenBank data.

    Attributes:
     - locus - The name specified after the LOCUS keyword in the GenBank
       record. This may be the accession number, or a clone id or something else.
     - size - The size of the record.
     - residue_type - The type of residues making up the sequence in this
       record. Normally something like RNA, DNA or PROTEIN, but may be as
       esoteric as 'ss-RNA circular'.
     - data_file_division - The division this record is stored under in
       GenBank (ie. PLN -> plants; PRI -> humans, primates; BCT -> bacteria...)
     - date - The date of submission of the record, in a form like '28-JUL-1998'
     - accession - list of all accession numbers for the sequence.
     - nid - Nucleotide identifier number.
     - pid - Proteint identifier number
     - version - The accession number + version (ie. AB01234.2)
     - db_source - Information about the database the record came from
     - gi - The NCBI gi identifier for the record.
     - keywords - A list of keywords related to the record.
     - segment - If the record is one of a series, this is info about which
       segment this record is (something like '1 of 6').
     - source - The source of material where the sequence came from.
     - organism - The genus and species of the organism (ie. 'Homo sapiens')
     - taxonomy - A listing of the taxonomic classification of the organism,
       starting general and getting more specific.
     - references - A list of Reference objects.
     - comment - Text with any kind of comment about the record.
     - features - A listing of Features making up the feature table.
     - base_counts - A string with the counts of bases for the sequence.
     - origin - A string specifying info about the origin of the sequence.
     - sequence - A string with the sequence itself.
     - contig - A string of location information for a CONTIG in a RefSeq file
     - project - The genome sequencing project numbers
       (will be replaced by the dblink cross-references in 2009).
     - dblinks - The genome sequencing project number(s) and other links.
       (will replace the project information in 2009).

    O                  	   z%-sr   %c                 C   s   g | _ d| _d| _d| _d| _d| _d| _g | _d| _g | _	d| _
g | _d| _d| _d| _d| _d| _d| _g | _g | _g | _d| _d| _d| _d| _d| _g | _d| _d| _d| _g | _dS Initialize the class.r   N)	accessionbase_countscommentcontigdata_file_divisiondate	db_sourcedblinks
definitionfeaturesgikeywordslocusmolecule_typenidorganismoriginpidprimaryprojects
referencesresidue_typesegmentsequencesizesourcetaxonomytopologyversionwgs
wgs_scafldselfr   r   r   __init__   s>   
zRecord.__init__c                 C   s4  |   }||  7 }||  7 }||  7 }||  7 }||  7 }||  7 }||  7 }||  7 }|| 	 7 }|| 
 7 }||  7 }||  7 }| jD ]}|t|7 }qO||  7 }||  7 }| jD ]}|t|7 }qg||  7 }||  7 }||  7 }||  7 }||  7 }||  7 }|d7 }|S )a  Provide a GenBank formatted output option for a Record.

        The objective of this is to provide an easy way to read in a GenBank
        record, modify it somehow, and then output it in 'GenBank format.'
        We are striving to make this work so that a parsed Record that is
        output using this function will look exactly like the original
        record.

        Much of the output is based on format description info at:

        ftp://ncbi.nlm.nih.gov/genbank/gbrel.txt
        z//)_locus_line_definition_line_accession_line_version_line_project_line_dblink_line	_nid_line	_pid_line_keywords_line_db_source_line_segment_line_source_line_organism_liner:   str_comment_line_features_liner/   _base_count_line_origin_line_sequence_line	_wgs_line_wgs_scafld_line_contig_line)rF   output	referencefeaturer   r   r   __str__   s6   

zRecord.__str__c                 C   s   d}|d7 }|d| j  7 }|d7 }|d| j 7 }d| jv r"|d7 }n|d7 }d	| jv r3|d
| j 7 }n d| jv rD|d| j 7 }|d7 }n|d7 }|d| j 7 }|d7 }|d7 }|d| j 7 }|d7 }|d| j 7 }|d7 }|S )z7Provide the output string for the LOCUS line (PRIVATE).LOCUSz       z%-9sr   z%7sPROTEINz aaz bp circularz%17s-z
          z   z%-4s  z%3sz%11sr   )r2   r>   r;   r*   r+   rF   r^   r   r   r   rH      s,   




zRecord._locus_linec                 C   s$   t jd }|t| jd t j7 }|S )z1Provide output for the DEFINITION line (PRIVATE).
DEFINITION.)r   BASE_FORMATr   r.   GB_BASE_INDENTrg   r   r   r   rI     s   
zRecord._definition_linec                 C   sR   | j r%tjd }d}| j D ]	}|| d7 }q| }|t|tj7 }|S d}|S )z(Output for the ACCESSION line (PRIVATE).	ACCESSIONr   r   )r&   r   rj   rstripr   rk   )rF   r^   acc_infor&   r   r   r   rJ     s   

zRecord._accession_linec                 C   s>   | j rtjd }|| j 7 }|d7 }|| j d7 }|S d}|S )z&Output for the VERSION line (PRIVATE).VERSIONz  GI:r   r   )rB   r   rj   r0   rg   r   r   r   rK   +  s   

zRecord._version_linec                 C   s6   d}t | jdkrtjd }|d| j d7 }|S )Nr   r   PROJECTrf   r   )r	   r9   r   rj   joinrg   r   r   r   rL   6  s
   
zRecord._project_linec                 C   s<   d}t | jdkrtjd }d| j}|t|tj7 }|S )Nr   r   DBLINKr   )r	   r-   r   rj   rq   r   rk   )rF   r^   dblink_infor   r   r   rM   =  s   
zRecord._dblink_linec                 C   ,   | j rtjd }|| j  d7 }|S d}|S )zKOutput for the NID line. Use of NID is obsolete in GenBank files (PRIVATE).NIDr   r   )r4   r   rj   rg   r   r   r   rN   E     
zRecord._nid_linec                 C   rt   )zFOutput for PID line. Presumedly, PID usage is also obsolete (PRIVATE).PIDr   r   )r7   r   rj   rg   r   r   r   rO   N  rv   zRecord._pid_linec                 C   s^   d}| j r-|tjd 7 }d}| j D ]	}|| d7 }q|dd }|d7 }|t|tj7 }|S )z'Output for the KEYWORDS line (PRIVATE).r   KEYWORDS; Nri   )r1   r   rj   r   rk   )rF   r^   keyword_infokeywordr   r   r   rP   W  s   
zRecord._keywords_linec                 C   rt   )z#Output for DBSOURCE line (PRIVATE).DBSOURCEr   r   )r,   r   rj   rg   r   r   r   rQ   g  rv   zRecord._db_source_linec                 C   .   d}| j r|tjd 7 }|t| j tj7 }|S )z&Output for the SEGMENT line (PRIVATE).r   SEGMENT)r<   r   rj   r   rk   rg   r   r   r   rR   p  
   zRecord._segment_linec                 C   s    t jd }|t| jt j7 }|S )z?Output for SOURCE line on where the sample came from (PRIVATE).SOURCE)r   rj   r   r?   rk   rg   r   r   r   rS   x  s   
zRecord._source_linec                 C   sp   t jd }|t| jt j7 }|dt j 7 }d}| jD ]	}|| d7 }q|dd }|d7 }|t|t j7 }|S )z6Output for ORGANISM line with taxonomy info (PRIVATE).ORGANISMr   r   ry   Nrz   ri   )r   INTERNAL_FORMATr   r5   rk   r@   )rF   r^   taxonomy_infotaxr   r   r   rT   ~  s   

zRecord._organism_linec                 C   r~   )z'Output for the COMMENT lines (PRIVATE).r   COMMENT)r(   r   rj   r   rk   rg   r   r   r   rV     r   zRecord._comment_linec                 C   s,   d}t | jdkr|tjd 7 }|d7 }|S )z'Output for the FEATURES line (PRIVATE).r   r   FEATURESzLocation/Qualifiers
)r	   r/   r   BASE_FEATURE_FORMATrg   r   r   r   rW     s
   zRecord._features_linec                 C   s   d}| j rQ|tjd 7 }| j d}d|v r|d d|v st|d dkrHt|dkrG|d}|d}||dd| 7 }t|dks-n|| j 7 }|d7 }|S )z?Output for the BASE COUNT line with base information (PRIVATE).r   zBASE COUNT  r   r   r   z>7r   )r'   r   rj   r   remover	   pop)rF   r^   count_parts
count_info
count_typer   r   r   rX     s"   



	zRecord._base_count_linec                 C   s@   d}| j r|tjd 7 }| jr|t| jtj7 }|S |d7 }|S )z%Output for the ORIGIN line (PRIVATE).r   ORIGINr   )r=   r   rj   r6   r   rk   rg   r   r   r   rY     s   zRecord._origin_linec                 C   s   d}| j rRd}|t| j k rR|tjt|d  7 }tdD ]%}||d  }|d }| j || }|d|  7 }|t| j krB nq|d7 }|d7 }|t| j k s|S )	z)Output for all of the sequence (PRIVATE).r   r   r      
   r   r   <   )r=   r	   r   SEQUENCE_FORMATrU   rangelower)rF   r^   cur_seq_possection	start_posend_posseq_sectionr   r   r   rZ     s"   zRecord._sequence_linec                 C   &   d}| j r|tjd 7 }|| j 7 }|S )Nr   WGS)rC   r   rj   rg   r   r   r   r[     
   
zRecord._wgs_linec                 C   r   )Nr   
WGS_SCAFLD)rD   r   rj   rg   r   r   r   r\     r   zRecord._wgs_scafld_linec                 C   s2   d}| j r|tjd 7 }|t| j tjdd7 }|S )z=Output for CONTIG location information from RefSeq (PRIVATE).r   CONTIG,r   )r)   r   rj   r   rk   rg   r   r   r   r]     s   
zRecord._contig_lineN))__name__
__module____qualname____doc__r   rk   GB_FEATURE_INDENTGB_INTERNAL_INDENTGB_OTHER_INTERNAL_INDENTGB_FEATURE_INTERNAL_INDENTGB_SEQUENCE_INDENTrU   rj   r   OTHER_INTERNAL_FORMATr   INTERNAL_FEATURE_FORMATr   rG   ra   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rV   rW   rX   rY   rZ   r[   r\   r]   r   r   r   r   r   b   sj    +

") 			r   c                   @   s`   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd ZdS )	ReferenceaJ  Hold information from a GenBank reference.

    Attributes:
     - number - The number of the reference in the listing of references.
     - bases - The bases in the sequence the reference refers to.
     - authors - String with all of the authors.
     - consrtm - Consortium the authors belong to.
     - title - The title of the reference.
     - journal - Information about the journal where the reference appeared.
     - medline_id - The medline id for the reference.
     - pubmed_id - The pubmed_id for the reference.
     - remark - Free-form remarks about the reference.

    c                 C   s:   d| _ d| _d| _d| _d| _d| _d| _d| _d| _dS r$   )	numberbasesauthorsconsrtmtitlejournal
medline_id	pubmed_idremarkrE   r   r   r   rG     s   
zReference.__init__c                 C   s`   |   }||  7 }||  7 }||  7 }||  7 }||  7 }||  7 }||  7 }|S )z1Convert the reference to a GenBank format string.)_reference_line_authors_line_consrtm_line_title_line_journal_line_medline_line_pubmed_line_remark_linerg   r   r   r   ra     s   zReference.__str__c                 C   sJ   t jd }| jr| jr|d| j 7 }|| j 7 }n|| j 7 }|d7 }|S )z%Output for REFERENCE lines (PRIVATE).	REFERENCEz%-3sr   )r   rj   r   r   rg   r   r   r   r     s   
zReference._reference_linec                 C   r~   )z)Output for AUTHORS information (PRIVATE).r   AUTHORS)r   r   r   r   rk   rg   r   r   r   r   (  r   zReference._authors_linec                 C   r~   )z)Output for CONSRTM information (PRIVATE).r   CONSRTM)r   r   r   r   rk   rg   r   r   r   r   0  r   zReference._consrtm_linec                 C   r~   )z'Output for TITLE information (PRIVATE).r   TITLE)r   r   r   r   rk   rg   r   r   r   r   8  r   zReference._title_linec                 C   r~   )z)Output for JOURNAL information (PRIVATE).r   JOURNAL)r   r   r   r   rk   rg   r   r   r   r   @  r   zReference._journal_linec                 C   *   d}| j r|tjd 7 }|| j d 7 }|S )z)Output for MEDLINE information (PRIVATE).r   MEDLINEr   )r   r   r   rg   r   r   r   r   H  
   zReference._medline_linec                 C   r   )z(Output for PUBMED information (PRIVATE).r   PUBMEDr   )r   r   r   rg   r   r   r   r   P  r   zReference._pubmed_linec                 C   r~   )z(Output for REMARK information (PRIVATE).r   REMARK)r   r   r   r   rk   rg   r   r   r   r   X  r   zReference._remark_lineN)r   r   r   r   rG   ra   r   r   r   r   r   r   r   r   r   r   r   r   r     s    r   c                   @   *   e Zd ZdZd
ddZdd Zdd Zd	S )Featurea  Hold information about a Feature in the Feature Table of GenBank record.

    Attributes:
     - key - The key name of the feature (ie. source)
     - location - The string specifying the location of the feature.
     - qualifiers - A list of Qualifier objects in the feature.

    r   c                 C   s   || _ || _g | _dS r%   N)keylocation
qualifiers)rF   r   r   r   r   r   rG   k  s   
zFeature.__init__c                 C      d| j d| jdS )6Representation of the object for debugging or logging.zFeature(key=z, location=))r   r   rE   r   r   r   __repr__q     zFeature.__repr__c                 C   s>   t j| j }|t| jt jdd7 }| jD ]}|t|7 }q|S )z*Return feature as a GenBank format string.r   r   )r   r   r   r   r   r   r   rU   )rF   r^   	qualifierr   r   r   ra   u  s   

zFeature.__str__Nr   r   r   r   r   r   rG   r   ra   r   r   r   r   r   a  s
    
	r   c                   @   r   )	QualifierzHold information about a qualifier in a GenBank feature.

    Attributes:
     - key - The key name of the qualifier (ie. /organism=)
     - value - The value of the qualifier ("Dictyostelium discoideum").

    r   c                 C   s   || _ || _dS r   r   value)rF   r   r   r   r   r   rG     s   
zQualifier.__init__c                 C   r   )r   zQualifier(key=z, value=r   r   rE   r   r   r   r     r   zQualifier.__repr__c                 C   sF   dt j }d}tjjjD ]	}|| jv rd}q|t| j| j t j| S )z4Return feature qualifier as a GenBank format string.r   r   r   )	r   r   BioGenBank_BaseGenBankConsumerremove_space_keysr   r   r   )rF   r^   
space_wrapno_space_keyr   r   r   ra     s   

zQualifier.__str__Nr   r   r   r   r   r   r     s
    
r   )r   r   )	r   Bio.GenBankr   r   r   r   r   r   r   r   r   r   r   <module>   s   

<   o