o
    Rŀg3U                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 G dd	 d	eZd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZG d d! d!e
Zd"S )#a  Implementations of Biopython-like Seq objects on top of BioSQL.

This allows retrieval of items stored in a BioSQL database using
a biopython-like SeqRecord and Seq interface.

Note: Currently we do not support recording per-letter-annotations
(like quality scores) in BioSQL.
    )Optional)
SeqFeature)Seq)SequenceDataAbstractBaseClass)_RestrictedDict)	SeqRecordc                       s6   e Zd ZdZdZd
 fdd	Zdd Zdd	 Z  ZS )_BioSQLSequenceDataz9Retrieves sequence data from a BioSQL database (PRIVATE).)
primary_idadaptor_lengthstartr   c                    s&   || _ || _|| _|| _t   dS )aU  Create a new _BioSQLSequenceData object referring to a BioSQL entry.

        You wouldn't normally create a _BioSQLSequenceData object yourself,
        this is done for you when retrieving a DBSeqRecord object from the
        database, which creates a Seq object using a _BioSQLSequenceData
        instance as the data provider.
        N)r	   r
   r   r   super__init__)selfr	   r
   r   length	__class__ A/var/www/html/myenv/lib/python3.10/site-packages/BioSQL/BioSeq.pyr   #   s
   z_BioSQLSequenceData.__init__c                 C   s   | j S )z"Return the length of the sequence.)r   r   r   r   r   __len__1   s   z_BioSQLSequenceData.__len__c           
      C   s(  t |tr|| j\}}}tt|||}|dkrdS n2|}|dk r1|| j7 }|dk r0t|n	|| jkr:t|| j| j	| j
| | j
| d }t|S |dkr{|dkro|| jkro| j| j	| j
| j
| j }|dS t| j	| j| j
| |S | j| j	| j
| | j
| }	|	dd| dS )z@Return a subsequence as a bytes or a _BioSQLSequenceData object.r          ASCIIN)
isinstancesliceindicesr   lenrange
IndexErrorr
   get_subseq_as_stringr	   r   ordencoder   )
r   keyr   endstepsizeicsequencefullr   r   r   __getitem__5   s>   



z_BioSQLSequenceData.__getitem__)r   r   )	__name__
__module____qualname____doc__	__slots__r   r   r+   __classcell__r   r   r   r   r      s    r   c                 C   sH   |  d|f}|sd S t|dkrtdt| d|d \}t|S )Nz5SELECT length FROM biosequence WHERE bioentry_id = %sr   Expected 1 response, got .r   )execute_and_fetchallr   
ValueErrorint)r
   r	   seqsgiven_lengthr   r   r   _retrieve_seq_len^   s   
r9   c           	      C   s,  |  d|f}|sd S t|dkrtdt| d|d \}}}zt|}t|}||kr9td| d| d}W nD ty   |d urOtd	| d|  d
|f}t|dkrftdt| d|d \}}}|rwtd| dt|}d}~Y nw ~|rt|| d|d}t|S td |dS )NzLSELECT alphabet, length, length(seq) FROM biosequence WHERE bioentry_id = %sr   r2   r3   r   z''length' differs from sequence length, z, Tz$Expected 'length' to be 'None', got zDSELECT alphabet, length, seq FROM biosequence WHERE bioentry_id = %sz*Expected 'seq' to have a falsy value, got F)r   r   r   )r4   r   r5   r6   	TypeErrorr   r   )	r
   r	   r7   moltyper8   r   have_seqseqdatar   r   r   _retrieve_seqk   sL   r@   c                 C   sZ   g }|  d|f}|D ]\}}}|r|dkr| d| }n|}|| d|  q|S )zBRetrieve the database cross references for the sequence (PRIVATE).z{SELECT dbname, accession, version FROM bioentry_dbxref join dbxref using (dbxref_id) WHERE bioentry_id = %s ORDER BY "rank"0r3   :)r4   append)r
   r	   _dbxrefsdbxrefsdbname	accessionversionvr   r   r   _retrieve_dbxrefs   s   rJ   c                 C   s  d}|  ||f}g }|D ][\}}}|  d|f}i }	|D ]\}
}|	|
g | q|  d|f}|D ]\}
}|
 d| }|	dg | q5|  d|f}g }|D ]X\}}}}|ra|d8 }|dkrgd }|d	vrstd
||f |d ur|d ur||k rdd l}ddlm} |d|||f | |d u rt	 }|d u rt	 }|||||f qU|  d|f}i }|D ]"\}}}}|r|dkr| d| }n|}|dkrd }||f||< qtj|d}||_
|	|_t|dkrnut|dkr |d \}}}}t| ||_||d\}}t|||_||j_||j_||j_nDg }|D ]}|\}}}}||d\}}|tj|||||d q$dd |D }t|dkr]d|v r]|d d d }t|d|_|| q|S )NzSELECT seqfeature_id, type.name, "rank" FROM seqfeature join term type on (type_term_id = type.term_id) WHERE bioentry_id = %s ORDER BY "rank"zvSELECT name, value FROM seqfeature_qualifier_value  join term using (term_id) WHERE seqfeature_id = %s ORDER BY "rank"zSELECT dbxref.dbname, dbxref.accession FROM dbxref join seqfeature_dbxref using (dbxref_id) WHERE seqfeature_dbxref.seqfeature_id = %s ORDER BY "rank"rB   db_xrefzeSELECT location_id, start_pos, end_pos, strand FROM location WHERE seqfeature_id = %s ORDER BY "rank"r   r   )r   Nz8Invalid strand %s found in database for seqfeature_id %s)BiopythonWarningz<Inverted location start/end (%i and %i) for seqfeature_id %szsSELECT location_id, dbname, accession, version FROM location join dbxref using (dbxref_id) WHERE seqfeature_id = %srA   r3    )type)NN)strandrefref_dbc                 S   s   h | ]}|j qS r   )rP   ).0_r   r   r   	<setcomp>,  s    z%_retrieve_features.<locals>.<setcomp>rL   join)r4   
setdefaultrC   r5   warningsBiorM   warnr   UnknownPosition_seqfeature_id
qualifiersr   "_retrieve_location_qualifier_valuelocation_operatorgetSimpleLocationlocationrP   rR   rQ   CompoundLocation)r
   r	   sqlresultsseq_feature_listseqfeature_idseqfeature_typeseqfeature_rankqvsr]   qv_nameqv_valuevalue	locationslocation_idr   r$   rP   rX   rM   remote_resultslookuprF   rG   rH   rI   featurelocsrb   strandsr   r   r   _retrieve_features   s   

ru   c                 C   s.   |  d|f}z|d W S  ty   Y dS w )NzASELECT value FROM location_qualifier_value WHERE location_id = %sr   rN   )execute_and_fetch_col0r   )r
   ro   rm   r   r   r   r^   :  s   
r^   c                 C   sZ   i }| t| | | t| | | t| | | t| || | t| | |S N)update_retrieve_alphabet_retrieve_qualifier_value_retrieve_reference_retrieve_taxon_retrieve_comment)r
   r	   taxon_idannotationsr   r   r   _retrieve_annotationsE  s   r   c                 C   s   |  d|f}t|dkrtdt| d|d }t|dkr+tdt| d|d }|dkr6d}n|d	kr=d
}n	|dkrDd}nd }|d urNd|iS i S )Nz7SELECT alphabet FROM biosequence WHERE bioentry_id = %sr   r2   r3   r   z%Expected 1 alphabet in response, got dnaDNArnaRNAproteinmolecule_type)r4   r   r5   )r
   r	   re   	alphabetsalphabetr   r   r   r   ry   O  s&   ry   c                 C   s^   |  d|f}i }|D ]!\}}|dkrd}n|dkrd}n|dkr#d}||g | q|S )NzqSELECT name, value FROM bioentry_qualifier_value JOIN term USING (term_id) WHERE bioentry_id = %s ORDER BY "rank"keywordkeywordsdate_changeddatesecondary_accession
accessions)r4   rW   rC   )r
   r	   rj   r]   namerm   r   r   r   rz   g  s   rz   c                 C   s   |  d|f}g }|D ]F\}}}}}}	}
t }|d us |d ur0|d ur(|d8 }t||g|_|r5||_|r:||_||_|	dkrE|
|_n|	dkrL|
|_	|
| q|rXd|iS i S )NzSELECT start_pos, end_pos,  location, title, authors, dbname, accession FROM bioentry_reference JOIN reference USING (reference_id) LEFT JOIN dbxref USING (dbxref_id) WHERE bioentry_id = %s ORDER BY "rank"r   PUBMEDMEDLINE
references)r4   r   	Referencera   rb   authorstitlejournal	pubmed_id
medline_idrC   )r
   r	   refsr   r   r$   rb   r   r   rF   rG   	referencer   r   r   r{   |  s0   r{   c                 C   s   i }|  d|f}|r|d |d< |  d|f}|r |d |d< |  d|f}|r9|d r9|d dkr9|d |d< g }|rV| d	|f\}}	}
||
krLn
|d| |
}|s=|r\||d
< |S )NzVSELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'genbank common name'r   sourcezRSELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'scientific name'organismz3SELECT ncbi_taxon_id FROM taxon WHERE taxon_id = %srA   
ncbi_taxidzSELECT taxon_name.name, taxon.node_rank, taxon.parent_taxon_id FROM taxon, taxon_name WHERE taxon.taxon_id=taxon_name.taxon_id AND taxon_name.name_class='scientific name' AND taxon.taxon_id = %staxonomy)rv   execute_oneinsert)r
   r	   r~   acommon_namesscientific_namesncbi_taxidsr   r   rankparent_taxon_idr   r   r   r|     s@   
r|   c                 C   s,   |  d|f}dd |D }|rd|iS i S )NzESELECT comment_text FROM comment WHERE bioentry_id=%s ORDER BY "rank"c                 S   s   g | ]}|d  qS )r   r   )rS   commr   r   r   
<listcomp>  s    z%_retrieve_comment.<locals>.<listcomp>comment)r4   )r
   r	   rj   commentsr   r   r   r}     s   r}   c                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zeeeed
Z	ede
e fddZejde
e ddfddZejdddZdd Zdd Zdd ZeeeedZedejfddZejdeej ddfddZejdddZdS )DBSeqRecordz4BioSQL equivalent of the Biopython SeqRecord object.c              	   C   sx   || _ || _| j d| jf\| _| _| _}}| _| _| _|r,|dkr,| d| | _	n|| _	t
||}t|d| _dS )a8  Create a DBSeqRecord object.

        Arguments:
         - adaptor - A BioSQL.BioSeqDatabase.Adaptor object
         - primary_id - An internal integer ID used by BioSQL

        You wouldn't normally create a DBSeqRecord object yourself,
        this is done for you when using a BioSeqDatabase object
        zSELECT biodatabase_id, taxon_id, name, accession, version, identifier, division, description FROM bioentry WHERE bioentry_id = %srA   r3   r:   N)_adaptor_primary_idr   _biodatabase_id	_taxon_idr   _identifier	_divisiondescriptionidr9   r   _per_letter_annotations)r   r
   r	   rG   rH   r   r   r   r   r     s(   

zDBSeqRecord.__init__c                 C       t | dst| j| j| _| jS )N_seq)hasattrr@   r   r   r   r   r   r   r   	__get_seq     
zDBSeqRecord.__get_seqc                 C   
   || _ d S rw   r   )r   r>   r   r   r   	__set_seq     
zDBSeqRecord.__set_seqc                 C      | ` d S rw   r   r   r   r   r   	__del_seq     zDBSeqRecord.__del_seqz
Seq objectreturnc                 C   r   )zDatabase cross references.rD   )r   rJ   r   r   rD   r   r   r   r   rE      s   
zDBSeqRecord.dbxrefsrm   Nc                 C   r   rw   rD   r   rm   r   r   r   rE   '  r   c                 C   r   rw   r   r   r   r   r   rE   +     c                 C   r   )N	_features)r   ru   r   r   r   r   r   r   r   __get_features/  r   zDBSeqRecord.__get_featuresc                 C   r   rw   r   )r   featuresr   r   r   __set_features4  s   
zDBSeqRecord.__set_featuresc                 C   r   rw   r   r   r   r   r   __del_features7  r   zDBSeqRecord.__del_featuresFeaturesc                 C   sH   t | ds!t| j| j| j| _| jr| j| jd< | jr!| j| jd< | jS )zAnnotations._annotationsgidata_file_division)r   r   r   r   r   r   r   r   r   r   r   r   r   <  s   
zDBSeqRecord.annotationsc                 C   s   |r|| _ d S i | _ d S rw   r   r   r   r   r   r   I  s   

c                 C   r   rw   r   r   r   r   r   r   P  r   )r   N)r,   r-   r.   r/   r   _DBSeqRecord__get_seq_DBSeqRecord__set_seq_DBSeqRecord__del_seqpropertyr>   liststrrE   setterdeleter_DBSeqRecord__get_features_DBSeqRecord__set_features_DBSeqRecord__del_featuresr   r   _AnnotationsDictr   r   r   r   r   r   r     s.    (r   N)r/   typingr   rY   r   Bio.Seqr   r   Bio.SeqRecordr   r   r   r9   r@   rJ   ru   r^   r   ry   rz   r{   r|   r}   r   r   r   r   r   <module>   s*   	@1 
'7