o
    Rŀg$,                     @   s   d Z zddlZW n ey   ddlmZ eddw ddlmZ ddlmZ ddl	m
Z
 dd	lmZ dd
lmZ G dd deZG dd deZdS )aS  Bio.SeqIO support for UCSC's "twoBit" (.2bit) file format.

This parser reads the index stored in the twoBit file, as well as the masked
regions and the N's for each sequence. It also creates sequence data objects
(_TwoBitSequenceData objects), which support only two methods: __len__ and
__getitem__. The former will return the length of the sequence, while the
latter returns the sequence (as a bytes object) for the requested region.

Using the information in the index, the __getitem__ method calculates the file
position at which the requested region starts, and only reads the requested
sequence region. Note that the full sequence of a record is loaded only if
specifically requested, making the parser memory-efficient.

The TwoBitIterator object implements the __getitem__, keys, and __len__
methods that allow it to be used as a dictionary.
    N)MissingPythonDependencyErrorzVInstall NumPy if you want to use Bio.SeqIO with TwoBit files.See http://www.numpy.org/)Seq)SequenceDataAbstractBaseClass)	SeqRecord   )	_twoBitIO)SequenceIteratorc                       sD   e Zd ZdZdZ fddZdd Zdd Zd	d
 Zdd Z	  Z
S )_TwoBitSequenceDataa;  Stores information needed to retrieve sequence data from a .2bit file (PRIVATE).

    Objects of this class store the file position at which the sequence data
    start, the sequence length, and the start and end position of unknown (N)
    and masked (lowercase) letters in the sequence.

    Only two methods are provided: __len__ and __getitem__. The former will
    return the length of the sequence, while the latter returns the sequence
    (as a bytes object) for the requested region. The full sequence of a record
    is loaded only if explicitly requested.
    )streamoffsetlengthnBlocks
maskBlocksc                    s    || _ || _|| _t   dS )zBInitialize the file stream and file position of the sequence data.N)r
   r   r   super__init__)selfr
   r   r   	__class__ F/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqIO/TwoBitIO.pyr   l   s   z_TwoBitSequenceData.__init__c              
   C   s  | j }t|tr||\}}}tt|||}|dkrdS n|dk r/||7 }|dk r/td|}|d }d}d}|d }|d d }|| }	| j}
z
|
| j	|  W n t
yl } zt|dkrgt
dd	 d	}~ww tj|
d
|	d}t||||| j| j}t|tr|S t|S )zJReturn the sequence contents (as a bytes object) for the requested region.r       zindex out of ranger         zseek of closed filez(cannot retrieve sequence: file is closedNuint8dtypecount)r   
isinstancesliceindiceslenrange
IndexErrorr
   seekr   
ValueErrorstrnpfromfiler   convertr   r   ord)r   keyr   startendstepsize	byteStartbyteEndbyteSizer
   	exceptiondatasequencer   r   r   __getitem__s   sD   


z_TwoBitSequenceData.__getitem__c                 C   s   | j S )zGet the sequence length.)r   r   r   r   r   __len__   s   z_TwoBitSequenceData.__len__c                 C   s>   t | j| j| j}| jddddf |_tjddd|_|S )zRemove the sequence mask.N)r      uint32r   )r	   r
   r   r   r   r&   emptyr   r   r3   r   r   r   upper   s   z_TwoBitSequenceData.upperc                 C   sF   t | j| j| j}| jddddf |_tjd| jggdd|_|S )z.Extend the sequence mask to the full sequence.Nr   r9   r:   )r	   r
   r   r   r   r&   arrayr   r<   r   r   r   lower   s   z_TwoBitSequenceData.lower)__name__
__module____qualname____doc__	__slots__r   r5   r7   r=   r?   __classcell__r   r   r   r   r	   ]   s    $r	   c                       s@   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Z  Z	S )TwoBitIteratorz%Parser for UCSC twoBit (.2bit) files.c                    s  t  j|ddd d| _| j}|d}|stdd}d}t||D ]\}}t||}|d	kr3 nq#td
|| _	|d}tj||dd}	|	dkrPtd|	dkrZtd|	 |d}tj||dd}
|d}tj||dd}|dkr|tdi }t
|
D ],}|d}tj||dd}||}|d}|d}tj||dd}||f||< q|| _| D ]\}\}}|| |d}tj||dd}t|||}|d}tj||dd}tj|||d}tj|||d}tj|dfdd|_||jdddf< || |jdddf< |d}tj||dd}tj|||d}tj|||d}tj|dfdd|_||jdddf< || |jdddf< |d}tj||dd}|dkrbtd| | |_|||< qdS )zRead the file index.btwoBit)modefmtFr   zEmpty file.)littlebig)z<u4z>u4iC'AzUnknown signature)signedr   zPversion-1 twoBit files with 64-bit offsets for index are currently not supportedr   z*Found unexpected file version %u; abortingz'Found non-zero reserved field; abortingASCIIr   r8   r9   r:   Nz Found non-zero reserved field %u)r   r   should_close_streamr
   readr$   zipint
from_bytes	byteorderr!   decode	sequencesitemsr#   r	   r&   r'   r;   r   r   tellr   )r   sourcer
   r3   
byteordersdtypesrT   r   	signatureversionsequenceCountreservedrV   inameSizenamer   dnaSizer4   nBlockCountnBlockStartsnBlockSizesmaskBlockCountmaskBlockStartsmaskBlockSizesr   r   r   r      s   















zTwoBitIterator.__init__c                 c   s4    | j  D ]\}}t|}t||d}|V  qdS )z'Iterate over the sequences in the file.idN)rV   rW   r   r   )r   r
   rb   r4   recordr   r   r   parse   s   zTwoBitIterator.parsec                 C   s<   z| j | }W n ty   t|dw t|}t||dS )zAReturn sequence associated with given name as a SeqRecord object.Nrj   )rV   r$   KeyErrorr   r   )r   rb   r4   r   r   r   r5      s   
zTwoBitIterator.__getitem__c                 C   s
   | j  S )z:Return a list with the names of the sequences in the file.)rV   keysr6   r   r   r   ro        
zTwoBitIterator.keysc                 C   s
   t | jS )zReturn number of sequences.)r    rV   r6   r   r   r   r7     rp   zTwoBitIterator.__len__)
r@   rA   rB   rC   r   rm   r5   ro   r7   rE   r   r   r   r   rF      s    D	rF   )rC   numpyr&   ImportErrorBior   Bio.Seqr   r   Bio.SeqRecordr    r   
Interfacesr   r	   rF   r   r   r   r   <module>   s$   DM