o
    Rŀg                     @   s   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 G dd	 d	e	ZG d
d de
ZedkrGddlmZ edd dS dS )a  Bio.SeqIO support for the UCSC nib file format.

Nib stands for nibble (4 bit) representation of nucleotide sequences.
The two nibbles in a byte each store one nucleotide, represented numerically
as follows:

    - ``0`` - T
    - ``1`` - C
    - ``2`` - A
    - ``3`` - G
    - ``4`` - N (unknown)

As the first bit in a nibble is set if the nucleotide is soft-masked, we
additionally have:

    - ``8`` - t
    - ``9`` - c
    - ``a`` - a
    - ``b`` - g
    - ``c`` - n (unknown)

A nib file contains only one sequence record.
You are expected to use this module via the Bio.SeqIO functions under
the format name "nib":

    >>> from Bio import SeqIO
    >>> record = SeqIO.read("Nib/test_even_bigendian.nib", "nib")
    >>> print("%i %s..." % (len(record), record.seq[:20]))
    50 nAGAAGagccgcNGgCActt...

For detailed information on the file format, please see the UCSC
description at https://genome.ucsc.edu/FAQ/FAQformat.html.
    N)Seq)	SeqRecord   )SequenceIterator)SequenceWriterc                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )NibIteratorzParser for nib files.c                    s   t  j|ddd dS )a  Iterate over a nib file and yield a SeqRecord.

            - source - a file-like object or a path to a file in the nib file
              format as defined by UCSC; the file must be opened in binary mode.

        Note that a nib file always contains only one sequence record.
        The sequence of the resulting SeqRecord object should match the sequence
        generated by Jim Kent's nibFrag utility run with the -masked option.

        This function is used internally via the Bio.SeqIO functions:

        >>> from Bio import SeqIO
        >>> record = SeqIO.read("Nib/test_even_bigendian.nib", "nib")
        >>> print("%s %i" % (record.seq, len(record)))
        nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50

        You can also call it directly:

        >>> with open("Nib/test_even_bigendian.nib", "rb") as handle:
        ...     for record in NibIterator(handle):
        ...         print("%s %i" % (record.seq, len(record)))
        ...
        nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50

        bNib)modefmtNsuper__init__)selfsource	__class__ C/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqIO/NibIO.pyr   8   s   zNibIterator.__init__c                 C   sR   | d}|std| }|dkrd}n|dkrd}ntd| ||}|S )z9Start parsing the file, and return a SeqRecord generator.   zEmpty file.3a3de96blittle6be93d3abigz"unexpected signature in nib header)read
ValueErrorhexiterate)r   handleword	signature	byteorderrecordsr   r   r   parseT   s   
zNibIterator.parsec                 c   s    | d}t||}|  }t|}|d dkr&t||kr%tdn|d dkr>t||d kr8td|d| }t|dsItdt	
dd	}||}t|}	t|	}
|
V  dS )
z)Iterate over the records in the nib file.r      r   zUnexpected file sizer   N
   0123489abcz&Unexpected sequence data found in file
   TCAGNtcagn)r   int
from_bytesbinasciihexlifylenr   setissubsetbytes	maketrans	translater   r   )r   r   r!   numberlengthdataindicestablenucleotidessequencerecordr   r   r   r   c   s(   



zNibIterator.iterate)__name__
__module____qualname____doc__r   r#   r   __classcell__r   r   r   r   r   5   s
    r   c                       s@   e Zd ZdZ fddZ fddZdd Z fdd	Z  ZS )
	NibWriterzNib file writer.c                    s   t  j|dd dS )zInitialize a Nib writer object.

        Arguments:
         - target - output stream opened in binary mode, or a path to a file

        wb)r
   Nr   )r   targetr   r   r   r   |   s   zNibWriter.__init__c                    sT   t    | j}tj}|dkrd}n|dkrd}ntd| |t| dS )zWrite the file header.r   r   r   r   zunexpected system byte order N)	r   write_headerr   sysr!   RuntimeErrorwriter.   fromhex)r   r   r!   r    r   r   r   rA      s   
zNibWriter.write_headerc           
      C   s   | j }|j}t|}t|}|td| tdd}|d }|d }||7 }t|	ds4t
d||}	|t|	 dS )	z)Write a single record to the output file.ir&   r%   r$      Ts
   ACGTNacgtnz0Sequence should contain A,C,G,T,N,a,c,g,t,n onlyN)r   seqr.   r+   rD   structpackr/   r,   r-   r   r0   r)   	unhexlify)
r   r8   r   r7   r6   r2   r5   paddingsuffixr4   r   r   r   write_record   s   
zNibWriter.write_recordc                    s   t  j|ddd}|S )zKWrite the complete file with the records, and return the number of records.r   )mincountmaxcount)r   
write_file)r   r"   countr   r   r   rQ      s   zNibWriter.write_file)	r9   r:   r;   r<   r   rA   rN   rQ   r=   r   r   r   r   r>   y   s    	r>   __main__)run_doctest)verbose)r<   r)   rI   rB   Bio.Seqr   Bio.SeqRecordr   
Interfacesr   r   r   r>   r9   
Bio._utilsrT   r   r   r   r   <module>   s   "D/