o
    Rŀg @                    @   s^  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
ZddlmZ ddlmZ ddlmZ ddlmZ ed	d
ZG dd deZeddedddd
edddd
edddd
edddd
edddd
edddd
edddd
edd d!d
edd"d#d
ed$d%d&d
ed'd(d)d
ed'd*d+d
ge_G d,d- d-ejZG d.d/ d/ejZG d0d1 d1ej	ZG d2d3 d3ZG d4d5 d5eZG d6d7 d7ZG d8d9 d9ZG d:d; d;eZG d<d= d=ZG d>d? d?eZ G d@dA dAZ!G dBdC dCZ"G dDdE dEe!Z#G dFdG dGZ$G dHdI dIZ%G dJdK dKZ&G dLdM dMZ'G dNdO dOZ(G dPdQ dQZ)dS )Ra  Bio.Align support for alignment files in the bigBed format.

The bigBed format stores a series of pairwise alignments in a single indexed
binary file. Typically they are used for transcript to genome alignments. As
in the BED format, the alignment positions and alignment scores are stored,
but the aligned sequences are not.

See http://genome.ucsc.edu/goldenPath/help/bigBed.html for more information.

You are expected to use this module via the Bio.Align functions.
    N)
namedtuple)BytesIO)	Alignment)
interfaces)Seq)	SeqRecordField)as_typenamecommentc                       sZ   e Zd ZU dZd ed< dd Zedd Zedd Zd	d
 Z	dd Z
 fddZ  ZS )AutoSQLTablezJAutoSQL table describing the columns of an (possibly extended) BED format.defaultc                 C   s   || _ || _|| dd< dS )zKCreate an AutoSQL table describing the columns of an (extended) BED format.N)r
   r   )selfr
   r   fields r   D/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/bigbed.py__init__M   s   zAutoSQLTable.__init__c                 C   s  | dsJ |dd  }|dd\}}|dksJ |dd\}}|ds,J |dd}|d| }||d d  }|dsIJ | dsPJ |dd  }g }|r|d}|d|d }||d | }	|d|  }
|
 d	sJ |
dd dd\}}| d
r|d}|d| }n|}|dv sJ t|||	}|	| ||d d  }|s\t
|||S )z?Return an AutoSQLTable initialized using the bytes object data.    N   table"();][)
intuintshortushortbyteubytefloatcharstringlstring)endswithdecodesplit
startswithfindstripindexrsplitr   appendr   )clsdatatextwordr
   ir   r   jfield_comment
definition
field_type
field_name	data_typefieldr   r   r   
from_bytesS   s<   



zAutoSQLTable.from_bytesc                 C   s   |  | d S )z@Return an AutoSQLTable initialized using the string object data.r   )r<   encode)r0   r1   r   r   r   from_string   s   zAutoSQLTable.from_stringc                 C   s   t dd | D }t dd | D d }g }|d| j  |d| j  |d | D ]}|jd }|d	| d
| d|j||jf  q-|d d|S )Nc                 s   s    | ]
}t t|jV  qd S N)lenstrr	   .0r;   r   r   r   	<genexpr>   s    z'AutoSQLTable.__str__.<locals>.<genexpr>c                 s   s    | ]}t |jV  qd S r?   )r@   r
   rB   r   r   r   rD      s    r   z	table %s
z"%s"
z(
r   z   %-zs %-z
s    "%s"
z)
 )maxr/   r
   r   r	   join)r   
type_width
name_widthlinesr;   r
   r   r   r   __str__   s$   



zAutoSQLTable.__str__c                 C   s   t |  d S )Nr   )rA   r=   r   r   r   r   	__bytes__   s   zAutoSQLTable.__bytes__c                    s2   t |trt |}t| j| j|S t |S r?   )
isinstanceslicesuper__getitem__r   r
   r   )r   r4   r   	__class__r   r   rQ      s   
zAutoSQLTable.__getitem__)__name__
__module____qualname____doc____annotations__r   classmethodr<   r>   rK   rM   rQ   __classcell__r   r   rR   r   r   H   s   
 
,
r   bedzBrowser Extensible Datar%   chromz)Reference sequence chromosome or scaffoldr   
chromStartzStart position in chromosomechromEndzEnd position in chromosomer
   zName of item.scorezScore (0-1000)zchar[1]strandz+ or - for strand
thickStartz4Start of where display should be thick (start codon)thickEndz1End of where display should be thick (stop codon)reservedz Used as itemRgb as of 2004-11-22r   
blockCountzNumber of blockszint[blockCount]
blockSizesz#Comma separated list of block sizeschromStartsz&Start positions relative to chromStartc                       s`   e Zd ZdZdZdZ								d fd
d	Zdd Zdd Zdd Z	dd Z
dd Z  ZS )AlignmentWriterz1Alignment file writer for the bigBed file format.bigBedwb   NT      r   c	           	         sR   |dk s|dkrt dt | || _|| _|| _|| _|| _|| _|| _	dS )a  Create an AlignmentWriter object.

        Arguments:
         - target       - output stream or file name.
         - bedN         - number of columns in the BED file.
                          This must be between 3 and 12; default value is 12.
         - declaration  - an AutoSQLTable object declaring the fields in the
                          BED file.
                          Required only if the BED file contains extra (custom)
                          fields.
                          Default value is None.
         - targets      - A list of SeqRecord objects with the chromosomes in
                          the order as they appear in the alignments. The
                          sequence contents in each SeqRecord may be undefined,
                          but the sequence length must be defined, as in this
                          example:

                          SeqRecord(Seq(None, length=248956422), id="chr1")

                          If targets is None (the default value), the alignments
                          must have an attribute .targets providing the list of
                          SeqRecord objects.
         - compress     - If True (default), compress data using zlib.
                          If False, do not compress data.
                          Use compress=False for faster searching.
         - blockSize    - Number of items to bundle in r-tree.
                          See UCSC's bedToBigBed program for more information.
                          Default value is 256.
         - itemsPerSlot - Number of data points bundled at lowest level.
                          See UCSC's bedToBigBed program for more information.
                          Use itemsPerSlot=1 for faster searching.
                          Default value is 512.
         - extraIndex   - List of strings with the names of extra columns to be
                          indexed.
                          Default value is an empty list.
           rj   zbedN must be between 3 and 12N)

ValueErrorrP   r   bedNdeclarationtargetscompressextraIndexNamesitemsPerSlot	blockSize)	r   targetro   rp   rq   rr   rt   ru   
extraIndexrR   r   r   r      s   /
zAlignmentWriter.__init__c                 C   s  | j du r	|j }n| j }t }| j|_| jdu r6z|jd| j | _W n ty5   tjd| j | _Y nw | j}t||_	t
| j|}| |||\}}|t|j |ttj | |_|t| | |_|ttj | |_|t|j | |_t |t| jt|| | |_t|}	|t|dtj |t| |  |||	|\}
}| j!rt"|
| j#t$j |_%nd|_%| |_&t' || jd|j&| | (|||j&|j ||	\}}t||_)|D ]}| |_*|j+,  t |j+| j| q|-d |t| |t| |-|j |t| |j| ks6J |.| |-dt/j0 |j1dtj}|| dS )zWrite the alignments to the file stream, and return the number of alignments.

        alignments - A list or iterator returning Alignment objects
        stream     - Output file stream.
        N   r   r      )2rq   _Headerro   definedFieldCountrp   AttributeErrorr   r   r@   
fieldCount_ExtraIndicesrs   _get_chrom_usagewritebytessize_ZoomLevelstellautoSqlOffsettotalSummaryOffset_SummaryextraIndicesOffsetchromosomeTreeOffset_BPlusTreeFormatterminru   fullDataOffsetcalculate_reductionsto_bytessys	byteorder
initializewrite_alignmentsrr   rF   rt   _RegionSummaryuncompressBufSizefullIndexOffset_RTreeFormatter_write_zoom_levels
zoomLevels
fileOffsetchunkssortseektofileioSEEK_END	signature)r   stream
alignmentsrq   headerrp   extra_indiceschromUsageListaveSize
reductionsmaxBlockSizeregionszoomListtotalSumextra_indexr1   r   r   r   
write_file  s   















zAlignmentWriter.write_filec                 C   s  d}d}d}d}d}g }	d}
d}t j}|D ]}|jd j}|jd }|jd }|D ]}|| q)||krCtd| d| d| d	|d
7 }||| 7 }||kr||kr]td| d	|rk|	|||f |d
7 }|D ]	}|j|krv nqmtd| d| d	|}t|
t	|}
t	|}d}||krtd| d| d| d| d	|dkr|| }||k r|dk rtd| d	|}|}q|r|	|||f t
j|	dd|
 fddgd}	|dkr|| }||_|	|fS )Nr   rE   r   r   r   r   r   zend (z) before start (z) in alignment [r   r   z7alignments are not sorted by target name at alignment [zfailed to find target 'z' in target list at alignment [zend coordinate z bigger than z	 size of z at alignment [z]'z(alignments are not sorted at alignment [r
   S)id=i4r   r   dtype)r   maxsize	sequencesr   coordinatesupdateMaxFieldSizern   r/   rF   r@   nparray_len)r0   r   rq   r   r   chromId
totalBasesbedCountr
   r   keySize	chromSizeminDiff	alignmentr\   startendr   rv   	lastStartdiffr   r   r   r   f  s~   




z AlignmentWriter._get_chrom_usagec                 C   s  t  }t }t|dkrd|_d|_||fS | j}| j}	| j}
|d }| |d _	|D ]}|d t
j }|	r;|d }||krA nq,|d }|d | |
t
j }|	rZt||}nt||}g }g }t||}t|d }|t j }|D ]4}tj }|||}|D ]%}|| || || |jk rt|}|j}|| q||7 }qqv|  t||d ksJ |d |d _| }||d _t |||
|| |	rt||}nt|t
j}||||||
 ||fS )Nr              r   scale) r   r   r@   minValmaxValru   rr   rt   r   
dataOffsetr   r   r   _ZippedBufferedStream_BufferedStream
_RangeTreegenerater   bbiResIncrementr   r   generate_summariesr   r/   r   copyr   flushreductionLevelindexOffsetr   reduce)r   r   outputdataSizer   r   r   r   ru   
doCompressrt   maxReducedSizeinitialReductionreducedSizer   bufferr   rezoomedListtreesr   doubleReductionSizetreer   	summariessummaryrezoomedr   r   r   r   r     sp   5






	


z"AlignmentWriter._write_zoom_levelsc                 C   s  | j }g }|jj}t|dkrtd| dt|dk r!td|jd }|jd }||k r:td| d	| d
|dkr\|jj}|dkrId}nt|dkrWtd| d|| |dkrz|j}W n t	yp   d}Y nw |dk sy|dkrtd| dt
|}|| |dkr|jd |jd krd}	nd}	||	 |dkrz|j}
W n t	y   |}
Y nw |t
|
 |dkr-z|j}W n t	y   |}Y nSw ||
k rtd|
 d| d
|
dkr|
|k s|
|krtd| d| d| d |
 d| 
|dkr&||k s||kr&td!| d| d| d |
 d| 
|t
| |d"kr|z|j}W n t	yC   d}Y n4w |d#d#}t|dkr^td$d% |D r^ndt|  krld&k ron nntd'| d(|| |d)krt|j}t|dkdd*k}|d| }t|}|t
| |d+kr|d#d,d% |D d#  |d-kr|jdd d.f | | }|d#d/d% |D d#  |d0kr|d1krtd2| d3|jd4 }|jd5 }t|}|t|ks J |t
| |d#| |d#d6d% |D  | j|d  D ]2}|j|j }t|t
r8|| q#t|ttfrI|t
| q#|d#tt
| q#d7| }||||fS )8N   zalignment target name 'z.' is too long (must not exceed 254 characters)r   z.alignment target name cannot be blank or emptyr   r   zchromStart after chromEnd (z > r   rm   rE   .zalignment query name 'z-' is too long (must not exceed 255 charactersry   r   i  zscore (z) must be between 0 and 1000   )r   r   )r   r   +-      zthickStart (z) after thickEnd (zthickStart out of range for :z, thick:zthickEnd out of range for rx   ,c                 s   s,    | ]}d t |  kodk n  V  qdS )r   rl   N)r   )rC   colorr   r   r   rD   0  s   * z2AlignmentWriter._extract_fields.<locals>.<genexpr>l        z?Expecting color to consist of r,g,b values from 0 to 255. Got ''	   r   
   c                 s       | ]}t |V  qd S r?   rA   )rC   ru   r   r   r   rD   @         r   c                 s   r   r?   r   )rC   r]   r   r   r   rD   C  r   rj      zUnexpected value z for bedN in _extract_fieldsexpIds	expScoresc                 s   r   r?   r   )rC   expScorer   r   r   rD   M  r   	)ro   rv   r   r@   rn   r   queryr/   r_   r|   rA   ra   rb   itemRgbrstripr)   allr   r   r   sumrF   rG   annotationsrp   r
   rN   r#   mapr=   )r   r   ro   rowr\   r]   r^   r
   r_   r`   ra   rb   r   colorsstepsalignedre   rd   rf   r   r   expCountr;   valuerestr   r   r   _extract_fields  s   











  

" 








zAlignmentWriter._extract_fieldsc                 C   sB  | j }d}d}d}d}	d}
g }| jdu rt }nt }d}td}d}d}t|}	 zt|}W n ty>   |}d}Y nw | 	|\}}}}||
kr\|
durR|}|}
|d7 }d|d< ||kr|
 }|
 }||krn|}| }|| |d |d |r|
 }|| }|D ]
}|||||	 q|	}||_|du r	 ||fS d}|dkrt|||}|| n||jkr||_|d7 }|D ]6}||d kr|d	  d7  < ||d
  |d< ||d kr|d	  d7  < |d  |d
 7  < ||d ksq|r|D ]	}|||	 q|	d7 }	||||}||| d  q+)zWrite alignments to the output file, and return the number of alignments.

        alignments - A list or iterator returning Alignment objects
        stream     - Output file stream.
        r   r   NTz=IIIFr   r   r   r   r   )rt   rr   _ZippedStreamr   structStructiternextStopIterationr  r   getvaluer   r   truncateaddOffsetSizeoffset_Regionr/   r   addKeysFromRowpack)r   r   r   r   r   rt   r   itemIxsectionStartIxsectionEndIxcurrentChromr   r   r   	formatterdoneregionr   r\   r   r   r
  blockStartOffsetr   r1   blockEndOffsetru   r   r  r   r   r   r   Y  s   





z AlignmentWriter.write_alignments)rj   NNTrk   rl   r   )rT   rU   rV   rW   fmtmoder   r   r   r   r  r   rZ   r   r   rR   r   rg      s"    :NC>rrg   c                   @   sb   e Zd ZdZdZdZdd Zdd Zdd	 Zd
d Z	dd Z
dd Zdd Zdd ZdddZdS )AlignmentIteratorzAlignment iterator for bigBed files.

    The pairwise alignments stored in the bigBed file are loaded and returned
    incrementally.  Additional alignment information is stored as attributes
    of each alignment.
    rh   bc              
   C   s2  t |}|j}|j}|| _|j}|j}|j}t|}|| t	|j
}	|D ]&}
|
j}|| ||	j}|	|\	}}}}}}}}| _|t	jksMJ q'| ||| _|| t|d |d\}|| _|jdkrrd| _nd| _||j t||| _||j t	||| _| || _d S )NQrx   r   TF)rz   fromfiler   r   r}   r{   r   r   readr   formatter_headerr   r   r   unpackrt   r   _read_autosqlrp   r  _lengthr   _compressedr   r   rq   r   r   _iterate_index_data)r   r   r   r   r   r}   r{   r   r   r  	zoomLevelr   r1   r   ru   nItemsstartChromId	startBase
endChromIdendBaseendFileOffset	dataCountr   r   r   _read_header  sL   





zAlignmentIterator._read_headerc                 C   sN   |j |j }|j}|j| _||j ||}t|}| 	||| j |S r?   )
r   r   r}   r{   ro   r   r(  r   r<   _analyze_fields)r   r   r   autoSqlSizer}   r1   rp   r   r   r   r+    s   

zAlignmentIterator._read_autosqlc                 C   s  d}t | jD ]}|| j}||| krtd|| |f q||kr&g | _t ||D ]X}|| j}|| j}d|v rMd|v rMd}	|d\}}
| }nd}	|dv rVt}n|dv r]t	}n|d	krdt
}n|d
v rkt}ntd| |	r{|}|fdd}| j||g q+d S )N)r\   r]   r^   r
   r_   r`   ra   rb   rc   rd   re   rf   z$Expected field name '%s'; found '%s'r   r   TF)r   r   r   r    )r!   r"   r#   )r#   r$   r%   r&   zUnknown field type %sc                    s"   |  dd} fdd|D S )Nr   c                    s   g | ]} |qS r   r   )rC   r	  item_converterr   r   
<listcomp>#      zHAlignmentIterator._analyze_fields.<locals>.converter.<locals>.<listcomp>)r   r)   )r1   r<  valuesr   r;  r   	converter!  s   z4AlignmentIterator._analyze_fields.<locals>.converter)rangero   r
   rn   _custom_fieldsr	   r)   r,   r   r   r#   rA   	Exceptionr/   )r   r   r}   r{   namesr4   r
   r9   r8   
make_array_r@  r<  r   r   r   r9    s@   



z!AlignmentIterator._analyze_fieldsc                 c   sF   t | jd }|j}| j}	 z|j}W n ty   ||j |	|j
}| jdkr2t|}|ra||d | \}}}	|d|d }
|||
 }||
d  }|||	|dt|fV  |s4	 |j}|d u rlY d S t|jD ]\}}t|t|kr nqqtdz	|j|d  }W n ty   |}Y nw nqbY nw |d }q)NIIITr   r   Failed to find child node)r  r  r   r   r   childrenr|   r   r   r(  r   r-  zlib
decompressr*  r-   r@   parent	enumerater   RuntimeError
IndexError)r   r   r  r   noderI  r1   r   r]   r^   r4   r
  rL  r-   childr   r   r   r.  '  sN   


z AlignmentIterator._iterate_indexc              
   c   sP   t | jd }|j}|d }|d }| j}		 z|	j}
W n ty   ||	j |	|	j
}| jdkr:t|}| jdkro||d | \}}}||krn||k rW||k sc||krn||ksc||krn|||||t|fV  nVd}t|}||k r|| }|||| \}}}|}|d|d }||| }|}||krqu||ks||kr||krqu||kr||krqu||||dt|fV  ||k syY n%w d}|
D ]}|j|jf||fk rq||f|j|jfk rqd} |r|}	q	 |	j}|d u rd S t|jD ]\}}t|	t|kr
 nqtdz	|j|d  }	W n ty$   |}	Y nw nqq)NrG  r   Tr   r   FrH  )r  r  r   r   r   rI  r|   r   r   r(  r   r-  rJ  rK  rt   r*  r@   r-   
endChromIxr5  startChromIxr3  rL  rM  r   rN  rO  )r   r   chromIxr   r   r  r   padded_start
padded_endrP  rI  r1   child_chromIxchild_chromStartchild_chromEndr4   nr5   r
  visit_childrQ  rL  r-   r   r   r   _search_indexP  s   





	2zAlignmentIterator._search_indexc           	      C   sH   zt | j}W n
 ty   Y d S w |\}}}}}}| ||||||S r?   )r  r/  r  _create_alignment)	r   r   r  r   r]   r^   r
  	dataStartdataEndr   r   r   _read_next_alignment  s   z&AlignmentIterator._read_next_alignmentc                 C   sL  ||d  dks
J |||d  }|r|  d}ng }| j| }| jdkr-|d }	nd }	| jdkr9|d }
nd}
| jdkrt|d	 }t|d
 ddt}t|d ddt}t||krrt	dt||f t||krt	dt||f d}d}||gg}t
||D ] \}}||kr|||g |}||7 }||7 }|||g qt| }t|}n|| }td|gd|gg}|}|dd d f  |7  < td |d}t||	d}||g}|
dkr||dd d f  |dd d f< ||d krt	d||d f ||d kr t	d||d f t||}t|| jd krPi |_t
|| jd d  | jD ]\}}|\}	}|||j|	< q?| jdkrX|S |d }zt|}W n
 t	yl   Y n
w | rvt|}||_| jd	kr|S t|d |_| jd
kr|S t|d |_| jdkr|S |d |_|S )Nr   r   r   rm   r   r   r   r   r   r   r   rx   z:Inconsistent number of block sizes (%d found, expected %d)zDInconsistent number of block start positions (%d found, expected %d)lengthr   r   r   z/Inconsistent chromStart found (%d, expected %d)r   z-Inconsistent chromEnd found (%d, expected %d)ry   )r(   r)   rq   ro   r   r   fromiterr   r@   rn   zipr/   r   	transposer  r   r   r   r  rB  r#   
is_integerr_   ra   rb   r   )r   r   r]   r^   r
  r^  r_  wordstarget_recordr
   r`   rd   re   blockStarts	tPosition	qPositionr   ru   
blockStartqSizequery_sequencequery_recordrecordsr   r3   custom_fieldr@  r_   r   r   r   r]    s   









 


"

z#AlignmentIterator._create_alignmentc                 C   s   | j S r?   )r,  rL   r   r   r   __len__  s   zAlignmentIterator.__len__Nc              	   c   s    | j }|du r|dus|durtdn2t| jD ]\}}|j|kr% nqtd| |du r?|du r;d}t|}ntd|du rG|d }| ||||}|D ]}|\}}	}
}}}| ||	|
|||}|V  qQdS )a  Iterate over alignments overlapping the specified chromosome region..

        This method searches the index to find alignments to the specified
        chromosome that fully or partially overlap the chromosome region
        between start and end.

        Arguments:
         - chromosome - chromosome name. If None (default value), include all
           alignments.
         - start      - starting position on the chromosome. If None (default
           value), use 0 as the starting position.
         - end        - end position on the chromosome. If None (default value),
           use the length of the chromosome as the end position.

        Nz5start and end must both be None if chromosome is NonezFailed to find %s in alignmentsr   z!end must be None if start is Noner   )_streamrn   rM  rq   r   r@   r\  r]  )r   
chromosomer   r   r   rT  rv   r1   r  r]   r^   r
  r^  r_  r   r   r   r   search  s8   

zAlignmentIterator.search)NNN)rT   rU   rV   rW   r"  r#  r8  r+  r9  r.  r\  r`  r]  rs  rv  r   r   r   r   r$    s    ,
3)[
\r$  c                       s   e Zd Z fddZ  ZS )r  c                    s   t   }t|S r?   )rP   r  rJ  rr   r   r1   rR   r   r   r  B  s   

z_ZippedStream.getvalue)rT   rU   rV   r  rZ   r   r   rR   r   r  A  s    r  c                   @   s$   e Zd Zdd Zdd Zdd ZdS )r   c                 C   s   t  | _|| _|| _d S r?   )r   r   r   r   )r   r   r   r   r   r   r   H  s   
z_BufferedStream.__init__c                 C   sb   | j  |_t|}| j| | j | jkr/| j | j  | jd | j	d d S d S Nr   )
r   r   r  r   r   r   r   r  r   r  r   itemr1   r   r   r   r   M  s   z_BufferedStream.writec                 C   s.   | j | j  | jd | jd d S rx  )r   r   r   r  r   r  rL   r   r   r   r   V  s   z_BufferedStream.flushN)rT   rU   rV   r   r   r   r   r   r   r   r   G  s    	r   c                   @   s   e Zd Zdd Zdd ZdS )r   c                 C   sh   | j  |_t|}| j| | j | jkr2| j t| j	  | j
d | jd d S d S rx  )r   r   r  r   r   r   r   rJ  rr   r  r   r  ry  r   r   r   r   ]  s   z_ZippedBufferedStream.writec                 C   s4   | j t| j  | jd | jd d S rx  )r   r   rJ  rr   r   r  r   r  rL   r   r   r   r   f  s   z_ZippedBufferedStream.flushN)rT   rU   rV   r   r   r   r   r   r   r   \  s    	r   c                   @   s<   e Zd ZdZedZejZdZdZ	e
dd Zdd Zd	S )
rz   )r   r   r   r   r   r}   r{   r   r   r   r   z=IHHQQQHHQQIQl   r ry   c           
      C   s   | d}tj|ddtjkrd}ntj|ddtjkrd}ntdt|d }t }||_|j	}| |}|
|\}|_|_|_|_|_|_|_|_|_|_|tjksYJ |j}	|	d	k sd|	d
krjtd|	 |S )Nry   littler   <big>znot a bigBed fileHHQQQHHQQIQrm   rj   z+expected between 3 and 12 columns, found %d)r(  r   r<   rz   r   rn   r  r  r   r   r*  r   r   r   r   r}   r{   r   r   r   r   bbiCurrentVersion)
r0   r   magicr   r  r   r   r1   versionr{   r   r   r   r'    s>   

z_Header.fromfilec                 C   s:   t jt jt j| j| j| j| j| j	| j
| j| j| j| jS r?   )rz   r  r  r   r  r   r   r   r   r}   r{   r   r   r   r   rL   r   r   r   rM     s   z_Header.__bytes__N)rT   rU   rV   	__slots__r  r  r  r   r   r  rY   r'  rM   r   r   r   r   rz   l  s    

"rz   c                   @   sB   e Zd ZdZedZdd Zdd Zdd Z	d	d
 Z
dd ZdS )_ExtraIndex)
indexFieldmaxFieldSizer   r   	get_valuez=xxHQxxxxHxxc                    s   d| _ d | _t|D ]\}}|j kr nq
td d |jdkr%td|| _ dkr3dd | _d S  dkr>d	d | _d S  fd
d| _d S )Nr   zCextraIndex field %s not a standard bed field or found in 'as' file.r%   z+Sorry for now can only index string fields.r\   c                 S      | j jS r?   )rv   r   r   r   r   r   <lambda>      z&_ExtraIndex.__init__.<locals>.<lambda>r
   c                 S   r  r?   )r   r   r  r   r   r   r    r  c                    s
   | j   S r?   )r  r  r
   r   r   r    s   
 )r  r   rM  r
   rn   r	   r  r  )r   r
   rp   r-   r;   r   r  r   r     s(   

z_ExtraIndex.__init__c                 C   s*   |  |}t|}|| jkr|| _d S d S r?   )r  r@   r  )r   r   r	  r   r   r   r   r     s
   


z_ExtraIndex.updateMaxFieldSizec                 C   s    |  |}| | j| d< d S )Nr
   )r  r=   r   )r   r   recordIxr	  r   r   r   r    s   
z_ExtraIndex.addKeysFromRowc                 C   s(   || j || d< || j || d< d S )Nr  r   )r   )r   r  r   startIxendIxr   r   r   r    s   z_ExtraIndex.addOffsetSizec                 C   s   d}| j || j| jS Nr   )r  r  r   r  )r   indexFieldCountr   r   r   rM     s   z_ExtraIndex.__bytes__N)rT   rU   rV   r  r  r  r  r   r   r  r  rM   r   r   r   r   r    s    
r  c                   @   s:   e Zd ZedZdd Zedd Zdd Z	dd	 Z
d
S )r~   z=HHQ52xc                    s    fdd|D | d d < d S )Nc                    s   g | ]}t | qS r   )r  )rC   r
   rp   r   r   r=    s    z*_ExtraIndices.__init__.<locals>.<listcomp>r   )r   rD  rp   r   r  r   r     s   z_ExtraIndices.__init__c                 C   s   | j jtj jt|   S r?   )r  r   r  r@   rL   r   r   r   r     s   z_ExtraIndices.sizec                 C   sJ   |dkrd S | D ]}|j }tdd| fddg}tj||d|_qd S )Nr   r
   z=S)r  =u8)r   r  r   )r  r   r   zerosr   )r   r   r   r   r   r   r   r   r     s   z_ExtraIndices.initializec                 C   sx   | j j}t| dkr-| | }| j |t| |}|| | D ]	}|t| q!d S | j |dd}|| d S rx  )r  r   r@   r   r  r   r   )r   r   r   r  r1   r   r   r   r   r     s   
z_ExtraIndices.tofileN)rT   rU   rV   r  r  r  r   propertyr   r   r   r   r   r   r   r~     s    

r~   c                   @   s.   e Zd Zg dZd
ddZdd Zdd Zd	S )
_ZoomLevel)r   r   r   r  =c                 C   s   t |d | _d S )NIxxxxQQ)r  r  r  r   r   r   r   r   r     s   z_ZoomLevel.__init__c                 C   s   | j | j| j| jS r?   )r  r  r   r   r   rL   r   r   r   rM     s   z_ZoomLevel.__bytes__c                 C   sB   | | jj}| j|\}}}|dkrt|| _|| _|| _d S rx  )r(  r  r   r*  r  r   r   r   )r   r   r1   r   r   r   r   r   r   r(  $  s   
z_ZoomLevel.readNr  )rT   rU   rV   r  r   rM   r(  r   r   r   r   r    s
    
r  c                   @   sR   e Zd ZdZdZedjje ZdddZdd Z	dd	 Z
ed
d Zdd ZdS )r   ry   r   r  c                    s$    fddt tjD | d d < d S )Nc                    s   g | ]}t  qS r   )r  )rC   r4   r|  r   r   r=  4  r>  z(_ZoomLevels.__init__.<locals>.<listcomp>)rA  r   bbiMaxZoomLevelsr  r   r|  r   r   3  s   $z_ZoomLevels.__init__c                 C   s.   d dd | D }|ttjt| 7 }|S )N    c                 s   r   r?   )r   )rC   rz  r   r   r   rD   7  r   z(_ZoomLevels.__bytes__.<locals>.<genexpr>)rG   r   r   r   r@   rw  r   r   r   rM   6  s   z_ZoomLevels.__bytes__c              	   C   sF   t tjD ]}z	| | | W q ty    | |d = Y  d S w d S r?   )rA  r   r  r(  r  )r   r   r   r   r   r   r(  ;  s   
z_ZoomLevels.readc                 C   sx   t j}tj|g dd}d}tt||}t|jd j}t|D ]}||kr* n||| d< |t j	9 }q"|d | S )N))r   r   r   )r   r   r   r   r   )
r   r  r   r  rF   r   iinfor   rA  r   )r0   r   r  r   minZoomresmaxIntresTryr   r   r   r   C  s   z _ZoomLevels.calculate_reductionsc                 C   s6  |d }t |d tj }|j}t }	tdtjD ]z}
t|}||kr% no|}| | |
 _	|
dtj}|| |D ]}|| q<|  | }|	||||| || |
 _|| |
 _|tj9 }d}d }|D ]#}|j|ksv|j|kr|j| }|j}|}|||< |d7 }qj||7 }qj||d = q| |
d = d S )Nr   r   r   ry   r   )r   r   r   r   r   rA  r  r@   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   ru   rt   	zoomCount	reductionr   r  r   rezoomCountr1   r   r   r4   r   r   currentSummaryr   r   r   r   T  s@   






z_ZoomLevels.reduceNr  )rT   rU   rV   r   r  r  r  r   r   rM   r(  rY   r   r   r   r   r   r   r   .  s    

r   c                   @   s8   e Zd ZdZedZejZdd Zdd Z	dd Z
d	S )
r   
validCountr   r   sumData
sumSquaresz=Qddddc                 C   s(   d| _ tj| _tj | _d| _d| _d S Nr   r   )r  r   r   r   r   r  r  rL   r   r   r   r   ~  s
   

z_Summary.__init__c                 C   sZ   |  j |7  _ || jk r|| _|| jkr|| _|  j|| 7  _|  j|| | 7  _d S r?   r  )r   r   valr   r   r   update  s   

z_Summary.updatec                 C   s   | j | j| j| j| j| jS r?   )r  r  r  r   r   r  r  rL   r   r   r   rM     s   z_Summary.__bytes__N)rT   rU   rV   r  r  r  r  r   r   r  rM   r   r   r   r   r   x  s    
	r   c                   @   s   e Zd ZdZdd ZdS )r  )r   r   r   r  c                 C      || _ || _|| _d S r?   )r   r   r   )r   r   r   r   r   r   r   r        
z_Region.__init__N)rT   rU   rV   r  r   r   r   r   r   r    s    r  c                   @   sH   e Zd Zejej ZedZej	Z	dd Z
dd Zdd Zdd	 Zd
S )r   z	=IIIIffffc                 C   sR   || _ || _|| _d| _t|| _t|| _td| _td| _	d | _
d S r  )r   r   r   r  r   float32r   r   r  r  r  )r   r   r   r   r	  r   r   r   r     s   
z_RegionSummary.__init__c                 C   sd   |j | _ |  j|j7  _t| j|j| _t| j|j| _t| j|j | _t| j	|j	 | _	| S r?   )
r   r  r   r   rF   r   r   r  r  r  )r   otherr   r   r   __iadd__  s   z_RegionSummary.__iadd__c                 C   sn   |  j |7  _ | j|krt|| _| j|k rt|| _t| j||  | _t| j|| |  | _d S r?   )r  r   r   r  r   r  r  )r   overlapr  r   r   r   r    s   

z_RegionSummary.updatec              
   C   s*   | j | j| j| j| j| j| j| j| j	S r?   )
r  r  r   r   r   r  r   r   r  r  rL   r   r   r   rM     s   z_RegionSummary.__bytes__N)rT   rU   rV   r  r  r   r  r  r  r   r   r  r  rM   r   r   r   r   r     s    
		r   c                   @   s$   e Zd Zg dZdd Zdd ZdS )
_RTreeNode)rI  rL  r2  r3  r4  r5  startFileOffsetr6  c                 C   s   d | _ g | _d S r?   )rL  rI  rL   r   r   r   r     s   
z_RTreeNode.__init__c                 C   sD   ||  d7  < |d7 }|t |krd S | jD ]}||| qd S r  )r@   rI  calcLevelSizes)r   
levelSizeslevelrQ  r   r   r   r    s   
z_RTreeNode.calcLevelSizesN)rT   rU   rV   r  r   r  r   r   r   r   r    s    r  c                   @   sL   e Zd ZdZdd Zedd Zdd Zdd	 Zd
d Z	dd Z
dd ZdS )r   )rootrZ  freeListstackr   r   c                 C   s"   d | _ d| _g | _|| _|| _d S rx  )r  rZ  r  r   r   )r   r   r   r   r   r   r     s
   
z_RangeTree.__init__c              	   c   s    t |}d }|D ]8\}}}| }t||}|d ur || 	 zt|}W n	 ty0   Y nw |jj|kr8n|| q!|V  q	d S r?   )r  r(   r   addToCoverageDepthr  r  rv   r   )r0   r   r   r   	chromNamer   r   r   r   r   r   r     s(   


z_RangeTree.generatec                 c   s:   | j  }t|\}}}| j}| j}t||t|| ||}		 t|| d}
||
| |	j	|kr>t||t|| ||}	||	j	krvt||	j	t||	j
 }|dksUJ |	|| |
|8 }
|	j	}|	V  t||t|| ||}	||	j	ksC|	|
| z	t|\}}}W n	 ty   Y n
w |	j	|kr|	V  q |	V  d S )NTr   r   )r  traverser  r   r   r   r   rF   r  r   r   r  )r   r   r   rangesr   r   r  r   r   r   r   r  r   r   r   r     sZ   





z_RangeTree.generate_summariesc                 C   sL   | j }|d ur$||jjkr|j}n|jj|kr|j}n|jS |d usd S d S r?   )r  rz  r   leftr   right)r   r   r   pr   r   r   r+   2  s   z_RangeTree.findc                 C   s   t | j}||ju r-||ju r|}||_|j|_||_n3|}|j|_||_|j|_||_n"||ju rC|}|j|_||_|j|_||_n|}|j|_||_||_|dkri| j|d  }||ju rd||_|S ||_|S || _|S Nr   r   )r@   r  r  r  r  )r   xyztosmidNoderL  r   r   r   restructure<  s>   




z_RangeTree.restructurec                 C   s  g | _ z| j }W n ty   t }Y nw |j| _d |_d |_||_d |_| j	}|d urk	 | j 
| |j|jjkrM|j}|d u rL| j  }||_nn|jj|jkre|j}|d u rd| j  }||_nnd S q/d}n|| _	d}||_|  jd7  _t| j dkr|jdu r| j  }||jkr|j}n|j}|d u s|jdu r| |||}d|_d|j_d|j_d S d|_d|_t| j dkrd S d|_|}| j  }|jdu sd S d S d S )NTFr   r   )r  r  poprO  _RedBlackTreeNoder  r  rz  r   r  r/   r   r   rZ  r@   r  )r   rz  r  r  colmqr   r   r   add`  st   






z_RangeTree.addc           
      C   s  |j d }|j d }||kr||}}| ||}|d u r+t||dd}| | d S |j|krh|j|krh|j|k rJt|j||j}||_| | |j|kr_t||j|j}||_| | | jd7  _d S t| j	||}|}|}|D ]P}	||	jk rt||	jd}|	j}| | n||	jkrt|	j||	j}||	_| | |	j|k r|	j|krt||	j|	j}||	_| | |	 jd7  _|	j}qw||k rt||d}| | d S d S )Nr   r   r   )r  )
r   r+   _Ranger  r   r   r  listr  traverse_range)
r   r   r   r   existingritemsserz  r   r   r   r    sP   










z_RangeTree.addToCoverageDepthN)rT   rU   rV   r  r   rY   r   r   r+   r  r  r  r   r   r   r   r     s    
+
$9r   c                   @       e Zd ZdZdd Zdd ZdS )r  )r  r   r   r  c                 C   r  r?   )r   r   r  )r   r   r   r  r   r   r   r     r  z_Range.__init__c                 C   s   t | j| j| jfS r?   )r  r   r   r  rL   r   r   r   __iter__  s   z_Range.__iter__N)rT   rU   rV   r  r   r  r   r   r   r   r    s    r  c                   @   r  )r  )r  r  r   rz  c                 c   sF    | j d ur| j  E d H  | jV  | jd ur!| j E d H  d S d S r?   )r  r  rz  r  rL   r   r   r   r    s   

z_RedBlackTreeNode.traversec                 c   s    | j j|kr| jd ur| j||E d H  d S d S || j jkr3| jd ur1| j||E d H  d S d S | jd urB| j||E d H  | j V  | jd urW| j||E d H  d S d S r?   )rz  r   r  r  r   r  )r   r   r   r   r   r   r    s   



z _RedBlackTreeNode.traverse_rangeN)rT   rU   rV   r  r  r  r   r   r   r   r    s    r  c                   @   sB   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dS )r   ih$r  c                 C   sD   t |d | _t |d | _t |d | _t |d | _d S )NIIQIIIIQIxxxx?xHIIIIQIIIIQQ)r  r  r)  formatter_nodeformatter_nonleafformatter_leafr  r   r   r   r     s   	z_RTreeFormatter.__init__c              	   C   s  t dg d}t dg d}|| jj}| j|\	}}}}}	}
}}}|tjks+J | j}| j}| j	}|d g ||	|
|d }|}d}	 ||j}||\}}|r|j
}t|D ]"}||j}||\}}	}
}}}||||	|
|||}|| qY||7 }	 |j}|d u r||ksJ |S t|j
D ]\}}t|t|kr nqtdz	|j
|d  }W n ty   |}Y nw nqn/|j
}t|D ]!}||j}||\}}	}
}}||g ||	|
||}|| q|}|d }||j qC)	NNonLeaf)rL  rI  rS  r3  rR  r5  r   Leaf)rL  rS  r3  rR  r5  r   r   r   TrH  r   )r   r(  r)  r   r*  r   r   r  r  r  rI  rA  r/   rL  rM  r   rN  rO  r   r   )r   r   r  r  r1   r  ru   	itemCountrS  r3  rR  r5  r6  rt   r  r  r  r  rP  itemsCountedisLeafcountrI  r4   r   r   rQ  rL  r-   r   r   r   r(    s   
		z_RTreeFormatter.readc                 C   s  t |}|dkr
d S g }|d j}d}d}||k rt }	||	 || }
|
j |	_|	_|
j|	_|
j	|	_
||	_d}|
}t|d |D ]}|| }|j}||	jkrR n|d7 }qB|}||	_||d ||  D ]@}|j|	jk rw|j|	_|j|	_n|j|	jkr|j|	jk r|j|	_|j|	jkr|j|	_|j	|	_
qf|j|	jkr|j	|	j
kr|j	|	_
qf||7 }||k sd}	 g }|}|D ]}}	||krd}t }|	j|_|	j|_|	j|_|	j|_|	j
|_
|	j|_|	j|_|| nG|d7 }|	j|jk r|	j|_|	j|_n|	j|jkr
|	j|jk r
|	j|_|	j|jkr|	j|_|	j
|_
n|	j|jkr,|	j
|j
kr,|	j
|_
|j|	 ||	_q|d7 }t |dkrF	 ||fS |}qr  )r@   r  r  r/   r   r2  r4  r   r3  r   r5  r  rA  r6  rL  rI  )r   ru   r  r6  r  rI  
nextOffsetoneSizer4   rQ  	startItemendItemr5   rz  
levelCountparents	slotsUsedrL  r   r   r   rTreeFromChromRangeArray  s   





!

z(_RTreeFormatter.rTreeFromChromRangeArrayc              
   C   s   | j }||krHd}| j|t|j}	||	 |jD ]}
||
j|
j|
j|
j	|
j
|
j|
j
 }	||	 q|t|t|j | jj  d S |jD ]}
| |||
|d || qKd S )NTr   )r  r  r  r@   rI  r   r2  r3  r4  r5  r  r6  r   r  r   rWriteLeaves)r   rt   	lNodeSizer   curLevel	leafLevelr   r  r  r1   rQ  r   r   r   r    s.   



z_RTreeFormatter.rWriteLeavesc              
   C   s   |}| j }	||krGd}
| j|
t|j}|| |jD ]}|	|j|j|j|j	|}|| ||7 }q|t
|t|j | j j  n|jD ]}| ||||d |||}qJ| }||krmtd| d| d|S )NFr   z!Internal error: offset mismatch (z vs r   )r  r  r  r@   rI  r   r2  r3  r4  r5  r   r   rWriteIndexLevelr   rN  )r   rL  ru   childNodeSizer  	destLevelr  r   previous_offsetr  r  r1   rQ  positionr   r   r   r    sF   




	z _RTreeFormatter.rWriteIndexLevelc                 C   s   |  |||\}}| jtj|t||j|j|j|j	||	}|
| |d u r)d S t|t}	|j|	dd | jj| jj|  }
| }t|d D ]%}||	| |
 7 }||d krd| jj| jj|  }
| |||
d||| qJ|d }| ||
|d|| d S )Nr   )r  r   rm   )r  r)  r  r   r   r@   r2  r3  r4  r5  r   r   r  r   r  r  r   r  r   rA  r  r  r  )r   r  ru   rt   r6  r   r  r  r1   r  r   levelOffsetr4   r  r   r   r   r     s8   
z_RTreeFormatter.writeNr  )
rT   rU   rV   r   r   r(  r  r  r  r   r   r   r   r   r     s    
%sS(r   c                   @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )r   ixr  c                 C   s4   t |d | _t |d | _|d | _|| _d S )NIIIIQxxxxxxxxr  z{keySize}sQ)r  r  r)  r  fmt_nonleafr   r  r   r   r   r   @  s   

z_BPlusTreeFormatter.__init__c                 C   s  | j }| j}||j}||\}}}}}	|tjksJ | j}
t	| j
j|d}t	| | d}||j| ks=J |dksCJ tdddg}g }d }	 ||
j}|
|\}}|rt|D ]0}||j}||\}}}|d }|t|ksJ td |d	}t||d
}|| qbn"g }t|D ]}||j}||\}}|| q|}|||}	 |d u rt||	ksJ |S |j}z|d}W n ty   |j}Y nw nq|| qO)Nr   sIIrx   NoderL  rI  Tr   ra  rc  r   )r   r)  r(  r   r*  r   r   r  r  r  r  formatr   rA  r   r(   r@   r   r   r/   rI  r  rO  rL  r   )r   r   r   r  r1   r  ru   r   valSizer  r  r  r  r  rq   rP  r  r  r4   keyr   r   r
   sequencerecordrI  posrL  r   r   r   r(  W  s^   	


z_BPlusTreeFormatter.readc                 C   s  t j}|jd j}|j| }t|}| j}||||||}	||	 | j}
t	
| jj|d}d}||krFttd||}|d7 }||ks6t|}|
j||j  }|
j||j  }d}| }t|d ddD ]f}|| }|| }td||}|dkr|}n|}t|| }|| }|}|D ]<}|||| | }t|}||
|| |D ]}||d |}	||	 ||7 }qt|| |j }	||	 q|}qhd}td|D ]1}||||  }t|}|dkr d S ||
|| || t|| |j }	||	 qd S )Nr
   r  r   r   Fr   T)r   r   r   itemsizer@   r)  r  r   r  r  r  r  r  rA  r   r   r   	itertoolsr  r   )r   r  ru   r   r   r   r  r  r  r1   r  r  levelsbytesInIndexBlockbytesInLeafBlockr  r   r  slotSizePernodeSizePerindicesbytesInNextLevelBlock	levelSizeendLevel	nextChildr-   blockrZ  rz  r   r   r   r     sf   




z_BPlusTreeFormatter.writeNr  )rT   rU   rV   r   r   r(  r   r   r   r   r   r   =  s
    
8r   )*rW   r   r   r  r  r   rJ  collectionsr   r   numpyr   	Bio.Alignr   r   Bio.Seqr   Bio.SeqRecordr   r   r  r   r   rg   r$  r  r   r   rz   r  r~   r  r   r   r  r   r  r   r  r  r   r   r   r   r   r   <module>   s   .
\4   ^   V-%J 	0 \  U