o
    Rŀg5                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ eddeddddeddddeddddeddddgZG dd de
jZG dd de
jejZdS )a'  Bio.Align support for the "bigmaf" multiple alignment format.

The bigMaf format stores multiple alignments in a format compatible with
the MAF (Multiple Alignment Format) format. BigMaf files are binary and are
indexed as a bigBed file.

See https://genome.ucsc.edu/goldenPath/help/bigMaf.html
    N)
_aligncore)	Alignment)
Alignments)bigbed)maf)AutoSQLTable)Field)Seq)	SeqRecordbedMafzBed3 with MAF blockstringchromz)Reference sequence chromosome or scaffold)as_typenamecommentuint
chromStartzStart position in chromosomechromEndzEnd position in chromosomelstringmafBlockz	MAF blockc                       s6   e Zd ZdZdZ				d fdd	Zd	d
 Z  ZS )AlignmentWriterz1Alignment file writer for the bigMaf file format.bigMafNT      c              	      s   t  j|dt||||d dS )aw  Create an AlignmentWriter object.

        Arguments:
         - target       - output stream or file name.
         - targets      - A list of SeqRecord objects with the chromosomes in
                          the order as they appear in the alignments. The
                          sequence contents in each SeqRecord may be undefined,
                          but the sequence length must be defined, as in this
                          example:

                          SeqRecord(Seq(None, length=248956422), id="chr1")

                          If targets is None (the default value), the alignments
                          must have an attribute .targets providing the list of
                          SeqRecord objects.
         - compress     - If True (default), compress data using zlib.
                          If False, do not compress data.
                          Use compress=False for faster searching.
         - blockSize    - Number of items to bundle in r-tree.
                          See UCSC's bedToBigBed program for more information.
                          Default value is 256.
         - itemsPerSlot - Number of data points bundled at lowest level.
                          See UCSC's bedToBigBed program for more information.
                          Use itemsPerSlot=1 for faster searching.
                          Default value is 512.

           )bedNdeclarationtargetscompress	blockSizeitemsPerSlotN)super__init__r   )selftargetr   r   r   r    	__class__ D/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/bigmaf.pyr"   A   s   #
zAlignmentWriter.__init__c                 C   s  t  }|D ]J}t|tstdt|ddd dd}|j}|js$q|dd }|jj	
dd	\}}||j_	|d
 |d k sBJ i |_||jd< || q|jdd d |jd }	|	j	
dd	\}}t|j}
t|	j|d|
d< |
|_tj|dt| jd| dS )zWrite the file.zExpected an Alignment objectr   N
;   .   r   r   )r   r)   r   c                 S   s   | j j| jd fS )Nr/   )r$   idcoordinates)	alignmentr'   r'   r(   <lambda>   s    z,AlignmentWriter.write_file.<locals>.<lambda>)keyr   )r0   r   )r   r   r   )r   
isinstancer   	TypeErrorformatreplacer1   sizer$   r0   splitannotationsappendsortr   listr
   seqr   r   r   r   write)r#   stream
alignmentsfixed_alignmentsr2   r   r1   	reference
chromosomerecordr   r'   r'   r(   
write_filen   s6   




zAlignmentWriter.write_file)NTr   r   )__name__
__module____qualname____doc__fmtr"   rG   __classcell__r'   r'   r%   r(   r   <   s    -r   c                       sD   e Zd ZdZdZdZ fddZdd Z fdd	Zd
d Z	  Z
S )AlignmentIteratora  Alignment iterator for bigMaf files.

    The file may contain multiple alignments, which are loaded and returned
    incrementally.

    Alignment annotations are stored in the ``.annotations`` attribute of the
    ``Alignment`` object, except for the alignment score, which is stored as an
    attribute.  Sequence information of empty parts in the alignment block
    (sequences that connect the previous alignment block to the next alignment
    block, but do not align to the current alignment block) is stored in the
    alignment annotations under the ``"empty"`` key.  Annotations specific to
    each line in the alignment are stored in the ``.annotations`` attribute of
    the corresponding sequence record.
    r   bc                    s   d| _ t | dS )z|Create an AlignmentIterator object.

        Arguments:
        - source - input file stream, or path to input file
        N)rD   r!   r"   )r#   sourcer%   r'   r(   r"      s   zAlignmentIterator.__init__c                 C   s  t | jd }|j}| j}	 z|j}W n	 ty   Y nw |d }q| }||j	 d}d}d}		 |
|}
| jrJ|	|
7 }	t }||	}n||
7 }z|d|}W n	 ty_   Y q3w ||d d   }t|dkrqnq4|d }|| |d	d\}}| S )
NIIITr   r       s   ;sr.   r,      .)structStruct	byteorderr9   treechildrenAttributeErrortellseek
dataOffsetread_compressedzlibdecompressobj
decompressindex
ValueErrorr:   lendecode)r#   rA   	formatterr9   noderX   fileposdataSizedatacompressed_datachunkdecompressoriwordsr   rD   rE   r'   r'   r(   _read_reference   sH   


z!AlignmentIterator._read_referencec                    sJ   t  | | jd u r| || _d| _d| j| jd jf | jd _d S )Nr   z%s.%s)r!   _read_headerrD   rp   _indexr   r0   )r#   rA   r%   r'   r(   rq      s
   
"zAlignmentIterator._read_headerc           -      C   s  ||d  dks
J t |}||| }g }g }	i }
d }td}d}g }	 |d }|||d  }|dkrJtd||d  }|| d  }nJ|dkrtd||d  }|| d  }|||  }|dd   }|D ]0}|d	\}}|d
krt|}qr|dkrt	|}|dkrt
d| ||
d< qrt
d|  n|dkr1td||d  }|| d  }|||  }|d d}t|dkrt
d|d  }t	|d }t	|d }|d }t	|d }|}|||| \}}t||krt
dt||f t||i|d}t||ddd}|| || }|d }|	| nc|dkrtd||d  }|| d  }|||  }|d d}t|dks_J |d  |ksjJ |d  } t	|d }!|d  }"t	|d }#| tjv sJ |"tjv sJ | |jd< |!|jd< |"|jd< |#|jd< n|d kr>td||d  }|| d  }|||  }|d d}t|d!ksJ |d  }t	|d }t	|d }|d }t	|d }|d  }$|$tjv sJ td |d}t||ddd}|| }%|d"kr||%f}&n|| ||% f}&||&|$f}'|
d#}(|(d u r8g }(|(|
d#< |(|' nV|d$krtd||d  }|| d  }|||  }|d d}t|dkslJ |d  |kswJ |d d%d&}| |jd'< n|d(krnt
d)| q&|j})t|)tj}*||* t||	|*D ]+\}}}+|d"kr|+|jjd d 7 }+q|j |_|jjd d |+ |+d d < qt ||*},|
d ur|
|,_|d ur||,_!|,S )*Nr.   r      ;r)   T   #s   ^[^;]*   a   =s   scores   passz&pass value must be positive (found %d)passz Unknown annotation variable '%s'   ss$   ^s\s*\S*\s*\d*\s*\d*\s*[+-]\s*\d*\s*      z5Error parsing alignment - 's' line must have 7 fieldsr,   r      z2sequence size is incorrect (found %d, expected %d))length )r0   r   description   i
leftStatus	leftCountrightStatus
rightCount   e      +empty   q   -rR   quality    z+Error parsing alignment - unexpected line:
)"
memoryviewr   PrintedAlignmentParserrematchspantobytesr:   floatintrc   re   rd   feedr	   r
   r<   rN   status_charactersr;   empty_status_charactersgetr8   shapenpr   int64fillzipr?   defined_rangesreverse_complementr   score)-r#   chromIdr   r   rj   	dataStartdataEndbufferrecordsstrandsr;   r   printed_alignment_parserj	sequencesrn   prefixmlinero   wordr4   valuesrcstartr9   strandsrcSizensequencer?   rF   r   r   r   r   statusendsegmentr   
annotationr   r1   rowr2   r'   r'   r(   _create_alignment   s
  


















n

 


z#AlignmentIterator._create_alignment)rH   rI   rJ   rK   rL   moder"   rp   rq   r   rM   r'   r'   r%   r(   rN      s    	)rN   )rK   r   rT   r_   numpyr   	Bio.Alignr   r   r   r   r   Bio.Align.bigbedr   r   Bio.Seqr	   Bio.SeqRecordr
   r   r   rN   r'   r'   r'   r(   <module>   sR   	P