o
    Rŀgv                     @   s   d Z ddlZddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddl	mZ ddl	mZ dd	lmZ G d
d dejZG dd dejZdS )a  Bio.Align support for the "sam" pairwise alignment format.

The Sequence Alignment/Map (SAM) format, created by Heng Li and Richard Durbin
at the Wellcome Trust Sanger Institute, stores a series of alignments to the
genome in a single file. Typically they are used for next-generation sequencing
data. SAM files store the alignment positions for mapped sequences, and may
also store the aligned sequences and other information associated with the
sequence.

See http://www.htslib.org/ for more information.

You are expected to use this module via the Bio.Align functions.

Coordinates in the SAM format are defined in terms of one-based start
positions; the parser converts these to zero-based coordinates to be consistent
with Python and other alignment formats.
    N)chain)	Alignment)
interfaces)reverse_complement)Seq)UndefinedSequenceError)	SeqRecordc                       s8   e Zd ZdZdZd fdd	Zdd Zdd	d
Z  ZS )AlignmentWriterzGAlignment file writer for the Sequence Alignment/Map (SAM) file format.SAMFc                    s   t  | || _dS )zCreate an AlignmentWriter object.

        Arguments:
         - md - If True, calculate the MD tag from the alignment and include it
                in the output.
                If False (default), do not include the MD tag in the output.

        N)super__init__md)selftargetr   	__class__ A/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/sam.pyr   +   s   	
zAlignmentWriter.__init__c              	   C   s  z|j }W n ty   i }Y nw z|j}W n ty!   i }Y nw |d}|durVdd|d  g}| D ]\}}|dkr@q7|d||f  q7d|d }	||	 |D ]}
d	g}|d
|
j  t	|
j
}|d|  |
j D ]n\}}|dkr|d|  qv|dkr|dd|  qv|dkr|d|  qv|dkr|d|  qv|dkr|d|  qv|dkr|dv sJ |d|  qv|dkr|d|  qv|d|dd |f  qvz|
j}W n	 ty   Y nw |dkr|d|  d|d }	||	 qX| D ]6\}}|dkrq|D ]'}d| g}| D ]\}}|d||f  q(d|d }	||	 qqdS ) zWrite the SAM header.HDNz@HDzVN:%sVNz%s:%s	
z@SQzSN:%szLN:%dalternate_locuszAH:%snameszAN:%s,assemblyzAS:%sMD5zM5:%sspecieszSP:%stopologylinearcircularzPP:%sURIzUR:%s   z<unknown description>zDS:%s@)metadataAttributeErrortargetsgetitemsappendjoinwriteidlenseqannotationsdescription)r   stream
alignmentsr%   r'   valuesfieldskeyvaluelinerecordlengthr1   tagrowsrowr   r   r   write_header7   sz   







zAlignmentWriter.write_headerNc           '      C   s  t |ts	td|j }|j\}}d}d}z|j}W n ty)   d}d}	Y nIw z|jd }W n tt	fy<   Y nw z|jd }W n tt	fyO   Y nw z|j
d }
W n tt	fyd   d}	Y nw dd	d
 |
D }	|j}t|}z|j}W n ty   d}Y nw |j}|d |d kr|dddddf }|d |d k rd}n d}t|}t|}||dddf  |dddf< ||}}z||jO }W n	 ty   Y nw zt|}W n ty   Y n ty   d}Y nw t|d}|dddf \}}|d }d}|dur|d| 7 }|dkr|d| 7 }z|j}W nU tyx   d}|ddddf D ]>\}}|| }|| }|dkrR|d| 7 }|}q7|dkra|d| 7 }|}q7||krjtd|d| 7 }|}|}q7Y n~w t||ddddf D ]o\}\}}|| }|| }|dkr|tdksJ |d| 7 }|}q|dkr|tdkr|d| 7 }n|tdkr|d| 7 }ntd| |}q||krtd|td ksJ |d| 7 }|}|}q||k r|d||  7 }|dur|d| 7 }z|j}W n ty   d!}Y nw z|j}W n ty1   d}Y nw ||kr9d"}z|j}W n tyJ   d}Y nw |d7 }d}|t||t|t|||t|t|||	g}|du rp| j}|d#u r|dkr~td$|dddf \}}d}d}|du r|ddddf D ]j\}}|| }|| }|dkr|}q|dkr	 |r|t|7 }d}|d%|||  7 }|}q||krtdt||| ||| D ]\}}||kr|d7 }q|t|| 7 }d}q|}|}q|r|t|7 }nt||ddddf D ]r\}\}}|| }|| }|dkr4|}q|dkrY|tdkrU|rK|t|7 }d}|d%|||  7 }|}q||krbtdt||| ||| D ]\}}||kr~|d7 }qo|t|| 7 }d}qo|}|}q|r|t|7 }d&| } ||  z|j}!W n
 ty   Y n
w d'|! } ||  z|j}"W n
 ty   Y nw |" D ]\}#}$t |$t rd(}%t|$}$nat |$t!rd)}%t|$}$nTt |$tr t|$dkrd*}%nDd+}%nAt |$trd,}%dt"t|$}$n0t |$tjrAd-}%t#|$j$tj%r$nt#|$j$t!r-ntd.|$j$ d/|# d0dt"t|$}$|# d1|% d1|$ } ||  qd2|d3 }&|&S )4zBReturn a string with a single alignment formatted as one SAM line.zExpected an Alignment objectNquery*hard_clip_lefthard_clip_rightphred_quality c                 s   s    | ]	}t |d  V  qdS )!   N)chr).0r7   r   r   r   	<genexpr>   s    z3AlignmentWriter.format_alignment.<locals>.<genexpr>r   )r   r   )r   rI   )r      )rI   rJ   r      rJ   ASCIIz%dHz%dSz%dIz%dDzUnequal step sizes in alignmentz%dMINz%dNDzUnexpected operation M   =Tz(requested MD tag with undefined sequence^zMD:Z:%sz	AS:i:%.0fifAZHBz Array of incompatible data type z in annotation '':r   r   )&
isinstancer   	TypeErrorcoordinates	transpose	sequencesr-   r&   r0   KeyErrorletter_annotationsr+   r/   r.   r   nparrayflagbytesr   str
operations
ValueErrorzipordmapqrnextpnextr   r*   scorer)   intfloatmap
issubdtypedtypeinteger)'r   	alignmentr   r^   r   r?   rA   rB   qNamequalphredqSizernamere   tStartqStartposcigarrh   tEndqEndtCountqCount	operationrl   rm   rn   tLenr5   numbertcqcfieldro   r0   r6   r7   datatyper8   r   r   r   format_alignmentu   s  





 







&















"
&


"






z AlignmentWriter.format_alignment)F)N)	__name__
__module____qualname____doc__fmtr   r>   r   __classcell__r   r   r   r   r	   &   s    >r	   c                   @   s$   e Zd ZdZdZdd Zdd ZdS )AlignmentIteratora  Alignment iterator for Sequence Alignment/Map (SAM) files.

    Each line in the file contains one genomic alignment, which are loaded
    and returned incrementally.  The following columns are stored as attributes
    of the alignment:

      - flag: The FLAG combination of bitwise flags;
      - mapq: Mapping Quality (only stored if available)
      - rnext: Reference sequence name of the primary alignment of the next read
               in the alignment (only stored if available)
      - pnext: Zero-based position of the primary alignment of the next read in
               the template (only stored if available)
      - tlen: signed observed template length (only stored if available)

    Other information associated with the alignment by its tags are stored in
    the annotations attribute of each alignment.

    Any hard clipping (clipped sequences not present in the query sequence)
    are stored as 'hard_clip_left' and 'hard_clip_right' in the annotations
    dictionary attribute of the query sequence record.

    The sequence quality, if available, is stored as 'phred_quality' in the
    letter_annotations dictionary attribute of the query sequence record.
    r
   c                 C   s  i | _ g | _|D ]}|ds|| _ n|dd   d}|d }i }|dkri }d }|dd  D ]u}|dd\}	}
t|	dksEJ |	dkrL|
}q3|	d	krUt|
}q3|	d
kr^|
|d< q3|	dkrj|
d|d< q3|	dkrs|
|d< q3|	dkrz|
}q3|	dkr|
|d< q3|	dkr|
|d< q3|	dkr|
dv sJ |
|d< q3|	dkr|
|d< q3|
||	< q3td |d}t	||d|d}|d ur||_
| j| q|dd  D ]}|dd\}	}
t|	dksJ |
||	< q|dkr|| j |< q|| j vrg | j |< | j | | qdd  t| jD | _d S )!Nr$   rJ   r   r   SQr[   r#   SNLNAHr   ANr   r   ASr   DSM5r   SPr   TPr   r   URr"   r:   rD   )r-   r1   r0   r   c                 S   s   i | ]\}}|j |qS r   )r-   )rG   indexr9   r   r   r   
<dictcomp>  s    z2AlignmentIterator._read_header.<locals>.<dictcomp>)r%   r'   
startswith_linestripsplitr.   rp   r   r   r1   r*   	enumerate_target_indices)r   r2   r8   r5   r;   r4   r0   r1   r   r6   r7   r{   r:   sequencer9   r   r   r   _read_header  sp   











zAlignmentIterator._read_headerc           /         sZ	  z| j }W n ty   |}Y n	w t|g|}| ` |D ]}| }t|dk r0tdt| |d }t|d }|d }t|d d }t|d }	|d }
|d	 }t|d
 d }t|d }|d }|d }d }d }i }|dd  D ]}|dd\}} |dkr|dksJ t }qv|dkr|dksJ  }qv|dkrt  nV|dkrt  nM|dv rnH|dkrt }t fddt	d|dD  n0|dkr d } dd  d |dv rt}n|dkrt}ntd| d| dt
 |  ||< qv|d@ r	d}nd }d }d }d!}|d@ rd }d }n|d u rd}||gg}d"} t }!|
D ]}|d#krCt| }"||"7 }||"7 }n|d$v rWt| }"||"7 }||"7 }d%}nr|d&kret| }"||"7 }nd|d'krt| }"|dkr}|d d  |"7  < ||"7 }d"} q/|d(krt| }"||"7 }n6|d)krt| }"||"7 }d%}n&|dkr|dkrt| }nt| }d"} q/|d*krtd+| |7 } q/|||g |!t| d"} q/| j|}#|#d u r| jrtd,| d-td |d"d.}n| j|# }nd}||gg}|}$d"}|g}%d}&g }'d"} t }!|
D ]}|d#v rAt| }"||"7 }||"7 }||$d |" 7 }|$|"d  }$|&|"7 }&n|d$v rgt| }"||"7 }||"7 }||$d |" 7 }|$|"d  }$|&|"7 }&d%}n|d&kr{t| }"||"7 }|$|"d  }$n|d'krt| }"|dkr|d d  |"7  < ||"7 }|$|"d  }$d"} q|d(krt| }"||"7 }|&|"7 }&|%| |'|& d}&nB|d)krt| }"||"7 }|%| |'|& d}&d%}n&|dkr|dkrt| }nt| }d"} q|d*krtd+| |7 } q|||g |!t| d"} q|'|& |}$d"}d"} t|}(|(D ]b}|d/v rN| rBt| } ||$d |  7 }|$| d  }$d"} ||7 }|$dd  }$q$|d0kr| rjt| } ||$d |  7 }|$| d  }$d"} |(D ]}|d/vru n||7 }ql n
|} q$| |7 } q$| rt| } ||$d |  7 }|}$| j| }#t| j|# }t|j}"i })d}#t|%|'D ]\}*}&|$|#|#|&  |)|*< |#|&7 }#qt|)|"d1|_|d urt
| }|dkr||dd d f  |dd d f< |d2kr|}"td |"d1}+nt|}+|d@ st||ksJ |dkr|+ }+t|+|d"d.}|dkr)||}}|d ur3||jd3< |d ur=||jd4< |d2krNd5d6 |D },|,|jd7< ||g}-t|-|}.||._|	d8krb|	|._ |d9krk||._!n|d2krs||._!|dkr{||._"|dkr||._#|d ur||._$|r||._|d ur||._%|d ur||._&|r|!|._'|.  S d S ):N   z)line has %d columns; expected at least 11r   rJ   r#                     	   
   r[   r   rT   MDrW   rU   )rV   rW   rX   c                 3   s$    | ]}t  ||d   V  qdS )r#   N)rp   )rG   rT   r7   r   r   rH     s   " z9AlignmentIterator._read_next_alignment.<locals>.<genexpr>rY   r   cCsSiIzUnknown number type 'z
' in tag 'rZ   rK   -+FrD   rP   z=XTrM   SrO   rN   Pz'padding operator is not yet implementedzFound target z missing from header)r-   r1   
ACGTNacgtnrS   r   r@   rA   rB   c                 S   s   g | ]}t |d  qS )rE   )rk   )rG   cr   r   r   
<listcomp>  s    z:AlignmentIterator._read_next_alignment.<locals>.<listcomp>rC   rQ   rR   )(r   r&   r   r   r.   ri   rp   rq   rf   rangerc   rd   	bytearrayNotImplementedErrorr*   rk   r   r(   r'   r   itercopydeepcopyr/   rj   r   r_   r   r0   rb   r   re   rl   rm   rn   tlenro   rA   rB   rh   )/r   r2   r8   linesr5   qnamere   r{   
target_posrl   r   rm   rn   r   r?   rx   r   ro   r0   r   r;   r   nletterrt   strandrA   rB   store_operationsr   r^   	query_posr   rh   r:   r   r/   startssizesizeslettersdatastartr   ry   recordsrv   r   r   r   _read_next_alignment  s"  





 












































 



















  z&AlignmentIterator._read_next_alignmentN)r   r   r   r   r   r   r   r   r   r   r   r   t  s
    =r   )r   r   	itertoolsr   numpyrc   	Bio.Alignr   r   Bio.Seqr   r   r   Bio.SeqRecordr   r	   r   r   r   r   r   <module>   s     P