o
    RŀgEV                     @   s   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ G dd dejZG dd dejZdS )a  Bio.Align support for the "psl" pairwise alignment format.

The Pattern Space Layout (PSL) format, described by UCSC, stores a series
of pairwise alignments in a single file. Typically they are used for
transcript to genome alignments. PSL files store the alignment positions
and alignment scores, but do not store the aligned sequences.

See http://genome.ucsc.edu/FAQ/FAQformat.html#format2

You are expected to use this module via the Bio.Align functions.

Coordinates in the PSL format are defined in terms of zero-based start
positions (like Python) and aligning region sizes.

A minimal aligned region of length one and starting at first position in the
source sequence would have ``start == 0`` and ``size == 1``.

As we can see in this example, ``start + size`` will give one more than the
zero-based end position. We can therefore manipulate ``start`` and
``start + size`` as python list slice boundaries.
    )chainN)	Alignment)
interfaces)reverse_complement)Seq)UndefinedSequenceError)CompoundLocation)ExactPosition)
SeqFeature)SimpleLocation)	SeqRecordc                       s6   e Zd ZdZdZd fdd	Zdd	 Zd
d Z  ZS )AlignmentWriterzEAlignment file writer for the Pattern Space Layout (PSL) file format.PSLTNNc                    sL   t  | || _|dur|dkrt| }nt| }|| _|| _dS )a  Create an AlignmentWriter object.

        Arguments:
         - target    - output stream or file name
         - header    - If True (default), write the PSL header consisting of
                       five lines containing the PSL format version and a
                       header for each column.
                       If False, suppress the PSL header, resulting in a simple
                       tab-delimited file.
         - mask      - Specify if repeat regions in the target sequence are
                       masked and should be reported in the `repMatches` field
                       of the PSL file instead of in the `matches` field.
                       Acceptable values are
                       None   : no masking (default);
                       "lower": masking by lower-case characters;
                       "upper": masking by upper-case characters.
         - wildcard  - Report alignments to the wildcard character in the
                       target or query sequence in the `nCount` field of the
                       PSL file instead of in the `matches`, `misMatches`, or
                       `repMatches` fields.
                       Default value is 'N'.

        Nupper)super__init__headerordlowerr   wildcardmask)selftargetr   r   r   	__class__ A/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/psl.pyr   2   s   
zAlignmentWriter.__init__c                 C   sN   | j sdS z|j}W n ty   d}Y nw |dd}|d| d dS )zWrite the PSL header.N3psLayout versionzpsLayout version a  

match	mis- 	rep. 	N's	Q gap	Q gap	T gap	T gap	strand	Q        	Q   	Q    	Q  	T        	T   	T    	T  	block	blockSizes 	qStarts	 tStarts
     	match	match	   	count	bases	count	bases	      	name     	size	start	end	name     	size	start	end	count
---------------------------------------------------------------------------------------------------------------------------------------------------------------
)r   metadataAttributeErrorgetwrite)r   stream
alignmentsr    versionr   r   r   write_headerT   s   
zAlignmentWriter.write_headerc           (      C   s  t |ts	td|j}|jsdS |j\}}z|j}W n ty&   d}Y nw z|j}W n	 ty5   Y nw z|j}W n tyF   d}Y nw z|j}W n	 tyU   Y nw t	|}t	|}t
t|d}	|	ddk}
|	dd|
f d\}}||krd}|d	 |d
 kr|dddddf }|d |d krd}t|}| }||dddf  |dddf< n9d}n6|d| krd}|d	 |d
 krd}t|}| }||dddf  |dddf< nd}ntd||f | j}| j}d}d}d}d}d}d}d}d}g }g }g }|dddf \}}|ddddf  D ]\}}||krC|dkr?||k r?|d7 }||| 7 }|}q!||kr`|dkr\||k r\|d7 }||| 7 }|}q!|| }|| }|| || || ||kr|du sJ n|d| ksJ |du sJ ||| } ||| }!zt| } W n ty   t| d} Y n ty   d} Y nw zt|!}!W n ty   t|!d}!Y n ty   d}!Y nw | du s|!du r||7 }n|dkr2t|  |! | D ]0\}"}#}$|"|ks|#|kr|d7 }q |"|#kr+|"|$kr%|d7 }q |d7 }q |d7 }q no|dkrst|  |! | D ]0\}"}#}$|"|ksP|#|krV|d7 }qA|"|#krl|"|$krf|d7 }qA|d7 }qA|d7 }qAn.t|  |! D ]$\}"}#|"|ks|#|kr|d7 }q||"|#kr|d7 }q||d7 }q||}|}q!z|j}W n
 ty   Y nw z|j}W n
 ty   Y nw z|j}W n
 ty   Y nw z|j}W n
 ty   Y nw |d }|d }|d | }|d | }|dkr|du r|| || }}n	|| || }}t	|}%dt t!|d }dt t!|d }dt t!|d }|rAd| }t!|t!|t!|t!|t!|t!|t!|t!|||t!|t!|t!||t!|t!|t!|t!|%|||g}&d|&d }'|'S )zBReturn a string with a single alignment formatted as one PSL line.zExpected an Alignment object queryr      r   NFr   r   r   r-   r*   r   r*   r-   -+   Tzinconsistent step sizes %d, %dASCIIr   r   ,	
)"
isinstancer   	TypeErrorcoordinatessize	sequencesidr!   seqlenabsnpdiffminsumr   copy
ValueErrorr   r   	transposeappendbytesr   zipr   r   matches
misMatches
repMatchesnCountjoinmapstr)(r   	alignmentr9   r   r)   qNametNametSizeqSizestepsalignedtCountqCountdnaxstrandr   r   rJ   rK   rL   rM   
qNumInsertqBaseInsert
tNumInserttBaseInsert
blockSizesqStartstStartstStartqStarttEndqEndtSeqqSequ1u2c1
blockCountwordsliner   r   r   format_alignmentj   sx  





"""




















z AlignmentWriter.format_alignment)TNr   )	__name__
__module____qualname____doc__fmtr   r'   ro   __classcell__r   r   r   r   r   -   s    "r   c                   @   s$   e Zd ZdZdZdd Zdd ZdS )AlignmentIteratora$  Alignment iterator for Pattern Space Layout (PSL) files.

    Each line in the file contains one pairwise alignment, which are loaded
    and returned incrementally.  Alignment score information such as the number
    of matches and mismatches are stored as attributes of each alignment.
    r   c                 C   s   t |}|drA| }|d dkrtd|d  d|d i| _t |}t |}t |}t |}|d dkr?td	d S || _d S )
Nz	psLayout r*   r&   z#Unexpected word '%s' in header liner      r0   r(   zEnd of header not found)next
startswithsplitrE   r    lstripstrip_line)r   r$   rn   rm   r   r   r   _read_header@  s   

zAlignmentIterator._read_headerc           .      C   s|  z| j }W n ty   |}Y n	w | ` t|g|}|D ]}| }t|dkr+d}nt|dkr4d}ntdt| |d }|d }t|d }|d	 }	t|d
 }
t|d }dd |d ddD }dd |d ddD }dd |d ddD }t||krtdt||f t||krtdt||f t||krtdt||f t	|}t	|}t	|}|dv rd| }n|}|d }|d }||gg}t
||||D ]0\}}}}||kr|||g |}||kr|||g |}||7 }||7 }|||g qt	| }d}d}d}d}|d d df \}}|d d dd f  D ]G\}}|| }|| }|dkrb|dkr^||k r^|d7 }||7 }|}q;|dkr}|dkry||
k ry|d7 }||7 }|}q;|}|}q;|t|d krtd|d |f |t|d krtd|d |f |t|d krtd |d |f |t|d! krtd"|d! |f t|d# }t|d$ }t|d% }t|d& }|d'kr||}}||dd d f  |dd d f< n|d(kr||}}|
|dd d f  |dd d f< ||d) kr-td*||d) f ||d+ kr>td,||d+ f ||d- krOtd.||d- f ||d/ kr`td0||d/ f d } |du rn|d dd}!|d1 dd}"tt
||!}#|dv rQtd |
d2}$t|#|d2}%|d3kr|d d df \}}g }&|d d dd f  D ]#\}}||k r||k rtt|t|dd4}'|&|' |}|}qt|&dkrt|&d5}'d6|"}(d7|(gi})t|'d8|)d9} n|d(krP|d d df \}}g }&|d d dd f  D ]#\}}||k r-||k r-tt|t|d:d4}'|&|' |}|}qt|&dkr?t|&d5}'d6|"}(d7|(gi})t|'d8|)d9} n)tt
||"}(t|(|
d2}$t|#|d2}%|d'krm|% }%ntd |
d2}$td |d2}%t|$|	d6d;}*t|%|d6d;}+| d ur|*j|  |*|+g},t|,|}-t|d |-_t|d |-_t|d< |-_t|d |-_|-  S d S )=N   T   Fz&line has %d columns; expected 21 or 23   	   
            c                 S      g | ]}t |qS r   int).0	blockSizer   r   r   
<listcomp>f  s    z:AlignmentIterator._read_next_alignment.<locals>.<listcomp>   r4   c                 S   r   r   r   r   startr   r   r   r   i         c                 S   r   r   r   r   r   r   r   r   j  r      z5Inconsistent number of blocks (%d found, expected %d)zDInconsistent number of query start positions (%d found, expected %d)zEInconsistent number of target start positions (%d found, expected %d))+++-r2   r   r*      z/Inconsistent qNumInsert found (%s, expected %d)   z0Inconsistent qBaseInsert found (%s, expected %d)   z/Inconsistent tNumInsert found (%s, expected %d)   z0Inconsistent tBaseInsert found (%s, expected %d)            r0   r   r+   z+Inconsistent tStart found (%d, expected %d)r,   z)Inconsistent tEnd found (%d, expected %d)r.   z+Inconsistent qStart found (%d, expected %d)r/   z)Inconsistent qEnd found (%d, expected %d)   )lengthr   )r[   rN   r(   translationCDS)type
qualifiersr-   )r<   descriptionrw   )r}   r!   r   rz   r>   rE   r   rstripr@   arrayrI   rG   rF   dictr   r   r	   r   rN   r
   r   r   featuresr   rJ   rK   rL   rM   ).r   r$   rn   linesrm   pslxr[   rR   rU   rS   rT   rl   r`   ra   rb   qBlockSizestBlockSizes	qPosition	tPositionr9   
tBlockSize
qBlockSizerc   rd   r\   r]   r^   r_   re   rf   rX   rY   featureqSeqstSeqsrh   target_sequencequery_sequence	locationslocationrg   r   target_recordquery_recordrecordsrQ   r   r   r   _read_next_alignmentP  s  










 







"

 






 




 






 z&AlignmentIterator._read_next_alignmentN)rp   rq   rr   rs   rt   r~   r   r   r   r   r   rv   6  s
    rv   )rs   	itertoolsr   numpyr@   	Bio.Alignr   r   Bio.Seqr   r   r   Bio.SeqFeaturer   r	   r
   r   Bio.SeqRecordr   r   rv   r   r   r   r   <module>   s"     