o
    RŀgF                     @   sd   d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ G dd dej	Z	G d	d
 d
ej
Z
dS )ax  Bio.Align support for the "maf" multiple alignment format.

The Multiple Alignment Format, described by UCSC, stores a series of
multiple alignments in a single file. It is suitable for whole-genome
to whole-genome alignments, metadata such as source chromosome, start
position, size, and strand can be stored.

See http://genome.ucsc.edu/FAQ/FAQformat.html#format5

You are expected to use this module via the Bio.Align functions.

Coordinates in the MAF format are defined in terms of zero-based start
positions (like Python) and aligning region sizes.

A minimal aligned region of length one and starting at first position in the
source sequence would have ``start == 0`` and ``size == 1``.

As we can see on this example, ``start + size`` will give one more than the
zero-based end position. We can therefore manipulate ``start`` and
``start + size`` as python list slice boundaries.
    N)	Alignment)
interfaces)Seq)	SeqRecordc                   @   s4   e Zd ZdZdZdd Zdd Zdd Zd	d
 ZdS )AlignmentWriterz-Accepts Alignment objects, writes a MAF file.MAFc                 C   s   | d | D ]E\}}|dv rn)|dkr!|dvr td| n|dkr0|dvr/td| n|d	kr:d
|}nq	d
|v rCd| }| d
| d|  q	| d d S )NtracknamedescriptionframesmafDotonoffz2mafDot value must be 'on' or 'off' (received '%s')
visibilitydensepackfullzCvisibility value must be 'dense', 'pack', or 'full' (received '%s')speciesOrder z"%s"=
)writeitems
ValueErrorjoin)selfstreammetadatakeyvalue r#   A/var/www/html/myenv/lib/python3.10/site-packages/Bio/Align/maf.py_write_trackline-   s2   
z AlignmentWriter._write_tracklinec           	      C   s"  z|j }W n ty   ddi}Y nw d}|D ]}||v r%| ||  nq|d | D ]<\}}||v r8q/|dkr=q/|dkrL|dkrItdd}n|dkrSd	}n|d
krZd}ntd| |d| d|  q/|d |d}|dur|D ]}|d| d q||d dS dS )zWrite the MAF header.MAF Version1)r
   r   r   r   r   r   ##mafCommentsMAF version must be 1versionScoringscoringProgramprogramzUnexpected key '%s' for headerr   r   r   Nz# )r    AttributeErrorr%   r   r   r   get)	r   r   
alignmentsr    
track_keysr!   r"   commentscommentr#   r#   r$   write_headerF   sD   



zAlignmentWriter.write_headerc                 C   sV   z|j }W n ty   d}Y nw d|d}|d}|d ur'|d| 7 }|d S )Naza score=z.6fpassz pass=r   )scorer0   r1   )r   	alignmentannotationsr9   liner"   r#   r#   r$   _format_score_linep   s   

z"AlignmentWriter._format_score_linec           "      C   s0  t |ts	tdz|j}W n ty   i }Y nw g }| ||}|| d}d}d}d}t|j}	t	|	D ]^}
|j|
 }|j
|
 }z|j}W n tyV   d|
 }Y nw |d }|d }t|}||k rl|| }n|| }|| }t|t|}t|tt|}t|tt|}t|tt|}q8t|dg D ]Z\}
}|\}}}z|j}W n ty   d|
|	  }Y nw t|t|}|\}}t|j}||kr|| }n|| }|| }t|tt|}t|tt|}t|tt|}q|| | | d }t	|	D ]}
|j|
 }|j
|
 }z|j}W n ty(   d|
 }Y nw |d }|d }t|}||k rA|| }d}n
|| }|| }d}||
 }||}t||}t||}t||}d	| d
| d
| d
| d
| d
| d}|| z|j}W n ty   d}Y nw |dur|d}|durd}d}|D ]}|dkr|d7 }q||| 7 }|d7 }q||}d| d
| d}|| z|d }|d }|d } |d }!W n ty   Y qw ||}d| d
| d
| d
|  d
|! d}|| qt|dg D ]s\}
}|\}}}z|j}W n ty8   d|
|	  }Y nw ||}|\}}t|j}||krS|| }d}n
|| }|| }d}t||}t||}t||}d| d
| d
| d
| d
| d
| d}|| q|d d|S )zAReturn a string with a single alignment formatted as a MAF block.zExpected an Alignment objectr   zsequence_%dempty   +-s r   r   Nquality    q 
leftStatus	leftCountrightStatus
rightCounti ze )
isinstancer   	TypeErrorr;   r0   r=   appendlen	sequencesrangecoordinatesidmaxstr	enumerater1   seqljustrjustKeyErrorr   )"r   r:   alignment_annotationslinesr<   
name_widthstart_width
size_widthlength_widthnirecordrS   r
   startendlengthsizer?   segmentstatusquality_width	record_idstrandtextr;   rD   gapped_qualityjletterrH   rI   rJ   rK   r#   r#   r$   format_alignment|   s  
















*








$





*

z AlignmentWriter.format_alignmentN)	__name__
__module____qualname____doc__fmtr%   r6   r=   rr   r#   r#   r#   r$   r   (   s    *r   c                   @   s,   e Zd ZdZdZdZdZdd Zdd Zd	S )
AlignmentIteratoraJ  Alignment iterator for Multiple Alignment Format files.

    The file may contain multiple concatenated alignments, which are loaded
    and returned incrementally.

    File meta-data are stored in the ``.metadata`` attribute of the returned
    iterator.  Alignment annotations are stored in the ``.annotations``
    attribute of the ``Alignment`` object, except for the alignment score,
    which is stored as an attribute.  Sequence information of empty parts in
    the alignment block (sequences that connect the previous alignment block to
    the next alignment block, but do not align to the current alignment block)
    is stored in the alignment annotations under the ``"empty"`` key.
    Annotations specific to each line in the alignment are stored in the
    ``.annotations`` attribute of the corresponding sequence record.
    r   )CINrb   MT)ry   rz   r|   rb   c           
      C   s  i }t |}|drZt|}|dd  D ]?}|d\}}|dv r$n-|dkr3|dvr2td| n|dkrB|d	vrAtd
| n|dkrK| }ntd| |||< qt |}| }|d dkrhtd|dd  D ](}|d\}}|dkr~d}n|dkrd}n|dkrd}ntd| |||< qn|ddkrtdg }|D ]$}| r|ds|dsJ || _ n|dd   }	||	 q| 	  |r||d< || _
d S )Nztrack rF   r   r	   r   r   z7Variable mafDot in track line has unexpected value '%s'r   r   z;Variable visibility in track line has unexpected value '%s'r   z&Unexpected variable '%s' in track liner   r(   z%header line does not start with ##mafr+   r&   r-   r,   r/   r.   z'Unexpected variable '%s' in header liner'   r*   #r7   r)   )next
startswithshlexsplitr   r1   strip_alinerO   _closer    )
r   r   r    r<   wordswordr!   r"   r4   r5   r#   r#   r$   _read_header  st   







zAlignmentIterator._read_headerc           &      C   s  | j }|d u r	d S g }g }g }g }d }g }i }	|dd   }
|
D ].}|d\}}|dkr3t|}q!|dkrJt|}|dkrEtd| ||	d< q!td| |D ]}|dr[qR|d	rf|| _  nu|d
r|  }
t|
dkr{td|
d }t|
d }t|
d }|
d }t|
d }|
d }dD ]}||d}q|	|
  td |d}t||ddd}|	| |	| |	| |	| qR|dr#|  }
t|
dksJ |
d |ksJ |
d }t|
d }|
d }t|
d }|tjv sJ |tjv sJ ||jd< ||jd< ||jd< ||jd< qR|dr|dd   }
t|
dks:J |
d }t|
d }t|
d }|
d }t|
d }|
d }|tjv s`J td |d}t||ddd}|| }|dkr|||f}n|| || f}|||f} |	d}!|!d u rg }!|!|	d< |!	|  qR|dr|  }
t|
dksJ |
d |ksJ |
d dd}||jd < qR| sqRtd!| d | _ t|\}"}#t|||"|D ]&\}}}}t|j}t||krtd"t||f t||i|d|_qt|||#D ]+\}}}$|dkr4|$d# |$d  |$ |$d d < |j |_|jjd d }|$|7 }$qt||#}%|	d urO|	|%_|d urW||%_|%S )$NrF   r   r9   r8   r   z&pass value must be positive (found %d)z Unknown annotation variable '%s'r~   r7   rC      z5Error parsing alignment - 's' line must have 7 fields         r@      z.=_rB   )rg   rE   )rT   r
   r   rL   rH   rI   rJ   rK   erA   r?   rG   rD   z+Error parsing alignment - unexpected line:
z2sequence size is incorrect (found %d, expected %d)r>   )r   r   floatintr   r   r   rP   replacerO   encoder   r   rx   status_charactersr;   empty_status_charactersr1   r   parse_printed_alignmentziprX   reverse_complementdefined_rangesr9   )&r   r   alinerecordsstartssizesstrandsr9   aligned_sequencesr;   r   r   r!   r"   r<   srcre   rh   rm   srcSizern   gap_charrX   rd   rH   rI   rJ   rK   rj   sequencerf   ri   r?   
annotationrQ   rS   rowr:   r#   r#   r$   _read_next_alignmentO  s   























z&AlignmentIterator._read_next_alignmentN)	rs   rt   ru   rv   rw   r   r   r   r   r#   r#   r#   r$   rx      s    :rx   )rv   r   	Bio.Alignr   r   Bio.Seqr   Bio.SeqRecordr   r   rx   r#   r#   r#   r$   <module>   s   	 X