o
    RŀgO                     @   s<  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	l	m
Z
 d
ZedZedZedZedZedZedZedZdd Zd/ddZd0ddZdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( ZG d)d* d*eZG d+d, d,eZ e!d-krdd.l"m#Z# e#  dS dS )1z;Bio.SearchIO parser for Exonerate plain text output format.    N)chain   )_BaseExonerateIndexer)_BaseExonerateParser)_parse_hit_or_query_line)_STRAND_MAP)
_RE_VULGAR)ExonerateTextParserExonerateTextIndexerz\s*\d+\s+: (.*) :\s+\d+z@[atgc ]{2}?(?:(?:[<>]+ \w+ Intron \d+ [<>]+)|(?:\.+))[atgc ]{2}?z%(?:(\d+) bp // (\d+) bp)|(?:(\d+) bp)z--<\s+\d+\s+>--z--<\s+(\d+)\s+>--z\{(\w{1,2})\}$z^\{(\w{1,2})\}c                 C   sN   d\}}t | |D ]\}}|dkr||7 }||7 }q	||7 }||7 }q	||fS )z=Flips the codon characters from one seq to another (PRIVATE).) r    )zip)	codon_seq
target_seqabchar1char2 r   [/var/www/html/myenv/lib/python3.10/site-packages/Bio/SearchIO/ExonerateIO/exonerate_text.py_flip_codons)   s   

r   Fc           	      C   sh   d}g }|s	t }nt}| |d  }t||D ]}|||d |7 }|t| }|||f q|S )zWReturn a list of start, end coordinates for each given block in the sequence (PRIVATE).r   queryN)_RE_EXON_RE_NERresplitfindlenappend)	
parsed_seqrow_dicthas_nerstartcoordssplitterseqblockendr   r   r   _get_block_coords8   s   r(   c                 C   s   |dkr&dd | D }t t| dd }t t|ddd |ddd S t t|  dd }t t|ddd |ddd S )zReturn list of pairs covering intervening ranges (PRIVATE).

    From the given pairs of coordinates, returns a list of pairs
    covering the intervening ranges.
    c                 S   s$   g | ]\}}t ||t||fqS r   )maxmin).0r   r   r   r   r   
<listcomp>U   s   $ z%_get_inter_coords.<locals>.<listcomp>r   N   )listr   r   )r#   strandsorted_coordsinter_coordsr   r   r   _get_inter_coordsL   s   ""r3   c              	      s*  zt dd | D }| D ]
}t||ksJ qW n8 tyP   t| D ]*\ }t||krMt|d |ks7J dt|d  g| dt|d  g |  < q#Y nw g }t| d D ]\ }d fdd| D }|| qYt|dkrt|d |d	 \|d< |d	< t|d
 |d \|d
< |d< |S )zQStitches together the parsed alignment rows and returns them in a list (PRIVATE).c                 s   s    | ]}t |V  qd S N)r   r,   xr   r   r   	<genexpr>c       z_stitch_rows.<locals>.<genexpr>r.   r   r   r   c                 3   s    | ]}|  V  qd S r4   r   )r,   aln_rowidxr   r   r7   p   r8      r         )r*   r   AssertionError	enumeratejoinr   r   )raw_rowsmax_lenrow	cmbn_rowscmbn_rowr   r:   r   _stitch_rows]   s*   ,rG   c                 C   s   i }| dkrd|d< d|d< d|d< d\|d	< |d
< |S | dkrZd|v r:d|d< d|d< d|d< d|d
< d|d	< |S d|v rTd|d< d|d< d|d< d|d
< d|d	< |S t d| | dkrtd|d	< d|d< d|d< d|d< d|d
< |S t d|  )zJReturn a dictionary of row indices for parsing alignment blocks (PRIVATE).r>   r   r   r   midliner.   hit)NNqannothannotr=   protein2N2proteinzUnexpected model: r<   z+Unexpected row count in alignment block: %i)
ValueError)row_lenmodelr;   r   r   r   _get_row_dict~   s>   rQ   c                 C   s   dD ]}||v s
J qg }|D ]S\}}i }| |d  || |d< | |d  || |d< | |d  || |d< |d durK| |d  || |d< |d	 dur]| |d	  || |d
< | | q|S )zNReturn a list of dictionaries of sequences split by the coordinates (PRIVATE).)r   rI   rH   rJ   rK   r   rI   rH   
similarityrJ   Nquery_annotationrK   hit_annotation)r   )rowsr#   r;   idx_nameblocksr"   r'   r&   r   r   r   _get_blocks   s   rX   c                 C   s   g g d}|D ]I}g }| D ]>}t t|| }t t|| }|r0t|d}||df n|d |rFt|d}|d|f q|d q|||< q|S )zRGet a dictionary of split codon locations relative to each fragment end (PRIVATE).r   rI   r   r   )r   r   )r   search_RE_SCODON_START_RE_SCODON_ENDr   groupr   )tmp_seq_blocksscodon_movesseq_typescoordsr&   m_startm_endr   r   r   _get_scodon_moves   s    


rd   c                 C   sB   g }| D ]}|D ]}||  dd dd||< q|| q|S )zMRemove curly braces (split codon markers) from the given sequences (PRIVATE).{r   })replacer   )r^   
seq_blocks	seq_block	line_namer   r   r   _clean_blocks   s   rk   c           	         s    dkrdnd} fdd|D }t |t |ksJ g }t||D ]7\}}|rRt|dd rA|dkr:t|d nt|d }n|d rLt|d }ntd	| d}|| q"|S )
z9Return the length of introns between fragments (PRIVATE).r   rI   c                    s   g | ]}d |  v qS )Intronr   r5   r`   r   r   r-      s    z%_comp_intron_lens.<locals>.<listcomp>Nr.   r   r   z$Unexpected intron parsing result: %r)r   r   allintrN   r   )	r`   inter_blocksraw_inter_lensopp_typehas_intron_after
inter_lensflag
parsed_len
intron_lenr   rm   r   _comp_intron_lens   s   rx   c                 C   s   |dv sJ | d|  dkrdnd}| d|  }|t | | d ddd	dd
d|  }||fg}t| | dd D ]#\}}|d d || |  }	|	|t |dd  }
||	|
f q?|dkr~t|D ]\}}|| d || d f||< qk|S )zAFill the block coordinates of the given hsp dictionary (PRIVATE).)rI   r   	%s_strandr   r   r)   %s_start-r   ><N)r   rg   r@   r   )hspr`   rt   seq_stepfstartfendr#   r;   r&   bstartbendcoordr   r   r   _comp_coords  s$   &
r   c                 C   s   g }t t|| D ]k}|| | }t|sq
t|rJ |\}}| d|  |d  }| d|  dkr5dnd}	|rQ|	dkr?tnt}
|
|}|||	 d  |}}n|rh|	dkrYtnt}
|
|}|||	  |}}|t||t||f q
|S )zKCompute positions of split codons, store in given HSP dictionary (PRIVATE).	%s_rangesr.   ry   r   r   r)   )ranger   anyrn   r*   r+   r   )r~   r`   r_   scodonsr;   pairr   r   anchor_pairr0   funcanchorstart_cend_cr   r   r   _comp_split_codons  s&   r   c                   @   s$   e Zd ZdZdZdd Zdd ZdS )r	   z'Parser for Exonerate plain text output.zC4 Alignment:c              	      s  |d }|d }|d }dD ]
}||v sJ |q|   \}}t|}tt||d }	d|d  v }
t||	|
}t|||	}t|}t|}t	|d  |d< t	|d  |d< t
|d	 |d	< t
|d
 |d
< t
|d |d< t
|d |d< t
|d |d< dd |D |d< dd |D |d< i |d< d|d v sd|d v sd|d v rd|d< dD ] z fdd|D |d  < W q ty   Y qw |
st|}t|||	}tt||	d  }dD ]X}|
s|dkrdnd}t|||}n|}dd tt||	|  D }t|t|| d kr"tdt|t|| d f t||||d| < |
s9t||||d | < qdD ]!}|d!|  d"kr\d#| }d$| }|| || ||< ||< q<|||d%S )&z7Parse alignment block, return query result, hits, hsps.qresultrI   r~   )query_start	query_end	hit_starthit_endquery_strand
hit_strandrP   NERr   r   r   r   r   r   scorec                 S      g | ]}|d  qS )r   r   r5   r   r   r   r-   i      z=ExonerateTextParser.parse_alignment_block.<locals>.<listcomp>r   c                 S   r   )rI   r   r5   r   r   r   r-   j  r   aln_annotationrL   coding2rM   proteinmolecule_type)rR   rS   rT   c                    s   g | ]}|  qS r   r   r5   
annot_typer   r   r-   w  r   rH   rY   c                 S   s   g | ]}t |qS r   )ro   r5   r   r   r   r-     s    r   zLength mismatch: %r vs %rr   z%s_split_codonsry   r)   rz   z%s_end)r   rI   r~   )_read_alignmentrG   rQ   r   upperr(   rX   rd   rk   r   ro   KeyErrorr3   r   findall_RE_EXON_LENrx   _RE_NER_LENrN   r   r   )selfheaderr   rI   r~   val_nameraw_aln_blocksvulgar_comprE   r    r!   
seq_coordsr^   r_   rh   r2   rp   rq   r`   rr   rt   n_startn_endr   r   r   parse_alignment_block>  s   z)ExonerateTextParser.parse_alignment_blockc           	      C   s   g }d}d}	 t t| j }|r(|s(| j|d}t|d}d}g }|r6|| j|||   |rK|rKt|dkrK|| d}d}d}| j	
 | _| jdrct t| j}|d}| jrm| j| jrx| jssd| _	 ||fS q)zGRead the raw alignment block strings, returns them in a list (PRIVATE).FNTr   vulgar
   mock)r   rZ   _RE_ALN_ROWlinestripindexr]   r   r   handlereadline
startswithr   	_ALN_MARK)	r   r   
in_aln_rowr   match	start_idxrO   raw_aln_blockr   r   r   r   r     s6   

z#ExonerateTextParser._read_alignmentN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r	   9  s
    yr	   c                   @   s(   e Zd ZdZeZdZdd Zdd ZdS )r
   z'Indexer class for Exonerate plain text.s   C4 Alignmentc                 C   sL   | j }|| d}	 |  }||rn|stqt| \}}|S )z3Return the query ID from the nearest "Query:" line.s   Query:)_handleseekr   r   r   StopIterationr   decode)r   posr   sentinelr   qiddescr   r   r   get_qresult_id  s   

z#ExonerateTextIndexer.get_qresult_idc                 C   s   | j }|| d}d}	 | }|s	 |S || jr;| }|du r*| |}n| |}||kr6	 |S || ||7 }q)zDReturn the raw string of a QueryResult object from the given offset.N    )r   r   r   r   _query_marktellr   )r   offsetr   qresult_keyqresult_rawr   cur_poscurr_keyr   r   r   get_raw  s(   


zExonerateTextIndexer.get_rawN)	r   r   r   r   r	   _parserr   r   r   r   r   r   r   r
     s    r
   __main__)run_doctest)F)r   )$r   r   	itertoolsr   _baser   r   r   r   exonerate_vulgarr   __all__compiler   r   r   r   r   r[   r\   r   r(   r3   rG   rQ   rX   rd   rk   rx   r   r   r	   r
   r   
Bio._utilsr   r   r   r   r   <module>   sH   







!(# +0
