o
    Rŀg*                  
   @   s   d Z ddlZdd Zdd ZG dd dZed	ed
ededededededdZdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zd d! ZdS )"a*  Code to deal with COMPASS output, a program for profile/profile comparison.

Compass is described in:

Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein
alignments with assessment of statistical significance. J Mol Biol. 2003 Feb
7;326(1):317-36.

Tested with COMPASS 1.24.
    Nc              	   C   s   d}z2t | }t }t|| t | }t|| t | }t|| t | }t|| t | }t|| W n tyG   |sBtddtddw | D ],}|	 sQqJt
|| zt | }t|| t | }t|| W qJ tyv   tddw |S )z2Read a COMPASS file containing one COMPASS record.NzNo record found in handleUnexpected end of stream.)nextRecord__read_names__read_threshold__read_lengths__read_profilewidth__read_scoresStopIteration
ValueErrorstrip__read_query_alignment__read_positive_alignment__read_hit_alignmenthandlerecordline r   H/var/www/html/myenv/lib/python3.10/site-packages/Bio/Compass/__init__.pyread   s>   








r   c              	   c   s   d}zt | }W n
 ty   Y dS w 	 z.t }t|| t | }t|| t | }t|| t | }t|| t | }t|| W n tyO   tddw | D ]5}|	 sYqRd|v rb|V   n+t
|| zt | }t|| t | }t|| W qR ty   tddw |V  dS q)z'Iterate over records in a COMPASS file.NTr   Ali1:)r   r
   r   r   r   r   r   r	   r   r   r   r   r   r   r   r   r   parse8   sP   







r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r   zQHold information from one compass hit.

    Ali1 is the query, Ali2 the hit.
    c                 C   sp   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _d| _d| _d| _dS )zInitialize the class. r   N)queryhitgap_thresholdquery_lengthquery_filtered_lengthquery_nseqsquery_neffseqs
hit_lengthhit_filtered_length	hit_nseqshit_neffseqssw_scoreevaluequery_start	hit_start	query_alnhit_aln	positives)selfr   r   r   __init__f   s$   
zRecord.__init__c                 C      | j dd}t|S )z8Return the length of the query covered in the alignment.=r   )r*   replacelenr-   sr   r   r   query_coverage{      zRecord.query_coveragec                 C   r/   )z6Return the length of the hit covered in the alignment.r0   r   )r+   r1   r2   r3   r   r   r   hit_coverage   r6   zRecord.hit_coverageN)__name__
__module____qualname____doc__r.   r5   r7   r   r   r   r   r   `   s
    r   z Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+z4Threshold of effective gap content in columns: (\S+)zOlength1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)z7Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)z-Smith-Waterman score = (\S+)\s+Evalue = (\S+)z(\d+)z^.{15}(\S+)z
^.{15}(.+))names	thresholdlengthsprofilewidthscoresstartalignpositive_alignmentc                 C   s@   d|vrt d| td |}|d| _|d| _d S )Nr   zLine does not contain 'Ali1:':
r<         )r   __regexsearchgroupr   r   r   r   mr   r   r   r      s
   r   c                 C   s:   | dstd| td |}t|d| _d S )N	Thresholdz&Line does not start with 'Threshold':
r=   rD   )
startswithr   rF   rG   floatrH   r   rI   r   r   r   r      s   
r   c                 C   sj   | dstd| td |}t|d| _t|d| _t|d| _	t|d| _
d S )Nzlength1=z%Line does not start with 'length1=':
r>   rD   rE         )rL   r   rF   rG   intrH   r   rM   r   r"   r#   rI   r   r   r   r      s   
r   c                 C   sh   d|vrt d| td |}t|d| _t|d| _t|d| _t|d| _	d S )NNseqs1z Line does not contain 'Nseqs1':
r?   rD   rE   rN   rO   )
r   rF   rG   rP   rH   r    rM   r!   r$   r%   rI   r   r   r   r      s   r   c                 C   s^   | dstd| td |}|r't|d| _t|d| _d S d| _d| _d S )NzSmith-Watermanz+Line does not start with 'Smith-Waterman':
r@   rD   rE   r   g      )	rL   r   rF   rG   rP   rH   r&   rM   r'   rI   r   r   r   r	      s   

r	   c                 C   X   t d |}|rt|d| _t d |}|d us J d|  j|d7  _d S NrA   rD   rB   invalid match)rF   rG   rP   rH   r(   matchr*   rI   r   r   r   r         r   c                 C   s6   t d |}|d usJ d|  j|d7  _d S )NrC   rT   rD   )rF   rU   r,   rH   rI   r   r   r   r      s   r   c                 C   rR   rS   )rF   rG   rP   rH   r)   rU   r+   rI   r   r   r   r      rV   r   )r;   rer   r   r   compilerF   r   r   r   r   r	   r   r   r   r   r   r   r   <module>   s4   "()


	