o
    Rŀg                     @   s(   d Z ddlZdd Zdd Zdd ZdS )	z'Methods for parsing yn00 results files.    Nc           
      C   s   g }| D ]g}t d|}|durkt d|d}dd |D }|d }|| i ||< tdt|d	D ]2}i }	|| |	d
< ||d  |	d< ||d  |	d< d|	i|| ||d	  < d|	i|||d	   |< q8q||fS )a:  Parse the Nei & Gojobori (1986) section of the results.

    Nei_Gojobori results are organized in a lower
    triangular matrix, with the sequence names labeling
    the rows and statistics in the format:
    w (dN dS) per column
    Example row (2 columns):
    0.0000 (0.0000 0.0207) 0.0000 (0.0000 0.0421)
    z1^([^\s]+?)(\s+-?\d+\.\d+.*$|\s*$|-1.0000\s*\(.*$)N
-*\d+\.\d+   c                 S      g | ]}t |qS  float.0valr   r   N/var/www/html/myenv/lib/python3.10/site-packages/Bio/Phylo/PAML/_parse_yn00.py
<listcomp>,       zparse_ng86.<locals>.<listcomp>   r      omegadNdSNG86)rematchfindallgroupstripappendrangelen)
linesresults	sequenceslinematrix_row_resline_floats_resline_floatsseq_nameir   r   r   r   
parse_ng86   s(   

r%   c                 C   s  | D ]}t d|}dd |D }t d|}|durt|d}t|d}||d  }	||d  }
i }|d |d	< |d |d
< |d |d< |d |d< |d |d< |d |d< |d |d< |d |d< |d |d< |||	 |
 d< |||
 |	 d< d}	d}
q|S )zParse the Yang & Nielsen (2000) part of the results.

    Yang & Nielsen results are organized in a table with
    each row comprising one pairwise species comparison.
    Rows are labeled by sequence number rather than by
    sequence name.
    r   c                 S   r   r   r   r   r   r   r   r   J   r   zparse_yn00.<locals>.<listcomp>z\s+(\d+)\s+(\d+)Nr   r   r   SNtr   kappa   r      r      zdN SE   r      zdS SEYN00)r   r   r   intr   )r   r   r   r   r!   r"   row_resseq1seq2	seq_name1	seq_name2r/   r   r   r   
parse_yn00;   s2   r6   c              
   C   sN  d}d}| D ]}t d|}|dur|d}|d}q|dur|durd|v ri }|dd  }t d|}	|	D ])}
|
dd	  }|
dd  }zt|||< W q< tye   d||< Y q<w d
|v r{||| | d< ||| | d< qd|v r||| | d< ||| | d< qd|v r||| | d< ||| | d< q|S )a  Parse the results from the other methods.

    The remaining methods are grouped together. Statistics
    for all three are listed for each of the pairwise
    species comparisons, with each method's results on its
    own line.
    The stats in this section must be handled differently
    due to the possible presence of NaN values, which won't
    get caught by my typical "line_floats" method used above.
    Nz\d+ \((.+)\) vs. \d+ \((.+)\)r   r   zdS =:z[dSNwrho]{1,3} =.{7,8}?=r   zLWL85:LWL85LWL85mLPB93)r   r   r   splitr   r   r   
ValueError)r   r   r   r4   r5   r   comp_resstats
line_statsres_matches	stat_pairstatvaluer   r   r   parse_othersb   s>   
rE   )__doc__r   r%   r6   rE   r   r   r   r   <module>   s
   .'