o
    Rŀg4                     @   sh   d Z ddlmZ g dZG dd dZdd Zdd	 Zd
d Zdd Ze	dkr2ddl
mZ e  dS dS )aI  Parser for PHD files output by PHRED and used by PHRAP and CONSED.

This module can be used directly, which will return Record objects
containing all the original data in the file.

Alternatively, using Bio.SeqIO with the "phd" format will call this module
internally.  This will give SeqRecord objects for each contig sequence.
    )Seq)CHROMAT_FILEABI_THUMBPRINTPHRED_VERSIONCALL_METHODQUALITY_LEVELSTIMETRACE_ARRAY_MIN_INDEXTRACE_ARRAY_MAX_INDEXTRIMTRACE_PEAK_AREA_RATIOCHEMDYEc                   @   s   e Zd ZdZdd ZdS )Recordz!Hold information from a PHD file.c                 C   s:   d| _ i | _tD ]	}d| j| < qg | _d| _d| _dS )zInitialize the class. N)	file_namecomments	CKEYWORDSlowersitesseqseq_trimmed)selfkw r   F/var/www/html/myenv/lib/python3.10/site-packages/Bio/Sequencing/Phd.py__init__(   s   
zRecord.__init__N)__name__
__module____qualname____doc__r   r   r   r   r   r   %   s    r   c                 C   sd   t | }z#t|}z	t| W td ty'   | Y W || ur&|  S S w || ur1|  w w )aW  Read one PHD record from the file and return it as a Record object.

    Argument source is a file-like object opened in text mode, or a path
    to a file.

    This function reads PHD file data line by line from the source, and
    returns a single Record object. A ValueError is raised if more than
    one record is found in the file.
    zMore than one PHD record found)_open_readnextStopIterationclose
ValueErrorsourcehandlerecordr   r   r   read3   s   



r+   c                 c   sP    t | }z	 t|}|sW || ur|  dS dS |V  q|| ur'|  w w )aL  Iterate over a file yielding multiple PHD records.

    Argument source is a file-like object opened in text mode, or a path
    to a file.

    The data is read line by line from the source.

    Typical usage::

        records = parse(handle)
        for record in records:
            # do something with the record object

    TN)r!   r"   r%   r'   r   r   r   parseK   s   
r,   c                 C   s@   zt | }W |S  ty   | }|ddkrtdd Y |S w )Nr   r   z&PHD files must be opened in text mode.)open	TypeErrorr+   r&   )r(   r)   r   r   r   r!   i   s   

r!   c           	      C   s  | D ]}| drt }|dd   |_ nqd S | D ]	}| dr% nqtd| D ]Z}| }|s5q,|dkr; nP|dd\}}| }| }|dv rU||j|< q,|d	v rat	||j|< q,|d
krmt
||j|< q,|dkr| \}}}t	|t	|t
|f|j|< q,td| D ]	}| dr nqtd| D ]}| dr n| }t|dv r|jt| qtd| D ]	}| dr nqtdtddd |jD |_|jd d ur|jd d d \}}|j|| |_|S )NBEGIN_SEQUENCE   BEGIN_COMMENTz!Failed to find BEGIN_COMMENT lineEND_COMMENT:   )chromat_filephred_versioncall_methodchemdyetimebasecaller_versiontrace_processor_version)abi_thumbprintquality_levelstrace_array_min_indextrace_array_max_indextrace_peak_area_ratiotrimzFailed to find END_COMMENT line	BEGIN_DNAzFailed to find BEGIN_DNA lineEND_DNA)      zODNA line must contain a base and quality score, and optionally a peak location.END_SEQUENCEz Failed to find END_SEQUENCE liner   c                 s   s    | ]}|d  V  qdS )r   Nr   ).0nr   r   r   	<genexpr>   s    z_read.<locals>.<genexpr>rE   )
startswithr   rstripr   r&   stripsplitr   r   intfloatlenr   appendtupler   joinr   r   )	r)   liner*   keywordvaluefirstlastprobpartsr   r   r   r"   s   sp   





r"   __main__)run_doctestN)r    Bior   r   r   r+   r,   r!   r"   r   
Bio._utilsr]   r   r   r   r   <module>   s   		
V
