o
    RŀgR                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
 dd	lmZ d
e	dee fddZG dd de
ZedkrNddlmZ e  dS dS )a  Bio.SeqIO support for the "phd" file format.

PHD files are output by PHRED and used by PHRAP and CONSED.

You are expected to use this module via the Bio.SeqIO functions, under the
format name "phd". See also the underlying Bio.Sequencing.Phd module.

For example, using Bio.SeqIO we can read in one of the example PHRED files
from the Biopython unit tests:

    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("Phd/phd1", "phd"):
    ...     print(record.id)
    ...     print("%s..." % record.seq[:10])
    ...     print("%s..." % record.letter_annotations["phred_quality"][:10])
    34_222_(80-A03-19).b.ab1
    ctccgtcgga...
    [9, 9, 10, 19, 22, 37, 28, 28, 24, 22]...
    425_103_(81-A03-19).g.ab1
    cgggatccca...
    [14, 17, 22, 10, 10, 10, 15, 8, 8, 9]...
    425_7_(71-A03-19).b.ab1
    acataaatca...
    [10, 10, 10, 10, 8, 8, 6, 6, 6, 6]...

Since PHRED files contain quality scores, you can save them as FASTQ or as
QUAL files, for example using Bio.SeqIO.write(...), or simply with the format
method of the SeqRecord object:

    >>> print(record[:50].format("fastq"))
    @425_7_(71-A03-19).b.ab1
    acataaatcaaattactnaccaacacacaaaccngtctcgcgtagtggag
    +
    ++++))'''')(''')$!$''')''''(+.''$!$))))+)))'''''''
    <BLANKLINE>

Or,

    >>> print(record[:50].format("qual"))
    >425_7_(71-A03-19).b.ab1
    10 10 10 10 8 8 6 6 6 6 8 7 6 6 6 8 3 0 3 6 6 6 8 6 6 6 6 7
    10 13 6 6 3 0 3 8 8 8 8 10 8 8 8 6 6 6 6 6 6 6
    <BLANKLINE>

Note these examples only show the first 50 bases to keep the output short.
    )Iterator)	SeqRecord)Phd   )	_IOSource)_TextIOSource)SequenceWriter)_get_phred_qualitysourcereturnc              	   c   s    t | }|D ]C}|jddd }t|j|||jd}|j|_d|jd< dd |jD |j	d	< zd
d |jD |j	d< W n	 t
yG   Y nw |V  qdS )zReturn SeqRecord objects from a PHD file.

    Arguments:
     - source - input stream opened in text mode, or a path to a file

    This uses the Bio.Sequencing.Phd module to do the hard work.
    Nr   r   )idnamedescriptionDNAmolecule_typec                 S      g | ]}t |d  qS )r   int.0site r   C/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqIO/PhdIO.py
<listcomp>Y       zPhdIterator.<locals>.<listcomp>phred_qualityc                 S   r   )   r   r   r   r   r   r   ]   r   peak_location)r   parse	file_namesplitr   seqcommentsannotationssitesletter_annotations
IndexError)r
   phd_records
phd_recordr   
seq_recordr   r   r   PhdIteratorB   s*   

r*   c                       s2   e Zd ZdZdeddf fddZdd Z  ZS )		PhdWriterz Class to write Phd format files.handler   Nc                    s   t  | dS )zInitialize the class.N)super__init__)selfr,   	__class__r   r   r.   k   s   zPhdWriter.__init__c           	      C   s  |j sJ dt|}|jd}t|j t|krtd|r-t|j t|kr-tdd|v r5td|j|j drC|j}n	|j d|j }| j	
d| | d	 d
d tjD D ]B}d}|dkrw|jdrvd|jd  }n|dkr|jdr|jd d}n|j|}|s|dkr| j	
|  d| d qa| j	
d t|j D ]&\}}|r| j	
d|t|| || f  q| j	
d|t|| f  q| j	
d dS )z&Write a single Phd record to the file.z No sequence present in SeqRecordr   z>Number of phd quality scores does not match length of sequencez@Number of peak location scores does not match length of sequenceNz!A quality value of None was found zBEGIN_SEQUENCE z
BEGIN_COMMENT
c                 S   s   g | ]}|  qS r   )lower)r   kr   r   r   r      s    z*PhdWriter.write_record.<locals>.<listcomp>trimz
%s %s %.4ftrace_peak_area_ratioz.4fr   z: 
zEND_COMMENT
BEGIN_DNA
z	%s %i %i
z%s %i
zEND_DNA
END_SEQUENCE
)r!   r	   r%   getlen
ValueErrorr   
startswithr   r,   writecleanr   	CKEYWORDSr#   upper	enumerateround)	r/   recordphred_qualitiespeak_locationstitleannotvalueir   r   r   r   write_recordo   sP   zPhdWriter.write_record)__name__
__module____qualname____doc__r   r.   rI   __classcell__r   r   r0   r   r+   h   s    r+   __main__)run_doctestN)rM   collections.abcr   Bio.SeqRecordr   Bio.Sequencingr   
Interfacesr   r   r   	QualityIOr	   r*   r+   rJ   
Bio._utilsrP   r   r   r   r   <module>   s   /&8
