o
    Rŀg3                     @   s|   d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ dd	 Zd
d Zdd Zdd ZdS )a  Bio.SeqIO support for the Graphical Fragment Assembly format.

This format is output by many assemblers and includes linkage information for
how the different sequences fit together, however, we just care about the
segment (sequence) information.

Documentation:
- Version 1.x: https://gfa-spec.github.io/GFA-spec/GFA1.html
- Version 2.0: https://gfa-spec.github.io/GFA-spec/GFA2.html
    N)BiopythonWarning)	as_handle)_UndefinedSequenceData)Seq)	SeqRecordc                 C   s   |D ]j}|dd dkr>t | dkrtt|dd | _qt|dd t | kr=td|dd  dt |  dt q|dd d	krltt	| 
  }| |dd krltd
|dd  d| dt qdS )z:Check a segment line's tags for inconsistencies (PRIVATE).N   LNr      z,Segment line has incorrect length. Expected z	 but got .SHz.Segment line has incorrect checksum. Expected )lenr   int_datawarningswarnr   hashlibsha256strencode	hexdigestupper)seqtagstagchecksum r   C/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqIO/GfaIO.py_check_tags   s&   r   c                 C   sT  i }| D ]"}| d}t|dk rtd| dtd|d du r0td|d  d	t d|d
d |d
< |d |d
 f||d < |d dvrYtd|d  t q|d dkrvtd|d
 du rvtd|d
  dt q|d dkrtd|d
 du rtd|d
  dt q|d dkrtd|d
 du rtd|d
  dt q|d dkrtd|d
 du rtd|d
  dt q|d dkrtd|d
 du rtd|d
  dt q|d dkr	td|d
 du r	td|d
  dt q|d dkr'td |d
 du r'td!|d
  dt q|S )"z>Build an annotations dictionary from a list of tags (PRIVATE).:   zSegment line has invalid tag: r
   z[A-Za-z][A-Za-z0-9]r   NzTag has invalid name: z. Are they tab delimited?r      AifZJHBzTag has invalid type: Az[!-~]z:Tag has incorrect type. Expected printable character, got iz[-+]?[0-9]+z5Tag has incorrect type. Expected signed integer, got fz&[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?z,Tag has incorrect type. Expected float, got Zz[ !-~]+z7Tag has incorrect type. Expected printable string, got JzQTag has incorrect type. Expected JSON excluding new-line and tab characters, got Hz	[0-9A-F]+z?Tag has incorrect type. Expected byte array in hex format, got Bz3[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+zBTag has incorrect type. Expected array of integers or floats, got )	splitr   
ValueErrorre	fullmatchr   r   r   join)r   annotationsr   partsr   r   r   _tags_to_annotations.   sv   

    $r0   c                 c   s    t | e}|D ]Y}|dkrtdt q|dd}|d dkr$qt|dk r2td| d|d	 d
kr?tddd}nt|d	 }|dd }t	|| t
|}t||d |d |dV  qW d   dS 1 smw   Y  dS )z`Parser for GFA 1.x files.

    Documentation: https://gfa-spec.github.io/GFA-spec/GFA1.html
    
GFA data has a blank line.	r   Sr   z1Segment line must have name and sequence fields: r
   r   *Nlengthr    idnamer.   )r   r   r   r   stripr)   r   r*   r   r   r0   r   sourcehandlelinefieldsr   r   r.   r   r   r   Gfa1Iteratorq   s,   


"rA   c              
   c   s   t | ~}|D ]r}|dkrtdt q|dd}|d dkr$qt|dk r2td| dzt|d	  W n tyJ   td
| ddw |d dkrXt	ddd}nt	|d }|dd }t
|| t|}t||d |d |dV  qW d   dS 1 sw   Y  dS )znParser for GFA 2.0 files.

    Documentation for version 2: https://gfa-spec.github.io/GFA-spec/GFA2.html
    r1   r2   r3   r   r4      z:Segment line must have name, length, and sequence fields: r
   r   z*Segment line must have an integer length: Nr   r5   r6   r    r8   )r   r   r   r   r;   r)   r   r*   r   r   r   r0   r   r<   r   r   r   Gfa2Iterator   s>   



"rC   )__doc__r   r+   r   Bior   Bio.Filer   Bio.Seqr   r   Bio.SeqRecordr   r   r0   rA   rC   r   r   r   r   <module>   s    C