o
    Rŀg=                     @   s   d Z ddlZg dZg dZg dZg dZg dZg dZg d	Zd
d Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zefd$d%Zefd&d'Zd(d) Zed*krwdd+lmZ edd, dS dS )-a  Parsers for the GAF, GPA and GPI formats from UniProt-GOA.

Uniprot-GOA README + GAF format description:
ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/README

Gene Association File, GAF formats:
http://geneontology.org/docs/go-annotation-file-gaf-format-2.2/
http://geneontology.org/docs/go-annotation-file-gaf-format-2.1/
http://geneontology.org/docs/go-annotation-file-gaf-format-2.0/

Gene Product Association Data  (GPA format) README:
http://geneontology.org/docs/gene-product-association-data-gpad-format/

Gene Product Information (GPI format) README:
http://geneontology.org/docs/gene-product-information-gpi-format/

Go Annotation files are located here:
ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/
    N)DBDB_Object_IDDB_Object_Symbol	QualifierGO_IDDB:ReferenceEvidenceWithAspectDB_Object_NameSynonymDB_Object_TypeTaxon_IDDateAssigned_ByAnnotation_ExtensionGene_Product_Form_ID)r   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   )r   r   r   r   r   zEvidence coder	   Interacting_taxon_IDr   Assigned_byr   Spliceform_ID)r   r   r   r   r   ECO_Evidence_coder	   r   r   r   zAnnotation ExtensionAnnotation_Properties)r   	DB_subsetr   r   r   DB_Object_Synonymr   TaxonAnnotation_Target_SetAnnotation_CompletedParent_Object_ID)	r   r   r   r   r   r   r   DB_XrefGene_Product_Properties)
r   r   r   r   r   r   r   r   r   r   c                 c   sp    | D ]2}|d dkrq| dd}t|dkrq|d d|d< |d d|d< ttt|V  qd	S )
zRead GPI 1.0 format files (PRIVATE).

    This iterator is used to read a gp_information.goa_uniprot
    file which is in the GPI 1.0 format.
    r   !
	      |   N)rstripsplitlendictzipGPI10FIELDShandleinlineinrec r1   C/var/www/html/myenv/lib/python3.10/site-packages/Bio/UniProt/GOA.py_gpi10iterator   s   r3   c                 c       | D ]D}|d dkrq| dd}t|dkrq|d d|d< |d d|d< |d	 d|d	< |d
 d|d
< ttt|V  qdS )zRead GPI 1.1 format files (PRIVATE).

    This iterator is used to read a gp_information.goa_uniprot
    file which is in the GPI 1.1 format.
    r   r    r!   r"   r#      r%         r&   N)r'   r(   r)   r*   r+   GPI11FIELDSr-   r1   r1   r2   _gpi11iterator      r9   c                 c   r4   )zRead GPI 1.2 format files (PRIVATE).

    This iterator is used to read a gp_information.goa_uniprot
    file which is in the GPI 1.2 format.
    r   r    r!   r"   r#   r6   r%      r&   	   N)r'   r(   r)   r*   r+   GPI12FIELDSr-   r1   r1   r2   _gpi12iterator   r:   r>   c                 C   sh   |   }| dkrt| S | dkrt| S | dkr"t| S | dkr,tdtd| d)zRead GPI format files.

    This function should be called to read a
    gp_information.goa_uniprot file. At the moment, there is
    only one format, but this may change, so
    this function is a placeholder a future wrapper.
    z!gpi-version: 1.2z!gpi-version: 1.1z!gpi-version: 1.0z!gpi-version: 2.1z1Sorry, parsing GPI version 2 not implemented yet.zUnknown GPI version r!   )readlinestripr>   r9   r3   NotImplementedError
ValueErrorr.   r/   r1   r1   r2   gpi_iterator   s   rD   c                 c   r4   )zRead GPA 1.0 format files (PRIVATE).

    This iterator is used to read a gp_association.*
    file which is in the GPA 1.0 format. Do not call directly. Rather,
    use the gpaiterator function.
    r   r    r!   r"   r#   r5   r%   r;      
   N)r'   r(   r)   r*   r+   GPA10FIELDSr-   r1   r1   r2   _gpa10iterator      rH   c                 c   r4   )zRead GPA 1.1 format files (PRIVATE).

    This iterator is used to read a gp_association.goa_uniprot
    file which is in the GPA 1.1 format. Do not call directly. Rather
    use the gpa_iterator function
    r   r    r!   r"   r#   r5   r%   r;   rE   rF   N)r'   r(   r)   r*   r+   GPA11FIELDSr-   r1   r1   r2   _gpa11iterator   rI   rK   c                 C   s@   |   }| dkrt| S | dkrt| S td| d)zRead GPA format files.

    This function should be called to read a
    gene_association.goa_uniprot file. Reads the first record and
    returns a gpa 1.1 or a gpa 1.0 iterator as needed
    z!gpa-version: 1.1z!gpa-version: 1.0zUnknown GPA version r!   )r?   r@   rK   rH   rB   rC   r1   r1   r2   gpa_iterator  s   rL   c                 c       | D ]M}|d dkrq| dd}t|dkrq|d d|d< |d d|d< |d	 d|d	< |d
 d|d
< |d d|d< ttt|V  qd S Nr   r    r!   r"   r#   r6   r%   r$   r7   rF      )r'   r(   r)   r*   r+   GAF20FIELDSr-   r1   r1   r2   _gaf20iterator     rQ   c                 c   rM   rN   )r'   r(   r)   r*   r+   GAF10FIELDSr-   r1   r1   r2   _gaf10iterator.  rR   rT   c                 c       d }g }| D ]m}|d dkrq| dd}t|dkrq|d d|d< |d d|d< |d	 d|d	< |d
 d|d
< |d d|d< ttt|}|d |krk|rkt|}|g}|d }|V  q|d }|| qd S Nr   r    r!   r"   r#   r6   r%   r$   r7   rF   rO   r   )r'   r(   r)   r*   r+   rS   copyappendr.   cur_idid_rec_listr/   r0   cur_recret_listr1   r1   r2   _gaf10byproteiniterator=  .   
r^   c                 c   rU   rV   )r'   r(   r)   r*   r+   rP   rW   rX   rY   r1   r1   r2   _gaf20byproteiniteratorV  r_   r`   c                 C   sh   |   }| dkrt| S | dkrt| S | dkr"t| S | dkr,t| S td| d)a  Iterate over records in a gene association file.

    Returns a list of all consecutive records with the same DB_Object_ID
    This function should be called to read a
    gene_association.goa_uniprot file. Reads the first record and
    returns a gaf 2.0 or a gaf 1.0 iterator as needed
    2016-04-09: added GAF 2.1 iterator & fixed bug in iterator assignment
    In the meantime GAF 2.1 uses the GAF 2.0 iterator
    !gaf-version: 2.0!gaf-version: 1.0!gaf-version: 2.1!gaf-version: 2.2Unknown GAF version r!   )r?   r@   r`   r^   rB   rC   r1   r1   r2   gafbyproteiniteratoro  s   
rf   c                 C   sh   |   }| dkrt| S | dkrt| S | dkr"t| S | dkr,t| S td| d)a  Iterate over a GAF 1.0 or 2.x file.

    This function should be called to read a
    gene_association.goa_uniprot file. Reads the first record and
    returns a gaf 2.x or a gaf 1.0 iterator as needed

    Example: open, read, interat and filter results.

    Original data file has been trimmed to ~600 rows.

    Original source ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/YEAST/goa_yeast.gaf.gz

    >>> from Bio.UniProt.GOA import gafiterator, record_has
    >>> Evidence = {'Evidence': set(['ND'])}
    >>> Synonym = {'Synonym': set(['YA19A_YEAST', 'YAL019W-A'])}
    >>> Taxon_ID = {'Taxon_ID': set(['taxon:559292'])}
    >>> with open('UniProt/goa_yeast.gaf', 'r') as handle:
    ...     for rec in gafiterator(handle):
    ...         if record_has(rec, Taxon_ID) and record_has(rec, Evidence) and record_has(rec, Synonym):
    ...             for key in ('DB_Object_Name', 'Evidence', 'Synonym', 'Taxon_ID'):
    ...                 print(rec[key])
    ...
    Putative uncharacterized protein YAL019W-A
    ND
    ['YA19A_YEAST', 'YAL019W-A']
    ['taxon:559292']
    Putative uncharacterized protein YAL019W-A
    ND
    ['YA19A_YEAST', 'YAL019W-A']
    ['taxon:559292']
    Putative uncharacterized protein YAL019W-A
    ND
    ['YA19A_YEAST', 'YAL019W-A']
    ['taxon:559292']

    ra   rc   rd   rb   re   r!   )r?   r@   rQ   rT   rB   rC   r1   r1   r2   gafiterator  s   %rg   c                 C   s   d}|dd D ]'}t | | tr'| | D ]}||d 7 }q|dd d }q|| | d 7 }q|| |d  d 7 }|| dS )zWrite a single UniProt-GOA record to an output stream.

    Caller should know the  format version. Default: gaf-2.0
    If header has a value, then it is assumed this is the first record,
    a header is written.
     Nr%   r"   r!   )
isinstancelistwrite)outrecr.   fieldsoutstrfieldsubfieldr1   r1   r2   writerec  s   rr   c                 C   s   | D ]	}t |||d qdS )aO  Write a list of GAF records to an output stream.

    Caller should know the  format version. Default: gaf-2.0
    If header has a value, then it is assumed this is the first record,
    a header is written. Typically the list is the one read by fafbyproteinrec, which
    contains all consecutive lines with the same DB_Object_ID
    )rn   N)rr   )
outprotrecr.   rn   rm   r1   r1   r2   writebyproteinrec  s   rt   c                 C   sN   d}|D ] }t | | tr| | h}nt| | }||| @ r$d} |S q|S )zAccept a record, and a dictionary of field values.

    The format is {'field_name': set([val1, val2])}.
    If any field in the record has  a matching value, the function returns
    True. Otherwise, returns False.
    FT)rj   strset)r0   	fieldvalsretvalrp   set1r1   r1   r2   
record_has  s   rz   __main__)run_doctest)verbose)__doc__rW   rP   rS   rG   rJ   r,   r8   r=   r3   r9   r>   rD   rH   rK   rL   rQ   rT   r^   r`   rf   rg   rr   rt   rz   __name__
Bio._utilsr|   r1   r1   r1   r2   <module>   s:   :