o
    Rŀg                     @   sN   d Z dd Zdd ZG dd deZdd Zed	kr%d
dlmZ e  dS dS )aX  Parser for the cellosaurus.txt file from ExPASy.

See https://web.expasy.org/cellosaurus/

Tested with the release of Version 18 (July 2016).

Functions:
 - read       Reads a file containing one cell line entry
 - parse      Reads a file containing multiple cell line entries

Classes:
 - Record     Holds cell line data.

Examples
--------
This example downloads the Cellosaurus database and parses it. Note that
urlopen returns a stream of bytes, while the parser expects a stream of plain
string, so we use TextIOWrapper to convert bytes to string using the UTF-8
encoding. This is not needed if you download the cellosaurus.txt file in
advance and open it (see the comment below).

    >>> from urllib.request import urlopen
    >>> from io import TextIOWrapper
    >>> from Bio.ExPASy import cellosaurus
    >>> url = "ftp://ftp.expasy.org/databases/cellosaurus/cellosaurus.txt"
    >>> bytestream = urlopen(url)
    >>> textstream = TextIOWrapper(bytestream, "UTF-8")
    >>> # alternatively, use
    >>> # textstream = open("cellosaurus.txt")
    >>> # if you downloaded the cellosaurus.txt file in advance.
    >>> records = cellosaurus.parse(textstream)
    >>> for record in records:
    ...     if 'Homo sapiens' in record['OX'][0]:
    ...         print(record['ID'])  # doctest:+ELLIPSIS
    ...
    #15310-LN
    #W7079
    (L)PC6
    0.5alpha
    ...

c                 c   s    	 t | }|s
dS |V  q)zParse cell line records.

    This function is for parsing cell line files containing multiple
    records.

    Arguments:
     - handle   - handle to the file.

    TN)__read)handlerecord r   J/var/www/html/myenv/lib/python3.10/site-packages/Bio/ExPASy/cellosaurus.pyparse2   s   
r   c                 C   s    t | }|  }|rtd|S )zRead one cell line record.

    This function is for parsing cell line files containing
    exactly one record.

    Arguments:
     - handle   - handle to the file.

    z$More than one cell line record found)r   read
ValueError)r   r   	remainderr   r   r   r   C   s
   
r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	Recorda$  Holds information from an ExPASy Cellosaurus record as a Python dictionary.

    Each record contains the following keys:

    =========  ==============================  =======================
    Line code  Content                         Occurrence in an entry
    =========  ==============================  =======================
    ID         Identifier (cell line name)     Once; starts an entry
    AC         Accession (CVCL_xxxx)           Once
    AS         Secondary accession number(s)   Optional; once
    SY         Synonyms                        Optional; once
    DR         Cross-references                Optional; once or more
    RX         References identifiers          Optional: once or more
    WW         Web pages                       Optional; once or more
    CC         Comments                        Optional; once or more
    ST         STR profile data                Optional; twice or more
    DI         Diseases                        Optional; once or more
    OX         Species of origin               Once or more
    HI         Hierarchy                       Optional; once or more
    OI         Originate from same individual  Optional; once or more
    SX         Sex of cell                     Optional; once
    AG         Age of donor at sampling        Optional; once
    CA         Category                        Once
    DT         Date (entry history)            Once
    //         Terminator                      Once; ends an entry
    =========  ==============================  =======================

    c                 C   s   t |  d| d< d| d< d| d< d| d< g | d< g | d< g | d< g | d	< g | d
< g | d< g | d< g | d< g | d< d| d< d| d< d| d< d| d< dS )zInitialize the class. IDACASSYDRRXWWCCSTDIOXHIOISXAGCADTN)dict__init__selfr   r   r   r   s   s$   
zRecord.__init__c                 C   sX   | d r%| d r| j j d| d  d| d  dS | j j d| d  dS | j j dS )z@Return the canonical string representation of the Record object.r   r   z (z, )z ( ))	__class____name__r   r   r   r   __repr__   s
   "zRecord.__repr__c                 C   s4  d| d  }|d| d  7 }|d| d  7 }|d| d  7 }|d	t | d
  7 }|dt | d  7 }|dt | d  7 }|dt | d  7 }|dt | d  7 }|dt | d  7 }|dt | d  7 }|dt | d  7 }|dt | d  7 }|d| d  7 }|d| d  7 }|d| d   7 }|d!| d"  7 }|S )#z=Return a readable string representation of the Record object.zID: r   z AC: r   z AS: r   z SY: r   z DR: r   z RX: r   z WW: r   z CC: r   z ST: r   z DI: r   z OX: r   z HI: r   z OI: r   z SX: r   z AG: r   z CA: r   z DT: r   )repr)r    outputr   r   r   __str__   s$   zRecord.__str__N)r#   
__module____qualname____doc__r   r$   r'   r   r   r   r   r
   U   s
    
r
   c                 C   s   d }| D ]Z}|d d |dd    }}|dkr!t }||d< q|dv r.||  |7  < q|dv r:|| | q|dkrS|d\}}|d | | f q|dkr^|r]|  S qq|retd	d S )
N      r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   ;z//zUnexpected end of stream)rstripr
   appendsplitstripr   )r   r   linekeyvaluekvr   r   r   r      s*   
r   __main__    )run_doctestN)	r*   r   r   r   r
   r   r#   
Bio._utilsr9   r   r   r   r   <module>   s   ,V"
