o
    Rŀg                  	   @   s   d Z ddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 edZdedee fd	d
Zdedee fddZdedefddZG dd dZ	ddedeee  dedefddZedkrmddlmZ e  dS dS )a  Code for dealing with assorted UniProt file formats and interacting with the UniProt database.

This currently include parsers for the GAF, GPA and GPI formats
from UniProt-GOA as the module Bio.UniProt.GOA.

See also Bio.SwissProt and the "swiss" support in Bio.SeqIO for
the legacy plain text sequence format still used in UniProt.

See also Bio.SeqIO.SwissIO for the "uniprot-xml" support in
Bio.SeqIO.
    N)HTTPResponse)Optional)urlopenz<(.+)>; rel="next"responsereturnc                 C   s6   | j }d|v r|d rt|d }|r|dS d S )NLink   )headers_re_next_linkmatchgroup)r   r	   r    r   H/var/www/html/myenv/lib/python3.10/site-packages/Bio/UniProt/__init__.py_get_next_link   s   
r   c                 C   s   t |   d S )Nresults)jsonloadsreaddecode)r   r   r   r   _get_results(   s   r   c                 C   s&   | j }d|v r|d rt|d S dS )Nzx-total-resultsr   )r	   int)r   r	   r   r   r   _get_search_result_count,   s   r   c                   @   sj   e Zd ZdZdefddZdefddZdd	 Zde	fd
dZ
de	ddfddZdefddZdd ZdS )_UniProtSearchResultszA sequence over the results of a UniProt search.

    Do not use this class directly. Instead, use the :meth:`UniProt.search` method.
    r   c                 C   s\   | j d usJ t| j }|  jt|7  _t|| _ |W  d    S 1 s'w   Y  d S N)next_urlr   results_cacher   r   )selfr   r   r   r   _fetch_next_batch;   s   
$z'_UniProtSearchResults._fetch_next_batch	first_urlc                 C   s(   || _ g | _d| _|  }t|| _d S )Nr   )r   r   next_result_indexr   r   search_result_count)r   r   r   r   r   r   __init__B   s
   z_UniProtSearchResults.__init__c                 C   s   | S r   r   r   r   r   r   __iter__I   s   z_UniProtSearchResults.__iter__c                 C   s   | j S )zHReturn the total number of search results, regardless of the batch size.)r    r"   r   r   r   __len__L   s   z_UniProtSearchResults.__len__indexNc                 C   s@   |t t| v s
J |t| jkr|   |t| jksdS dS )z4Fetch batches until the given index is in the cache.N)rangelenr   r   )r   r%   r   r   r   
_fetch_forP   s   z _UniProtSearchResults._fetch_forc                 C   sR   | j t| k r'| | j  z| j| j  }|  j d7  _ |W S  ty&   tw t)Nr   )r   r'   r(   r   
IndexErrorStopIteration)r   next_resultr   r   r   __next__V   s   z_UniProtSearchResults.__next__c                 C   s   t |trP|t| \}}}d|  krt| k r,n J d|  kr+t| ks.J  J |dkrB||krA|dkrA| |d  n,||krO|dkrO| | nt |trn|tt|  t| vretd| |t|   | j| S )Nr   r   zIndex out of bounds.)	
isinstancesliceindicesr'   r(   r   r&   r)   r   )r   r%   startstopstepr   r   r   __getitem__b   s   
>


z!_UniProtSearchResults.__getitem__)__name__
__module____qualname____doc__r   r   strr!   r#   r   r$   r(   dictr,   r3   r   r   r   r   r   5   s    r     queryfields
batch_sizec                 C   s8   | |dd}|rd ||d< dtj| }t|S )aa  Search the UniProt database.

    Consider using `query syntax <https://www.uniprot.org/help/text-search>`_ and
    `query fields <https://www.uniprot.org/help/query-fields>`_ to refine your search.

    See the API details `here <https://www.uniprot.org/help/api_queries>`_.

    >>> from Bio import UniProt
    >>> from itertools import islice
    >>> # Get the first 10 results
    >>> results = UniProt.search("(organism_id:2697049) AND (reviewed:true)")[:10]

    :param query: The query string to search UniProt with
    :type query: str
    :param fields: The columns to retrieve in the results, defaults to all fields
    :type fields: List[str], optional
    :param batch_size: The number of results to retrieve in each batch, defaults to 500
    :type batch_size: int
    :return: An iterator over the search results
    :rtype: _UniProtSearchResults
    r   )r;   sizeformat,r<   z*https://rest.uniprot.org/uniprotkb/search?)joinurllibparse	urlencoder   )r;   r<   r=   
parametersurlr   r   r   searchu   s   rG   __main__)run_doctest)Nr:   )r7   r   reurllib.parserB   http.clientr   typingr   urllib.requestr   compiler
   r8   r   listr9   r   r   r   r   rG   r4   
Bio._utilsrI   r   r   r   r   <module>   s4   
	A

$
