o
    Rŀg5z                  	   @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ G d	d
 d
ZedkrdZee dZdZdZeejdkrejd ZeedZeejdkrejdd D ]"Zedkrde_q}edkrdZq}edv redd Zq}edkrdZq}n
e Ze Zde_eejdkrejd dkrede  ej eed dS ejd dkrej!ed erej"ed dS dS ejd dkrej#eed dS ejd dkrej"ed dS eejd dkrFejd d $ rFejd Z%ej&e%eeed  erBe' Z(e(D ]\Z)Z*e)e%kr@ej+e%e*eeed  q-dS dS ejd d d!kre,d"ejd Z-e-D ]0Z.ej&e.eeed  ere' Z(e(D ]\Z)Z*e)e.krej+e.e*eeed  qoq[dS dS dS dS )#z?Access the PDB over the internet (e.g. to download structures).    N)ThreadPoolExecutor)Optional)Request)
urlcleanup)urlopen)urlretrievec                   @   s^  e Zd ZdZdZ				d/ddZedd	 Zed
d Zdd Z	dd Z
dd Z	d0ddZd1ddZ					d2dee dedee dee dedee fddZd3ded eeeef  fd!d"Z	d4d#d$Z			d5d%ee dee dee fd&d'Z			d5d%ee dee dee fd(d)Z			d5d%ee dee dee fd*d+Zd6d-d.ZdS )7PDBLista  Quick access to the structure lists on the PDB or its mirrors.

    This class provides quick access to the structure lists on the
    PDB server or its mirrors. The structure lists contain
    four-letter PDB codes, indicating that structures are
    new, have been modified or are obsolete. The lists are released
    on a weekly basis.

    It also provides a function to retrieve PDB files from the server.
    To use it properly, prepare a directory /pdb or the like,
    where PDB files are stored.

    All available file formats (PDB, PDBx/mmCif, PDBML, mmtf) are supported.
    Please note that large structures (containing >62 chains
    and/or 99999 ATOM lines) are no longer stored as a single PDB file
    and by default (when PDB format selected) are not downloaded.

    Large structures can be downloaded in other formats, including PDBx/mmCif
    or as a .tar file (a collection of PDB-like formatted files for a given
    structure).

    If you want to use this module from inside a proxy, add
    the proxy variable to your environment, e.g. in Unix:
    export HTTP_PROXY='http://realproxy.charite.de:888'
    (This can also be added to ~/.bashrc)
    a  
    The Protein Data Bank: a computer-based archival file for macromolecular structures.
    F.C.Bernstein, T.F.Koetzle, G.J.B.Williams, E.F.Meyer Jr, M.D.Brice, J.R.Rodgers, O.Kennard, T.Shimanouchi, M.Tasumi
    J. Mol. Biol. 112 pp. 535-542 (1977)
    http://www.pdb.org/.
    https://files.wwpdb.orgNTc                 C   sP   d| _ || _|r|| _nt | _|| _|r|| _n	tj| jd| _d| _	dS )zInitialize the class with the default server or a custom one.

        Argument pdb is the local path to use, defaulting to the current
        directory at the moment of initialisation.
        NobsoleteF)

assemblies
pdb_server	local_pdbosgetcwd_verboseobsolete_pdbpathjoin	flat_tree)selfserverpdbr   verbose r   C/var/www/html/myenv/lib/python3.10/site-packages/Bio/PDB/PDBList.py__init__Y   s   

zPDBList.__init__c                 C   s   | du rt jd dS | S )zPrint a warning to stdout (PRIVATE).

        Temporary warning (similar to a deprecation warning) that files
        are being downloaded in mmCIF.
        NzHWARNING: The default download format has changed from PDB to PDBx/mmCif
mmCif)sysstderrwritefile_formatr   r   r   _print_default_format_warningx   s   z%PDBList._print_default_format_warningc                 C   sh   t t| #}g }|D ]}| }t|dksJ ||  qW d   |S 1 s-w   Y  |S )zRetrieve a list of pdb codes in the weekly pdb status file from given URL.

        Used by get_recent_changes. Typical contents of the list files parsed
        by this method is now very simply - one PDB name per line.
           N)
contextlibclosingr   striplenappenddecode)urlhandleanswerliner   r   r   r   get_status_list   s   
zPDBList.get_status_listc                 C   s>   | j d }| |d }| |d }| |d }|||gS )a  Return three lists of the newest weekly files (added,mod,obsolete).

        Reads the directories with changed entries from the PDB server and
        returns a tuple of three URL's to the files of new, modified and
        obsolete entries from the most recent list. The directory with the
        largest numerical name is used.
        Returns None if something goes wrong.

        Contents of the data/status dir (20031013 would be used);:

            drwxrwxr-x   2 1002     sysadmin     512 Oct  6 18:28 20031006
            drwxrwxr-x   2 1002     sysadmin     512 Oct 14 02:14 20031013
            -rw-r--r--   1 1002     sysadmin    1327 Mar 12  2001 README
        z/pub/pdb/data/status/latest/z	added.pdbzmodified.pdbzobsolete.pdb)r   r.   )r   r   addedmodifiedr
   r   r   r   get_recent_changes   s
   

zPDBList.get_recent_changesc                 C   sf   | j d }| jrtd tt|}dd | dd D }W d   |S 1 s,w   Y  |S )zRetrieve the big file containing all the PDB entries and some annotation.

        Returns a list of PDB codes in the index file.
        z'/pub/pdb/derived_data/index/entries.idxz)Retrieving index file. Takes about 27 MB.c                 S   s(   g | ]}t |d kr|dd   qS )r#   N)r'   r)   ).0r-   r   r   r   
<listcomp>   s    "z+PDBList.get_all_entries.<locals>.<listcomp>   N)r   r   printr$   r%   r   	readlines)r   r*   r+   all_entriesr   r   r   get_all_entries   s   

zPDBList.get_all_entriesc                 C   s   | j d }tt|+}g }|D ]}|dsq| d }t|dks'J ||  qW d   |S 1 s:w   Y  |S )a  Return a list of all obsolete entries ever in the PDB.

        Returns a list of all obsolete pdb codes that have ever been
        in the PDB.

        Gets and parses the file from the PDB server in the format
        (the first pdb_code column is the one used). The file looks
        like this::

             LIST OF OBSOLETE COORDINATE ENTRIES AND SUCCESSORS
            OBSLTE    31-JUL-94 116L     216L
            ...
            OBSLTE    29-JAN-96 1HFT     2HFT
            OBSLTE    21-SEP-06 1HFV     2J5X
            OBSLTE    21-NOV-03 1HG6
            OBSLTE    18-JUL-84 1HHB     2HHB 3HHB
            OBSLTE    08-NOV-96 1HID     2HID
            OBSLTE    01-APR-97 1HIU     2HIU
            OBSLTE    14-JAN-04 1HKE     1UUZ
            ...

        z!/pub/pdb/data/status/obsolete.dats   OBSLTE r4   r#   N)	r   r$   r%   r   
startswithsplitr'   r(   r)   )r   r*   r+   r
   r-   r   r   r   r   get_all_obsolete   s   




zPDBList.get_all_obsoleteFc              	   C   s  |  |}| }d| d| d| d| | dd}||vr1td| dd	| d
|| }|dv rc|s=dnd}|dkrEdn|dkrKdnd}	| jd| d|	 d|dd  d|  }
n|dkrz| jd|dd  d| d|  }
nd| }
|du r|s| jn| j}| jstj	||dd }n|}t
|tjst| tj	||}d| d| d| d| d| dd}tj	||| }|stj	|r| jrtd| d |S | jrtd | d! z
t  t|
| W n ty	   td" Y |S w t|d##}t|d$}|| W d   n	1 s'w   Y  W d   n	1 s7w   Y  t| |S )%a  Fetch PDB structure file from PDB server, and store it locally.

        The PDB structure's file name is returned as a single string.
        If obsolete ``==`` True, the file will be saved in a special file tree.

        NOTE. The default download format has changed from PDB to PDBx/mmCif

        :param pdb_code: 4-symbols structure Id from PDB (e.g. 3J92).
        :type pdb_code: string

        :param file_format:
            File format. Available options:

            * "mmCif" (default, PDBx/mmCif file),
            * "pdb" (format PDB),
            * "xml" (PDBML/XML format),
            * "mmtf" (highly compressed),
            * "bundle" (PDB formatted archive for large structure)

        :type file_format: string

        :param overwrite: if set to True, existing structure files will be overwritten. Default: False
        :type overwrite: bool

        :param obsolete:
            Has a meaning only for obsolete structures. If True, download the obsolete structure
            to 'obsolete' folder, otherwise download won't be performed.
            This option doesn't work for mmtf format as obsoleted structures aren't stored in mmtf.
            Also doesn't have meaning when parameter pdir is specified.
            Note: make sure that you are about to download the really obsolete structure.
            Trying to download non-obsolete structure into obsolete folder will not work
            and you face the "structure doesn't exists" error.
            Default: False

        :type obsolete: bool

        :param pdir: put the file in this directory (default: create a PDB-style directory tree)
        :type pdir: string

        :return: filename
        :rtype: string
        r   z.ent.gz.cif.gzz.xml.gzz-pdb-bundle.tar.gz)r   r   xmlmmtfbundlezSpecified file_format zF does not exist or is not supported. Please use one of the following: z, .)r   r   r=   dividedr
   r   mmCIFXMLz/pub/pdb/data/structures//      r?   z/pub/pdb/compatible/pdb_bundle/zhttp://mmtf.rcsb.org/v1.0/full/Nz.entz.cifz.xmlz.mmtfz-pdb-bundle.tarStructure exists: '' zDownloading PDB structure ''...zDesired structure doesn't existrbwb)r"   lower
ValueErrorr   r   r   r   r   r   r   accessF_OKmakedirsexistsr   r5   r   r   OSErrorgzipopen
writelinesremove)r   pdb_coder
   pdirr!   	overwritearchive_dictarchivepdb_dir	file_typer*   r   filenamefinal
final_filegzoutr   r   r   retrieve_pdb_file   s   
.
"




zPDBList.retrieve_pdb_filec                 C   s  t j| js	J t j| jrt j| jsJ | |}|  \}}}|| D ]B}z#| j||d |rM| 	 }|D ]\}}	||krLt
j||	|dd q;W q) tyk }
 ztd| d|
 d W Y d}
~
q)d}
~
ww |D ]}| jrt j| jd| d	| }| j}nt j| j|d
d d| d	| }t j| j|d
d }t j|d| d	| }t j|rt j|dd zt|| W qn ty   td| d Y qnw t j|r| jrtd| d qn| jrtd| d qndS )a,  Update your local copy of the PDB files.

        I guess this is the 'most wanted' function from this module.
        It gets the weekly lists of new and modified pdb entries and
        automatically downloads the according PDB files.
        You can call this module as a weekly cron job.
        r    T)r!   rY   zerror z: 
Nr   r@   rE   rF   )exist_okzCould not move z to obsolete folderzObsolete file z already movedz is missing)r   r   isdirr   rQ   r   r"   r1   rc   get_all_assembliesplretrieve_assembly_file	Exceptionr5   r   r   isfilerP   shutilmover   )r   r!   with_assembliesnewr0   r
   rW   r   
a_pdb_codeassembly_numerrold_filenew_dirnew_filer   r   r   
update_pdb\  s^   
"
zPDBList.update_pdb	pdb_codesr
   rX   r!   rY   max_num_threadsc              
   C   sV   |  |}t|}|tj| j||||d| W d   dS 1 s$w   Y  dS )aJ  Fetch set of PDB structure files from the PDB server and store them locally.

        :param pdb_codes: A list of 4-symbol PDB structure IDs

        :param obsolete:
            Has a meaning only for obsolete structures.
            If True, download the obsolete structure to 'obsolete' folder.
            Otherwise, the download won't be performed.
            This option doesn't work for mmtf format as obsolete structures are not available as mmtf.
            (default: ``False``)

        :param pdir: Put the file in this directory. By default, create a PDB-style directory tree.

        :param file_format: File format. Available options:

            * "mmCif" (default, PDBx/mmCif file),
            * "pdb" (format PDB),
            * "xml" (PMDML/XML format),
            * "mmtf" (highly compressed),
            * "bundle" (PDB formatted archive for large structure).

        :param overwrite: If set to true, existing structure files will be overwritten. (default: ``False``)

        :param max_num_threads: The maximum number of threads to use when downloading files
        )r
   rX   r!   rY   N)r"   r   map	functoolspartialrc   )r   rw   r
   rX   r!   rY   rx   executorr   r   r   download_pdb_files  s   
#
"zPDBList.download_pdb_files returnc           	      C   s   t | dr| jr| jrtd | jS | jrtd ddidd}t|d}d	tt|d
}t	d||}t
|}t| dd }W d   n1 sRw   Y  dtdtttf fdd}tt||}|| _|S )a,  Retrieve the list of PDB entries with an associated bio assembly.

        The requested list will be cached to avoid multiple calls to the server.

        :param str file_format: A legacy parameter that is left to avoid breaking changes
        :return: the assemblies
        :rtype: list
        r   z%Retrieving cached list of assemblies.z7Retrieving list of assemblies. This might take a while.return_all_hitsTassembly)request_optionsreturn_typezutf-8zapplication/json; charset=utf-8)zContent-TypezContent-Lengthz+https://search.rcsb.org/rcsbsearch/v2/query
result_setNr   c                 S   s"   | d  d}|d  |d fS )N
identifier-r   )r:   rL   )r   r:   r   r   r   	transform  s   z-PDBList.get_all_assemblies.<locals>.transform)hasattrr   r   r5   jsondumpsencodestrr'   r   r   loadsreadr)   dicttuplelistry   )	r   r!   bodydataheadersrequestresponser   r   r   r   r   rg     s,   	

zPDBList.get_all_assembliesc              
   C   s  |  }t|}| d| d| d| dd}| |}|  }||vr.td| d|| }|dkr?| jd	|  }n|d
krL| jd|  }ntd| d|du rj| j}	| jsitj	
|	|dd }	n|}	t|	tjsxt|	 tj	
|	|}
tj	
|	|dd }|stj	|r| jrtd| d |S | jrtd| d| d z
t  t||
 W n ty } ztd|  W Y d}~|S d}~ww t|
d"}t|d}|| W d   n1 sw   Y  W d   n1 sw   Y  t|
 |S )aI  Fetch one or more assembly structures associated with a PDB entry.

        Unless noted below, parameters are described in ``retrieve_pdb_file``.

        :type  assembly_num: str
        :param assembly_num: assembly number to download.

        :rtype : str
        :return: file name of the downloaded assembly file.
        z.pdbz.gzz	-assemblyr<   )r   mmcifzSpecified file_format 'z?' is not supported. Use one of the following: 'mmcif' or 'pdb'.r   z#/pub/pdb/data/assemblies/mmCIF/all/r   z/pub/pdb/data/biounit/PDB/all/zfile_format 'z' not supportedNrE   rF   rG   rH   zDownloading assembly (z) for PDB entry 'rI   z<Download failed! Maybe the desired assembly does not exist: rJ   rK   )rL   intr"   rj   r   rM   r   r   r   r   r   rN   rO   rP   rQ   r   r5   r   r   rR   rS   rT   rU   rV   )r   rW   rq   rX   r!   rY   r[   
archive_fnr*   r   assembly_gz_fileassembly_final_filerr   ra   rb   r   r   r   ri     sl   



zPDBList.retrieve_assembly_filelistfilec                    s   |  |}|  }t|}|D ]\ |tj| j|d  qW d   n1 s,w   Y  |rXt|d}| fdd|D  W d   dS 1 sQw   Y  dS dS )a  Retrieve all biological assemblies not in the local PDB copy.

        :param listfile: File name to which all assembly codes will be written

        :param file_format: Format in which to download the entries.
            Available options are "mmCif" or "pdb". Defaults to "mmCif".

        :param max_num_threads: The maximum number of threads to use while downloading the assemblies
        r    Nwc                 3   s     | ]} d   dV  qdS )r@   rd   Nr   r2   xrq   rW   r   r   	<genexpr>_  s    z2PDBList.download_all_assemblies.<locals>.<genexpr>)	r"   rg   r   submitrz   r{   ri   rT   rU   )r   r   r!   rx   r   r|   outfiler   r   r   download_all_assembliesA  s$   


"zPDBList.download_all_assembliesc                 C   s   |  |}|  }t|}|tj| j|d| W d   n1 s$w   Y  |rMt|d}|dd |D  W d   dS 1 sFw   Y  dS dS )ah  Retrieve all PDB entries not present in the local PDB copy.

        NOTE: The default download format has changed from PDB to PDBx/mmCif.

        :param listfile: Filename to which all PDB codes will be written

        :param file_format: File format. Available options:

            * "mmCif" (default, PDBx/mmCif file),
            * "pdb" (format PDB),
            * "xml" (PMDML/XML format),
            * "mmtf" (highly compressed),
            * "bundle" (PDB formatted archive for large structure)

        :param max_num_threads: The maximum number of threads to use while downloading PDB entries
        r    Nr   c                 s       | ]}|d  V  qdS rd   Nr   r   r   r   r   r         z.PDBList.download_entire_pdb.<locals>.<genexpr>)	r"   r8   r   ry   rz   r{   rc   rT   rU   r   r   r!   rx   entriesr|   r   r   r   r   download_entire_pdba  s   

"zPDBList.download_entire_pdbc                 C   s   |  |}|  }t|}|tj| jd|d| W d   n1 s%w   Y  |rNt|d}|dd |D  W d   dS 1 sGw   Y  dS dS )a	  Retrieve all obsolete PDB entries not present in local obsolete PDB copy.

        NOTE: The default download format has changed from PDB to PDBx/mmCif.

        :param listfile: Filename to which all PDB codes will be written

        :param file_format: File format. Available options:

            * "mmCif" (default, PDBx/mmCif file),
            * "pdb" (PDB format),
            * "xml" (PMDML/XML format).

        :param max_num_threads: The maximum number of threads to use while downloading PDB entries
        T)r
   r!   Nr   c                 s   r   r   r   r   r   r   r   r     r   z4PDBList.download_obsolete_entries.<locals>.<genexpr>)	r"   r;   r   ry   rz   r{   rc   rT   rU   r   r   r   r   download_obsolete_entries  s   

"z!PDBList.download_obsolete_entriespdb_seqres.txtc                 C   s&   | j rtd | jd }t|| dS )zKRetrieve and save a (big) file containing all the sequences of PDB entries.z-Retrieving sequence file (takes over 110 MB).z$/pub/pdb/derived_data/pdb_seqres.txtN)r   r5   r   r   )r   savefiler*   r   r   r   get_seqres_file  s   
zPDBList.get_seqres_file)r	   NNT)FNNF)NF)FNNFN)r~   )NNF)NNN)r   )__name__
__module____qualname____doc__PDB_REFr   staticmethodr"   r.   r1   r8   r;   rc   rv   r   r   boolr   r   r}   r   rg   ri   r   r   r   r   r   r   r   r   r   6   s    	


%

~B
 0+
N
"
%
#r   __main__a  PDBList.py
    (c) Kristian Rother 2003, Wiktoria Karwicka & Jacek Smietanski 2016
    Contributed to Biopython

    Usage::

        PDBList.py update <pdb_path> [options]   - write weekly PDB updates to
                                                   local pdb tree.
        PDBList.py all    <pdb_path> [options]   - write all PDB entries to
                                                   local pdb tree.
        PDBList.py obsol  <pdb_path> [options]   - write all obsolete PDB
                                                   entries to local pdb tree.
        PDBList.py assemb <pdb_path> [options]   - write all assemblies for each
                                                   PDB entry to local pdb tree.
        PDBList.py <PDB-ID> <pdb_path> [options] - retrieve single structure
        PDBList.py (<PDB-ID1>,<PDB-ID2>,...) <pdb_path> [options] - retrieve a set
                                                   of structures

    Options:
     -d       A single directory will be used as <pdb_path>, not a tree.
     -o       Overwrite existing structure files.
     -pdb     Downloads structures in PDB format
     -xml     Downloads structures in PDBML (XML) format
     -mmtf    Downloads structures in mmtf format
     -with-assemblies    Downloads assemblies along with regular entries.

    Maximum one format can be specified simultaneously (if more selected, only
    the last will be considered). By default (no format specified) structures are
    downloaded as PDBx/mmCif files.
    r   Fr4   )r   rF   z-dTz-o)z-pdbz-xmlz-mmtfrE   z-with-assembliesupdatezupdating local PDB at )r!   rn   allr    obsolassembr#   )rX   r!   rY   (z[0-9A-Za-z]{4})/r   r$   rz   rS   r   r   rerl   r   concurrent.futuresr   typingr   urllib.requestr   r   r   r   r   r   docr5   r!   rY   rn   r'   argvpdb_pathrh   optionr   r   rv   r   r   r   isdigitrW   rc   rg   r   rp   rq   ri   findallpdb_idspdb_idr   r   r   r   <module>   s   #    
}

(



9(