o
    Rŀg<^                     @   sp   d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ G dd	 d	ZG d
d dZdS )zmmCIF parsers.    N)	as_handle)
MMCIF2Dict)PDBConstructionException)PDBConstructionWarning)StructureBuilderc                   @   sD   e Zd ZdZ	dddZdd Zd	d
 Zdd Zdd Zdd Z	dS )MMCIFParserz1Parse a mmCIF file and return a Structure object.NTFc                 C   sL   |dur|| _ nt | _ d| _d| _d| _t|| _t|| _t|| _dS )a  Create a PDBParser object.

        The mmCIF parser calls a number of standard methods in an aggregated
        StructureBuilder object. Normally this object is instantiated by the
        MMCIParser object itself, but if the user provides his/her own
        StructureBuilder object, the latter is used instead.

        Arguments:
         - structure_builder - an optional user implemented StructureBuilder class.
         - auth_chains - True by default. If true, use the author chain IDs.
           If false, use the re-assigned mmCIF chain IDs.
         - auth_residues - True by default. If true, use the author residue numbering.
           If false, use the mmCIF "label" residue numbering, which has no insertion
           codes, and strictly increments residue numbers.
           NOTE: Non-polymers such as water don't have a "label" residue number,
           and will be skipped.

         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
           These warnings might be indicative of problems in the mmCIF file!

        Nr   )	_structure_builderr   headerline_counterbuild_structureboolauth_chainsauth_residuesQUIETselfstructure_builderr   r   r    r   G/var/www/html/myenv/lib/python3.10/site-packages/Bio/PDB/MMCIFParser.py__init__   s   

zMMCIFParser.__init__c                 C   sj   t  $ | jrt jdtd t|| _| | | j	| 
  W d   n1 s+w   Y  | j S )zReturn the structure.

        Arguments:
         - structure_id - string, the id that will be used for the structure
         - filename - name of mmCIF file, OR an open text mode file handle

        ignorecategoryN)warningscatch_warningsr   filterwarningsr   r   _mmcif_dict_build_structurer   
set_header_get_headerget_structure)r   structure_idfilenamer   r   r   r    =   s   



zMMCIFParser.get_structurec                 C   s$   ||v r|| d }d|kr|S |S )Nr   ?r   )r   keydictdefltrsltr   r   r   
_mmcif_getP   s
   zMMCIFParser._mmcif_getc              
   C   sb   | j }|D ])}||}z|d }W n ttfy   Y qw |dkr.|dkr.|| j|<  d S qd S )Nr   r#   .)r   get	TypeError
IndexErrorr	   )r   
target_keykeysmdr$   valitemr   r   r   _update_header_entryW   s   

z MMCIFParser._update_header_entryc                 C   s   dddddd d| _ | dg d | ddg | ddd	g | d
dg | ddg | dg d | j d d ur]zt| j d | j d< W | j S  ty\   d | j d< Y | j S w | j S )N )nameheadidcodedeposition_datestructure_method
resolutionr6   )	_entry_idz_exptl.entry_idz_struct.entry_idr4   z_struct.titler5   z_struct_keywords.pdbx_keywordsz_struct_keywords.textr7   z3_pdbx_database_status.recvd_initial_deposition_dater8   z_exptl.methodr9   )z_refine.ls_d_res_highz_refine_hist.d_res_highz _em_3d_reconstruction.resolution)r	   r2   float
ValueError)r   r   r   r   r   c   s>   	zMMCIFParser._get_headerc           @      C   sp  ddh}| j }|d }|d }|d }z|d }W n ty$   d }Y nw | jr-|d }n|d }d	d
 |d D }	dd
 |d D }
dd
 |d D }|d }|d }|d }|d }|d }zdd
 |d D }W n tyv   d }Y n ty   tdd w z|d }|d }|d }|d }|d }|d }d}W n ty   d}Y nw | jrd |v r|d  }n	|d! }n|d! }d }d }d }| j}|| |d" d#}d#}t	t
|D ]v} ||  zt||  }!W n ty   ||  }!td$t Y nw |	|  }"|
|  }#||  }$||  }%||  }&||  }'|'|v rd"}'||  }(|(dkrUz|d  |  })d%|& d&|) d'}*W n ttfyK   d%|& d'}*Y nw td(|* t qt|(}+||  },|,|v rdd"},||  }-zt||  }.W n ty}   td)d w zt||  }/W n ty   td*d w ||  }0|0d+kr|%d,ks|%d-krd.}1nd/}1nd"}1|1|+|,f}(|d ur||  }2||2kr|2}|d7 }||| d }d }d }n|| ||&kr|&}|| d }d }||(ks||%kr |(}|%}||%|1|+|, t|"|#|$fd0}3|r||   nd }4|j|-|3|.|/|'|-|!|4d1 |dkrS| t
|k rS||  ||  ||  ||  ||  ||  f}5d2d
 |5D }6t|6d0}7||7 qzXt|d3 d }8t|d4 d }9t|d5 d }:t|d6 d };t|d7 d }<t|d8 d }=t|8|9|:|;|<|=fd0}>|d9 d }?|?dd# }?|?d u rt||?|> W d S  ty   Y d S w ):Nr)   r#   _atom_site.id_atom_site.label_atom_id_atom_site.label_comp_id_atom_site.type_symbol_atom_site.auth_asym_id_atom_site.label_asym_idc                 S      g | ]}t |qS r   r;   .0xr   r   r   
<listcomp>       z0MMCIFParser._build_structure.<locals>.<listcomp>_atom_site.Cartn_xc                 S   rC   r   rD   rE   r   r   r   rH      rI   _atom_site.Cartn_yc                 S   rC   r   rD   rE   r   r   r   rH      rI   _atom_site.Cartn_z_atom_site.label_alt_id_atom_site.pdbx_PDB_ins_code_atom_site.B_iso_or_equiv_atom_site.occupancy_atom_site.group_PDBc                 S   rC   r   intrF   nr   r   r   rH      rI   _atom_site.pdbx_PDB_model_numInvalid model number_atom_site_anisotrop.U[1][1]_atom_site_anisotrop.U[1][2]_atom_site_anisotrop.U[1][3]_atom_site_anisotrop.U[2][2]_atom_site_anisotrop.U[2][3]_atom_site_anisotrop.U[3][3]   r   _atom_site.auth_seq_id_atom_site.label_seq_id zBPDBConstructionWarning: Some atom serial numbers are not numerical"Non-existing residue ID in chain '', residue ''PDBConstructionWarning: Invalid or missing B factorInvalid or missing occupancyHETATMHOHWATWHfserial_numberelementc                 S   rC   r   rD   rF   _r   r   r   rH   8  rI   z_cell.length_az_cell.length_bz_cell.length_cz_cell.angle_alphaz_cell.angle_betaz_cell.angle_gammaz_symmetry.space_group_name_H-M)r   KeyErrorr   r<   r   r   r   init_structureinit_segrangelenset_line_counterrS   r   warnr   r,   r;   
init_model
init_chaininit_residuenparrayupper	init_atom
set_anisou	Exceptionset_symmetry)@r   r!   _unassigned
mmcif_dictatom_serial_listatom_id_listresidue_id_listelement_listchain_id_listx_listy_listz_listalt_list
icode_listb_factor_listoccupancy_listfieldname_listserial_list	aniso_u11	aniso_u12	aniso_u13	aniso_u22	aniso_u23	aniso_u33
aniso_flagseq_id_listcurrent_chain_idcurrent_residue_idcurrent_resnamer   current_model_idcurrent_serial_idiserialrG   yzresnamechainidaltlocresseq
msg_resseqmsg
int_resseqicoder4   
tempfactor	occupancy	fieldnamehetatm_flag	serial_idcoordrq   umapped_anisouanisou_arrayabcalphabetagammacell
spacegroupr   r   r   r      sL  





















zMMCIFParser._build_structureNTTF)
__name__
__module____qualname____doc__r   r    r(   r2   r   r   r   r   r   r   r      s    
'%r   c                   @   s,   e Zd ZdZ	dddZdd Zd	d
 ZdS )FastMMCIFParserz2Parse an MMCIF file and return a Structure object.NTFc                 C   sF   |dur|| _ nt | _ d| _d| _t|| _t|| _t|| _dS )aR  Create a FastMMCIFParser object.

        The mmCIF parser calls a number of standard methods in an aggregated
        StructureBuilder object. Normally this object is instantiated by the
        parser object itself, but if the user provides his/her own
        StructureBuilder object, the latter is used instead.

        The main difference between this class and the regular MMCIFParser is
        that only 'ATOM' and 'HETATM' lines are parsed here. Use if you are
        interested only in coordinate information.

        Arguments:
         - structure_builder - an optional user implemented StructureBuilder class.
         - auth_chains - True by default. If true, use the author chain IDs.
           If false, use the re-assigned mmCIF chain IDs.
         - auth_residues - True by default. If true, use the author residue numbering.
           If false, use the mmCIF "label" residue numbering, which has no insertion
           codes, and strictly increments residue numbers.
           NOTE: Non-polymers such as water don't have a "label" residue number,
           and will be skipped.

         - QUIET - Evaluated as a Boolean. If true, warnings issued in constructing
           the SMCRA data will be suppressed. If false (DEFAULT), they will be shown.
           These warnings might be indicative of problems in the mmCIF file!

        Nr   )r   r   r
   r   r   r   r   r   r   r   r   r   r   P  s   

zFastMMCIFParser.__init__c              	   C   sz   t  , | jrt jdtd t|}| || W d   n1 s$w   Y  W d   n1 s3w   Y  | j S )zReturn the structure.

        Arguments:
         - structure_id - string, the id that will be used for the structure
         - filename - name of the mmCIF file OR an open filehandle

        r   r   N)	r   r   r   r   r   r   r   r   r    )r   r!   r"   handler   r   r   r    z  s   


zFastMMCIFParser.get_structurec           B      C   sZ  ddh}d\}}g g }}g g }}	|D ]G}
|
 dr%d}||
  q|
 dr4d}||
  q|r>|
 dr>d}q|rH|
 drHd}q|rR||
  q|r[|	|
  qtttj| }tttj|	 }tt||}|tt|| |d	 }|d
 }|d }z|d }W n t	y   d }Y nw | j
r|d }n|d }dd |d D }dd |d D }dd |d D }|d }|d }|d }|d }|d }zdd |d D }W n t	y   d }Y n ty   tdd w z|d }|d }|d  }|d! }|d" } |d# }!d$}"W n t	y    d%}"Y nw | jr4d&|v r/|d& }#n	|d' }#n|d' }#d }$d }%d }&| j}'|'| |'d( d)}(d)})tt|D ]T}*|'|* ||* }+||* },||* }-||* }.||* }/||* }0||* }1|1|v rd(}1|#|* }2|2dkrz|d& |* }3d*|0 d+|3 d,}4W n t	tfy   d*|0 d,}4Y nw td-|4 t qUt|2}5||* }6|6|v rd(}6||* d.}7zt||* }8W n ty   td/d w zt||* }9W n ty   td0d w ||* }:|:d1krd2};nd(};|;|5|6f}2|d ur-||* }<|)|<kr,|<})|(d$7 }(|'|(|) d }$d }%d }&n|'|( |$|0krB|0}$|'|$ d }%d }&|%|2ksL|&|/krX|2}%|/}&|'|/|;|5|6 t|,|-|.fd3}=|rh||* nd }>|'j|7|=|8|9|1|7|+|>d4 |"d$kr|*t|k r||* ||* ||* ||* | |* |!|* f}?d5d |?D }@t|@d3}A|' |A qUd S )6Nr)   r#   )FFz_atom_site.Tz_atom_site_anisotrop.#Fr=   r>   r?   r@   rA   rB   c                 S   rC   r   rD   rE   r   r   r   rH     rI   z4FastMMCIFParser._build_structure.<locals>.<listcomp>rJ   c                 S   rC   r   rD   rE   r   r   r   rH     rI   rK   c                 S   rC   r   rD   rE   r   r   r   rH     rI   rL   rM   rN   rO   rP   rQ   c                 S   rC   r   rR   rT   r   r   r   rH     rI   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r   r_   r`   ra   rb   rc   rd   re   rf   "rg   rh   ri   rm   rn   ro   c                 S   rC   r   rD   rr   r   r   r   rH   W  rI   )!
startswithappendstripzipmapstrsplitr%   updatert   r   r<   r   r   r   ru   rv   rw   rx   ry   r,   r   rz   r   rS   r;   r{   r|   r}   r~   r   r   r   )Br   r!   
filehandler   	read_atom
read_aniso_fields_records_anisof_anisorsline_record_tbl_anisob_tblr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rG   r   r   r   r   r   r   r   r   r   r   r4   r   r   r   r   r   r   rq   r   r   r   r   r   r   r     sF  

























z FastMMCIFParser._build_structurer   )r   r   r   r   r   r    r   r   r   r   r   r   M  s    
*r   )r   r   numpyr~   Bio.Filer   Bio.PDB.MMCIF2Dictr   Bio.PDB.PDBExceptionsr   r   Bio.PDB.StructureBuilderr   r   r   r   r   r   r   <module>   s     <