o
    Rŀg7                     @   sP  d Z ddlZddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 dd	l	mZ dd
lmZ eee dd d \ZZeZi Zi Zi Zi ZedD ]Zee Zee Zeee< eee< eee< eee< qRdd Zdd Zdd Zdd Zd$ddZ d$ddZ!G dd de"Z#G dd dZ$G d d! d!e$Z%G d"d# d#e$Z&dS )%a  Polypeptide-related classes (construction and representation).

Simple example with multiple chains,

    >>> from Bio.PDB.PDBParser import PDBParser
    >>> from Bio.PDB.Polypeptide import PPBuilder
    >>> structure = PDBParser().get_structure('2BEG', 'PDB/2BEG.pdb')
    >>> ppb=PPBuilder()
    >>> for pp in ppb.build_peptides(structure):
    ...     print(pp.get_sequence())
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA
    LVFFAEDVGSNKGAIIGLMVGGVVIA

Example with non-standard amino acids using HETATM lines in the PDB file,
in this case selenomethionine (MSE):

    >>> from Bio.PDB.PDBParser import PDBParser
    >>> from Bio.PDB.Polypeptide import PPBuilder
    >>> structure = PDBParser().get_structure('1A8O', 'PDB/1A8O.pdb')
    >>> ppb=PPBuilder()
    >>> for pp in ppb.build_peptides(structure):
    ...     print(pp.get_sequence())
    DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW
    TETLLVQNANPDCKTILKALGPGATLEE
    TACQG

If you want to, you can include non-standard amino acids in the peptides:

    >>> for pp in ppb.build_peptides(structure, aa_only=False):
    ...     print(pp.get_sequence())
    ...     print("%s %s" % (pp.get_sequence()[0], pp[0].get_resname()))
    ...     print("%s %s" % (pp.get_sequence()[-7], pp[-7].get_resname()))
    ...     print("%s %s" % (pp.get_sequence()[-6], pp[-6].get_resname()))
    MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG
    M MSE
    M MSE
    M MSE

In this case the selenomethionines (the first and also seventh and sixth from
last residues) have been shown as M (methionine) by the get_sequence method.
    N)nucleic_letters_3to1)nucleic_letters_3to1_extended)protein_letters_3to1)protein_letters_3to1_extended)PDBException)
calc_angle)calc_dihedral)Seqc                 C   s   | d S )N    )xr   r   G/var/www/html/myenv/lib/python3.10/site-packages/Bio/PDB/Polypeptide.py<lambda>A   s    r   )key   c                 C      t |  S )zyIndex to corresponding one letter amino acid name.

    >>> index_to_one(0)
    'A'
    >>> index_to_one(19)
    'Y'
    )dindex_to_1)indexr   r   r   index_to_oneS      r   c                 C   r   )z`One letter code to index.

    >>> one_to_index('A')
    0
    >>> one_to_index('Y')
    19
    )d1_to_indexsr   r   r   one_to_index^   r   r   c                 C   r   )zIndex to corresponding three letter amino acid name.

    >>> index_to_three(0)
    'ALA'
    >>> index_to_three(19)
    'TYR'
    )dindex_to_3)ir   r   r   index_to_threei   r   r   c                 C   r   )zjThree letter code to index.

    >>> three_to_index('ALA')
    0
    >>> three_to_index('TYR')
    19
    )d3_to_indexr   r   r   r   three_to_indext   r   r   Fc                 C   2   t | ts|  d} |  } |r| tv S | tv S )a  Return True if residue object/string is an amino acid.

    :param residue: a L{Residue} object OR a three letter amino acid code
    :type residue: L{Residue} or string

    :param standard: flag to check for the 20 AA (default false)
    :type standard: boolean

    >>> is_aa('ALA')
    True

    Known three letter codes for modified amino acids are supported,

    >>> is_aa('FME')
    True
    >>> is_aa('FME', standard=True)
    False
    <3s)
isinstancestrget_resnameupperr   r   residuestandardr   r   r   is_aa   s   
r(   c                 C   r   )a
  Return True if residue object/string is a nucleic acid.

    :param residue: a L{Residue} object OR a three letter code
    :type residue: L{Residue} or string

    :param standard: flag to check for the 8 (DNA + RNA) canonical bases.
        Default is False.
    :type standard: boolean

    >>> is_nucleic('DA ')
    True

    >>> is_nucleic('A  ')
    True

    Known three letter codes for modified nucleotides are supported,

    >>> is_nucleic('A2L')
    True
    >>> is_nucleic('A2L', standard=True)
    False
    r    )r!   r"   r#   r$   r   r   r%   r   r   r   
is_nucleic   s   
r)   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )Polypeptidez5A polypeptide is simply a list of L{Residue} objects.c                 C   s$   g }| D ]}|d }| | q|S )zGet list of C-alpha atoms in the polypeptide.

        :return: the list of C-alpha atoms
        :rtype: [L{Atom}, L{Atom}, ...]
        CA)append)selfca_listrescar   r   r   get_ca_list   s
   zPolypeptide.get_ca_listc              	   C   sB  g }t | }t|D ]}| | }z|d  }|d  }|d  }W n ty<   |d d|jd< d|jd< Y q
w |dkrb| |d	  }z|d  }	t|	|||}
W n tya   d}
Y nw d}
||d	 k r| |d	  }z|d  }t||||}W n ty   d}Y nw d}||
|f |
|jd< ||jd< q
|S )
z+Return the list of phi/psi dihedral angles.Nr+   C)NNNPHIPSIr   r
   )lenrange
get_vector	Exceptionr,   xtrar   )r-   ppllngr   r/   nr0   crpcpphirnnnpsir   r   r   get_phi_psi_list   sH   



zPolypeptide.get_phi_psi_listc                 C   s   |   }g }tt|d D ]:}|| ||d  ||d  ||d  f}dd |D \}}}}t||||}	||	 ||d   }
|	|
jd< q|S )z?List of tau torsions angles for all 4 consecutive Calpha atoms.   r
      c                 s       | ]}|  V  qd S Nr8   .0ar   r   r   	<genexpr>       z+Polypeptide.get_tau_list.<locals>.<genexpr>TAU)r1   r7   r6   r   r,   
get_parentr:   )r-   r.   tau_listr   	atom_listv1v2v3v4taur/   r   r   r   get_tau_list   s   (
zPolypeptide.get_tau_listc           
      C   s   g }|   }tt|d D ]3}|| ||d  ||d  f}dd |D \}}}t|||}|| ||d   }	||	jd< q|S )z8List of theta angles for all 3 consecutive Calpha atoms.rG   r
   c                 s   rH   rI   rJ   rK   r   r   r   rN     rO   z-Polypeptide.get_theta_list.<locals>.<genexpr>THETA)r1   r7   r6   r   r,   rQ   r:   )
r-   
theta_listr.   r   rS   rT   rU   rV   thetar/   r   r   r   get_theta_list  s   
zPolypeptide.get_theta_listc                 C   s   d dd | D }t|S )znReturn the AA sequence as a Seq object.

        :return: polypeptide sequence
        :rtype: L{Seq}
         c                 s   s     | ]}t | d V  qdS )XN)r   getr#   )rL   r/   r   r   r   rN     s    
z+Polypeptide.get_sequence.<locals>.<genexpr>)joinr	   )r-   r   r   r   r   get_sequence  s   
zPolypeptide.get_sequencec                 C   s2   | d   d }| d   d }d| d| dS )zReturn string representation of the polypeptide.

        Return <Polypeptide start=START end=END>, where START
        and END are sequence identifiers of the outer residues.
        r   r
   z<Polypeptide start=z end=>)get_id)r-   startendr   r   r   __repr__  s   zPolypeptide.__repr__N)
__name__
__module____qualname____doc__r1   rE   rY   r]   rb   rh   r   r   r   r   r*      s    -r*   c                   @   s*   e Zd ZdZdd Zdd Zd
ddZd	S )
_PPBuilderzBase class to extract polypeptides.

    It checks if two consecutive residues in a chain are connected.
    The connectivity test is implemented by a subclass.

    This assumes you want both standard and non-standard amino acids.
    c                 C   s
   || _ dS )z`Initialize the base class.

        :param radius: distance
        :type radius: float
        Nradiusr-   ro   r   r   r   __init__2  s   
z_PPBuilder.__init__c                 C   s8   t ||drdS |sd|jv rtd|   dS dS )z0Check if the residue is an amino acid (PRIVATE).)r'   Tr+   z5Assuming residue %s is an unknown modified amino acidF)r(   
child_dictwarningswarnr#   )r-   r&   standard_aa_onlyr   r   r   _accept:  s   z_PPBuilder._acceptr
   c              	   C   s  | j }| j}| }|dkr|d }| }n|dkr | }n|dkr(|g}ntdg }|D ]U}	t|	}
zt|
}|||sIt|
}|||r@W n	 tyS   Y q0w d}|
D ],}|||r|||r|||r|du rzt }|	| |	| |	| nd}|}qXq0|S )a  Build and return a list of Polypeptide objects.

        :param entity: polypeptides are searched for in this object
        :type entity: L{Structure}, L{Model} or L{Chain}

        :param aa_only: if 1, the residue needs to be a standard AA
        :type aa_only: int
        Sr   Mr3   z+Entity should be Structure, Model or Chain.N)
_is_connectedrv   	get_levelget_listr   iternextStopIterationr*   r,   )r-   entityaa_onlyis_connectedacceptlevelmodel
chain_listpp_listchainchain_itprev_resppnext_resr   r   r   build_peptidesK  sP   	





z_PPBuilder.build_peptidesN)r
   )ri   rj   rk   rl   rq   rv   r   r   r   r   r   rm   )  s
    rm   c                   @   s"   e Zd ZdZdddZdd ZdS )	CaPPBuilderz)Use CA--CA distance to find polypeptides.333333@c                 C      t | | dS zInitialize the class.Nrm   rq   rp   r   r   r   rq        zCaPPBuilder.__init__c           
      C   s   ||fD ]
}| ds dS q|d }|d }| r | }n|g}| r,| }n|g}|D ]}|D ]}	||	 | jk rB  dS q5q1dS )Nr+   FT)has_idis_disordereddisordered_get_listro   )
r-   r   r   rr=   pnlistplistrC   r   r   r   r   ry     s&   


zCaPPBuilder._is_connectedN)r   )ri   rj   rk   rl   rq   ry   r   r   r   r   r     s    
r   c                   @   s*   e Zd ZdZd
ddZdd Zdd Zd	S )	PPBuilderz'Use C--N distance to find polypeptides.?c                 C   r   r   r   rp   r   r   r   rq     r   zPPBuilder.__init__c                 C   s   | dsdS | dsdS | j}|d }|d }| r"| }n|g}| r.| }n|g}|D ]6}|D ]1}	| }
|	 }|
|ksM|
dksM|dkrh|||	rh| r[|| | rd||
   dS q7q3dS )Nr3   Fr2    T)r   
_test_distr   r   
get_altlocdisordered_select)r-   r   r   	test_distr>   r=   clistr   rC   ccn_altlocc_altlocr   r   r   ry     s6   






zPPBuilder._is_connectedc                 C   s   || | j k r	dS dS )z4Return 1 if distance between atoms<radius (PRIVATE).r
   r   rn   )r-   r>   r=   r   r   r   r     s   zPPBuilder._test_distN)r   )ri   rj   rk   rl   rq   ry   r   r   r   r   r   r     s
    
#r   )F)'rl   rs   Bio.Data.PDBDatar   r   r   r   Bio.PDB.PDBExceptionsr   Bio.PDB.vectorsr   r   Bio.Seqr	   zipsorteditemsaa3aa1standard_aa_namesr   r   r   r   r7   r   n1n3r   r   r   r   r(   r)   listr*   rm   r   r   r   r   r   r   <module>   sB   -


 nV