o
    Rŀg                     @   st   d Z dddddZdddd	dd
ZdddZddddddddZdZG dd dZedkr8ddlm	Z	 e	  dS dS )aR  Calculate isoelectric points of polypeptides using methods of Bjellqvist.

pK values and the methods are taken from::

    * Bjellqvist, B.,Hughes, G.J., Pasquali, Ch., Paquet, N., Ravier, F.,
    Sanchez, J.-Ch., Frutiger, S. & Hochstrasser, D.F.
    The focusing positions of polypeptides in immobilized pH gradients can be
    predicted from their amino acid sequences. Electrophoresis 1993, 14,
    1023-1031.

    * Bjellqvist, B., Basse, B., Olsen, E. and Celis, J.E.
    Reference points for comparisons of two-dimensional maps of proteins from
    different human cell types defined in a pH scale where isoelectric points
    correlate with polypeptide compositions. Electrophoresis 1994, 15, 529-539.

I designed the algorithm according to a note by David L. Tabb, available at:
http://fields.scripps.edu/DTASelect/20010710-pI-Algorithm.pdf
g      @g      $@g      (@gQ@)NtermKRHgffffff@333333@g@g      "@)CtermDECYg333333@g      @)r   r   g\(\@g      @gQ@gQ @gHzG@g(\@g@)AMSPTVr   )r   r   r   r   r   r	   r
   c                   @   s<   e Zd ZdZdddZdd Zdd Zd	d
 ZdddZdS )IsoelectricPointa  A class for calculating the IEP or charge at given pH of a protein.

    Parameters
    ----------
    :protein_sequence: A ``Bio.Seq`` or string object containing a protein
                       sequence.
    :aa_content: A dictionary with amino acid letters as keys and its
                 occurrences as integers, e.g. ``{"A": 3, "C": 0, ...}``.
                 Default: ``None``. If ``None``, the dic will be calculated
                 from the given sequence.

    Methods
    -------
    :charge_at_pH(pH):  Calculates the charge of the protein for a given pH
    :pi():              Calculates the isoelectric point


    Examples
    --------
    The methods of this class can either be accessed from the class itself
    or from a ``ProtParam.ProteinAnalysis`` object (with partially different
    names):

    >>> from Bio.SeqUtils.IsoelectricPoint import IsoelectricPoint as IP
    >>> protein = IP("INGAR")
    >>> print(f"IEP of peptide {protein.sequence} is {protein.pi():.2f}")
    IEP of peptide INGAR is 9.75
    >>> print(f"Its charge at pH 7 is {protein.charge_at_pH(7.0):.2f}")
    Its charge at pH 7 is 0.76


    >>> from Bio.SeqUtils.ProtParam import ProteinAnalysis as PA
    >>> protein = PA("PETER")
    >>> print(f"IEP of {protein.sequence}: {protein.isoelectric_point():.2f}")
    IEP of PETER: 4.53
    >>> print(f"Charge at pH 4.53: {protein.charge_at_pH(4.53):.2f}")
    Charge at pH 4.53: 0.00

    Nc                 C   sH   |  | _|sddlm} || j }| || _|  \| _| _	dS )zInitialize the class.    )ProteinAnalysisN)
uppersequenceBio.SeqUtils.ProtParamr   count_amino_acids_select_chargedcharged_aas_content_update_pKs_tablespos_pKsneg_pKs)selfprotein_sequence
aa_content_PA r!   Q/var/www/html/myenv/lib/python3.10/site-packages/Bio/SeqUtils/IsoelectricPoint.py__init__R   s   
zIsoelectricPoint.__init__c                 C   s2   i }t D ]
}t|| ||< qd|d< d|d< |S )N      ?r   r   )charged_aasfloat)r   r   chargedaar!   r!   r"   r   _   s   z IsoelectricPoint._select_chargedc                 C   sV   t  }t }| jd | jd }}|tv rt| |d< |tv r't| |d< ||fS )z@Update pKs tables with seq specific values for N- and C-termini.r   r   r   )positive_pKscopynegative_pKsr   pKnterminalpKcterminal)r   r   r   ntermctermr!   r!   r"   r   g   s   z#IsoelectricPoint._update_pKs_tablesc                 C   s   d}| j  D ]\}}dd||  d  }|| j| | 7 }qd}| j D ]\}}dd||  d  }|| j| | 7 }q&|| S )z.Calculate the charge of a protein at given pH.        r$   
   )r   itemsr   r   )r   pHpositive_charger(   pKpartial_chargenegative_charger!   r!   r"   charge_at_pHr   s   	zIsoelectricPoint.charge_at_pH皙@r      c                 C   sF   |  |}|| dkr!|dkr|}n|}|| d }| |||S |S )a  Calculate and return the isoelectric point as float.

        This is a recursive function that uses bisection method.
        Wiki on bisection: https://en.wikipedia.org/wiki/Bisection_method

        Arguments:
         - pH: the pH at which the current charge of the protein is computed.
           This pH lies at the centre of the interval (mean of `min_` and `max_`).
         - min\_: the minimum of the interval. Initial value defaults to 4.05,
           which is below the theoretical minimum, when the protein is composed
           exclusively of aspartate.
         - max\_: the maximum of the the interval. Initial value defaults to 12,
           which is above the theoretical maximum, when the protein is composed
           exclusively of arginine.
        g-C6?r1      )r9   pi)r   r4   min_max_chargenext_pHr!   r!   r"   r=      s   
zIsoelectricPoint.pi)N)r:   r   r;   )	__name__
__module____qualname____doc__r#   r   r   r9   r=   r!   r!   r!   r"   r   )   s    
(r   __main__r   )run_doctestN)
rE   r*   r,   r.   r-   r%   r   rB   
Bio._utilsrG   r!   r!   r!   r"   <module>   s$   
	{
