o
    Rŀg+                     @   s   d Z ddlZddlmZ zddlZW n ey$   ddlmZ eddw ddlm	Z	 e
de	 G dd	 d	Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Z					d%d d!Zed"krwdd#lmZ edd$ dS dS )&z8Maximum Entropy code.

Uses Improved Iterative Scaling.
    N)reduce)MissingPythonDependencyErrorzQPlease install NumPy if you want to use Bio.MaxEntropy. See http://www.numpy.org/)BiopythonDeprecationWarningzThe 'Bio.MaxEntropy' module is deprecated and will be removed in a future release of Biopython. Consider using scikit-learn instead.c                   @   s   e Zd ZdZdd ZdS )
MaxEntropya  Hold information for a Maximum Entropy classifier.

    Members:
    classes      List of the possible classes of data.
    alphas       List of the weights for each feature.
    feature_fns  List of the feature functions.

    Car data from example Naive Bayes Classifier example by Eric Meisner November 22, 2003
    http://www.inf.u-szeged.hu/~ormandi/teaching

    >>> from Bio.MaxEntropy import train, classify
    >>> xcar = [
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Red', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Domestic'],
    ...     ['Yellow', 'Sports', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Imported'],
    ...     ['Yellow', 'SUV', 'Domestic'],
    ...     ['Red', 'SUV', 'Imported'],
    ...     ['Red', 'Sports', 'Imported']]
    >>> ycar = ['Yes','No','Yes','No','Yes','No','Yes','No','No','Yes']

    Requires some rules or features

    >>> def udf1(ts, cl):
    ...     return ts[0] != 'Red'
    ...
    >>> def udf2(ts, cl):
    ...     return ts[1] != 'Sports'
    ...
    >>> def udf3(ts, cl):
    ...     return ts[2] != 'Domestic'
    ...
    >>> user_functions = [udf1, udf2, udf3]  # must be an iterable type
    >>> xe = train(xcar, ycar, user_functions)
    >>> for xv, yv in zip(xcar, ycar):
    ...     xc = classify(xe, xv)
    ...     print('Pred: %s gives %s y is %s' % (xv, xc, yv))
    ...
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Red', 'Sports', 'Domestic'] gives No y is Yes
    Pred: ['Yellow', 'Sports', 'Domestic'] gives No y is No
    Pred: ['Yellow', 'Sports', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is No
    Pred: ['Yellow', 'SUV', 'Imported'] gives No y is Yes
    Pred: ['Yellow', 'SUV', 'Domestic'] gives No y is No
    Pred: ['Red', 'SUV', 'Imported'] gives No y is No
    Pred: ['Red', 'Sports', 'Imported'] gives No y is Yes
    c                 C   s   g | _ g | _g | _dS )zInitialize the class.N)classesalphasfeature_fns)self r
   B/var/www/html/myenv/lib/python3.10/site-packages/Bio/MaxEntropy.py__init__Z   s   
zMaxEntropy.__init__N)__name__
__module____qualname____doc__r   r
   r
   r
   r   r   $   s    5r   c                 C   sd   g }t | jt | jksJ | jD ]}d}t| j| jD ]\}}||||| 7 }q|| q|S )zCalculate the log of the probability for each class.

    me is a MaxEntropy object that has been trained.  observation is a vector
    representing the observed data.  The return value is a list of
    unnormalized log probabilities for each class.
            )lenr   r   r   zipappend)meobservationscoresklasslprobfnalphar
   r
   r   	calculatea   s   
r   c                 C   sV   t | |}|d | jd }}tdt|D ]}|| |kr(|| | j| }}q|S )z%Classify an observation into a class.r      )r   r   ranger   )r   r   r   	max_scorer   ir
   r
   r   classifyr   s   
r!   c                 C   sR   i }t t|D ]}t t|D ]}| || || }|dkr%||||f< qq|S )a>  Evaluate a feature function on every instance of the training set and class (PRIVATE).

    fn is a callback function that takes two parameters: a
    training instance and a class.  Return a dictionary of (training
    set index, class index) -> non-zero value.  Values of 0 are not
    stored in the dictionary.
    r   )r   r   )r   xsr   valuesr    jfr
   r
   r   _eval_feature_fn|   s   r&   c                    s   i  t |D ]\}}| |< q fdd|D }g }t| }|D ]}	d}
t|D ]}|
|	||| fd7 }
q(||
|  q |S )zCalculate the expectation of each function from the data (PRIVATE).

    This is the constraint for the maximum entropy distribution. Return a
    list of expectations, parallel to the list of features.
    c                    s   g | ]} | qS r
   r
   ).0yclass2indexr
   r   
<listcomp>   s    z+_calc_empirical_expects.<locals>.<listcomp>r   )	enumerater   r   getr   )r"   ysr   featuresindexkeyys_iexpectNfeaturesr    r
   r)   r   _calc_empirical_expects   s   
r7   c                 C   s`   t | |||}g }|D ]"}d}| D ]\\}}	}
||| |	 |
 7 }q||t|   q|S )zCalculate the expectation of each feature from the model (PRIVATE).

    This is not used in maximum entropy training, but provides a good function
    for debugging.
    r   )_calc_p_class_given_xitemsr   r   )r"   r   r/   r   p_yxexpectsr5   sumr    r$   r%   r
   r
   r   _calc_model_expects   s   r=   c                 C   s   t t| t|f}t|t|ksJ t||D ]\}}| D ]\\}}}	|| |  ||	 7  < q"qt |}tt| D ]}
t||
 }||
 | ||
< qA|S )zCalculate conditional probability P(y|x) (PRIVATE).

    y is the class and x is an instance from the training set.
    Return a XSxCLASSES matrix of probabilities.
    )npzerosr   r   r9   expr   r<   )r"   r   r/   r   prob_yxr5   r   xr(   r%   r    zr
   r
   r   r8      s   
r8   c                 C   sF   t | |f}|D ]}| D ]\\}}}|| |  |7  < qq	|S )z/Calculate a matrix of f sharp values (PRIVATE).)r>   r?   r9   )r4   nclassesr/   f_sharpr5   r    r$   r%   r
   r
   r   _calc_f_sharp   s   rF   c                 C   s   d}d}||k r^d }	}
|  D ]'\\}}}|| | | t||| |   }|	|7 }	|
||| |  7 }
q||	|   |
 |  }	}
|	|
 }||8 }t||k rV	 |S |d }||k std)z,Solve delta using Newton's method (PRIVATE).r   r   r   z Newton's method did not converge)r9   r>   r@   fabsRuntimeError)r4   r5   rE   	empiricalrA   max_newton_iterationsnewton_convergedeltaitersf_newton	df_newtonr    r$   r%   prodratior
   r
   r   _iis_solve_delta   s"   &rR   c              	   C   sd   t | |||}t| }	|dd }
tt|D ]}t|	|| ||| |||}|
|  |7  < q|
S )zBDo one iteration of hill climbing to find better alphas (PRIVATE).N)r8   r   r   rR   )r"   r   r/   rE   r   e_empiricalrJ   rK   r:   r4   	newalphasr    rL   r
   r
   r   
_train_iis   s   	rU   '  h㈵>d   绽|=c              	      s  st dtt|krt d|}}	tt|  fdd|D }
ttt |
}t||	 |
}dgt|
 }d}||k rt| |
|||||}dd t||D }tt	j
|d}|}t }| ||_|_|_|dury|| ||k r	 |S ||k sEtd	)
aF  Train a maximum entropy classifier, returns MaxEntropy object.

    Train a maximum entropy classifier on a training set.
    training_set is a list of observations.  results is a list of the
    class assignments for each observation.  feature_fns is a list of
    the features.  These are callback functions that take an
    observation and class and return a 1 or 0.  update_fn is a
    callback function that is called at each training iteration.  It is
    passed a MaxEntropy object that encapsulates the current state of
    the training.

    The maximum number of iterations and the convergence criterion for IIS
    are given by max_iis_iterations and iis_converge, respectively, while
    max_newton_iterations and newton_converge are the maximum number
    of iterations and the convergence criterion for Newton's method.
    zNo data in the training set.z2training_set and results should be parallel lists.c                    s   g | ]}t | qS r
   )r&   )r'   r   r   training_setr
   r   r+   6  s    ztrain.<locals>.<listcomp>r   r   c                 S   s   g | ]\}}t || qS r
   )r>   rG   )r'   rB   r(   r
   r
   r   r+   K  s    NzIIS did not converge)
ValueErrorr   sortedsetrF   r7   rU   r   r   r>   addr   r   r   r   rH   )r[   resultsr   	update_fnmax_iis_iterationsiis_convergerJ   rK   r"   r.   r/   rE   rS   r   rM   nalphasdiffr   r
   rZ   r   train  sD   

rf   __main__)run_doctest)verbose)NrV   rW   rX   rY   )r   warnings	functoolsr   numpyr>   ImportErrorBior   r   warnr   r   r!   r&   r7   r=   r8   rF   rR   rU   rf   r   
Bio._utilsrh   r
   r
   r
   r   <module>   sL   	=

$
L