Source code for gklr.kernel_estimator

"""GKLR kernel_estimator module."""
from cmath import log
from typing import Optional, Tuple, Any, Dict

from asyncio.log import logger
import sys

import numpy as np

from .kernel_calcs import KernelCalcs
from .logger import *
from .kernel_utils import *
from .estimation import Estimation

[docs]class KernelEstimator(Estimation): """Estimation object for the Kernel Logistic Regression (KLR) model.""" def __init__(self, calcs: KernelCalcs, pmle: Optional[str] = None, pmle_lambda: float = 0.0, method: str = "L-BFGS-B", verbose: int = 1, ) -> None: """Constructor. Args: calcs: Calcs object. pmle: Indicates the penalization method for the penalized maximum likelihood estimation. If 'None' a maximum likelihood estimation without penalization is performed. Default: None. pmle_lambda: The value of the regularization parameter for the PMLE method. Default: 0.0. method: The optimization method. Default: "L-BFGS-B". verbose: Indicates the level of verbosity of the function. If 0, no output will be printed. If 1, basic information about the estimation procedure will be printed. If 2, the information about each iteration will be printed. Default: 1. """ if pmle not in VALID_PMLE_METHODS: msg = (f"'pmle' = {pmle} is not a valid value for the penalization" f" method. Valid methods are: {VALID_PMLE_METHODS}.") logger_error(msg) raise ValueError(msg) super().__init__(calcs, pmle, pmle_lambda, method, verbose) self.calcs = calcs self.alpha_shape = (calcs.K.get_num_cols(), calcs.K.get_num_alternatives()) self.P_cache = None # Cache for the matrix of probabilities P self.prev_params = None # Previous parameters used in the objective function self.prev_indices = None # Previous indices used in the objective function
[docs] def objective_function(self, params: np.ndarray, indices: Optional[np.ndarray] = None ) -> float: """Compute the objective function for the Kernel Logistic Regression (KLR) model and its gradient. Args: params: The model parameters. Shape: (n_params,). indices: The indices of the samples to be used in the computation of the objective function. If 'None' all the samples will be used. Default: None. Returns: A tuple with the value of the objective function and its gradient. The first element of the tuple is the value of the objective function and the second element is the gradient of the objective function with respect to the model parameters with shape: (num_rows_kernel_matrix * num_alternatives,) """ # Convert params to alfas and reshape them as a column vector alpha = params.reshape(self.alpha_shape) if self.prev_params is None or not np.array_equal(params, self.prev_params) or \ (indices is not None and self.prev_indices is None) or \ (indices is None and self.prev_indices is not None) or \ (indices is not None and self.prev_indices is not None and \ not np.array_equal(indices, self.prev_indices)): # Compute the matrix of probabilities P and store it in the cache P = self.calcs.calc_probabilities(alpha, indices=indices) self.P_cache = P self.prev_params = params self.prev_indices = indices else: # Reuse the cached matrix of probabilities P P = self.P_cache # Compute the log-likelihood ll = self.calcs.log_likelihood(alpha, P=P, pmle=self.pmle, pmle_lambda=self.pmle_lambda, indices=indices) self.history["loss"].append(-ll) if self.verbose >= 2: print(f"Current objective function: {-ll:,.4f}", end = "\r") sys.stdout.flush() return (-ll)
[docs] def gradient(self, params: np.ndarray, indices: Optional[np.ndarray] = None, ) -> np.ndarray: """Compute the gradient of the objective function for the Kernel Logistic Regression (KLR) model. This function is used by the optimization methods that do not require the computation of the objective function. If the objective function is also required, it is more efficient to use the 'objective_function' method, setting the 'return_gradient' argument to 'True'. Args: params: The model parameters. Shape: (n_params,). indices: The indices of the samples to be used in the computation of the the gradient. If 'None' all the samples will be used. Default: None. Returns: The gradient of the objective function with respect to the model parameters with shape: (num_rows_kernel_matrix * num_alternatives,). """ # Convert params to alfas and reshape them as a column vector alpha = params.reshape(self.alpha_shape) if self.prev_params is None or not np.array_equal(params, self.prev_params) or \ (indices is not None and self.prev_indices is None) or \ (indices is None and self.prev_indices is not None) or \ (indices is not None and self.prev_indices is not None and \ not np.array_equal(indices, self.prev_indices)): # Compute the matrix of probabilities P and store it in the cache P = self.calcs.calc_probabilities(alpha, indices=indices) self.P_cache = P self.prev_params = params self.prev_indices = indices else: # Reuse the cached matrix of probabilities P P = self.P_cache # Compute the log-likelihood and gradient gradient = self.calcs.gradient(alpha, P=P, pmle=self.pmle, pmle_lambda=self.pmle_lambda, indices=indices) return gradient
[docs] def objective_function_with_gradient(self, params: np.ndarray, indices: Optional[np.ndarray] = None ) -> Tuple[float, np.ndarray]: """Compute the objective function for the Kernel Logistic Regression (KLR) model and its gradient. Args: params: The model parameters. Shape: (n_params,). indices: The indices of the samples to be used in the computation of the objective function. If 'None' all the samples will be used. Default: None. Returns: A tuple with the value of the objective function and its gradient. The first element of the tuple is the value of the objective function and the second element is the gradient of the objective function with respect to the model parameters with shape: (num_rows_kernel_matrix * num_alternatives,) """ # Compute the log-likelihood and gradient obj = self.objective_function(params, indices=indices) gradient = self.gradient(params, indices=indices) return (obj, gradient)
[docs] def minimize(self, params: np.ndarray, loss_tol: float = 1e-06, options: Optional[Dict[str, Any]] = None, **kargs: Dict[str, Any], ) -> Dict[str, Any]: """Minimize the objective function. Args: params: The initial values of the model parameters. Shape: (n_params,). loss_tol: The tolerance for the loss function. Default: 1e-06. options: A dict with advance options for the optimization method. Default: None. **kargs: Additional arguments for the minimization function. Returns: A dict with the results of the optimization. """ results = super().minimize(params, loss_tol, options, **kargs) # Convert params to alpha np vector and reshape them as a column vector results["alpha"] = results["params"].reshape(self.alpha_shape) return results