Source code for pyuoi.utils

import numpy as np
import sys
import logging


[docs]def softmax(y, axis=-1): """Calculates the softmax distribution. Parameters ---------- y : ndarray Log-probabilities. """ yp = y - y.max(axis=axis, keepdims=True) epy = np.exp(yp) return epy / np.sum(epy, axis=axis, keepdims=True)
[docs]def sigmoid(x): """Calculates the bernoulli distribution. Parameters ---------- x : ndarray Log-probabilities. """ return np.exp(-np.logaddexp(0, -x))
[docs]def log_likelihood_glm(model, y_true, y_pred): """Calculates the log-likelihood of a generalized linear model given the true response variables and the "predicted" response variables. The "predicted" response variable varies by the specific generalized linear model under consideration. Parameters ---------- model : string The generalized linear model to calculate the log-likelihood for. y_true : nd-array, shape (n_samples,) Array of true response values. y_pred : nd-array, shape (n_samples,) Array of predicted response values (conditional mean). Returns ------- ll : float The log-likelihood. """ if model == 'normal': # this log-likelihood is calculated under the assumption that the # variance is the value that maximizes the log-likelihood rss = (y_true - y_pred)**2 n_samples = y_true.size ll = -n_samples / 2 * (1 + np.log(np.mean(rss))) elif model == 'poisson': if not np.any(y_pred): if np.any(y_true): ll = -np.inf else: ll = 0. else: ll = np.mean(y_true * np.log(y_pred) - y_pred) else: raise ValueError('Model is not available.') return ll
[docs]def BIC(ll, n_features, n_samples): """Calculates the Bayesian Information Criterion. Parameters ---------- ll : float The log-likelihood of the model. n_features : int The number of features used in the model. n_samples : int The number of samples in the dataset being tested. Returns ------- BIC : float Bayesian Information Criterion """ BIC = n_features * np.log(n_samples) - 2 * ll return BIC
[docs]def AIC(ll, n_features): """Calculates the Akaike Information Criterion. Parameters ---------- ll : float The log-likelihood of the model. n_features : int The number of features used in the model. n_samples : int The number of samples in the dataset being tested. Returns ------- AIC : float Akaike Information Criterion """ AIC = 2 * n_features - 2 * ll return AIC
[docs]def AICc(ll, n_features, n_samples): """Calculate the corrected Akaike Information Criterion. This criterion is useful in cases when the number of samples is small. If the number of features is equal to the number of samples plus one, then the AIC is returned (the AICc is undefined in this case). Parameters ---------- ll : float The log-likelihood of the model. n_features : int The number of features used in the model. n_samples : int The number of samples in the dataset being tested. Returns ------- AIC : float Akaike Information Criterion """ AICc = AIC(ll, n_features) if n_samples > (n_features + 1): AICc += 2 * (n_features**2 + n_features) / (n_samples - n_features - 1) return AICc
def check_logger(logger, name='uoi', comm=None): ret = logger if ret is None: if comm is not None and comm.Get_size() > 1: r, s = comm.Get_rank(), comm.Get_size() name += " " + str(r).rjust(int(np.log10(s)) + 1) ret = logging.getLogger(name=name) handler = logging.StreamHandler(sys.stdout) fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' handler.setFormatter(logging.Formatter(fmt)) ret.addHandler(handler) return ret