Source code for figaro.diagnostic

import numpy as np
import matplotlib.pyplot as plt
import warnings
from numba import njit, prange
from pathlib import Path
from figaro.exceptions import FIGAROException
from figaro import plot_settings

log2e = np.log2(np.e)


[docs]
@njit
def angular_coefficient(x, y):
    """
    Angular coefficient obtained from linear regression.
    
    Arguments:
        np.ndarray x: independent variables
        np.ndarray y: dependent variables
    
    Returns:
        double: angular coefficient
    """
    return np.sum((x - np.mean(x))*(y - np.mean(y)))/np.sum((x - np.mean(x))**2)

    

[docs]
def compute_angular_coefficients(x, L = None):
    """
    Given an array of points x, computes the angular coefficient for each adjacent chunk of length L.
    
    Arguments:
        np.ndarray x: array of points
        int L:        window length
    
    Returns:
        np.ndarray: array of angular coefficients
    """
    if L is None:
       L = len(x)//10
    
    if L > len(x):
        raise FIGAROException("L must be smaller than the number of points you have")
        
    L = np.min([int(L), len(x)])
    N = np.arange(len(x))+1
    a = np.zeros(len(x) - int(L), dtype = np.float64)
    for i in range(len(a)):
        a[i] = angular_coefficient(N[i:i+L], x[i:i+L])
    return a



[docs]
def plot_angular_coefficient(entropy, L = 500, ac_expected = None, out_folder = '.', name = 'event', step = 1, show = False, save = True):
    """
    Compute entropy angular coefficient and produce the relevant plot
    
    Arguments:
        iterable entropy:       container of mixture instances
        int L:                  window lenght
        double ac_expected:     expected angular coefficient
        str or Path out_folder: output folder
        str name:               name to be given to outputs
        int step:               number of draws between entropy samples (if downsampled by some other method, for plotting purposes only)
        bool save:              whether to save the plot or not
        bool show:              whether to show the plot during the run or not
    
    Returns:
        np.ndarray: angular coefficients
    """
    S = compute_angular_coefficients(entropy, L = L)
    fig, ax = plt.subplots()
    if ac_expected is not None:
        ax.axhline(ac_expected, lw = 0.5, ls = '--', c = 'r')
    ax.plot(np.arange(len(S))*step+L, S, ls = '--', marker = '', color = 'steelblue', lw = 0.7)
    ax.set_ylabel('$\\frac{dS(N)}{dN}$')
    ax.set_xlabel('$N$')
    if show:
        plt.show()
    if save:
        fig.savefig(Path(out_folder, name+'_angular_coefficient.pdf'), bbox_inches = 'tight')
    plt.close()
    return S



[docs]
@njit
def compute_autocorrelation(draws, mean, dx):
    """
    Computes autocorrelation of subsequent draws as <∫(draw[i]-mean)*(draw[i+t]-mean)*dx/∫(draw[i]-mean)**2*dx>
    
    Arguments:
        np.ndarray draws: evaluated mixtures (2d array)
        np.ndarray mean:  bin-wise mean of evaluated mixtures (1d array)
        double dx:        integration measure
    
    Returns:
        int: upper bound of autocorrelation length
        np.ndarray: autocorrelation function
    """
    taumax          = draws.shape[0]//2
    n_draws         = draws.shape[0]
    autocorrelation = np.zeros(taumax, dtype = np.float64)
    N = 0.
    for i in prange(n_draws):
        N += np.sum((draws[i] - mean)*(draws[i] - mean))*dx/n_draws
        
    for tau in prange(taumax):
        sum = 0.
        for i in prange(n_draws):
            sum += np.sum((draws[i] - mean)*(draws[(i+tau)%n_draws] - mean))*dx/(N*n_draws)
        autocorrelation[tau] = sum
    return taumax, autocorrelation



[docs]
def autocorrelation(draws, bounds = None, out_folder = '.', name = 'event', n_points = 1000, save = True, show = False):
    """
    Compute autocorrelation of a set of draws and produce the relevant plot
    
    Arguments:
        iterable draws:         container of mixture instances
        iterable bounds:        bounds of the interval over which the distributions are evaluated. It has to be passed as [[xmin,xmax]]. If None, draws bounds are used.
        str or Path out_folder: output folder
        str name:               name to be given to outputs
        int n_points:           number of points for linspace
        bool save:              whether to save the plot or not
        bool show:              whether to show the plot during the run or not
    
    Returns:
        np.ndarray: autocorrelation
    """
    all_bounds = np.atleast_2d([d.bounds[0] for d in draws])
    x_min = np.max(all_bounds[:,0])
    x_max = np.min(all_bounds[:,1])

    if bounds is not None:
        if not bounds[0] >= x_min:
            warnings.warn("The provided lower bound is invalid for at least one draw. {0} will be used instead.".format(x_min))
        else:
            x_min = bounds[0]
        if not bounds[1] <= x_max:
            warnings.warn("The provided upper bound is invalid for at least one draw. {0} will be used instead.".format(x_max))
        else:
            x_max = bounds[1]
    x  = np.linspace(x_min, x_max, n_points)
    dx = x[1] - x[0]
    
    functions = np.array([mix.pdf(x) for mix in draws])
    mean      = np.mean(functions, axis = 0)
    
    taumax, ac = compute_autocorrelation(functions, mean, dx)
    
    fig, ax = plt.subplots()
    ax.axhline(0, lw = 0.5, ls = '--', c = 'r')
    ax.plot(np.arange(taumax), ac, ls = '--', marker = '', lw = 0.7)
    ax.set_xlabel('$\\tau$')
    ax.set_ylabel('$C(\\tau)$')
    if show:
        plt.show()
    if save:
        fig.savefig(Path(out_folder, name+'_autocorrelation.pdf'), bbox_inches = 'tight')
    plt.close()
    return ac

    

[docs]
def compute_entropy_single_draw(mixture, n_draws = 1e3, return_error = False):
    """
    Compute entropy for a single realisation of the DPGMM using Monte Carlo integration
    
    Arguments:
        mixture mixture: instance of mixture class (see mixture.py for definition)
        double n_draws:  number of MC draws
    
    Returns:
        double: entropy value
    """
    samples = mixture.rvs(int(n_draws))
    logP    = mixture.logpdf(samples)
    entropy = np.mean(-logP)*log2e
    if not return_error:
        return entropy
    else:
        dentropy = np.std(-logP)/(np.sqrt(n_draws)/log2e)
        return entropy, dentropy



[docs]
def compute_entropy(draws, n_draws = 1e3, return_error = False):
    """
    Compute entropy for a list of realisations of the DPGMM using Monte Carlo integration
    
    Arguments:
        iterable draws: container of mixture class instaces (see mixture.py for definition)
        double n_draws: number of MC draws
    
    Returns:
        np.ndarray: entropy values
    """
    S = np.zeros(len(draws))
    if not return_error:
        for i, d in enumerate(draws):
            S[i] = compute_entropy_single_draw(d, n_draws)
        return S
    else:
        dS = np.zeros(len(draws))
        for i, d in enumerate(draws):
            S[i], dS[i] = compute_entropy_single_draw(d, n_draws, return_error = return_error)
        return S, dS



[docs]
def entropy(draws, out_folder = '.', exp_entropy = None, name = 'event', n_draws = 1e4, step = 1, show = False, save = True):
    """
    Compute entropy of a set of draws and produce the relevant plot
    
    Arguments:
        iterable draws:         container of mixture instances
        str or Path out_folder: output folder
        double exp_entropy:     expected value for entropy, expressed in bits
        str name:               name to be given to outputs
        int n_draws:            number of MC draws
        int step:               number of draws between entropy samples (if downsampled by some other method, for plotting purposes only)
        bool save:              whether to save the plot or not
        bool show:              whether to show the plot during the run or not
    
    Return:
        np.ndarray: entropy
    """
    S = compute_entropy(draws, int(n_draws))
    fig, ax = plt.subplots()
    ax.plot(np.arange(1, len(draws)+1)*step, S, ls = '--', marker = '', lw = 0.7)
    if exp_entropy is not None:
        ax.axhline(exp_entropy, lw = 0.5, ls = '--', c = 'r')
    ax.set_xlabel('$N$')
    ax.set_ylabel('$S(N)\\ [\\mathrm{bits}]$')
    if show:
        plt.show()
    if save:
        fig.savefig(Path(out_folder, name+'_entropy.pdf'), bbox_inches = 'tight')
    plt.close()
    return S