Source code for plotypus.utils

from os import makedirs
from os.path import join, isdir
from sys import stderr
from multiprocessing import Pool
from numpy import absolute, concatenate, median, resize

__all__ = [
    'verbose_print',
    'pmap',
    'make_sure_path_exists',
    'get_signal',
    'get_noise',
    'colvec',
    'mad',
    'autocorrelation'
]


[docs]def verbose_print(message, *, operation, verbosity): """ Prints *message* to stderr only if the given *operation* is in the list *verbosity*. If "all" is in *verbosity*, all operations are printed. **Parameters** message : str The message to print. operation : str The type of operation being performed. verbosity : [str] or None The list of operations to print *message* for. If "all" is contained in the list, then all operations are printed. If None, no operation is printed. **Returns** None """ if (verbosity is not None) and ((operation in verbosity) or ("all" in verbosity)): print(message, file=stderr)
[docs]def pmap(func, args, processes=None, callback=lambda *_, **__: None, **kwargs): """pmap(func, args, processes=None, callback=do_nothing, **kwargs) Parallel equivalent of ``map(func, args)``, with the additional ability of providing keyword arguments to func, and a callback function which is applied to each element in the returned list. Unlike map, the output is a non-lazy list. If *processes* is 1, no thread pool is used. **Parameters** func : function The function to map. args : iterable The arguments to map *func* over. processes : int or None, optional The number of processes in the thread pool. If only 1, no thread pool is used to avoid useless overhead. If None, the number is chosen based on your system by :class:`multiprocessing.Pool` (default None). callback : function, optional Function to call on the return value of ``func(arg)`` for each *arg* in *args* (default do_nothing). kwargs : dict Extra keyword arguments are unpacked in each call of *func*. **Returns** results : list A list equivalent to ``[func(x, **kwargs) for x in args]``. """ if processes is 1: results = [] for arg in args: result = func(arg, **kwargs) results.append(result) callback(result) return results else: with Pool() if processes is None else Pool(processes) as p: results = [p.apply_async(func, (arg,), kwargs, callback) for arg in args] return [result.get() for result in results]
[docs]def make_sure_path_exists(path): """ Creates the supplied *path* if it does not exist. Raises *OSError* if the *path* cannot be created. **Parameters** path : str Path to create. **Returns** None """ try: makedirs(path) except OSError: if not isdir(path): raise
[docs]def get_signal(data): """ Returns all of the values in *data* that are not outliers. **Parameters** data : masked array **Returns** signal : array Non-masked values in *data*. """ return data[~data.mask].data.reshape(-1, data.shape[1])
[docs]def get_noise(data): """ Returns all identified outliers in *data*. **Parameters** data : masked array **Returns** noise : array Masked values in *data*. """ return data[data.mask].data.reshape(-1, data.shape[1])
[docs]def colvec(X): """ Converts a row-vector *X* into a column-vector. **Parameters** X : array-like, shape = [n_samples] **Returns** out : array-like, shape = [n_samples, 1] """ return resize(X, (X.shape[0], 1))
def rowvec(X): """ Converts a column-vector *X* into a row-vector. **Parameters** X : array-like, shape = [n_samples, 1] **Returns* out : array-like, shape = [n_samples] """ return resize(X, (1, X.shape[0]))[0]
[docs]def mad(data, axis=None): """ Computes the median absolute deviation of *data* along a given *axis*. See `link <https://en.wikipedia.org/wiki/Median_absolute_deviation>`_ for details. **Parameters** data : array-like **Returns** mad : number or array-like """ return median(absolute(data - median(data, axis)), axis)
[docs]def autocorrelation(X, lag=1): """ Computes the autocorrelation of *X* with the given *lag*. Autocorrelation is simply autocovariance(X) / covariance(X-mean, X-mean), where autocovariance is simply covariance((X-mean)[:-lag], (X-mean)[lag:]). See `link <https://en.wikipedia.org/wiki/Autocorrelation>`_ for details. **Parameters** X : array-like, shape = [n_samples] lag : int, optional Index difference between points being compared (default 1). """ differences = X - X.mean() products = differences * concatenate((differences[lag:], differences[:lag])) return products.sum() / (differences**2).sum()
_latex_replacements = [ ('\\', '\\\\'), ('{', '\\{'), ('{', '\\}'), ('$', '\\$'), ('&', '\\&'), ('#', '\\#'), ('^', '\\textasciicircum{}'), ('_', '\\textunderscore{}'), ('~', '\\~'), ('%', '\\%'), ('<', '\\textless{}'), ('>', '\\textgreater{}'), ('|', '\\textbar{}') ] def sanitize_latex(string): """ Sanitize a string for input to LaTeX. Replacements taken from `Stack Overflow <http://stackoverflow.com/questions/2627135/how-do-i-sanitize-latex-input>`_ **Parameters** string: str **Returns** sanitized_string: str """ sanitized_string = string for old, new in _latex_replacements: sanitized_string = sanitized_string.replace(old, new) return sanitized_string