Source code for matrixprofile.algorithms.pairwise_dist

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

range = getattr(__builtins__, 'xrange', range)
# end of py2 compatability boilerplate

import numpy as np

from matrixprofile import core
from matrixprofile.algorithms.mpdist import mpdist

def compute_dist(args):
    """
    Helper function to parallelize pairwise distance calculation.

    Parameters
    ----------
    args : tuple
        The arguments to pass to the mpdist calculation.
    
    Returns
    -------
    values : tuple
        The kth index and distance.
    """
    k = args[0]
    distance = mpdist(args[1], args[2], args[3], threshold=args[4])

    return (k, distance)


[docs]def pairwise_dist(X, window_size, threshold=0.05, n_jobs=1):
    """
    Utility function to compute all pairwise distances between the timeseries
    using MPDist. 
    
    Note
    ----
    scipy.spatial.distance.pdist cannot be used because they
    do not allow for jagged arrays, however their code was used as a reference
    in creating this function.
    https://github.com/scipy/scipy/blob/master/scipy/spatial/distance.py#L2039

    Parameters
    ----------
    X : array_like
        An array_like object containing time series to compute distances for.
    window_size : int
        The window size to use in computing the MPDist.
    threshold : float
        The threshold used to compute MPDist.
    n_jobs : int
        Number of CPU cores to use during computation.
    
    Returns
    -------
    Y : np.ndarray
        Returns a condensed distance matrix Y.  For
        each :math:`i` and :math:`j` (where :math:`i<j<m`),where m is the 
        number of original observations. The metric ``dist(u=X[i], v=X[j])``
        is computed and stored in entry ``ij``.
    """
    if not core.is_array_like(X):
        raise ValueError('X must be array_like!')
    
    # identify shape based on iterable or np.ndarray.shape
    m = 0
    
    if isinstance(X, np.ndarray) and len(X.shape) == 2:
        m = X.shape[0]
    else:
        m = len(X)
    
    dm = np.empty((m * (m - 1)) // 2, dtype=np.double)
    k = 0

    if n_jobs == 1:
        for i in range(0, m - 1):
            for j in range(i + 1, m):
                dm[k] = mpdist(X[i], X[j], window_size, threshold=threshold, 
                            n_jobs=n_jobs)
                k = k + 1
    else:
        args = []
        for i in range(0, m - 1):
            for j in range(i + 1, m):
                args.append((k, X[i], X[j], window_size, threshold))
                k = k + 1
        
        with core.mp_pool()(n_jobs) as pool:
            results = pool.map(compute_dist, args)
        
        # put results in the matrix
        for result in results:
            dm[result[0]] = result[1]
    
    return dm