Source code for matrixprofile.io.__io

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

range = getattr(__builtins__, 'xrange', range)
# end of py2 compatability boilerplate

__all__ = [
    'to_json',
    'from_json',
    'to_disk',
    'from_disk',
]

import json.tool

import numpy as np

from matrixprofile import core
from matrixprofile.io.protobuf.protobuf_utils import (
    to_mpf,
    from_mpf
)


# Supported file extensions
SUPPORTED_EXTS = set([
    'json',
    'mpf',
])

# Supported file formats
SUPPORTED_FORMATS = set([
    'json',
    'mpf',
])

def JSONSerializer(obj):
    """
    Default JSON serializer to write numpy arays and other non-supported
    data types.

    Borrowed from:
    https://stackoverflow.com/a/52604722
    """
    if type(obj).__module__ == np.__name__:
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return obj.item()

    raise TypeError('Unknown type:', type(obj))


[docs]def from_json(profile): """ Converts a JSON formatted string into a profile data structure. Parameters ---------- profile : str The profile as a JSON formatted string. Returns ------- profile : dict_like A MatrixProfile or Pan-MatrixProfile data structure. """ dct = json.load(profile) # handle pmp and convert to appropriate types if core.is_pmp_obj(dct): dct['pmp'] = np.array(dct['pmp'], dtype='float64') dct['pmpi'] = np.array(dct['pmpi'], dtype=int) dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64') dct['windows'] = np.array(dct['windows'], dtype=int) # handle mp elif core.is_mp_obj(dct): dct['mp'] = np.array(dct['mp'], dtype='float64') dct['pi'] = np.array(dct['pi'], dtype=int) has_l = isinstance(dct['lmp'], list) has_l = has_l and isinstance(dct['lpi'], list) if has_l: dct['lmp'] = np.array(dct['lmp'], dtype='float64') dct['lpi'] = np.array(dct['lpi'], dtype=int) has_r = isinstance(dct['rmp'], list) has_r = has_r and isinstance(dct['rpi'], list) if has_r: dct['rmp'] = np.array(dct['rmp'], dtype='float64') dct['rpi'] = np.array(dct['rpi'], dtype=int) dct['data']['ts'] = np.array(dct['data']['ts'], dtype='float64') if isinstance(dct['data']['query'], list): dct['data']['query'] = np.array(dct['data']['query'], dtype='float64') else: raise ValueError('File is not of type profile!') return dct
[docs]def to_json(profile): """ Converts a given profile object into JSON format. Parameters ---------- profile : dict_like A MatrixProfile or Pan-MatrixProfile data structure. Returns ------- str : The profile as a JSON formatted string. """ if not core.is_mp_or_pmp_obj(profile): raise ValueError('profile is expected to be of type MatrixProfile or PMP') return json.dumps(profile, default=JSONSerializer)
def add_extension_to_path(file_path, extension): """ Utility function to add the file extension when it is not provided by the user in the file path. Parameters ---------- file_path : str The file path. Returns ------- str : The file path with the extension appended. str : The file format extension. """ end = '.{}'.format(extension) if not file_path.endswith(end): file_path = '{}{}'.format(file_path, end) return file_path def infer_file_format(file_path): """ Attempts to determine the file type based on the extension. The extension is assumed to be the last dot suffix. Parameters ---------- file_path : str The file path to infer the file format of. Returns ------- str : A label described the file extension. """ pieces = file_path.split('.') extension = pieces[-1].lower() if extension not in SUPPORTED_EXTS: raise RuntimeError('Unsupported file type with extension {}'.format(extension)) return extension
[docs]def to_disk(profile, file_path, format='json'): """ Writes a profile object of type MatrixProfile or PMP to disk as a JSON formatted file by default. Note ---- The JSON format is human readable where as the mpf format is binary and cannot be read when opened in a text editor. When the file path does not include the extension, it is appended for you. Parameters ---------- profile : dict_like A MatrixProfile or Pan-MatrixProfile data structure. file_path : str The path to write the file to. format : str, default json The format of the file to be written. Options include json, mpf """ if not core.is_mp_or_pmp_obj(profile): raise ValueError('profile is expected to be of type MatrixProfile or PMP') if format not in SUPPORTED_FORMATS: raise ValueError('Unsupported file format {} given.'.format(format)) file_path = add_extension_to_path(file_path, format) if format == 'json': with open(file_path, 'w') as out: out.write(to_json(profile)) elif format == 'mpf': with open(file_path, 'wb') as out: out.write(to_mpf(profile))
[docs]def from_disk(file_path, format='infer'): """ Reads a profile object of type MatrixProfile or PMP from disk into the respective object type. By default the type is inferred by the file extension. Parameters ---------- file_path : str The path to read the file from. format : str, default infer The file format type to read from disk. Options include: infer, json, mpf Returns ------- profile : dict_like, None A MatrixProfile or Pan-MatrixProfile data structure. """ if format != 'infer': if format not in SUPPORTED_FORMATS: raise ValueError('format supplied {} is not supported'.format(format)) else: format = infer_file_format(file_path) profile = None if format == 'json': with open(file_path) as f: profile = from_json(f) elif format == 'mpf': with open(file_path, 'rb') as f: profile = from_mpf(f.read()) return profile