Source code for ensign.cp_decomp

# ENSIGN rights
"""CANDECOMP-PARAFAC (CP) sparse tensor decomposition tools.

This module contains functions for reading, writing, and performing CP 
decompositions along with a class for representing decomposition results.
"""
from ctypes import *
import functools
import json
import multiprocessing as mltprc
import os
import random
import re
import string
import sys

import numpy as np
import pandas as pd

from ensign.constants import *
import ensign.constants as C
import ensign.ensign_ctypes.ensign_types as et
import ensign.ensign_io.ensign_logging as ensign_logging
import ensign.ensign_io.decomp_io as dio
import ensign.ensign_io.sptensor_io as sio
import ensign.sptensor as spt

API = cdll.LoadLibrary('libapi.so')

# Used when calculating backtrack data
# Determines which values (relative to max in mode) should be considered 0
REL_CHOP_POINT=1e-4 
DOUBLE_BYTES = np.dtype(float).itemsize

CP_ALS = 0
CP_ALS_NN = 1
CP_APR = 2
CP_APR_PDNR = 3
CP_APR_PQNR = 4
MALLOC_ERROR = 3

logger = ensign_logging.get_logger()

class StreamingDatalessFit:
    """
    Values necessary for calculating the fit of streaming decompositions 
    without using the base tensor data values.

    Attributes
    ----------
    norm : float
        L2 norm of the base tensor.
    residual_norm : float
        Residual L2 norm from fitting the base tensor to the base tensor 
        decomposition.
    inner_product : numpy.ndarray (float)
        List of terms constituting the inner product of base tensor and tensor 
        reconstructed from the factor matrices of base tensor decomposition.
    """
    def __init__(self, norm, res_norm, inner_product):
        """ Constructor for CPDecomp
        """
        self.norm = norm
        self.residual_norm = res_norm
        self.inner_product = inner_product

    def __eq__(self, other):
        if type(self) != type(other):
            return False

        if self.norm != other.norm:
            return False
        
        if self.residual_norm != other.residual_norm:
            return False

        if not np.array_equal(self.inner_product, other.inner_product):
            return False

        return True

    def __str__(self):
        return f'Norm: {self.norm}\nResidual-Norm: {self.residual_norm}\nInner-Product: {self.inner_product}'

[docs]class CPDecomp:
    """Represents the results of a CP decomposition.

    Attributes
    ----------
    rank : int
        Rank of decomposition.
    order : int
        Order of decomposed tensor.
    sptensor : SPTensor
        SPTensor that was decomposed.
    weights : numpy.ndarray
        1D array of length ``rank`` containing weights for each component of 
        the decomposition.
    factors : list of numpy.ndarray
        List of 2D factor matrices accessed by ``mode_id``. Each factor matrix
        has ``shape = (mode_size, rank)`` and ``dtype = "float64"``. All values
        in a factor matrix lie inside the closed interval ``[-1, 1]``. Each 
        column of a factor matrix represents a single component.
    factors_init : list of numpy.ndarray
        List of 2D factor matrices as initialized for the decomposition. Only
        saved if requested by the decomposition call.
    labels : list of list of str, optional
        An optional list of per-mode index labels accessed by ``mode_id``. Each
        list of labels corresponds to the labels for each index of the mode.
        Suppose we have a mode with ``mode_id = 3`` and ``"Yes"``, ``"No"``,
        ``"Maybe"`` mapped to indices 0-2. For this mode, ``labels[3]`` is
        equal to ``["Yes", "No", "Maybe"]``. Decompositions are not required to
        have labels.
    mode_names : list of str, optional
        Each mode's name accessed by ``mode_id``. Decompositions are not
        required to have mode names.
    mode_sizes : list of int
        The number of indices in each mode accessed by ``mode_id``.
    streaming_dataless_fit : ndarray
        Used by decomposition algorithms in a streaming setting to determine
        final fit.
    metrics : dict
        Dictionary with keys: [time, fit, cosine_sim, norm_scaling, cp_total_iter]
        representing the time taken to run the decomposition, the final fit of
        the decomposition, the cosine similarity of the decomposition to the
        original tensor, the scale factor used to normalize the decompositions,
        and the number of optimization steps used to reach the final 
        decomposition values, respectively.
    cpd_backtrack : list of lists of int
        One entry per component. Each list contains tensor entries that 
        contributed to the corresponding component. If CPDecomp object has two 
        components, and the first one tracks to tensor entries 0 and 2 and the 
        second tracks to tensor entries 1 and 3, then the backtrack will be 
        [[0,2], [1,3]]. Only exists if requested in the decomposition call.

    See also
    --------
    ensign.sptensor.SPTensor : Sparse tensor class
    """

[docs]    def __init__(self):
        self.filenames = None
        self.rank = 0
        self.order = 0
        self.sptensor = None
        self.weights = None
        self.streaming_dataless_fit = None
        self.factors = []
        self.labels = []
        self.mode_names = []
        self.mode_sizes = []
        self.factors_init = None
        self.metrics = None

    def __eq__(self, d):
        are_equal = True

        if type(self) != type(d):
            return False

        # First deal with the basics. Are both decomps there? The same size/shape?
        if self is None and d is None:
            return True
        elif self is None or d is None:
            return False

        are_equal = are_equal and self.rank == d.rank
        are_equal = are_equal and self.order == d.order
        are_equal = are_equal and self.weights.shape == d.weights.shape
        are_equal = are_equal and len(self.factors) == len(d.factors)

        if not are_equal:
            return False

        # Check weights
        are_equal = are_equal and np.array_equal(self.weights, d.weights)

        # Check streaming dataless fit
        if self.streaming_dataless_fit is not None and d.streaming_dataless_fit is not None:
            are_equal = are_equal and (self.streaming_dataless_fit == d.streaming_dataless_fit)

        if not are_equal:
            return False

        # Check factor matrices 
        for i in range(len(self.factors)):
            if not are_equal:
                break
            are_equal = are_equal and np.array_equal(self.factors[i], d.factors[i])

        return are_equal

    def __ne__(self, d):
        return not self.__eq__(d)

    def __str__(self):
        ret = {'rank': self.rank, 'order': self.order, 
               'weights': self.weights.tolist()}
        return json.dumps(ret)

[docs]    def compute_cpd_backtrack(self, out_dir=None):
        """Computes CP Decomposition backtracking information.

        Parameters
        ----------
        out_dir : str
            Directory to save backtracking information in.
        """  
        self.cpd_backtrack = _compute_cpd_backtrack(self)
        if out_dir:
            dio.write_decomp_backtrack(out_dir, self.cpd_backtrack)

def _get_sparse_co_tensor(py_sptensor):
    order = c_long(py_sptensor.order)
    nnz = c_long(py_sptensor.nnz)
    mode_sizes = (c_long * py_sptensor.order)(*py_sptensor.mode_sizes)

    py_entries = py_sptensor.entries.values[:, py_sptensor.order].astype(np.float64)
    entries = py_entries.ctypes.data_as(POINTER(c_double))

    py_coord_index = py_sptensor.entries.values[:, :-1].astype(np.int64).ravel()
    index = cast((POINTER(c_long) * py_sptensor.nnz)(), POINTER(POINTER(c_long)))
    index[0] = py_coord_index.ctypes.data_as(POINTER(c_long))

    sparse_co_tensor = et.C_SparseCoTensor(nModes=order, nnz=nnz, size=mode_sizes, NNZ=entries, SM=index)
    
    return sparse_co_tensor

def _get_py_decomp_obj(ktensor, sptensor, rank, metrics, options):
    d = CPDecomp()

    d.rank = rank
    d.order = sptensor.order

    weights = np.array([getattr(ktensor, 'lambda')[i] for i in range(rank)])
    sorted_weight_idxs = np.argsort(weights)[::-1]
    d.weights = weights[sorted_weight_idxs]
    
    d.factors = []
    for i in range(d.order):
        factor = np.ctypeslib.as_array(
            ktensor.matrices[i][0].data[0], shape=(sptensor.mode_sizes[i], rank))
        factor = factor[:, sorted_weight_idxs]
        d.factors.append(factor)

    streaming_inner_product = \
        [options.streaming_dataless_fit.contents.ip[i] for i in sorted_weight_idxs]

    d.streaming_dataless_fit = StreamingDatalessFit(
        options.streaming_dataless_fit.contents.norm,
        options.streaming_dataless_fit.contents.res_norm,
        np.array(streaming_inner_product)
    )

    d.mode_names = sptensor.mode_names
    d.mode_sizes = sptensor.mode_sizes
    d.labels = sptensor.labels
    d.sptensor = sptensor

    metrics_dict = {}
    metrics_dict['time'] = metrics.time
    metrics_dict['fit'] = metrics.fit
    metrics_dict['cosine_sim'] = metrics.cosine_sim
    metrics_dict['norm_scaling'] = metrics.norm_scaling
    metrics_dict['cp_total_iter'] = metrics.cp_total_iter

    d.metrics = metrics_dict

    return d

def _get_init_guess(py_decomp, options):
    """ Adds initial guesses for factor matrices to CPDecomp object 

    Copies initial factor matrix guesses from options to CPDecomp object.

    Parameters
    ----------
    py_decomp : CPDecomp
        CPDecomp object defined in this module.
    options : decomp_options
        decomp_options struct as defined in 'ensign_types.h'
    """
    py_decomp.factors_init = [np.zeros((py_decomp.mode_sizes[i], py_decomp.rank)) for i in range(py_decomp.order)]
    for i in range(py_decomp.order):
        for j in range(py_decomp.mode_sizes[i]):
            for k in range(py_decomp.rank):
                py_decomp.factors_init[i][j][k] = options.factor_matrices_initial_guess[i][j * py_decomp.rank + k]

def _get_ctype_decomp_options_obj(seed=0, verbose=False, memory_limit=2e9, 
                           factor_matrices_initial_guess=POINTER(POINTER(c_double))(),
                           streaming_dataless_fit=POINTER(et.C_StreamingDatalessFit)(),
                           output_init_guess=False,
                           cp_als_memory_limit=C.CP_ALS_MEM_LIMIT_GB*(1024**3),
                           cp_als_max_iter=C.CP_ALS_MAX_ITER, 
                           cp_als_stop_tol=C.CP_ALS_STOP_TOL, 
                           cp_als_nn_max_iter=C.CP_ALS_NN_MAX_ITER, 
                           cp_als_nn_memory_limit=C.CP_ALS_MEM_LIMIT_GB*(1024**3), 
                           cp_als_nn_stop_tol=C.CP_ALS_NN_STOP_TOL, 
                           cp_apr_memory_limit=C.CP_APR_MEM_LIMIT_GB*(1024**3), 
                           cp_apr_max_outer_iter=C.CP_APR_MAX_OUTER_ITER, 
                           cp_apr_max_inner_iter=C.CP_APR_MAX_INNER_ITER, 
                           cp_apr_stop_tol=C.CP_APR_STOP_TOL, 
                           cp_apr_pqnr_memory_limit=C.CP_APR_PQNR_MEM_LIMIT_GB*(1024**3), 
                           cp_apr_pqnr_max_outer_iter=C.CP_APR_PQNR_MAX_OUTER_ITER, 
                           cp_apr_pqnr_max_inner_iter=C.CP_APR_PQNR_MAX_INNER_ITER,
                           cp_apr_pqnr_stop_tol=C.CP_APR_PQNR_STOP_TOL,
                           cp_apr_pqnr_is_inexact=C.CP_APR_PQNR_IS_INEXACT,
                           cp_apr_pqnr_lbfgs_m=C.CP_APR_PQNR_LBFGS_M,
                           cp_apr_pqnr_skooch_mode=C.CP_APR_PQNR_SKOOCH_MODE,
                           cp_apr_pdnr_memory_limit=C.CP_APR_PDNR_MEM_LIMIT_GB*(1024**3),
                           cp_apr_pdnr_max_outer_iter=C.CP_APR_PDNR_MAX_OUTER_ITER,
                           cp_apr_pdnr_max_inner_iter=C.CP_APR_PDNR_MAX_INNER_ITER,
                           cp_apr_pdnr_stop_tol=C.CP_APR_PDNR_STOP_TOL,
                           cp_apr_pdnr_is_inexact=C.CP_APR_PDNR_IS_INEXACT):
    return et.C_DecompOptions(c_int(seed), 
                              c_int(verbose), 
                              c_long(int(memory_limit)),
                              c_int(output_init_guess),
                              factor_matrices_initial_guess,
                              streaming_dataless_fit,
                              c_long(int(cp_als_memory_limit)),
                              c_long(cp_als_max_iter),
                              c_double(cp_als_stop_tol),
                              c_long(cp_als_nn_max_iter),
                              c_long(int(cp_als_nn_memory_limit)),
                              c_double(cp_als_nn_stop_tol),
                              c_long(int(cp_apr_memory_limit)),
                              c_long(cp_apr_max_outer_iter),
                              c_long(cp_apr_max_inner_iter),
                              c_double(cp_apr_stop_tol),
                              c_long(int(cp_apr_pqnr_memory_limit)),
                              c_long(cp_apr_pqnr_max_outer_iter),
                              c_long(cp_apr_pqnr_max_inner_iter),
                              c_double(cp_apr_pqnr_stop_tol),
                              c_long(cp_apr_pqnr_is_inexact),
                              c_long(cp_apr_pqnr_lbfgs_m),
                              c_long(int(cp_apr_pqnr_skooch_mode)),
                              c_long(int(cp_apr_pdnr_memory_limit)),
                              c_long(cp_apr_pdnr_max_outer_iter),
                              c_long(cp_apr_pdnr_max_inner_iter),
                              c_double(cp_apr_pdnr_stop_tol),
                              c_long(cp_apr_pdnr_is_inexact))

def _get_decomp_metrics():
    return et.C_DecompMetrics(0.0, 0.0, 0.0, 0.0, 0)

def _get_k_tensor(order, rank):
    num_modes = c_long(order)
    num_factors = c_long(rank)
    weights = POINTER(c_double)((c_double * rank)(*([1.0] * rank)))

    # Array of pointers
    matrices = (POINTER(et.C_FactorMatrix) * order) 
    # Instantiate array of pointers
    matrices = matrices(*([pointer(et.C_FactorMatrix())] * order))
    # Cast to double pointer
    matrices = POINTER(POINTER(et.C_FactorMatrix))(matrices) 

    scratch_matrix = POINTER(et.C_FactorMatrix)()

    return et.C_KTensor(num_modes, matrices, weights, num_factors, scratch_matrix)

def _validate_params(sptensor, rank, max_iter, stop_tol, mem_lim, outer_iter=1, inner_iter=1):
    if not isinstance(sptensor, spt.SPTensor) and not isinstance(sptensor, str):
        msg = "sptensor is not of class SPTensor or an SPTensor filename."
        logger.error(msg)
        raise TypeError(msg)
    if sptensor == None:
        msg = "sptensor is none"
        logger.error(msg)
        raise TypeError(msg)
    if not isinstance(rank, int):
        msg = "Rank must be an int."
        logger.error(msg)
        raise TypeError(msg)
    if rank <= 0:
        msg = "Rank of decomposition cannot be less than 1."
        logger.error(msg)
        raise TypeError(msg)
    if max_iter < 1:
        msg = "Maximum iterations cannot be less than 1."
        logger.error(msg)
        raise TypeError(msg)
    if outer_iter < 1:
        msg = "Maximum outer iterations cannot be less than 1."
        logger.error(msg)
        raise TypeError(msg)
    if inner_iter < 1:
        msg = "Maximum inner iterations cannot be less than 1."
        logger.error(msg)
        raise TypeError(msg)
    if stop_tol <= 0.0:
        msg = "Stop tolerance cannot be less than or equal to 0."
        logger.error(msg)
        raise TypeError(msg)
    if mem_lim <= 0:
        msg = "Memory Limit (GB) cannot be less than or equal to 0."
        logger.error(msg)
        raise TypeError(msg)
    return 1

[docs]def reconstruct_into(decomp, tensor, comp_ids=None, orig_entries=False):
    """ Reconstructs a CP decomposition 'into' a sparse tensor.

    Reconstructs a tensor from a decomposition ``decomp`` by computing and summing 
    selected outer products. If a list of component IDs is given in ``comp_ids``
    then only components in this list will contribute to the reconstruction.
    Only nonzero indices of ``tensor`` are included in the reconstructed tensor. 

    Parameters
    ----------
    decomp : CPDecomp
        Decompostion to reconstruct using sum-of-outer-products.
    tensor : SPTensor
        The sparse tensor that contains the nonzero entries to reconstruct into.
    comp_ids : list, optional
        A list of components to reconstruct the SPTensor from. If ``None`` (default), 
        all components are used.
    orig_entries : bool, optional
        This parameter controls the value of entries in the reconstructed tensor. 
        When ``True``, nonzero values in the sum-of-outer-products reconstruction 
        are given the same value as ``tensor`` regardless of reconstructed value. 
        When ``False`` (default) nonzero values are given the value produced by 
        the sum-of-outer-products reconstruction.
        
    Returns
    -------
    reconstructed_spt : SPTensor
        The sparse tensor produced by reconstructing selected components of 
        ``decomp`` into nonzero values of ``tensor``.
    """
    # Validate decomposition and sparse tensor
    if decomp is None:
        msg = "Parameter 'decomp' must be a CPDecomp object (not 'None')"
        logger.error(msg)
        raise ValueError(msg)

    if tensor is None:
        msg = "Parameter 'tensor' must be a SPTensor object (not 'None')"
        logger.error(msg)
        raise ValueError(msg)

    if not isinstance(decomp, CPDecomp) and not isinstance(tensor, spt.SPTensor):
        msg = "Parameters 'decomp' and 'tensor' must be, respectively, CPDecomp and SPTensor objects"
        logger.error(msg)
        raise ValueError(msg)

    if decomp.order != tensor.order:
        msg = "Parameters 'decomp' and 'tensor' must have the same order"
        logger.error(msg)
        raise ValueError(msg)

    if len(decomp.mode_sizes) != len(tensor.mode_sizes):
        msg = "Parameters 'decomp' and 'tensor' must have the same order"
        logger.error(msg)
        raise ValueError(msg)

    for mode_id in range(decomp.order):
        if decomp.mode_sizes[mode_id] != tensor.mode_sizes[mode_id]:
            msg = "Parameters 'decomp' and 'tensor' must have identical mode sizes"
            logger.error(msg)
            raise ValueError(msg)

    # Validate component list
    if comp_ids is not None:
        for comp_id in comp_ids:
            try:
                tmp = int(comp_id)
            except:
                msg = "All items in parameter 'comp_ids' must be integers"
                logger.error(msg)
                raise ValueError(msg)
            if int(comp_id) >= len(decomp.weights) or int(comp_id) < 0:
                msg = "All items in parameter 'comp_ids' must be on the interval [0, 'd.rank')"
                logger.error(msg)
                raise ValueError(msg)

    # Validate original entries flag
    if (orig_entries != True and orig_entries != False):
        msg = "Flag 'orig_entries' must be 'True' or 'False'"
        logger.error(msg)
        raise ValueError(msg)

    # Copy original sparse tensor
    reconstructed_spt = spt.SPTensor(tensor.order, tensor.nnz, tensor.mode_sizes, 
                                    tensor.entries.copy())
    
    # Weight distributes to every dot product calculation for a component. If
    # we want a component to be represented, we set its weight to weight*1.0.
    base_product = np.copy(decomp.weights)
    if comp_ids is not None:
        base_product = np.zeros(decomp.rank)
        for comp_id in comp_ids:
            base_product[comp_id] = 1.0
        base_product *= decomp.weights

    # For every index tuple in the original sparse tensor we compute its outer 
    # product in all components simultaneously. We sum outer products for the 
    # index tuple to obtain its reconstructed value.
    indices = tensor.entries.values.astype("int64")
    for nz_id, nz in enumerate(indices):
        outer_product = np.copy(base_product)
        for mode_id in range(decomp.order):
            entry_id = nz[mode_id]
            entry_scores = decomp.factors[mode_id][entry_id]
            outer_product *= entry_scores
        reconstructed_value = 0
        if orig_entries == True:
            reconstructed_value = tensor.entries.values[nz_id, -1]
        else:
            reconstructed_value = outer_product.sum()
        reconstructed_spt.entries.iloc[nz_id, decomp.order] = reconstructed_value

    return reconstructed_spt

[docs]def get_fit_per_entry(decomp, tensor, top_k=None):
    """ Calculates fit for each entry of a reconstructed tensor present in the original tensor.

    This function evaluates how well each entry of the tensor reconstructed 
    from ``decomp`` matches the entry at the same index in ``tensor`` This 
    evaluation is calculated as fit, a floating point number on the interval 
    (-infinity, 1.0]. A fit of 1.0 is an exact match between the reconstructed 
    and original entry. Output is sorted fy fit (ascending). 
    
    Lower fit values indicate a reconstructed entry is different than the original 
    entry. Low fit values may be indicative of anomalous data. 

    Parameters
    ----------
    decomp : CPDecomp
        Decompostion used to calculate per-element fit. A sparse tensor is 
        reconstructed from ``decomp`` using sum-of-outer-products and the 
        results are compared to ``tensor``.
    tensor : SPTensor
        The sparse tensor that is the basis for comparison.
    top_k : int, optional
        The number of elements to return. If ``top_k`` is ``None`` (default), 
        fit values for all entries are returned. If ``top_k`` is positive, the 
        ``top_k`` lowest fit elements are returned. If ``top_k`` is negative the 
        ``abs(top_k)`` highest fit elements are returned. If ``top_k`` is zero
        an empty ``ndarray`` is returned.

    Returns
    -------
    fit_vals : numpy.ndarray
        A ``numpy.ndarray`` of the same shape and format as ``tensor.entries``. The 
        first ``tensor.order`` columns of ``fit_vals`` are indices into into 
        each mode of ``tensor``. The final column of ``fit_vals`` contains the 
        entry's fit value.
    """
    # Validate decomposition and sparse tensor
    if decomp is None:
        msg = "Parameter 'decomp' must be a CPDecomp object (not 'None')"
        logger.error(msg)
        raise ValueError(msg)

    if tensor is None:
        msg = "Parameter 'tensor' must be a SPTensor object (not 'None')"
        logger.error(msg)
        raise ValueError(msg)

    if not (isinstance(decomp, CPDecomp) and isinstance(tensor, spt.SPTensor)):
        msg = "Parameters 'decomp' and 'tensor' must be, respectively, CPDecomp and SPTensor objects"
        logger.error(msg)
        raise ValueError(msg)

    if decomp.order != tensor.order:
        msg = "Parameters 'decomp' and 'tensor' must have the same order"
        logger.error(msg)
        raise ValueError(msg)

    if len(decomp.mode_sizes) != len(tensor.mode_sizes):
        msg = "Parameters 'decomp' and 'tensor' must have the same order"
        logger.error(msg)
        raise ValueError(msg)

    for mode_id in range(decomp.order):
        if decomp.mode_sizes[mode_id] != tensor.mode_sizes[mode_id]:
            msg = "Parameters 'decomp' and 'tensor' must have identical mode sizes"
            logger.error(msg)
            raise ValueError(msg)

    # Validate top_k
    if top_k is not None:
        try:
            tmp = int(top_k)
        except:
            msg = "Parameter 'top_k' must be integer or None"
            logger.error(msg)
            raise ValueError(msg)
        if int(top_k) == 0:
            return np.empty(shape=(0,0))

    # Reconstruct decomposition into original tensor 
    r = reconstruct_into(decomp, tensor)

    # Compute difference between original and reconstructed nonzero values
    reconstructed_vals = r.entries.values[:, decomp.order]
    orig_vals = tensor.entries.values[:, decomp.order]
    diff_vals =  orig_vals - reconstructed_vals

    # Fit of original value in reconstruction is 1 - |diff / orig|
    # NOTE: Negative values indicate bad fit, and larger magnitude is worse 
    # e.g., -1 is bad, -100 is worse
    fit_orig = 1.0 - np.absolute(diff_vals / orig_vals)

    # Copy original entries and replace value of each index tuple with fit
    fit_vals = tensor.entries.values.copy()
    fit_vals[:, decomp.order] = fit_orig

    # Sort entries by fit (ascending). Smallest fit values are anomalies.
    view_spec = ""
    for i in range(decomp.order):
        view_spec += "float64,"
    view_spec += "float64"
    field_spec = "f" + str(decomp.order)
    fit_vals.view(view_spec).sort(order=[field_spec], axis=0)

    # Return the sorted ndarray
    if top_k is None:
        return fit_vals
    elif int(top_k) > 0:
        return fit_vals[:top_k, :]
    elif int(top_k) < 0:
        return fit_vals[top_k:,:]
    else:
        # Guard, we should never reach this point
        msg = "Invalid top_k value"
        logger.error(msg)
        raise ValueError(msg)

[docs]def are_close(decomp0, decomp1, rtol=1e-5, atol=1e-08):
    """Checks if weights and factor matrices are close according to tolerance.

    Uses the following formula element-wise on weights and factor matrices:

        abs(d0 - d1) <= (atol + rtol * abs(d1))

    If this inequality is ``True`` for all elements then ``True`` is returned.

    Parameters
    ----------
    decomp0, decomp1 : CPDecomp
        Decompositions to compare for closeness
    rtol : float, optional
        Relative tolerance
    atol : float, optional
        Absolute tolerance

    Returns
    -------
    are_close : bool
        True if the decomposition weights and factor matrices are element-wise 
        equal within a tolerance.
    """
    are_close = True

    # First deal with the basics. Are both decomps there? The same size/shape?
    if decomp0 is None and decomp1 is None:
        return True
    elif decomp0 is None or decomp1 is None:
        return False

    are_close = are_close and decomp0.rank == decomp1.rank
    are_close = are_close and decomp0.order == decomp1.order
    are_close = are_close and decomp0.weights.shape == decomp1.weights.shape
    are_close = are_close and len(decomp0.factors) == len(decomp1.factors)
    for i in range(len(decomp0.factors)):
        are_close = are_close and decomp0.factors[i].shape == decomp1.factors[i].shape

    if not are_close:
        return False

    # Check weights
    are_close = are_close and np.allclose(decomp0.weights,
                                        decomp1.weights,
                                        rtol=rtol,
                                        atol=atol)
    # Check factor matrices 
    for i in range(len(decomp0.factors)):
        if not are_close:
            break
        are_close = are_close and np.allclose(decomp0.factors[i],
                                            decomp1.factors[i],
                                            rtol=rtol,
                                            atol=atol)

    return are_close

[docs]def read_cp_decomp_dir(decomp_dir):
    """Reads a CP decomposition from the filesystem.

    Parameters
    ----------
    decomp_dir : str
        Path to the directory containing CP decomposition.

    Returns
    -------
    decomposition : CPDecomp
        The decomposition in ``decomp_dir``.

    Raises
    ------
    Exception
        If the decomposition cannot be read or is not well formed.

    See also
    --------
    ensign.cp_decomp.CPDecomp : CP decomposition class

    """
    if(decomp_dir == None or type(decomp_dir) != str):
        msg = "decomp_dir is not of type String."
        logger.error(msg)
        raise TypeError(msg)
    if(len(decomp_dir) < 1):
        msg = "decomp_dir is not a valid directory."
        logger.error(msg)
        raise ValueError(msg)

    if decomp_dir[-1] != '/':
        decomp_dir += '/'

    # Initialize decomposition object and build list of decomp_mode_<i>.txt files
    decomp = CPDecomp()
    decomp_fn_pattern = re.compile('decomp_mode_[0-9]+.txt')
    decomp_mode_fns = sorted(filter(lambda x: decomp_fn_pattern.fullmatch(x) is not None, 
                                    os.listdir(decomp_dir)))
    if len(decomp_mode_fns) == 0:
        msg = 'No decomp_mode_<i>.txt (factor matrices) files found in {}.'.format(decomp_dir)
        logger.error(msg)
        raise IOError(msg)

    # Assign decomposition specific values
    decomp.order = len(decomp_mode_fns)
    decomp.weights, decomp.rank = dio.read_weights(decomp_dir + '/weights.txt')
    decomp.factors = [dio.read_factor_matrix(decomp_dir + '/' + fn)[0] for fn in decomp_mode_fns]
    decomp.mode_sizes = [factor.shape[0] for factor in decomp.factors]
    if 'streaming.txt' in os.listdir(decomp_dir):
        decomp.streaming_dataless_fit = dio.read_streaming(decomp_dir + '/streaming.txt')

    # Assign sptensor specific values
    mode_map_fn_pattern = re.compile('map_mode_[0-9]+.txt')
    mode_map_fns = sorted(filter(lambda x: mode_map_fn_pattern.fullmatch(x) is not None, 
                                 os.listdir(decomp_dir)))
    if 'tensor_data.txt' in os.listdir(decomp_dir):
        decomp.sptensor = spt.read_sptensor(decomp_dir)
        decomp.labels = decomp.sptensor.labels
        decomp.mode_names = decomp.sptensor.mode_names
    elif mode_map_fns:
        decomp.mode_names, decomp.labels = sio.read_many_labels(decomp_dir, mode_map_fns)
    else:
        decomp.mode_names = list(map(lambda x: 'mode_'+str(x), list(range(len(decomp.mode_sizes)))))
        decomp.labels = [['label_{}-{}'.format(str(i), str(x)) for x in range(mode_size)] 
                    for i, mode_size in enumerate(decomp.mode_sizes)]

    # Read backtracking data if available
    decomp.cpd_backtrack = dio.read_decomp_backtrack(decomp_dir)

    return decomp

[docs]def write_cp_decomp_dir(decomp_dir, decomp, write_tensor=False):
    """Writes a CP decomposition to the filesystem.

    Parameters
    ----------
    decomp_dir : str
        Path to the destination directory of the CP decomposition. This
        directory will be created if nonexistent and **will be erased and
        overwritten** if it exists.
    decomp : CPDecomp
        The decomposition to be written.
    write_tensor : bool
        Write the tensor_data.txt and map_mode_<x>.txt files.

    Raises
    ------
    Exception
        If the decomposition could not be written.

    See also
    --------
    ensign.cp_decomp.CPDecomp : CP decomposition class
    """
    if not os.path.exists(decomp_dir):
        os.makedirs(decomp_dir)
    else:
        if write_tensor:
            fn_pattern = re.compile('map_mode_[0-9]+.txt|decomp_mode_[0-9]+.txt|initial_guess_[0-9]+.txt|streaming.txt|weights.txt|tensor_data.txt')
        else:
            fn_pattern = re.compile('decomp_mode_[0-9]+.txt|initial_guess_[0-9]+.txt|streaming.txt|weights.txt')
        for fn in list(filter(lambda x: fn_pattern.fullmatch(x) is not None, 
                              os.listdir(decomp_dir))):
            os.remove(os.path.join(decomp_dir, fn))

    # Scores
    for mode_id, factors in enumerate(decomp.factors):
        dio.write_factor_matrix(decomp_dir, mode_id, factors)

    # Weights
    dio.write_weights(decomp_dir, decomp.weights)

    # Streaming dataless fit
    if decomp.streaming_dataless_fit is not None:
        dio.write_streaming(decomp_dir, decomp.streaming_dataless_fit)

    # Initial Score Guesses
    if decomp.factors_init is not None:
        for mode_id, factors in enumerate(decomp.factors_init):
            dio.write_factor_matrix(decomp_dir, mode_id, factors, 'initial_guess')

    # Labels
    if write_tensor:
        for mode_id, labels in enumerate(decomp.labels):
            mode_name = decomp.mode_names[mode_id]
            labels = decomp.labels[mode_id]
            sio.write_labels(decomp_dir, mode_id, mode_name, labels)

    # Tensor
    if decomp.sptensor is not None and write_tensor:
        sio.write_sptensor_entries(decomp_dir, decomp.sptensor.mode_sizes, decomp.sptensor.entries.values)

    # Backtracking information
    if decomp.cpd_backtrack is not None:
        dio.write_decomp_backtrack(decomp_dir, decomp.cpd_backtrack)
    else: 
        if os.path.isfile(decomp_dir + '/cpd_backtrack.txt'):
            os.remove(decomp_dir + '/cpd_backtrack.txt')
        
def _filter_cartesian_product(index_lists, valid_indices):
    """
        Computes the component backtrack info from a component of a decompositon 
        to the tensor by finding the intersection of valid_indices with the Cartesian 
        product  of the lists in index_lists. index_lists can be of arbitary 
        length. 

        Parameters
        ----------
        index_lists : list of lists
            A list containing one list for each decomposition mode. Each list
            contains indices that are hot in the component in question.
        valid_indices : iterable of tuples
            All index tuples appearing in the tensor

        Returns
        -------
        product : list of tuples
            All tuples in Cartesian product of index_lists. These correspond to
            the tensor indices that are hot in the component.
    """
    index_list_lengths = list(map(len, index_lists))
    # if elements in cart. prod. were to reside in a multi-d array, the ith
    # element of flat_lengths is the number of elements in a slice where the
    # first i indices of that multi-d array are specified
    flat_lengths = [functools.reduce(lambda x,y:x*y, index_list_lengths[i:]) 
                    for i in range(len(index_lists))]
    n_entries_in_cart_prod = flat_lengths.pop(0)
    product = []
    for i in range(n_entries_in_cart_prod):
        idx = i
        # compute which indices should be chosen in constructing the current
        # element of the cart. prod.
        element_indices = () 
        for length in flat_lengths:
            element_indices += (idx // length,)
            idx %= length
        element_indices += (idx,)
        # construct the element given the indices that should be chosen
        element = tuple(index_lists[j][element_indices[j]] for j in range(len(index_lists)))
        # only store the element if it is specified as valid
        if element in valid_indices:
            product.append(element)
    return product

def _forwardtrack_decomp_component(decomp, comp_id):
    """
        Computes the component backtrack from a component of a decompositon to the
        tensor by looking "forward" from the tensor to the decomposition to
        check which indices appear in the component. This method is faster than
        _filter_cartesian_product if the component has more hot indices than
        the original tensor. 
        
        Parameters
        ----------
        decomp : CPDecomp
            Decomposition object in question.
        comp_id : int
            The component whose backtrack info is being calculated.

        Returns
        -------
        forward_track : list of tuples
            All indices in the tensor that are hot in the component.
    """
    tensor = decomp.sptensor
    forward_track = []
    # iterate over all hot indices in the tensor
    for i, coordinate_row in enumerate(tensor.entries.values[:, :-1]):
        nonzero_flag = 1
        # check if the corresponding element of the outer product is 0
        for label_index, factor_matrix in zip(coordinate_row, decomp.factors):
            # use a relative chop point below which is "zero"
            mode_max = factor_matrix[:, comp_id].max()
            if factor_matrix[int(label_index), comp_id] < REL_CHOP_POINT * mode_max:
                nonzero_flag = 0
        if nonzero_flag:
            forward_track.append(tuple(coordinate_row))
    return forward_track

def _compute_cpd_backtrack(decomp):
    """
        Computes the backtrack info from each component of a decompositon to
        the original tensor. Sets the appropriate field in the CPDecomp object. 

        Parameters
        ----------
        decomp : CPDecomp
            Decomposition object in question.
    """
    indices = [tuple(map(int, t)) 
               for t in decomp.sptensor.entries.values[:, :-1]]
    # save a map from indices to tensor line numbers
    line_nums = dict(list(zip(indices, list(range(len(indices))))))
    cpd_backtrack = []
    # for each comp keep track of hot indices in each mode
    for comp_id in range(decomp.rank):
        non_zero_indices = []
        for mode_id in range(decomp.order):
            mode_vec = decomp.factors[mode_id][:, comp_id]
            mode_max = mode_vec.max()
            hot_mode_indices = list(np.argwhere(mode_vec > REL_CHOP_POINT * mode_max).ravel())
            non_zero_indices.append(hot_mode_indices)
        # check if there are more hot indices in the component than in tensor
        num_comp_entries = np.prod(list(map(lambda l: len(l), non_zero_indices)))
        if num_comp_entries > len(decomp.sptensor.entries):
            indices_to_track = _forwardtrack_decomp_component(decomp, comp_id)
        else:
            indices_to_track = _filter_cartesian_product(non_zero_indices, line_nums)
        # convert from indices to line numbers
        entries_to_track = [line_nums[i] for i in indices_to_track]
        cpd_backtrack.append(entries_to_track)
    decomp.cpd_backtrack = cpd_backtrack
    return cpd_backtrack

[docs]def cp_als(sptensor, rank, out_dir=None, seed=CP_ALS_SEED, max_iter=CP_ALS_MAX_ITER, 
           stop_tol=CP_ALS_STOP_TOL, mem_limit_gb=CP_ALS_MEM_LIMIT_GB,
           output_init_guess=CP_ALS_OUTPUT_INIT_GUESS, 
           verbose=CP_ALS_VERBOSE, gen_backtrack=CP_ALS_BACKTRACK):
    """Performs a CP decomposition using the alternating least squares (ALS) method. [1]_ [2]_

    Parameters
    ----------
    sptensor : SPTensor or str
        Tensor to decompose. If sptensor is of type str, it should contain a
        path to a directory containing an SPTensor or directly to a 
        tensor_data.txt file.
    rank : int
        Rank of the decomposition.
    out_dir : str
        Path to directory where decomposition results will be written. If not
        set, the decomposition will not be written to disk.

    Returns
    -------
    decomp : CPDecomp
        Results of the decomposition.

    Other Parameters
    ----------------
    seed : int, optional
        Random seed used to initialize decomposition. 
        Default value is ``0``.
    max_iter : int, optional
        Maximum iterations before stopping decomposition. 
        Default value is ``100``.
    stop_tol : float, optional
        Minimum change in fit required for convergence.
        Default value is ``1.0e-5``.
    mem_limit_gb : int, optional
        Maximum memory used during decomposition, in GiB. 
        Default value is ``2``.
    output_init_guess : bool, optional
        Write the initial guess of decomposition to ``out_dir/input_<mode_id>.txt``
        Default value is ``False``.
    verbose : bool, optional
        Output verbose decomposition progress to ``out_dir/output.txt``
        Default value is ``False``.
    gen_backtrack : bool, optional
        Output component-wise backtracking information to ``out_dir/cpd_backtrack.txt``
        Default value is ``False``.

    Raises
    ------
    Exception
        If there is a problem performing the decomposition.

    See also
    --------
    ensign.cp_decomp.CPDecomp : CP decomposition class
    ensign.sptensor.SPTensor : Sparse tensor class

    References
    ----------
    .. [1] Kolda, T., Bader, B., "Tensor Decompositions and Applications,"
           SIAM Review, 51(3), pp. 455-500, 2009.

    .. [2] Baskaran, M., Meister, B., Vasilache, N., Lethin, R., "Efficient and 
           Scalable Computations with Sparse Tensors," IEEE HPEC, 2012.

    """
    if not _validate_params(sptensor, rank, max_iter, stop_tol, mem_limit_gb):
        msg = "Invalid Parameters"
        logger.error(msg)
        raise TypeError(msg)

    if isinstance(sptensor, str):
        if os.path.isdir(sptensor):
            sptensor = spt.read_sptensor(sptensor)
        elif os.path.isfile(sptensor):
            sptensor = spt.read_sptensor(os.path.dirname(sptensor) + '/', os.path.basename(sptensor))
        else:
            msg = '{} is not a valid sptensor file or directory.'.format(sptensor)
            logger.error(msg)
            raise IOError(msg)

    tensor = _get_sparse_co_tensor(sptensor)
    options = _get_ctype_decomp_options_obj(
        seed=seed, 
        cp_als_max_iter=max_iter, 
        cp_als_stop_tol=stop_tol,
        cp_als_memory_limit=mem_limit_gb*(1024**3), 
        verbose=verbose,
        memory_limit=mem_limit_gb*(1024**3),
        output_init_guess=output_init_guess
    )
    ktensor = _get_k_tensor(sptensor.order, rank)
    metrics = _get_decomp_metrics()

    cp_als = API.cp_als
    cp_als.argtypes = [POINTER(et.C_SparseCoTensor), c_int, 
        POINTER(et.C_DecompOptions), POINTER(et.C_KTensor), 
        POINTER(et.C_DecompMetrics)]
    ret = cp_als(pointer(tensor), rank, pointer(options), pointer(ktensor), 
        pointer(metrics))

    if ret == MALLOC_ERROR:
        logger.error('MALLOC_ERROR. Try increasing the memory limit by using the mem_limit_gb argument.')
        exit(MALLOC_ERROR)

    py_decomp = _get_py_decomp_obj(ktensor, sptensor, rank, metrics, options)

    if output_init_guess:
        _get_init_guess(py_decomp, options)

    if gen_backtrack:
        _compute_cpd_backtrack(py_decomp)
    else:
        py_decomp.cpd_backtrack = None

    if out_dir:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        write_cp_decomp_dir(out_dir, py_decomp)

    return py_decomp

[docs]def cp_als_nn(sptensor, rank, out_dir=None, seed=CP_ALS_NN_SEED, max_iter=CP_ALS_NN_MAX_ITER, 
              stop_tol=CP_ALS_NN_STOP_TOL, mem_limit_gb=CP_ALS_NN_MEM_LIMIT_GB,
              output_init_guess=CP_ALS_NN_OUTPUT_INIT_GUESS,
              verbose=CP_ALS_NN_VERBOSE, gen_backtrack=CP_ALS_NN_BACKTRACK):
    """Performs a nonnegative CP decomposition using the alternating least squares (ALS) method. 
        
    Nonegativity constraints are on output factor matrices. Input tensor 
    entries must be nonnegative. [1]_ [2]_

    Parameters
    ----------
    sptensor : SPTensor or str
        Tensor to decompose. If sptensor is of type str, it should contain a
        path to a directory containing an SPTensor or directly to a 
        tensor_data.txt file.
    rank : int
        Rank of the decomposition.
    out_dir : str
        Path to directory where decomposition results will be written. If not
        set, the decomposition will not be written to disk.

    Returns
    -------
    decomp : CPDecomp
        Results of the decomposition.

    Other Parameters
    ----------------
    seed : int, optional
        Random seed used to initialize decomposition. 
        Default value is ``0``.
    max_iter : int, optional
        Maximum iterations before stopping decomposition. 
        Default value is ``100``.
    stop_tol : float, optional
        Minimum change in fit required for convergence.
        Default value is ``1.0e-5``.
    mem_limit_gb : int, optional
        Maximum memory used during decomposition, in GiB. 
        Default value is ``2``.
    output_init_guess : bool, optional
        Write the initial guess of decomposition to ``out_dir/input_<mode_id>.txt``
        Default value is ``False``.
    verbose : bool, optional
        Output verbose decomposition progress to ``out_dir/output.txt``
        Default value is ``False``.
    gen_backtrack : bool, optional
        Output component-wise backtracking information to ``out_dir/cpd_backtrack.txt``
        Default value is ``False``.

    Raises
    ------
    Exception
        If there is a problem performing the decomposition.

    See also
    --------
    ensign.cp_decomp.CPDecomp : CP decomposition class
    ensign.sptensor.SPTensor : Sparse tensor class

    References
    ----------
    .. [1] Kolda, T., Bader, B., "Tensor Decompositions and Applications,"
           SIAM Review, 51(3), pp. 455-500, 2009.

    .. [2] Baskaran, M., Meister, B., Vasilache, N., Lethin, R., "Efficient and 
           Scalable Computations with Sparse Tensors," IEEE HPEC, 2012.

    """
    if not _validate_params(sptensor, rank, max_iter, stop_tol, mem_limit_gb):
        msg = "Invalid Parameters"
        logger.error(msg)
        raise TypeError(msg)

    if isinstance(sptensor, str):
        if os.path.isdir(sptensor):
            sptensor = spt.read_sptensor(sptensor)
        elif os.path.isfile(sptensor):
            sptensor = spt.read_sptensor(os.path.dirname(sptensor) + '/', os.path.basename(sptensor))
        else:
            msg = '{} is not a valid sptensor file or directory.'.format(sptensor)
            logger.error(msg)
            raise IOError(msg)

    tensor = _get_sparse_co_tensor(sptensor)
    options = _get_ctype_decomp_options_obj(
        seed=seed, 
        cp_als_max_iter=max_iter, 
        cp_als_stop_tol=stop_tol,
        cp_als_memory_limit=mem_limit_gb*(1024**3), 
        verbose=verbose,
        cp_als_nn_max_iter=max_iter, 
        cp_als_nn_stop_tol=stop_tol,
        memory_limit=mem_limit_gb*(1024**3),
        output_init_guess=output_init_guess
    )
    ktensor = _get_k_tensor(sptensor.order, rank)
    metrics = _get_decomp_metrics()

    cp_als_nn = API.cp_als_nn
    cp_als_nn.argtypes = [POINTER(et.C_SparseCoTensor), c_int, 
        POINTER(et.C_DecompOptions), POINTER(et.C_KTensor), 
        POINTER(et.C_DecompMetrics)]
    ret = cp_als_nn(pointer(tensor), rank, pointer(options), pointer(ktensor), 
        pointer(metrics))

    if ret == MALLOC_ERROR:
        logger.error('MALLOC_ERROR. Try increasing the memory limit by using the mem_limit_gb argument.')
        exit(MALLOC_ERROR)

    py_decomp = _get_py_decomp_obj(ktensor, sptensor, rank, metrics, options)

    if output_init_guess:
        _get_init_guess(py_decomp, options)

    if gen_backtrack:
        _compute_cpd_backtrack(py_decomp)
    else:
        py_decomp.cpd_backtrack = None

    if out_dir:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        write_cp_decomp_dir(out_dir, py_decomp)

    return py_decomp

[docs]def cp_apr(sptensor, rank, out_dir=None, seed=CP_APR_SEED, 
           max_outer_iter=CP_APR_MAX_OUTER_ITER, 
           max_inner_iter=CP_APR_MAX_INNER_ITER, 
           stop_tol=CP_APR_STOP_TOL, mem_limit_gb=CP_APR_MEM_LIMIT_GB,
           output_init_guess=CP_APR_OUTPUT_INIT_GUESS, verbose=CP_APR_VERBOSE,
           gen_backtrack=CP_APR_BACKTRACK):
    """Performs a CP decomposition using the alternating Poisson regression (APR) method. [1]_ [2]_

    Parameters
    ----------
    sptensor : SPTensor or str
        Tensor to decompose. If sptensor is of type str, it should contain a
        path to a directory containing an SPTensor or directly to a 
        tensor_data.txt file.
    rank : int
        Rank of the decomposition.
    out_dir : str
        Path to directory where decomposition results will be written. If not
        set, the decomposition will not be written to disk.

    Returns
    -------
    decomp : CPDecomp
        Results of the decomposition.

    Other Parameters
    ----------------
    seed : int, optional
        Random seed used to initialize decomposition. 
        Default value is ``0``.
    max_outer_iter : int, optional
        Maximum iterations before stopping decomposition.
        Default value is ``200``.
    max_inner_iter : int, optional
        Maximum inner iterations per outer iteration. 
        Default value is ``10``.
    stop_tol : float, optional
        Minimum change in KKT violation required for convergence.
        Default value is ``1.0e-4``.
    mem_limit_gb : int, optional
        Maximum memory used during decomposition, in GiB. 
        Default value is ``2``.
    output_init_guess : bool, optional
        Write the initial guess of decomposition to ``out_dir/input_<mode_id>.txt``
        Default value is ``False``.
    verbose : bool, optional
        Output verbose decomposition progress to ``out_dir/output.txt``
        Default value is ``False``.
    gen_backtrack : bool, optional
        Output component-wise backtracking information to ``out_dir/cpd_backtrack.txt``
        Default value is ``False``.

    Raises
    ------
    Exception
        If there is a problem performing the decomposition.

    See also
    --------
    ensign.cp_decomp.CPDecomp : CP decomposition class
    ensign.sptensor.SPTensor : Sparse tensor class

    References
    ----------
    .. [1] Chi, E., Kolda, T., "On Tensors, Sparsity, and Nonnegative 
           Factorizations," SIAM Journal on Matrix Analysis and Applications 
           33.4, pp. 1272-1299, 2012.

    .. [2] Baskaran, M., Meister, B., Vasilache, N., Lethin, R., "Efficient and 
           Scalable Computations with Sparse Tensors," IEEE HPEC, 2012.

    """
    if not _validate_params(
        sptensor, rank, 1, stop_tol, 
        mem_limit_gb, max_outer_iter, max_inner_iter
    ):
        msg = "Invalid Parameters"
        logger.error(msg)
        raise TypeError()

    if isinstance(sptensor, str):
        if os.path.isdir(sptensor):
            sptensor = spt.read_sptensor(sptensor)
        elif os.path.isfile(sptensor):
            sptensor = spt.read_sptensor(
                os.path.dirname(sptensor) + '/', 
                os.path.basename(sptensor)
            )
        else:
            msg = '{} is not a valid sptensor file or directory.'.format(sptensor)
            logger.error(msg)
            raise IOError(msg)
            
    tensor = _get_sparse_co_tensor(sptensor)
    options = _get_ctype_decomp_options_obj(
        seed=seed, 
        cp_apr_max_outer_iter=max_outer_iter, 
        cp_apr_stop_tol=stop_tol, 
        cp_apr_max_inner_iter=max_inner_iter,
        cp_apr_memory_limit=mem_limit_gb*(1024**3), 
        verbose=verbose,
        memory_limit=mem_limit_gb*(1024**3),
        output_init_guess=output_init_guess
    )
    ktensor = _get_k_tensor(sptensor.order, rank)
    metrics = _get_decomp_metrics()

    cp_apr = API.cp_apr
    cp_apr.argtypes = [POINTER(et.C_SparseCoTensor), c_int, 
        POINTER(et.C_DecompOptions), POINTER(et.C_KTensor), 
        POINTER(et.C_DecompMetrics)]
    ret = cp_apr(pointer(tensor), rank, pointer(options), pointer(ktensor), 
        pointer(metrics))

    if ret == MALLOC_ERROR:
        logger.error('MALLOC_ERROR. Try increasing the memory limit by using the mem_limit_gb argument.')
        exit(MALLOC_ERROR)

    py_decomp = _get_py_decomp_obj(ktensor, sptensor, rank, metrics, options)

    if output_init_guess:
        _get_init_guess(py_decomp, options)

    if gen_backtrack:
        _compute_cpd_backtrack(py_decomp)
    else:
        py_decomp.cpd_backtrack = None

    if out_dir:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        write_cp_decomp_dir(out_dir, py_decomp)

    return py_decomp

[docs]def cp_apr_pdnr(sptensor, rank, out_dir=None, seed=CP_APR_SEED, 
                max_outer_iter=CP_APR_PDNR_MAX_OUTER_ITER, 
                max_inner_iter=CP_APR_PDNR_MAX_INNER_ITER, 
                stop_tol=CP_APR_PDNR_STOP_TOL, mem_limit_gb=CP_APR_PDNR_MEM_LIMIT_GB,
                output_init_guess=CP_APR_PDNR_OUTPUT_INIT_GUESS, 
                load_balance=CP_APR_PDNR_LOAD_BALANCE,
                verbose=CP_APR_PDNR_VERBOSE, gen_backtrack=CP_APR_PDNR_BACKTRACK):
    """Performs a CP decomposition using alternating Poisson regression (APR) with projected damped Newton row (PDNR) subproblem. [1]_

    Parameters
    ----------
    sptensor : SPTensor or str
        Tensor to decompose. If sptensor is of type str, it should contain a
        path to a directory containing an SPTensor or directly to a 
        tensor_data.txt file.
    rank : int
        Rank of the decomposition.
    out_dir : str
        Path to directory where decomposition results will be written. If not
        set, the decomposition will not be written to disk.

    Returns
    -------
    decomp : CPDecomp
        Results of the decomposition.

    Other Parameters
    ----------------
    seed : int, optional
        Random seed used to initialize decomposition. 
        Default value is ``0``.
    max_outer_iter : int, optional
        Maximum iterations before stopping decomposition.
        Default value is ``200``.
    max_inner_iter : int, optional
        Maximum inner iterations per outer iteration. 
        Default value is ``10``.
    stop_tol : float, optional
        Minimum change in KKT violation required for convergence.
        Default value is ``1.0e-4``.
    mem_limit_gb : int, optional
        Maximum memory used during decomposition, in GiB. 
        Default value is ``2``.
    output_init_guess : bool, optional
        Write the initial guess of decomposition to ``out_dir/input_<mode_id>.txt``
        Default value is ``False``.
    load_balance : int, optional
        The load balancing technique to use for parallel execution. 
        ``ensign.constants.NO_LOAD_BALANCE`` (default) is no load balancing, 
        ``ensign.constants.LOAD_BALANCE`` is standard load balancing, and 
        ``ensign.constants.LOAD_BALANCE_UNSORTED`` is unsorted load balancing.
    verbose : bool, optional
        Output verbose decomposition progress to ``out_dir/output.txt``
        Default value is ``False``.
    gen_backtrack : bool, optional
        Output component-wise backtracking information to ``out_dir/cpd_backtrack.txt``
        Default value is ``False``.

    Raises
    ------
    Exception
        If there is a problem performing the decomposition.

    See also
    --------
    ensign.cp_decomp.CPDecomp : CP decomposition class
    ensign.sptensor.SPTensor : Sparse tensor class

    References
    ----------
    .. [1] Hansen, S., Plantenga, T., Kolda, T., "Newton-based Optimization for 
           Kullback-Leibler Nonnegative Tensor Factorizations,"  Optimization 
           Methods and Software 30(5), pp. 1002-1029, 2015.

    """
    if not _validate_params(sptensor, rank, 1, stop_tol, mem_limit_gb, max_outer_iter, max_inner_iter):
        msg = 'Invalid Parameters'
        logger.error(msg)
        raise TypeError(msg)

    if isinstance(sptensor, str):
        if os.path.isdir(sptensor):
            sptensor = spt.read_sptensor(sptensor)
        elif os.path.isfile(sptensor):
            sptensor = spt.read_sptensor(os.path.dirname(sptensor) + '/', os.path.basename(sptensor))
        else:
            msg = '{} is not a valid sptensor file or directory.'.format(sptensor)
            raise IOError(msg)
            
    tensor = _get_sparse_co_tensor(sptensor)
    options = _get_ctype_decomp_options_obj(
        seed=seed, 
        cp_apr_pdnr_max_outer_iter=max_outer_iter,
        cp_apr_pdnr_stop_tol=stop_tol, 
        cp_apr_pdnr_max_inner_iter=max_inner_iter,
        cp_apr_pdnr_memory_limit=mem_limit_gb*(1024**3), 
        verbose=verbose,
        memory_limit=mem_limit_gb*(1024**3),
        output_init_guess=output_init_guess
    )
    ktensor = _get_k_tensor(sptensor.order, rank)
    metrics = _get_decomp_metrics()

    cp_apr_pdnr = API.cp_apr_pdnr
    cp_apr_pdnr.argtypes = [POINTER(et.C_SparseCoTensor), c_int, 
        POINTER(et.C_DecompOptions), POINTER(et.C_KTensor), 
        POINTER(et.C_DecompMetrics)]
    ret = cp_apr_pdnr(pointer(tensor), rank, pointer(options), pointer(ktensor), 
        pointer(metrics))

    if ret == MALLOC_ERROR:
        logger.error('MALLOC_ERROR. Try increasing the memory limit by using the mem_limit_gb argument.')
        exit(MALLOC_ERROR)

    py_decomp = _get_py_decomp_obj(ktensor, sptensor, rank, metrics, options)

    if output_init_guess:
        _get_init_guess(py_decomp, options)

    if gen_backtrack:
        _compute_cpd_backtrack(py_decomp)
    else:
        py_decomp.cpd_backtrack = None

    if out_dir:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        write_cp_decomp_dir(out_dir, py_decomp)

    return py_decomp

[docs]def cp_apr_pqnr(sptensor, rank, out_dir=None, seed=CP_APR_PQNR_SEED, 
                max_outer_iter=CP_APR_PQNR_MAX_OUTER_ITER, 
                max_inner_iter=CP_APR_PQNR_MAX_INNER_ITER, 
                stop_tol=CP_APR_PQNR_STOP_TOL, mem_limit_gb=CP_APR_PQNR_MEM_LIMIT_GB,
                output_init_guess=CP_APR_PQNR_OUTPUT_INIT_GUESS, 
                load_balance=CP_APR_PQNR_LOAD_BALANCE,
                verbose=CP_APR_PQNR_VERBOSE, gen_backtrack=CP_APR_PQNR_BACKTRACK):
    """Performs a CP decomposition using alternating Poisson regression (APR) with projected quasi-Newton row (PQNR) subproblem. [1]_

    Parameters
    ----------
    sptensor : SPTensor or str
        Tensor to decompose. If sptensor is of type str, it should contain a
        path to a directory containing an SPTensor or directly to a 
        tensor_data.txt file.
    rank : int
        Rank of the decomposition.
    out_dir : str
        Path to directory where decomposition results will be written. If not
        set, the decomposition will not be written to disk.

    Returns
    -------
    decomp : CPDecomp
        Results of the decomposition.

    Other Parameters
    ----------------
    seed : int, optional
        Random seed used to initialize decomposition. 
        Default value is ``0``.
    max_outer_iter : int, optional
        Maximum iterations before stopping decomposition.
        Default value is ``200``.
    max_inner_iter : int, optional
        Maximum inner iterations per outer iteration. 
        Default value is ``10``.
    stop_tol : float, optional
        Minimum change in KKT violation required for convergence.
        Default value is ``1.0e-4``.
    mem_limit_gb : int, optional
        Maximum memory used during decomposition, in GiB. 
        Default value is ``2``.
    output_init_guess : bool, optional
        Write the initial guess of decomposition to ``out_dir/input_<mode_id>.txt``
        Default value is ``False``.
    load_balance : int, optional
        The load balancing technique to use for parallel execution. 
        ``ensign.constants.NO_LOAD_BALANCE`` (default) is no load balancing, 
        ``ensign.constants.LOAD_BALANCE`` is standard load balancing, and 
        ``ensign.constants.LOAD_BALANCE_UNSORTED`` is unsorted load balancing.
    verbose : bool, optional
        Output verbose decomposition progress to ``out_dir/output.txt``
        Default value is ``False``.
    gen_backtrack : bool, optional
        Output component-wise backtracking information to ``out_dir/cpd_backtrack.txt``
        Default value is ``False``.

    Raises
    ------
    Exception
        If there is a problem performing the decomposition.

    See also
    --------
    ensign.cp_decomp.CPDecomp : CP decomposition class
    ensign.sptensor.SPTensor : Sparse tensor class

    References
    ----------
    .. [1] Hansen, S., Plantenga, T., Kolda, T., "Newton-based Optimization for 
           Kullback-Leibler Nonnegative Tensor Factorizations,"  Optimization 
           Methods and Software 30(5), pp. 1002-1029, 2015.

    """
    if not _validate_params(sptensor, rank, 1, stop_tol, mem_limit_gb, max_outer_iter, max_inner_iter):
        msg = "Invalid Parameters"
        logger.error(msg)
        raise TypeError(msg)

    if isinstance(sptensor, str):
        if os.path.isdir(sptensor):
            sptensor = spt.read_sptensor(sptensor)
        elif os.path.isfile(sptensor):
            sptensor = spt.read_sptensor(os.path.dirname(sptensor) + '/', os.path.basename(sptensor))
        else:
            msg = '{} is not a valid sptensor file or directory.'.format(sptensor)
            logger.error(msg)
            raise IOError(msg)
            

    tensor = _get_sparse_co_tensor(sptensor)
    options = _get_ctype_decomp_options_obj(
        seed=seed, 
        cp_apr_pqnr_max_outer_iter=max_outer_iter, 
        cp_apr_pqnr_stop_tol=stop_tol, 
        cp_apr_pqnr_max_inner_iter=max_inner_iter,
        cp_apr_pqnr_memory_limit=mem_limit_gb*(1024**3), 
        verbose=verbose,
        memory_limit=mem_limit_gb*(1024**3),
        output_init_guess=output_init_guess
    )
    ktensor = _get_k_tensor(sptensor.order, rank)
    metrics = _get_decomp_metrics()

    cp_apr_pqnr = API.cp_apr_pqnr
    cp_apr_pqnr.argtypes = [POINTER(et.C_SparseCoTensor), c_int, 
        POINTER(et.C_DecompOptions), POINTER(et.C_KTensor), 
        POINTER(et.C_DecompMetrics)]
    ret = cp_apr_pqnr(pointer(tensor), rank, pointer(options), pointer(ktensor), 
        pointer(metrics))

    if ret == MALLOC_ERROR:
        logger.error('MALLOC_ERROR. Try increasing the memory limit by using the mem_limit_gb argument.')
        exit(MALLOC_ERROR)

    py_decomp = _get_py_decomp_obj(ktensor, sptensor, rank, metrics, options)

    if output_init_guess:
        _get_init_guess(py_decomp, options)

    if gen_backtrack:
        _compute_cpd_backtrack(py_decomp)
    else:
        py_decomp.cpd_backtrack = None

    if out_dir:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        write_cp_decomp_dir(out_dir, py_decomp)

    return py_decomp