Source code for tdamapper.utils.metrics

"""
Utilities for computing metrics.

This module provides functions to calculate various distance metrics. A metric,
or distance function, is a function that maps two points to a double value,
representing the "distance" between them. For a function to qualify as a valid
metric, it must satisfy the following properties:

1. Symmetry: The distance between two points is the same regardless of the
    order, i.e.:
    :math:`d(x, y) = d(y, x)` for all x and y.
2. Positivity: The distance between two distinct points is always positive,
    i.e.:
    :math:`d(x, y) > 0` for all distinct x and y, and :math:`d(x, x) = 0`
    for every x.
3. Triangle inequality: The distance between two points is less than or equal
    to the sum of the distances from a third point, i.e.:
    :math:`d(x, z) \\leq d(x, y) + d(y, z)` for all points x, y, z.

Supported distance metrics include:
- Euclidean: The square root of the sum of squared differences between the
components of vectors.
- Minkowski: A generalization of the Euclidean and Chebyshev distances,
parameterized by an order `p`.
- Chebyshev: The maximum absolute difference between the components of vectors.
- Cosine: A distance on unit vectors based on cosine similarity.
"""

import numpy as np
import tdamapper.utils._metrics as _metrics


_EUCLIDEAN = 'euclidean'
_MANHATTAN = 'manhattan'
_MINKOWSKI = 'minkowski'
_MINKOWSKI_P = 'p'
_CHEBYSHEV = 'chebyshev'
_COSINE = 'cosine'


[docs] def get_supported_metrics(): """ Return a list of supported metric names. :return: A list of supported metric names. :rtype: list of str """ return [ _EUCLIDEAN, _MANHATTAN, _MINKOWSKI, _CHEBYSHEV, _COSINE, ]
[docs] def euclidean(): """ Return the Euclidean distance function for vectors. The Euclidean distance is defined as the square root of the sum of the squared differences between the components of the vectors. :return: The Euclidean distance function. :rtype: callable """ return _metrics.euclidean
[docs] def manhattan(): """ Return the Manhattan distance function for vectors. The Manhattan distance is defined as the sum of the absolute differences between the components of the vectors. :return: The Manhattan distance function. :rtype: callable """ return _metrics.manhattan
[docs] def chebyshev(): """ Return the Chebyshev distance function for vectors. The Chebyshev distance is defined as the maximum absolute difference between the components of the vectors. :return: The Chebyshev distance function. :rtype: callable """ return _metrics.chebyshev
[docs] def minkowski(p): """ Return the Minkowski distance function for order p on vectors. The Minkowski distance is a generalization of the Euclidean and Chebyshev distances. When p = 1, it is equivalent to the Manhattan distance, and when p = 2, it is equivalent to the Euclidean distance. When p is infinite, it is equivalent to the Chebyshev distance. :param p: The order of the Minkowski distance. :type p: int :return: The Minkowski distance function. :rtype: callable """ if p == 1: return manhattan() elif p == 2: return euclidean() elif np.isinf(p): return chebyshev() return lambda x, y: _metrics.minkowski(p, x, y)
[docs] def cosine(): """ Return the cosine distance function for vectors. The cosine similarity between the input vectors ranges from -1.0 to 1.0. - A value of 1.0 indicates that the vectors are in the same direction. - A value of 0.0 indicates orthogonality (the vectors are perpendicular). - A value of -1.0 indicates that the vectors are diametrically opposed. The cosine distance is derived from the cosine similarity :math:`s` and is defined as: :math:`d(x, y) = \\sqrt{2 \\cdot (1 - s(x, y))}` This definition ensures that the cosine distance satisfies the triangle inequality on unit vectors. :return: The cosine distance function. :rtype: callable """ return _metrics.cosine
[docs] def get_metric(metric, **kwargs): """ Return a distance function based on the specified string or callable. :param metric: The metric to use. If a callable function is provided, it is returned directly. Otherwise, predefined metric names returned by `get_supported_metrics()` are supported. :type metric: str or callable :param kwargs: Additional keyword arguments (e.g., 'p' for Minkowski distance). :type kwargs: dict :return: The selected distance metric function. :rtype: callable :raises ValueError: If an invalid metric string is provided. """ if callable(metric): return metric elif metric == _EUCLIDEAN: return euclidean() elif metric == _MANHATTAN: return manhattan() elif metric == _MINKOWSKI: p = kwargs.get(_MINKOWSKI_P, 2) return minkowski(p) elif metric == _CHEBYSHEV: return chebyshev() elif metric == _COSINE: return cosine() else: raise ValueError('metric must be a string or callable')