"""
Utilities for computing metrics.
This module provides functions to calculate various distance metrics. A metric,
or distance function, is a function that maps two points to a double value,
representing the "distance" between them. For a function to qualify as a valid
metric, it must satisfy the following properties:
1. Symmetry: The distance between two points is the same regardless of the
order, i.e.:
:math:`d(x, y) = d(y, x)` for all :math:`x` and :math:`y`.
2. Positivity: The distance between two distinct points is always positive,
i.e.:
:math:`d(x, y) > 0` for all distinct :math:`x` and :math:`y`, and
:math:`d(x, x) = 0` for every :math:`x`.
3. Triangle inequality: The distance between two points is less than or equal
to the sum of the distances from a third point, i.e.:
:math:`d(x, z) \\leq d(x, y) + d(y, z)` for all points :math:`x, y, z`.
Supported distance metrics include:
- *Euclidean*: The square root of the sum of squared differences between the
components of vectors.
- *Manhattan*: The sum of the absolute differences between the components of
vectors.
- *Minkowski*: A generalization of the Euclidean and Chebyshev distances,
parameterized by an order `p`.
- *Chebyshev*: The maximum absolute difference between the components of vectors.
- *Cosine*: A distance on unit vectors based on cosine similarity.
"""
from typing import Any, Literal, Union, get_args
import numpy as np
import tdamapper.utils._metrics as _metrics
from tdamapper.protocols import Metric
MetricLiteral = Literal[
"euclidean",
"manhattan",
"minkowski",
"chebyshev",
"cosine",
]
[docs]
def get_supported_metrics() -> list[MetricLiteral]:
"""
Return a list of supported metric names.
:return: A list of supported metric names.
"""
return list(get_args(MetricLiteral))
[docs]
def euclidean() -> Metric[Any]:
"""
Return the Euclidean distance function for vectors.
The Euclidean distance is defined as the square root of the sum of
the squared differences between the components of the vectors.
:return: The Euclidean distance function.
"""
return _metrics.euclidean
[docs]
def manhattan() -> Metric[Any]:
"""
Return the Manhattan distance function for vectors.
The Manhattan distance is defined as the sum of the absolute differences
between the components of the vectors.
:return: The Manhattan distance function.
"""
return _metrics.manhattan
[docs]
def chebyshev() -> Metric[Any]:
"""
Return the Chebyshev distance function for vectors.
The Chebyshev distance is defined as the maximum absolute difference
between the components of the vectors.
:return: The Chebyshev distance function.
"""
return _metrics.chebyshev
[docs]
def minkowski(p: Union[int, float]) -> Metric[Any]:
"""
Return the Minkowski distance function for order p on vectors.
The Minkowski distance is a generalization of the Euclidean and Chebyshev
distances. When p = 1, it is equivalent to the Manhattan distance, and
when p = 2, it is equivalent to the Euclidean distance. When p is infinite,
it is equivalent to the Chebyshev distance.
:param p: The order of the Minkowski distance.
:return: The Minkowski distance function.
"""
if p == 1:
return manhattan()
if p == 2:
return euclidean()
if np.isinf(p):
return chebyshev()
def dist(x: Any, y: Any) -> float:
return _metrics.minkowski(p, x, y)
return dist
[docs]
def cosine() -> Metric[Any]:
"""
Return the cosine distance function for vectors.
The cosine similarity between the input vectors ranges from -1.0 to 1.0.
- A value of 1.0 indicates that the vectors are in the same direction.
- A value of 0.0 indicates orthogonality (the vectors are perpendicular).
- A value of -1.0 indicates that the vectors are diametrically opposed.
The cosine distance is derived from the cosine similarity :math:`s` and
is defined as: :math:`d(x, y) = \\sqrt{2 \\cdot (1 - s(x, y))}`
This definition ensures that the cosine distance satisfies the triangle
inequality on unit vectors.
:return: The cosine distance function.
"""
return _metrics.cosine
[docs]
def get_metric(metric: Union[MetricLiteral, Metric[Any]], **kwargs: Any) -> Metric[Any]:
"""
Return a distance function based on the specified string or callable.
:param metric: The metric to use. If a callable function is provided, it
is returned directly. Otherwise, predefined metric names returned by
:func:`tdamapper.utils.metrics.get_supported_metrics` are supported.
:param kwargs: Additional keyword arguments (e.g., 'p' for Minkowski
distance).
:return: The selected distance metric function.
:raises ValueError: If an invalid metric string is provided.
"""
if callable(metric):
return metric
if metric == "euclidean":
return euclidean(**kwargs)
if metric == "manhattan":
return manhattan(**kwargs)
if metric == "minkowski":
return minkowski(**kwargs)
if metric == "chebyshev":
return chebyshev(**kwargs)
if metric == "cosine":
return cosine(**kwargs)
raise ValueError("metric must be a known string or callable")