Digits dataset

[1]:

import numpy as np

from sklearn.datasets import load_digits
from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import PCA

from tdamapper.core import MapperAlgorithm
from tdamapper.cover import CubicalCover
from tdamapper.clustering import FailSafeClustering
from tdamapper.plot import MapperPlot


X, y = load_digits(return_X_y=True)  # We load a labelled dataset
lens = PCA(2).fit_transform(X)       # We compute the lens values

Build Mapper graph

[2]:

mapper_algo = MapperAlgorithm(
    cover=CubicalCover(
        n_intervals=10,
        overlap_frac=0.65),
    clustering=FailSafeClustering(   # We prevent clustering failures
        clustering=AgglomerativeClustering(10),
        verbose=False))
mapper_graph = mapper_algo.fit_transform(X, lens)

Plot Mapper graph with mean

[3]:

mapper_plot = MapperPlot(X, mapper_graph,
    colors=y,                        # We color according to digit values
    cmap='jet',                      # Jet colormap, used for classes
    agg=np.nanmean,                  # We aggregate on graph nodes according to mean
    dim=2,
    iterations=400,
    seed=42)
fig_mean = mapper_plot.plot(title='digit (mean)', width=600, height=600)
fig_mean.show(renderer='notebook_connected', config={'scrollZoom': True})

Plot Mapper graph with standard deviation

[4]:

fig_std = mapper_plot.with_colors(
    colors=y,                        # Viridis colormap, used for ranges
    cmap='viridis',                  # We aggregate on graph nodes according to std
    agg=np.nanstd,
).plot(title='digit (std)', width=600, height=600)
fig_std.show(renderer='notebook_connected', config={'scrollZoom': True})