Digits dataset
[1]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import PCA
from tdamapper.core import MapperAlgorithm
from tdamapper.cover import CubicalCover
from tdamapper.clustering import FailSafeClustering
from tdamapper.plot import MapperPlot
X, y = load_digits(return_X_y=True) # We load a labelled dataset
lens = PCA(2).fit_transform(X) # We compute the lens values
Build Mapper graph
[2]:
mapper_algo = MapperAlgorithm(
cover=CubicalCover(
n_intervals=10,
overlap_frac=0.65),
clustering=FailSafeClustering( # We prevent clustering failures
clustering=AgglomerativeClustering(10),
verbose=False))
mapper_graph = mapper_algo.fit_transform(X, lens)
Plot Mapper graph with mean
[3]:
mapper_plot = MapperPlot(X, mapper_graph,
colors=y, # We color according to digit values
cmap='jet', # Jet colormap, used for classes
agg=np.nanmean, # We aggregate on graph nodes according to mean
dim=2,
iterations=400,
seed=42)
fig_mean = mapper_plot.plot(title='digit (mean)', width=600, height=600)
fig_mean.show(renderer='notebook_connected', config={'scrollZoom': True})
Plot Mapper graph with standard deviation
[4]:
fig_std = mapper_plot.with_colors(
colors=y, # Viridis colormap, used for ranges
cmap='viridis', # We aggregate on graph nodes according to std
agg=np.nanstd,
).plot(title='digit (std)', width=600, height=600)
fig_std.show(renderer='notebook_connected', config={'scrollZoom': True})