This notebook computes the fermat distance between the images in the MNIST data set. We are reading the data set from https://www.kaggle.com/oddrationale/mnist-in-csv#mnist_train.csv
Contents:
1- Preprocessing data
2- Computing Fermat distance
3- Visualization using t-sne
import random
import numpy as np
from scipy.spatial import distance_matrix
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from fermat import Fermat
Reading mnist data from https://www.kaggle.com/oddrationale/mnist-in-csv#mnist_train.csv
labels = []
data = []
with open('../data/mnist_train.csv', 'r') as f:
for l in f.readlines():
row = l[:-1].split(',')
labels.append(row[0])
data.append([float(x)/255 for x in row[1:]])
# we only consider a sample of data points
test_size = 1000
# compute euclidean distances between data points
distances = distance_matrix(data[:test_size], data[:test_size])
# Initialize the fermat model
# The distances would be computed used the aprox method and the euclidean distances as input
f = Fermat(alpha = 4, path_method='L', k=30, landmarks=30)
# fit the Fermat model
f.fit(np.matrix(distances))
def plot_figure(data,S,s_labels, xlabel):
plt.figure(figsize=(20, 10))
plt.xlabel(xlabel, fontsize='large')
for digit in range(10):
xs = [data[i, 0] for i in range(S) if s_labels[i] == str(digit)]
ys = [data[i, 1] for i in range(S) if s_labels[i] == str(digit)]
plt.plot(xs, ys, 'o', label=str(digit))
plt.legend(numpoints=1)
tsne = TSNE(n_components=2, perplexity=60, n_iter=1000)
tsne_euclidean = tsne.fit_transform(distances)
plot_figure(tsne_euclidean, test_size, labels[:test_size], "TSNE on euclidean distance")