In this notebook, we will study how homeostasis (cooperation) may be an essential ingredient to this algorithm working on a winner-take-all basis (competition). This extension has been published as Perrinet, Neural Computation (2010) (see http://invibe.net/LaurentPerrinet/Publications/Perrinet10shl ). Compared to the previous post, we integrated the faster code to https://github.com/bicv/SHL_scripts.

See also the other posts on unsupervised learning,

This is joint work with Victor Boutin.

Summary: using the functions from the module allows to achieve homeostasis in the coding.

using fast Pcum functions from shl_scripts

In [1]:
%load_ext autoreload
%autoreload 2
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import numpy as np
np.set_printoptions(formatter = dict( float = lambda x: "%.3g" % x ), precision=3, suppress=True, threshold=np.inf)
import time
In [2]:
DEBUG = True
DEBUG = False
if not DEBUG:
    matname = '2017-05-31_Testing_COMPs'
    DEBUG_DOWNSCALE = 1
else:
    matname = '2017-05-31_Testing_COMPs-DEBUG'
    DEBUG_DOWNSCALE = 10

seed = 42
nb_quant = 512
C = 5.
do_sym = False

from shl_scripts.shl_experiments import SHL
shl = SHL(DEBUG_DOWNSCALE=DEBUG_DOWNSCALE, seed=seed, 
           eta=0.05, verbose=2, record_each=50, n_iter=1000, eta_homeo=0., alpha_homeo=1., 
          do_sym=do_sym, nb_quant=nb_quant, C=C)
data = shl.get_data(matname=matname)
loading the data called : data_cache/2017-05-31_Testing_COMPs_data
In [3]:
test_size = data.shape[0]//2
data_training = data[:test_size, :]
data_test = data[test_size:,:]   
#DEBUG
test_size = data.shape[0]//20
data_training = data[:(data.shape[0]-test_size),:].copy()
data_test = data[:test_size, :].copy()

We start off by using a short learning with no homeostasis such that we end up with a unbalanced dictionary:

In [4]:
dico_partial_learning = shl.learn_dico(data=data_training, matname=matname)
loading the dico called : data_cache/2017-05-31_Testing_COMPs_dico.pkl
In [5]:
fig, ax = shl.show_dico(dico_partial_learning, data=data, title=matname)
fig.show()
fig, ax = shl.time_plot(dico_partial_learning, variable='prob_active');
fig.show()

classical unsupervised learning with Matching Pursuit

In [6]:
n_samples, n_pixels = data_test.shape
n_dictionary, n_pixels = dico_partial_learning.dictionary.shape
norm_each_filter = np.sqrt(np.sum(dico_partial_learning.dictionary**2, axis=1))
dico_partial_learning.dictionary /= norm_each_filter[:,np.newaxis]

sparse_code_mp = shl.code(data_test, dico_partial_learning, matname=matname)

from shl_scripts.shl_tools import plot_proba_histogram
fig, ax = plot_proba_histogram(sparse_code_mp)
loading the code called : data_cache/2017-05-31_Testing_COMPs_coding.npy
In [7]:
from shl_scripts.shl_learn import get_P_cum
P_cum = get_P_cum(sparse_code_mp, nb_quant=shl.nb_quant, C=C, do_sym=do_sym)
from shl_scripts.shl_tools import plot_P_cum
fig, ax = plot_P_cum(P_cum, verbose=False, alpha=.05)
ax.set_ylim(0.92, 1.01);

testing that COMP with fixed Pcum is equivalent to MP (slow mode)

In [8]:
n_samples, nb_filter = sparse_code_mp.shape
P_cum = np.linspace(0, 1, shl.nb_quant, endpoint=True)[np.newaxis, :] * np.ones((n_dictionary, 1))

from shl_scripts.shl_encode import mp
sparse_code_mp = mp(data_test, dico_partial_learning.dictionary, P_cum=None, verbose=1, do_sym=do_sym, C=C, do_fast=False)
sparse_code_comp = mp(data_test, dico_partial_learning.dictionary, P_cum=P_cum, verbose=1, do_sym=do_sym, C=C, do_fast=False)
print('Relative difference = ', np.sum((sparse_code_mp - sparse_code_comp)**2)/np.sum((sparse_code_mp)**2))
coding duration : 2.2479662895202637
coding duration : 151.48856401443481
Relative difference =  0.0
In [9]:
fig, ax = plot_proba_histogram(sparse_code_mp)
fig.show()
fig, ax = plot_proba_histogram(sparse_code_comp)
In [10]:
from shl_scripts.shl_tools import plot_scatter_MpVsTrue

fig, ax = plot_scatter_MpVsTrue(sparse_code_mp, sparse_code_comp, xlabel='MP', ylabel='flat-COMP')

testing that COMP with fixed Pcum is equivalent to MP (fast mode)

In [11]:
n_samples, nb_filter = sparse_code_mp.shape
P_cum = np.linspace(0, 1, shl.nb_quant, endpoint=True)[np.newaxis, :] * np.ones((n_dictionary, 1))

from shl_scripts.shl_encode import mp
sparse_code_mp = mp(data_test, dico_partial_learning.dictionary, P_cum=None, verbose=1, do_sym=do_sym, C=C)
sparse_code_comp = mp(data_test, dico_partial_learning.dictionary, P_cum=P_cum, verbose=1, do_sym=do_sym, C=C)
print('Relative difference = ', np.sum((sparse_code_mp - sparse_code_comp)**2)/np.sum((sparse_code_mp)**2))
coding duration : 2.0314648151397705
coding duration : 4.705987215042114
Relative difference =  0.0

This fast mode introduce some errors (at a 1% level approximately) but a 40 fold speed-up.

In [12]:
fig, ax = plot_proba_histogram(sparse_code_mp)
fig.show()
fig, ax = plot_proba_histogram(sparse_code_comp)
In [13]:
from shl_scripts.shl_tools import plot_scatter_MpVsTrue

fig, ax = plot_scatter_MpVsTrue(sparse_code_mp, sparse_code_comp, xlabel='MP', ylabel='flat-COMP')
shuffled = np.random.permutation(n_dictionary).astype(np.int) deshuffled = np.arange(n_dictionary) print(deshuffled) deshuffled[shuffled] = np.arange(n_dictionary) #[deshuffled[ind] = i for ind, i in enumerate(shuffled)] shuffled, deshuffled

gradient descent

In [14]:
P_cum = np.linspace(0, 1, shl.nb_quant, endpoint=True)[np.newaxis, :] * np.ones((n_dictionary, 1))
print('Shape of modulation function', P_cum.shape)
from shl_scripts.shl_tools import plot_proba_histogram
from shl_scripts.shl_learn import update_P_cum

eta_homeo = .01
verbose = 1

for i in range(1000//DEBUG_DOWNSCALE):
    sparse_code = mp(data_test, dico_partial_learning.dictionary, l0_sparseness=shl.l0_sparseness, P_cum=P_cum, C=C, verbose=0, do_sym=do_sym)
    P_cum = update_P_cum(P_cum, sparse_code, eta_homeo=eta_homeo, C=C, nb_quant=shl.nb_quant, verbose=verbose, do_sym=do_sym)
    if i % (100//DEBUG_DOWNSCALE) == 0:
        print('Learning step', i)
        fig, ax = plot_proba_histogram(sparse_code)
        plt.show()
Shape of modulation function (324, 512)
Learning step 0
Learning step 100
Learning step 200