# Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """ This is the training code for the link and code. Especially the neighbors_kmeans function implements the EM-algorithm to find the appropriate weightings and cluster them. """ from __future__ import print_function import time import numpy as np import faiss #---------------------------------------------------------- # Utils #---------------------------------------------------------- def sanitize(x): return np.ascontiguousarray(x, dtype='float32') def train_kmeans(x, k, ngpu, max_points_per_centroid=256): "Runs kmeans on one or several GPUs" d = x.shape[1] clus = faiss.Clustering(d, k) clus.verbose = True clus.niter = 20 clus.max_points_per_centroid = max_points_per_centroid if ngpu == 0: index = faiss.IndexFlatL2(d) else: res = [faiss.StandardGpuResources() for i in range(ngpu)] flat_config = [] for i in range(ngpu): cfg = faiss.GpuIndexFlatConfig() cfg.useFloat16 = False cfg.device = i flat_config.append(cfg) if ngpu == 1: index = faiss.GpuIndexFlatL2(res[0], d, flat_config[0]) else: indexes = [faiss.GpuIndexFlatL2(res[i], d, flat_config[i]) for i in range(ngpu)] index = faiss.IndexReplicas() for sub_index in indexes: index.addIndex(sub_index) # perform the training clus.train(x, index) centroids = faiss.vector_float_to_array(clus.centroids) stats = clus.iteration_stats stats = [stats.at(i) for i in range(stats.size())] obj = np.array([st.obj for st in stats]) print("final objective: %.4g" % obj[-1]) return centroids.reshape(k, d) #---------------------------------------------------------- # Learning the codebook from neighbors #---------------------------------------------------------- # works with both a full Inn table and dynamically generated neighbors def get_Inn_shape(Inn): if type(Inn) != tuple: return Inn.shape return Inn[:2] def get_neighbor_table(x_coded, Inn, i): if type(Inn) != tuple: return x_coded[Inn[i,:],:] rfn = x_coded M, d = rfn.M, rfn.index.d out = np.zeros((M + 1, d), dtype='float32') int_i = int(i) rfn.get_neighbor_table(int_i, faiss.swig_ptr(out)) _, _, sq = Inn return out[:, sq * rfn.dsub : (sq + 1) * rfn.dsub] # Function that produces the best regression values from the vector # and its neighbors def regress_from_neighbors (x, x_coded, Inn): (N, knn) = get_Inn_shape(Inn) betas = np.zeros((N,knn)) t0 = time.time() for i in range (N): xi = x[i,:] NNi = get_neighbor_table(x_coded, Inn, i) betas[i,:] = np.linalg.lstsq(NNi.transpose(), xi, rcond=0.01)[0] if i % (N / 10) == 0: print ("[%d:%d] %6.3fs" % (i, i + N / 10, time.time() - t0)) return betas # find the best beta minimizing ||x-x_coded[Inn,:]*beta||^2 def regress_opt_beta (x, x_coded, Inn): (N, knn) = get_Inn_shape(Inn) d = x.shape[1] # construct the linear system to be solved X = np.zeros ((d*N)) Y = np.zeros ((d*N, knn)) for i in range (N): X[i*d:(i+1)*d] = x[i,:] neighbor_table = get_neighbor_table(x_coded, Inn, i) Y[i*d:(i+1)*d, :] = neighbor_table.transpose() beta_opt = np.linalg.lstsq(Y, X, rcond=0.01)[0] return beta_opt # Find the best encoding by minimizing the reconstruction error using # a set of pre-computed beta values def assign_beta (beta_centroids, x, x_coded, Inn, verbose=True): if type(Inn) == tuple: return assign_beta_2(beta_centroids, x, x_coded, Inn) (N, knn) = Inn.shape x_ibeta = np.zeros ((N), dtype='int32') t0= time.time() for i in range (N): NNi = x_coded[Inn[i,:]] # Consider all possible betas for the encoding and compute the # encoding error x_reg_all = np.dot (beta_centroids, NNi) err = ((x_reg_all - x[i,:]) ** 2).sum(axis=1) x_ibeta[i] = err.argmin() if verbose: if i % (N / 10) == 0: print ("[%d:%d] %6.3fs" % (i, i + N / 10, time.time() - t0)) return x_ibeta # Reconstruct a set of vectors using the beta_centroids, the # assignment, the encoded neighbors identified by the list Inn (which # includes the vector itself) def recons_from_neighbors (beta_centroids, x_ibeta, x_coded, Inn): (N, knn) = Inn.shape x_rec = np.zeros(x_coded.shape) t0= time.time() for i in range (N): NNi = x_coded[Inn[i,:]] x_rec[i, :] = np.dot (beta_centroids[x_ibeta[i]], NNi) if i % (N / 10) == 0: print ("[%d:%d] %6.3fs" % (i, i + N / 10, time.time() - t0)) return x_rec # Compute a EM-like algorithm trying at optimizing the beta such as they # minimize the reconstruction error from the neighbors def neighbors_kmeans (x, x_coded, Inn, K, ngpus=1, niter=5): # First compute centroids using a regular k-means algorithm betas = regress_from_neighbors (x, x_coded, Inn) beta_centroids = train_kmeans( sanitize(betas), K, ngpus, max_points_per_centroid=1000000) _, knn = get_Inn_shape(Inn) d = x.shape[1] rs = np.random.RandomState() for iter in range(niter): print('iter', iter) idx = assign_beta (beta_centroids, x, x_coded, Inn, verbose=False) hist = np.bincount(idx) for cl0 in np.where(hist == 0)[0]: print(" cluster %d empty, split" % cl0, end=' ') cl1 = idx[np.random.randint(idx.size)] pos = np.nonzero (idx == cl1)[0] pos = rs.choice(pos, pos.size / 2) print(" cl %d -> %d + %d" % (cl1, len(pos), hist[cl1] - len(pos))) idx[pos] = cl0 hist = np.bincount(idx) tot_err = 0 for k in range (K): pos = np.nonzero (idx == k)[0] npos = pos.shape[0] X = np.zeros (d*npos) Y = np.zeros ((d*npos, knn)) for i in range(npos): X[i*d:(i+1)*d] = x[pos[i],:] neighbor_table = get_neighbor_table(x_coded, Inn, pos[i]) Y[i*d:(i+1)*d, :] = neighbor_table.transpose() sol, residuals, _, _ = np.linalg.lstsq(Y, X, rcond=0.01) if residuals.size > 0: tot_err += residuals.sum() beta_centroids[k, :] = sol print(' err=%g' % tot_err) return beta_centroids # assign the betas in C++ def assign_beta_2(beta_centroids, x, rfn, Inn): _, _, sq = Inn if rfn.k == 1: return np.zeros(x.shape[0], dtype=int) # add dummy dimensions to beta_centroids and x all_beta_centroids = np.zeros( (rfn.nsq, rfn.k, rfn.M + 1), dtype='float32') all_beta_centroids[sq] = beta_centroids all_x = np.zeros((len(x), rfn.d), dtype='float32') all_x[:, sq * rfn.dsub : (sq + 1) * rfn.dsub] = x rfn.codes.clear() rfn.ntotal = 0 faiss.copy_array_to_vector( all_beta_centroids.ravel(), rfn.codebook) rfn.add_codes(len(x), faiss.swig_ptr(all_x)) codes = faiss.vector_to_array(rfn.codes) codes = codes.reshape(-1, rfn.nsq) return codes[:, sq] ####################################################### # For usage from bench_storages.py def train_beta_codebook(rfn, xb_full, niter=10): beta_centroids = [] for sq in range(rfn.nsq): d0, d1 = sq * rfn.dsub, (sq + 1) * rfn.dsub print("training subquantizer %d/%d on dimensions %d:%d" % ( sq, rfn.nsq, d0, d1)) beta_centroids_i = neighbors_kmeans( xb_full[:, d0:d1], rfn, (xb_full.shape[0], rfn.M + 1, sq), rfn.k, ngpus=0, niter=niter) beta_centroids.append(beta_centroids_i) rfn.ntotal = 0 rfn.codes.clear() rfn.codebook.clear() return np.stack(beta_centroids)