MultiIndex
|
#include <bitset>
#include <fstream>
#include <ios>
#include <iostream>
#include <map>
#include <set>
#include <vector>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include "mkl_cblas.h"
#include "multitable.hpp"
Go to the source code of this file.
Classes | |
struct | MultiIndex< Record > |
struct | IndexConfig |
struct | RerankADC8 |
struct | RerankADC16 |
Typedefs | |
typedef float | Coord |
typedef float | Distance |
typedef int | Dimensions |
typedef int | PointId |
typedef int | ClusterId |
typedef vector< Coord > | Point |
typedef vector< Point > | Points |
typedef vector< PointId > | PointIds |
typedef set< PointId > | SetPoints |
typedef vector< ClusterId > | ClusterIds |
typedef vector< ClusterId > | CoarseQuantization |
typedef unsigned char | FineClusterId |
typedef vector< FineClusterId > | FineQuantization |
typedef vector< SetPoints > | ClustersToPoints |
typedef std::vector< ClusterId > | PointsToClusters |
typedef std::vector< Point > | Centroids |
Enumerations | |
enum | PointType { FVEC, BVEC } |
enum | RerankMode { USE_RESIDUALS, USE_INIT_POINTS } |
Functions | |
Distance | Eucldistance (const Point &x, const Point &y) |
Distance | Eucldistance (const Point &x, const Point &y, Dimensions start, Dimensions finish) |
template<class T , class U > | |
U | Round (T number) |
template<class T , class U > | |
void | ReadPoints (const string &filename, vector< vector< U > > *points, int count) |
template<class T , class U > | |
void | ReadVector (ifstream &input, vector< U > *v) |
template<class T > | |
void | ReadVocabulary (ifstream &input, Dimensions dimension, int vocabulary_size, Centroids *centroids) |
template<class T > | |
void | ReadVocabularies (const string &filename, Dimensions space_dimension, vector< Centroids > *centroids) |
template<class T > | |
void | ReadFineVocabs (const string &fine_vocabs_filename, vector< Centroids > *fine_vocabs) |
void | GetSubpoints (const Points &points, const Dimensions start_dim, const Dimensions final_dim, Points *subpoints) |
ClusterId | GetNearestClusterId (const Point &point, const Centroids ¢roids, const Dimensions start_dim, const Dimensions final_dim) |
void | GetResidual (const Point &point, const CoarseQuantization &coarse_quantizations, const vector< Centroids > ¢roids, Point *residual) |
void | GetResidual (const Point &point, const CoarseQuantization &coarse_quantizations, const vector< Centroids > ¢roids, Coord *residual) |
void | GetNearestClusterIdsForPointSubset (const Points &points, const Centroids ¢roids, const PointId start_pid, const PointId final_pid, vector< ClusterId > *nearest) |
void | GetNearestClusterIdsForSubpoints (const Points &points, const Centroids ¢roids, const Dimensions start_dim, const Dimensions final_dim, int threads_count, vector< ClusterId > *nearest) |
void | GetPointsCoarseQuaintizations (const Points &points, const vector< Centroids > ¢roids, const int threads_count, vector< CoarseQuantization > *coarse_quantizations) |
typedef int ClusterId |
typedef vector<ClusterId> ClusterIds |
typedef vector<SetPoints> ClustersToPoints |
typedef vector<ClusterId> CoarseQuantization |
typedef float Coord |
typedef int Dimensions |
typedef float Distance |
typedef unsigned char FineClusterId |
typedef vector<FineClusterId> FineQuantization |
typedef int PointId |
typedef std::vector<ClusterId> PointsToClusters |
enum PointType |
enum RerankMode |
Distance Eucldistance | ( | const Point & | x, |
const Point & | y | ||
) |
Function calculates squared euclidian distance between two points (points must have the same dimensionality)
x | first point |
y | second point |
Distance Eucldistance | ( | const Point & | x, |
const Point & | y, | ||
Dimensions | start, | ||
Dimensions | finish | ||
) |
Function calculates squared euclidian distance point with small dimensionality and subpoint of point with bigger dimensionality.
x | first point |
y | second point |
start | first dimension of subpoint |
finish | dimension after the last dimension of subpoint |
ClusterId GetNearestClusterId | ( | const Point & | point, |
const Centroids & | centroids, | ||
const Dimensions | start_dim, | ||
const Dimensions | final_dim | ||
) |
This function returns identifier of clusters which centroid is the nearest to subpoint limited by start_dim and final_dim
point | full point |
Centroids | all centroids (function finds the nearest one) |
start_dim | first dimension of subpoint |
final_dim | dimension after the last dimension of subpoint |
void GetNearestClusterIdsForPointSubset | ( | const Points & | points, |
const Centroids & | centroids, | ||
const PointId | start_pid, | ||
const PointId | final_pid, | ||
vector< ClusterId > * | nearest | ||
) |
This function finds nearest cluster identifiers for points from start_pid to final_pid. We need this function for multi-threading
points | all points |
centroids | centroids of clusters |
start_pid | first point function finds nearest cluster |
final_pid | point after the last point function finds nearest cluster |
void GetNearestClusterIdsForSubpoints | ( | const Points & | points, |
const Centroids & | centroids, | ||
const Dimensions | start_dim, | ||
const Dimensions | final_dim, | ||
int | threads_count, | ||
vector< ClusterId > * | nearest | ||
) |
This function finds cluster identifiers nearest to subpoints for a number of points. Subpoints are limited by start_dim and finish_dim
points | all points |
centroids | centroids of clusters |
start_dim | first dimesion of subpoint |
final_dim | dimesion after the last dimension of subpoint |
threads_count | number of threads |
nearest | result |
void GetPointsCoarseQuaintizations | ( | const Points & | points, |
const vector< Centroids > & | centroids, | ||
const int | threads_count, | ||
vector< CoarseQuantization > * | coarse_quantizations | ||
) |
This function calculates points coarse product quantizations
points | all points |
centroids | centroids of clusters |
threads_count | number of threads |
coarse_quantizations | result quantizations |
void GetResidual | ( | const Point & | point, |
const CoarseQuantization & | coarse_quantizations, | ||
const vector< Centroids > & | centroids, | ||
Point * | residual | ||
) |
This function calculates quantization residual.
point | initial point |
coarse_quantizations | point coarse quantization |
centroids | lists of centroids |
residual | result residual |
void GetResidual | ( | const Point & | point, |
const CoarseQuantization & | coarse_quantizations, | ||
const vector< Centroids > & | centroids, | ||
Coord * | residual | ||
) |
This function calculates quantization residual.
point | initial point |
coarse_quantizations | point coarse quantization |
centroids | lists of centroids |
residual | pointer to start of residual |
void GetSubpoints | ( | const Points & | points, |
const Dimensions | start_dim, | ||
const Dimensions | final_dim, | ||
Points * | subpoints | ||
) |
This function returns subpoints limited by start_dim and final_dim for every point in points
points | all points |
start_dim | first dimension of subpoint |
final_dim | dimension after the last dimension of subpoint |
subpoints | result subpoints |
void ReadFineVocabs | ( | const string & | fine_vocabs_filename, |
vector< Centroids > * | fine_vocabs | ||
) |
This function reads fine vocabs of centroids
fine_vocabs_filename | file with vocabularies |
fine_vocabs | fine centroids lists |
void ReadPoints | ( | const string & | filename, |
vector< vector< U > > * | points, | ||
int | count | ||
) |
Function reads point written in .fvecs or .bvecs format. Input points have coordinates of type T. Result points have coordinates of type U
filename | .fvecs or .bvecs file name |
points_count | how many points to read |
points | result list of read points |
void ReadVector | ( | ifstream & | input, |
vector< U > * | v | ||
) |
Function reads one vector of coordinates of type T. Function assumes that the first int32-number in input stream is vector dimensionality. Result vector will have coordinates of type U.
input | input stream |
v | result vector |
void ReadVocabularies | ( | const string & | filename, |
Dimensions | space_dimension, | ||
vector< Centroids > * | centroids | ||
) |
Function reads vocabularies of centroids produced by matlab script. Function assumes that the first int32 in input is dimensionality of centroids and the second is the number of centroids in each vocabulary
input | input stream |
dimension | one centroid dimensionality |
vocabulary_size | centroids count |
Centroids* | result centroids |
void ReadVocabulary | ( | ifstream & | input, |
Dimensions | dimension, | ||
int | vocabulary_size, | ||
Centroids * | centroids | ||
) |
Function reads vocabulary of centroids produced by matlab script.
input | input stream |
dimension | one centroid dimensionality |
vocabulary_size | centroids count |
Centroids* | result centroids |
U Round | ( | T | number | ) | [inline] |
This simple function casts number of type T to the nearest number of type U