MultiIndex
|
#include <indexer.h>
Public Member Functions | |
MultiIndexer (const int multiplicity=2) | |
void | BuildMultiIndex (const string &points_filename, const string &metainfo_filename, const int points_count, const vector< Centroids > &coarse_vocabs, const vector< Centroids > &fine_vocabs, const RerankMode &mode, const bool build_coarse_quantization, const string &files_prefix, const string &coarse_quantization_filename="") |
Private Member Functions | |
void | PrepareCoarseQuantization (const string &points_filename, const int points_count, const vector< Centroids > &coarse_vocabs) |
void | GetCoarseQuantizationsForSubset (const string &points_filename, const int start_pid, const int subset_size, const vector< Centroids > &coarse_vocabs, vector< vector< ClusterId > > *transposed_coarse_quantizations) |
void | SerializeCoarseQuantizations (const vector< vector< ClusterId > > &transposed_coarse_quantizations, const string &filename) |
void | SerializeMultiIndexFiles () |
void | ConvertPointsInCellsCountToCellEdges () |
void | FillMultiIndex (const string &points_filename, const int points_count, const vector< Centroids > &coarse_vocabs, const vector< Centroids > &fine_vocabs, const RerankMode &mode) |
void | FillMultiIndexForSubset (const string &points_filename, const PointId start_pid, const int points_count, const vector< Centroids > &coarse_vocabs, const vector< Centroids > &fine_vocabs, const RerankMode &mode, Multitable< int > *points_written_in_index) |
void | GetPointCoarseQuantization (const PointId pid, const string &filename, vector< ClusterId > *coarse_quantization) |
void | FillPointRerankInfo (const Point &point, const PointId pid, const vector< Centroids > &fine_vocabs) |
void | RestorePointsInCellsCountFromCourseQuantization (const string &points_filename, const int points_count, const vector< Centroids > &coarse_vocabs) |
int | GetInputCoordSizeof () |
void | ReadPoint (ifstream &input, Point *point) |
void | InitBlasStructures (const vector< Centroids > &coarse_vocabs) |
Private Attributes | |
string | files_prefix_ |
string | coarse_quantization_filename_ |
int | multiplicity_ |
Multitable< int > | point_in_cells_count_ |
MultiIndex< Record > | multiindex_ |
boost::mutex | cell_counts_mutex_ |
vector< float * > | coarse_vocabs_matrices_ |
vector< vector< float > > | coarse_centroids_norms_ |
This is the main class for creating multiindex for a set of points in a multidimensional space. Clusterization and vocabs learning happen outside of this class, multiindexer receives prepared vocabs in input
MultiIndexer< Record >::MultiIndexer | ( | const int | multiplicity = 2 | ) |
This is the simple MultiIndexer constructor
multiplicity | how many parts input points will be divide on |
void MultiIndexer< Record >::BuildMultiIndex | ( | const string & | points_filename, |
const string & | metainfo_filename, | ||
const int | points_count, | ||
const vector< Centroids > & | coarse_vocabs, | ||
const vector< Centroids > & | fine_vocabs, | ||
const RerankMode & | mode, | ||
const bool | build_coarse_quantization, | ||
const string & | files_prefix, | ||
const string & | coarse_quantization_filename = "" |
||
) |
This is the main function of MultiIndexer
points_filename | file with points in .fvecs or .bvecs format |
points_count | how many points should we index |
coarse_vocabs | vocabularies for coarse quantization |
fine_vocabs | vocabularies for fine quantization for reranking |
mode | determines the way of rerank info calculating |
build_coarse_quantization | should we get coarse quantization or not |
files_prefix | all index filenames will have this prefix |
coarse_quantization_filename | file with coarse quantization (if exists) |
void MultiIndexer< Record >::ConvertPointsInCellsCountToCellEdges | ( | ) | [private] |
This function converts counts of points in cells to cell edges
void MultiIndexer< Record >::FillMultiIndex | ( | const string & | points_filename, |
const int | points_count, | ||
const vector< Centroids > & | coarse_vocabs, | ||
const vector< Centroids > & | fine_vocabs, | ||
const RerankMode & | mode | ||
) | [private] |
This function fills multiindex data structures.
points_filename | file with points in .fvecs or .bvecs format |
points_count | how many points should we index |
coarse_vocabs | vocabularies for coarse quantization |
fine_vocabs | vocabularies for fine quantization for reranking |
mode | determines the way of rerank info calculating |
void MultiIndexer< Record >::FillMultiIndexForSubset | ( | const string & | points_filename, |
const PointId | start_pid, | ||
const int | points_count, | ||
const vector< Centroids > & | coarse_vocabs, | ||
const vector< Centroids > & | fine_vocabs, | ||
const RerankMode & | mode, | ||
Multitable< int > * | points_written_in_index | ||
) | [private] |
This function fills multiindex data structures.
points_filename | file with points in .fvecs or .bvecs format |
start_pid | identifier of the first point in subset |
subset_size | points count in subset |
coarse_vocabs | vocabularies for coarse quantization |
fine_vocabs | vocabularies for fine quantization for reranking |
mode | determines the way of rerank info calculating |
points_written_in_index | auxillary structure for correct index filling |
void MultiIndexer< Record >::FillPointRerankInfo | ( | const Point & | point, |
const PointId | pid, | ||
const vector< Centroids > & | fine_vocabs | ||
) | [private] |
This function calculates rerank info for point
point | target point |
pid | identifier of target point |
fine_vocabs | vocabularies for rerank info calculation |
void MultiIndexer< Record >::GetCoarseQuantizationsForSubset | ( | const string & | points_filename, |
const int | start_pid, | ||
const int | subset_size, | ||
const vector< Centroids > & | coarse_vocabs, | ||
vector< vector< ClusterId > > * | transposed_coarse_quantizations | ||
) | [private] |
This function prepares for each point in subset its coarse quantization
points_filename | file with points in .fvecs or .bvecs format |
start_pid | identifier of the first point in subset |
subset_size | points count in subset |
coarse_vocabs | vocabularies for coarse quantization |
transposed_coarse_quantizations | result |
int MultiIndexer< Record >::GetInputCoordSizeof | ( | ) | [private] |
This simple function returns size of one coordinate of input point
void MultiIndexer< Record >::GetPointCoarseQuantization | ( | const PointId | pid, |
const string & | filename, | ||
vector< ClusterId > * | coarse_quantization | ||
) | [private] |
This function reads point coarse quantization from file
pid | identifier of target point |
filename | file with coarse quantizations |
coarse_quantization | result |
void MultiIndexer< Record >::InitBlasStructures | ( | const vector< Centroids > & | coarse_vocabs | ) | [private] |
Initialize all structures for BLAS operations
coarse_vocabs | coarse vocabularies |
void MultiIndexer< Record >::PrepareCoarseQuantization | ( | const string & | points_filename, |
const int | points_count, | ||
const vector< Centroids > & | coarse_vocabs | ||
) | [private] |
This function prepares for each point its coarse quantization
points_filename | file with points in .fvecs or .bvecs format |
points_count | how many points should we handle |
coarse_vocabs | vocabularies for coarse quantization |
void MultiIndexer< Record >::ReadPoint | ( | ifstream & | input, |
Point * | point | ||
) | [private] |
This simple function reads one point from input stream
input | input stream |
point | result point |
void MultiIndexer< Record >::RestorePointsInCellsCountFromCourseQuantization | ( | const string & | points_filename, |
const int | points_count, | ||
const vector< Centroids > & | coarse_vocabs | ||
) | [private] |
This function restores counts of points from coarse quantizations
points_filename | file with points in .fvecs or .bvecs format |
points_count | how many points should we index |
coarse_vocabs | vocabularies for coarse quantization We need them to init counts table correctly |
void MultiIndexer< Record >::SerializeCoarseQuantizations | ( | const vector< vector< ClusterId > > & | transposed_coarse_quantizations, |
const string & | filename | ||
) | [private] |
This function serializes prepared coarse quantizations to file
transposed_coarse_quantizations | quantizations to serialize. They are transposed because of effective memory usage |
filename | file we should serialize to |
void MultiIndexer< Record >::SerializeMultiIndexFiles | ( | ) | [private] |
This function saves index to files. All filenames start form the common files prefix
boost::mutex MultiIndexer< Record >::cell_counts_mutex_ [private] |
Mutex for critical section in filling index stage
vector<vector<float> > MultiIndexer< Record >::coarse_centroids_norms_ [private] |
Struct for BLAS
string MultiIndexer< Record >::coarse_quantization_filename_ [private] |
Filename of file with coarse quantizations
vector<float*> MultiIndexer< Record >::coarse_vocabs_matrices_ [private] |
Struct for BLAS
string MultiIndexer< Record >::files_prefix_ [private] |
All index filenames will start from this prefix
MultiIndex<Record> MultiIndexer< Record >::multiindex_ [private] |
Multiindex
int MultiIndexer< Record >::multiplicity_ [private] |
Multiplicity (how many parts point space is divided on)
Multitable<int> MultiIndexer< Record >::point_in_cells_count_ [private] |
Table with number of points in each cell