MultiIndex
Classes | Typedefs | Enumerations | Functions
data_util.h File Reference
#include <bitset>
#include <fstream>
#include <ios>
#include <iostream>
#include <map>
#include <set>
#include <vector>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include "mkl_cblas.h"
#include "multitable.hpp"
Include dependency graph for data_util.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  MultiIndex< Record >
struct  IndexConfig
struct  RerankADC8
struct  RerankADC16

Typedefs

typedef float Coord
typedef float Distance
typedef int Dimensions
typedef int PointId
typedef int ClusterId
typedef vector< CoordPoint
typedef vector< PointPoints
typedef vector< PointIdPointIds
typedef set< PointIdSetPoints
typedef vector< ClusterIdClusterIds
typedef vector< ClusterIdCoarseQuantization
typedef unsigned char FineClusterId
typedef vector< FineClusterIdFineQuantization
typedef vector< SetPointsClustersToPoints
typedef std::vector< ClusterIdPointsToClusters
typedef std::vector< PointCentroids

Enumerations

enum  PointType { FVEC, BVEC }
enum  RerankMode { USE_RESIDUALS, USE_INIT_POINTS }

Functions

Distance Eucldistance (const Point &x, const Point &y)
Distance Eucldistance (const Point &x, const Point &y, Dimensions start, Dimensions finish)
template<class T , class U >
Round (T number)
template<class T , class U >
void ReadPoints (const string &filename, vector< vector< U > > *points, int count)
template<class T , class U >
void ReadVector (ifstream &input, vector< U > *v)
template<class T >
void ReadVocabulary (ifstream &input, Dimensions dimension, int vocabulary_size, Centroids *centroids)
template<class T >
void ReadVocabularies (const string &filename, Dimensions space_dimension, vector< Centroids > *centroids)
template<class T >
void ReadFineVocabs (const string &fine_vocabs_filename, vector< Centroids > *fine_vocabs)
void GetSubpoints (const Points &points, const Dimensions start_dim, const Dimensions final_dim, Points *subpoints)
ClusterId GetNearestClusterId (const Point &point, const Centroids &centroids, const Dimensions start_dim, const Dimensions final_dim)
void GetResidual (const Point &point, const CoarseQuantization &coarse_quantizations, const vector< Centroids > &centroids, Point *residual)
void GetResidual (const Point &point, const CoarseQuantization &coarse_quantizations, const vector< Centroids > &centroids, Coord *residual)
void GetNearestClusterIdsForPointSubset (const Points &points, const Centroids &centroids, const PointId start_pid, const PointId final_pid, vector< ClusterId > *nearest)
void GetNearestClusterIdsForSubpoints (const Points &points, const Centroids &centroids, const Dimensions start_dim, const Dimensions final_dim, int threads_count, vector< ClusterId > *nearest)
void GetPointsCoarseQuaintizations (const Points &points, const vector< Centroids > &centroids, const int threads_count, vector< CoarseQuantization > *coarse_quantizations)

Detailed Description


Typedef Documentation

typedef std::vector<Point> Centroids
typedef int ClusterId
typedef vector<ClusterId> ClusterIds
typedef vector<SetPoints> ClustersToPoints
typedef vector<ClusterId> CoarseQuantization
typedef float Coord
typedef int Dimensions
typedef float Distance
typedef unsigned char FineClusterId
typedef vector<Coord> Point
typedef int PointId
typedef vector<PointId> PointIds
typedef vector<Point> Points
typedef std::vector<ClusterId> PointsToClusters
typedef set<PointId> SetPoints

Enumeration Type Documentation

enum PointType
Enumerator:
FVEC 
BVEC 
enum RerankMode
Enumerator:
USE_RESIDUALS 
USE_INIT_POINTS 

Function Documentation

Distance Eucldistance ( const Point x,
const Point y 
)

Function calculates squared euclidian distance between two points (points must have the same dimensionality)

Parameters:
xfirst point
ysecond point
Distance Eucldistance ( const Point x,
const Point y,
Dimensions  start,
Dimensions  finish 
)

Function calculates squared euclidian distance point with small dimensionality and subpoint of point with bigger dimensionality.

Parameters:
xfirst point
ysecond point
startfirst dimension of subpoint
finishdimension after the last dimension of subpoint
ClusterId GetNearestClusterId ( const Point point,
const Centroids centroids,
const Dimensions  start_dim,
const Dimensions  final_dim 
)

This function returns identifier of clusters which centroid is the nearest to subpoint limited by start_dim and final_dim

Parameters:
pointfull point
Centroidsall centroids (function finds the nearest one)
start_dimfirst dimension of subpoint
final_dimdimension after the last dimension of subpoint
void GetNearestClusterIdsForPointSubset ( const Points points,
const Centroids centroids,
const PointId  start_pid,
const PointId  final_pid,
vector< ClusterId > *  nearest 
)

This function finds nearest cluster identifiers for points from start_pid to final_pid. We need this function for multi-threading

Parameters:
pointsall points
centroidscentroids of clusters
start_pidfirst point function finds nearest cluster
final_pidpoint after the last point function finds nearest cluster
void GetNearestClusterIdsForSubpoints ( const Points points,
const Centroids centroids,
const Dimensions  start_dim,
const Dimensions  final_dim,
int  threads_count,
vector< ClusterId > *  nearest 
)

This function finds cluster identifiers nearest to subpoints for a number of points. Subpoints are limited by start_dim and finish_dim

Parameters:
pointsall points
centroidscentroids of clusters
start_dimfirst dimesion of subpoint
final_dimdimesion after the last dimension of subpoint
threads_countnumber of threads
nearestresult
void GetPointsCoarseQuaintizations ( const Points points,
const vector< Centroids > &  centroids,
const int  threads_count,
vector< CoarseQuantization > *  coarse_quantizations 
)

This function calculates points coarse product quantizations

Parameters:
pointsall points
centroidscentroids of clusters
threads_countnumber of threads
coarse_quantizationsresult quantizations
void GetResidual ( const Point point,
const CoarseQuantization coarse_quantizations,
const vector< Centroids > &  centroids,
Point residual 
)

This function calculates quantization residual.

Parameters:
pointinitial point
coarse_quantizationspoint coarse quantization
centroidslists of centroids
residualresult residual
void GetResidual ( const Point point,
const CoarseQuantization coarse_quantizations,
const vector< Centroids > &  centroids,
Coord residual 
)

This function calculates quantization residual.

Parameters:
pointinitial point
coarse_quantizationspoint coarse quantization
centroidslists of centroids
residualpointer to start of residual
void GetSubpoints ( const Points points,
const Dimensions  start_dim,
const Dimensions  final_dim,
Points subpoints 
)

This function returns subpoints limited by start_dim and final_dim for every point in points

Parameters:
pointsall points
start_dimfirst dimension of subpoint
final_dimdimension after the last dimension of subpoint
subpointsresult subpoints
template<class T >
void ReadFineVocabs ( const string &  fine_vocabs_filename,
vector< Centroids > *  fine_vocabs 
)

This function reads fine vocabs of centroids

Parameters:
fine_vocabs_filenamefile with vocabularies
fine_vocabsfine centroids lists
template<class T , class U >
void ReadPoints ( const string &  filename,
vector< vector< U > > *  points,
int  count 
)

Function reads point written in .fvecs or .bvecs format. Input points have coordinates of type T. Result points have coordinates of type U

Parameters:
filename.fvecs or .bvecs file name
points_counthow many points to read
pointsresult list of read points
template<class T , class U >
void ReadVector ( ifstream &  input,
vector< U > *  v 
)

Function reads one vector of coordinates of type T. Function assumes that the first int32-number in input stream is vector dimensionality. Result vector will have coordinates of type U.

Parameters:
inputinput stream
vresult vector
template<class T >
void ReadVocabularies ( const string &  filename,
Dimensions  space_dimension,
vector< Centroids > *  centroids 
)

Function reads vocabularies of centroids produced by matlab script. Function assumes that the first int32 in input is dimensionality of centroids and the second is the number of centroids in each vocabulary

Parameters:
inputinput stream
dimensionone centroid dimensionality
vocabulary_sizecentroids count
Centroids*result centroids
template<class T >
void ReadVocabulary ( ifstream &  input,
Dimensions  dimension,
int  vocabulary_size,
Centroids centroids 
)

Function reads vocabulary of centroids produced by matlab script.

Parameters:
inputinput stream
dimensionone centroid dimensionality
vocabulary_sizecentroids count
Centroids*result centroids
template<class T , class U >
U Round ( number) [inline]

This simple function casts number of type T to the nearest number of type U

 All Classes Files Functions Variables Typedefs Enumerations Enumerator