Combined sampling table and performing k-means phases

This commit is contained in:
Andrew Kane
2022-12-23 08:07:09 -08:00
parent b09e14ce14
commit 0f69cc789a
3 changed files with 5 additions and 9 deletions

View File

@@ -98,8 +98,6 @@ SampleRows(IvfflatBuildState * buildstate)
int targsamples = buildstate->samples->maxlen;
BlockNumber totalblocks = RelationGetNumberOfBlocks(buildstate->heap);
UpdateProgress(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_IVFFLAT_PHASE_SAMPLE);
buildstate->rowstoskip = -1;
BlockSampler_Init(&buildstate->bs, totalblocks, targsamples, RandomInt());
@@ -364,6 +362,8 @@ ComputeCenters(IvfflatBuildState * buildstate)
{
int numSamples;
UpdateProgress(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_IVFFLAT_PHASE_KMEANS);
/* Target 50 samples per list, with at least 10000 samples */
/* The number of samples has a large effect on index build time */
numSamples = buildstate->lists * 50;
@@ -381,7 +381,6 @@ ComputeCenters(IvfflatBuildState * buildstate)
SampleRows(buildstate);
/* Calculate centers */
UpdateProgress(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_IVFFLAT_PHASE_KMEANS);
IvfflatBench("k-means", IvfflatKmeans(buildstate->index, buildstate->samples, buildstate->centers));
/* Free samples before we allocate more memory */

View File

@@ -45,8 +45,6 @@ ivfflatbuildphasename(int64 phasenum)
{
case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE:
return "initializing";
case PROGRESS_IVFFLAT_PHASE_SAMPLE:
return "sampling table";
case PROGRESS_IVFFLAT_PHASE_KMEANS:
return "performing k-means";
case PROGRESS_IVFFLAT_PHASE_SORT:

View File

@@ -42,10 +42,9 @@
/* Build phases */
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */
#define PROGRESS_IVFFLAT_PHASE_SAMPLE 2
#define PROGRESS_IVFFLAT_PHASE_KMEANS 3
#define PROGRESS_IVFFLAT_PHASE_SORT 4
#define PROGRESS_IVFFLAT_PHASE_LOAD 5
#define PROGRESS_IVFFLAT_PHASE_KMEANS 2
#define PROGRESS_IVFFLAT_PHASE_SORT 3
#define PROGRESS_IVFFLAT_PHASE_LOAD 4
#define IVFFLAT_LIST_SIZE(_dim) (offsetof(IvfflatListData, center) + VECTOR_SIZE(_dim))