mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Merge branch 'master' into hqann2
This commit is contained in:
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -49,7 +49,7 @@ jobs:
|
||||
- postgres: 16
|
||||
os: macos-14
|
||||
- postgres: 14
|
||||
os: macos-12
|
||||
os: macos-13
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ankane/setup-postgres@v1
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
## 0.8.0 (unreleased)
|
||||
|
||||
- Added support for inline filtering with HNSW
|
||||
- Added support for iterative index scans
|
||||
- Added casts for arrays to `sparsevec`
|
||||
- Improved cost estimation
|
||||
- Improved performance of HNSW inserts and on-disk index builds
|
||||
|
||||
18
src/hnsw.c
18
src/hnsw.c
@@ -18,7 +18,16 @@
|
||||
#define MarkGUCPrefixReserved(x) EmitWarningsOnPlaceholders(x)
|
||||
#endif
|
||||
|
||||
static const struct config_enum_entry hnsw_iterative_search_options[] = {
|
||||
{"off", HNSW_ITERATIVE_SEARCH_OFF, false},
|
||||
{"on", HNSW_ITERATIVE_SEARCH_RELAXED, false},
|
||||
{"strict", HNSW_ITERATIVE_SEARCH_STRICT, false},
|
||||
{NULL, 0, false}
|
||||
};
|
||||
|
||||
int hnsw_ef_search;
|
||||
int hnsw_iterative_search_max_tuples;
|
||||
int hnsw_iterative_search;
|
||||
int hnsw_lock_tranche_id;
|
||||
static relopt_kind hnsw_relopt_kind;
|
||||
|
||||
@@ -69,6 +78,15 @@ HnswInit(void)
|
||||
"Valid range is 1..1000.", &hnsw_ef_search,
|
||||
HNSW_DEFAULT_EF_SEARCH, HNSW_MIN_EF_SEARCH, HNSW_MAX_EF_SEARCH, PGC_USERSET, 0, NULL, NULL, NULL);
|
||||
|
||||
DefineCustomEnumVariable("hnsw.iterative_search", "Sets iterative search",
|
||||
NULL, &hnsw_iterative_search,
|
||||
HNSW_ITERATIVE_SEARCH_OFF, hnsw_iterative_search_options, PGC_USERSET, 0, NULL, NULL, NULL);
|
||||
|
||||
/* TODO Ensure ivfflat.max_probes uses same value for "all" */
|
||||
DefineCustomIntVariable("hnsw.iterative_search_max_tuples", "Sets the max number of candidates to visit for iterative search",
|
||||
"-1 means all", &hnsw_iterative_search_max_tuples,
|
||||
-1, -1, INT_MAX, PGC_USERSET, 0, NULL, NULL, NULL);
|
||||
|
||||
MarkGUCPrefixReserved("hnsw");
|
||||
}
|
||||
|
||||
|
||||
40
src/hnsw.h
40
src/hnsw.h
@@ -89,6 +89,9 @@
|
||||
/* Ensure fits on page and in uint8 */
|
||||
#define HnswGetMaxLevel(m) Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - offsetof(HnswNeighborTupleData, indextids) - sizeof(ItemIdData)) / (sizeof(ItemPointerData)) / (m)) - 2, 255)
|
||||
|
||||
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
|
||||
#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr)
|
||||
|
||||
#define HnswGetValue(base, element) PointerGetDatum(HnswPtrAccess(base, (element)->value))
|
||||
|
||||
#if PG_VERSION_NUM < 140005
|
||||
@@ -109,8 +112,17 @@
|
||||
|
||||
/* Variables */
|
||||
extern int hnsw_ef_search;
|
||||
extern int hnsw_iterative_search;
|
||||
extern int hnsw_iterative_search_max_tuples;
|
||||
extern int hnsw_lock_tranche_id;
|
||||
|
||||
typedef enum HnswIterativeSearchType
|
||||
{
|
||||
HNSW_ITERATIVE_SEARCH_OFF,
|
||||
HNSW_ITERATIVE_SEARCH_RELAXED,
|
||||
HNSW_ITERATIVE_SEARCH_STRICT
|
||||
} HnswIterativeSearchType;
|
||||
|
||||
typedef struct HnswElementData HnswElementData;
|
||||
typedef struct HnswNeighborArray HnswNeighborArray;
|
||||
|
||||
@@ -133,6 +145,7 @@ struct HnswElementData
|
||||
uint8 heaptidsLength;
|
||||
uint8 level;
|
||||
uint8 deleted;
|
||||
uint8 version;
|
||||
uint32 hash;
|
||||
HnswNeighborsPtr neighbors;
|
||||
BlockNumber blkno;
|
||||
@@ -326,10 +339,10 @@ typedef struct HnswElementTupleData
|
||||
uint8 type;
|
||||
uint8 level;
|
||||
uint8 deleted;
|
||||
uint8 unused;
|
||||
uint8 version;
|
||||
ItemPointerData heaptids[HNSW_HEAPTIDS];
|
||||
ItemPointerData neighbortid;
|
||||
uint16 unused2;
|
||||
uint16 unused;
|
||||
Vector data;
|
||||
} HnswElementTupleData;
|
||||
|
||||
@@ -338,18 +351,37 @@ typedef HnswElementTupleData * HnswElementTuple;
|
||||
typedef struct HnswNeighborTupleData
|
||||
{
|
||||
uint8 type;
|
||||
uint8 unused;
|
||||
uint8 version;
|
||||
uint16 count;
|
||||
ItemPointerData indextids[FLEXIBLE_ARRAY_MEMBER];
|
||||
} HnswNeighborTupleData;
|
||||
|
||||
typedef HnswNeighborTupleData * HnswNeighborTuple;
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct pointerhash_hash *pointers;
|
||||
struct offsethash_hash *offsets;
|
||||
struct tidhash_hash *tids;
|
||||
} visited_hash;
|
||||
|
||||
typedef union
|
||||
{
|
||||
HnswElement element;
|
||||
ItemPointerData indextid;
|
||||
} HnswUnvisited;
|
||||
|
||||
typedef struct HnswScanOpaqueData
|
||||
{
|
||||
const HnswTypeInfo *typeInfo;
|
||||
bool first;
|
||||
List *w;
|
||||
visited_hash v;
|
||||
pairingheap *discarded;
|
||||
HnswQuery q;
|
||||
int m;
|
||||
int64 tuples;
|
||||
double previousDistance;
|
||||
MemoryContext tmpCtx;
|
||||
|
||||
/* Support functions */
|
||||
@@ -393,7 +425,7 @@ bool HnswCheckNorm(HnswSupport * support, Datum value);
|
||||
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
|
||||
void HnswInitPage(Buffer buf, Page page);
|
||||
void HnswInit(void);
|
||||
List *HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement, bool inMemory);
|
||||
List *HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement, bool inMemory, visited_hash * v, pairingheap **discarded, bool initVisited, int64 *tuples);
|
||||
HnswElement HnswGetEntryPoint(Relation index);
|
||||
void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint);
|
||||
void *HnswAlloc(HnswAllocator * allocator, Size size);
|
||||
|
||||
@@ -36,7 +36,7 @@ GetInsertPage(Relation index)
|
||||
* Check for a free offset
|
||||
*/
|
||||
static bool
|
||||
HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size etupSize, Size ntupSize, Buffer *nbuf, Page *npage, OffsetNumber *freeOffno, OffsetNumber *freeNeighborOffno, BlockNumber *newInsertPage)
|
||||
HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size etupSize, Size ntupSize, Buffer *nbuf, Page *npage, OffsetNumber *freeOffno, OffsetNumber *freeNeighborOffno, BlockNumber *newInsertPage, uint8 *tupleVersion)
|
||||
{
|
||||
OffsetNumber offno;
|
||||
OffsetNumber maxoffno = PageGetMaxOffsetNumber(page);
|
||||
@@ -98,6 +98,7 @@ HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size
|
||||
{
|
||||
*freeOffno = offno;
|
||||
*freeNeighborOffno = neighborOffno;
|
||||
*tupleVersion = etup->version;
|
||||
return true;
|
||||
}
|
||||
else if (*nbuf != buf)
|
||||
@@ -153,6 +154,7 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B
|
||||
OffsetNumber freeOffno = InvalidOffsetNumber;
|
||||
OffsetNumber freeNeighborOffno = InvalidOffsetNumber;
|
||||
BlockNumber newInsertPage = InvalidBlockNumber;
|
||||
uint8 tupleVersion;
|
||||
char *base = NULL;
|
||||
bool useIndexTuple = HnswUseIndexTuple(index);
|
||||
|
||||
@@ -203,7 +205,7 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B
|
||||
}
|
||||
|
||||
/* Next, try space from a deleted element */
|
||||
if (HnswFreeOffset(index, buf, page, e, etupSize, ntupSize, &nbuf, &npage, &freeOffno, &freeNeighborOffno, &newInsertPage))
|
||||
if (HnswFreeOffset(index, buf, page, e, etupSize, ntupSize, &nbuf, &npage, &freeOffno, &freeNeighborOffno, &newInsertPage, &tupleVersion))
|
||||
{
|
||||
if (nbuf != buf)
|
||||
{
|
||||
@@ -213,6 +215,10 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B
|
||||
npage = GenericXLogRegisterBuffer(state, nbuf, 0);
|
||||
}
|
||||
|
||||
/* Set tuple version */
|
||||
etup->version = tupleVersion;
|
||||
ntup->version = tupleVersion;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
142
src/hnswscan.c
142
src/hnswscan.c
@@ -5,6 +5,7 @@
|
||||
#include "pgstat.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/float.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
/*
|
||||
@@ -22,27 +23,60 @@ GetScanItems(IndexScanDesc scan, Datum value)
|
||||
HnswElement entryPoint;
|
||||
char *base = NULL;
|
||||
bool inMemory = false;
|
||||
HnswQuery q;
|
||||
HnswQuery *q = &so->q;
|
||||
|
||||
q.value = value;
|
||||
q.itup = NULL;
|
||||
q.keyData = scan->keyData;
|
||||
q->value = value;
|
||||
q->itup = NULL;
|
||||
q->keyData = scan->keyData;
|
||||
|
||||
/* Get m and entry point */
|
||||
HnswGetMetaPageInfo(index, &m, &entryPoint);
|
||||
|
||||
so->m = m;
|
||||
|
||||
if (entryPoint == NULL)
|
||||
return NIL;
|
||||
|
||||
ep = list_make1(HnswEntryCandidate(base, entryPoint, &q, index, support, false, inMemory));
|
||||
ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, support, false, inMemory));
|
||||
|
||||
for (int lc = entryPoint->level; lc >= 1; lc--)
|
||||
{
|
||||
w = HnswSearchLayer(base, &q, ep, 1, lc, index, support, m, false, NULL, inMemory);
|
||||
w = HnswSearchLayer(base, q, ep, 1, lc, index, support, m, false, NULL, inMemory, NULL, NULL, true, NULL);
|
||||
ep = w;
|
||||
}
|
||||
|
||||
return HnswSearchLayer(base, &q, ep, hnsw_ef_search, 0, index, support, m, false, NULL, inMemory);
|
||||
return HnswSearchLayer(base, q, ep, hnsw_ef_search, 0, index, support, m, false, NULL, inMemory, &so->v, hnsw_iterative_search != HNSW_ITERATIVE_SEARCH_OFF ? &so->discarded : NULL, true, &so->tuples);
|
||||
}
|
||||
|
||||
/*
|
||||
* Resume scan at ground level with discarded candidates
|
||||
*/
|
||||
static List *
|
||||
ResumeScanItems(IndexScanDesc scan)
|
||||
{
|
||||
HnswScanOpaque so = (HnswScanOpaque) scan->opaque;
|
||||
Relation index = scan->indexRelation;
|
||||
List *ep = NIL;
|
||||
char *base = NULL;
|
||||
int batch_size = hnsw_ef_search;
|
||||
|
||||
if (pairingheap_is_empty(so->discarded))
|
||||
return NIL;
|
||||
|
||||
/* Get next batch of candidates */
|
||||
for (int i = 0; i < batch_size; i++)
|
||||
{
|
||||
HnswSearchCandidate *sc;
|
||||
|
||||
if (pairingheap_is_empty(so->discarded))
|
||||
break;
|
||||
|
||||
sc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded));
|
||||
|
||||
ep = lappend(ep, sc);
|
||||
}
|
||||
|
||||
return HnswSearchLayer(base, &so->q, ep, batch_size, 0, index, &so->support, so->m, false, NULL, false, &so->v, &so->discarded, false, &so->tuples);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -86,6 +120,8 @@ hnswbeginscan(Relation index, int nkeys, int norderbys)
|
||||
so = (HnswScanOpaque) palloc(sizeof(HnswScanOpaqueData));
|
||||
so->typeInfo = HnswGetTypeInfo(index);
|
||||
so->first = true;
|
||||
so->v.tids = NULL;
|
||||
so->discarded = NULL;
|
||||
so->tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Hnsw scan temporary context",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
@@ -106,7 +142,15 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no
|
||||
{
|
||||
HnswScanOpaque so = (HnswScanOpaque) scan->opaque;
|
||||
|
||||
if (so->v.tids != NULL)
|
||||
tidhash_reset(so->v.tids);
|
||||
|
||||
if (so->discarded != NULL)
|
||||
pairingheap_reset(so->discarded);
|
||||
|
||||
so->first = true;
|
||||
so->tuples = 0;
|
||||
so->previousDistance = -get_float8_infinity();
|
||||
MemoryContextReset(so->tmpCtx);
|
||||
|
||||
if (keys && scan->numberOfKeys > 0)
|
||||
@@ -168,22 +212,100 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
#endif
|
||||
}
|
||||
|
||||
while (list_length(so->w) > 0)
|
||||
for (;;)
|
||||
{
|
||||
char *base = NULL;
|
||||
HnswSearchCandidate *sc = llast(so->w);
|
||||
HnswElement element = HnswPtrAccess(base, sc->element);
|
||||
HnswSearchCandidate *sc;
|
||||
HnswElement element;
|
||||
ItemPointer heaptid;
|
||||
|
||||
if (list_length(so->w) == 0)
|
||||
{
|
||||
if (hnsw_iterative_search == HNSW_ITERATIVE_SEARCH_OFF)
|
||||
break;
|
||||
|
||||
/* Empty index */
|
||||
if (so->discarded == NULL)
|
||||
break;
|
||||
|
||||
/* Reached max number of additional tuples */
|
||||
if (hnsw_iterative_search_max_tuples != -1 && so->tuples >= hnsw_iterative_search_max_tuples)
|
||||
{
|
||||
if (pairingheap_is_empty(so->discarded))
|
||||
break;
|
||||
|
||||
/* Return remaining tuples */
|
||||
so->w = lappend(so->w, HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded)));
|
||||
}
|
||||
/* Prevent scans from consuming too much memory */
|
||||
else if (MemoryContextMemAllocated(so->tmpCtx, false) > (Size) work_mem * 1024L)
|
||||
{
|
||||
if (pairingheap_is_empty(so->discarded))
|
||||
{
|
||||
ereport(DEBUG1,
|
||||
(errmsg("hnsw index scan exceeded work_mem after " INT64_FORMAT " tuples", so->tuples),
|
||||
errhint("Increase work_mem to scan more tuples.")));
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Return remaining tuples */
|
||||
so->w = lappend(so->w, HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded)));
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Locking ensures when neighbors are read, the elements they
|
||||
* reference will not be deleted (and replaced) during the
|
||||
* iteration.
|
||||
*
|
||||
* Elements loaded into memory on previous iterations may have
|
||||
* been deleted (and replaced), so when reading neighbors, the
|
||||
* element version must be checked.
|
||||
*/
|
||||
LockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock);
|
||||
|
||||
so->w = ResumeScanItems(scan);
|
||||
|
||||
UnlockPage(scan->indexRelation, HNSW_SCAN_LOCK, ShareLock);
|
||||
|
||||
#if defined(HNSW_MEMORY)
|
||||
elog(INFO, "memory: %zu KB", MemoryContextMemAllocated(so->tmpCtx, false) / 1024);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (list_length(so->w) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
sc = llast(so->w);
|
||||
element = HnswPtrAccess(base, sc->element);
|
||||
|
||||
/* Move to next element if no valid heap TIDs */
|
||||
if (!sc->matches || element->heaptidsLength == 0)
|
||||
{
|
||||
so->w = list_delete_last(so->w);
|
||||
|
||||
/* Mark memory as free for next iteration */
|
||||
if (hnsw_iterative_search != HNSW_ITERATIVE_SEARCH_OFF)
|
||||
{
|
||||
pfree(element);
|
||||
pfree(sc);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
heaptid = &element->heaptids[--element->heaptidsLength];
|
||||
|
||||
if (hnsw_iterative_search == HNSW_ITERATIVE_SEARCH_STRICT)
|
||||
{
|
||||
if (sc->distance < so->previousDistance)
|
||||
continue;
|
||||
|
||||
so->previousDistance = sc->distance;
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
scan->xs_heaptid = *heaptid;
|
||||
|
||||
104
src/hnswutils.c
104
src/hnswutils.c
@@ -100,19 +100,6 @@ hash_offset(Size offset)
|
||||
#define SH_DEFINE
|
||||
#include "lib/simplehash.h"
|
||||
|
||||
typedef union
|
||||
{
|
||||
pointerhash_hash *pointers;
|
||||
offsethash_hash *offsets;
|
||||
tidhash_hash *tids;
|
||||
} visited_hash;
|
||||
|
||||
typedef union
|
||||
{
|
||||
HnswElement element;
|
||||
ItemPointerData indextid;
|
||||
} HnswUnvisited;
|
||||
|
||||
/*
|
||||
* Get the max number of connections in an upper layer for each element in the index
|
||||
*/
|
||||
@@ -323,6 +310,8 @@ HnswInitElement(char *base, ItemPointer heaptid, int m, double ml, int maxLevel,
|
||||
|
||||
element->level = level;
|
||||
element->deleted = 0;
|
||||
/* Start at one to make it easier to find issues */
|
||||
element->version = 1;
|
||||
|
||||
HnswInitNeighbors(base, element, m, allocator);
|
||||
|
||||
@@ -508,6 +497,7 @@ HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element, bool
|
||||
etup->type = HNSW_ELEMENT_TUPLE_TYPE;
|
||||
etup->level = element->level;
|
||||
etup->deleted = 0;
|
||||
etup->version = element->version;
|
||||
for (int i = 0; i < HNSW_HEAPTIDS; i++)
|
||||
{
|
||||
if (i < element->heaptidsLength)
|
||||
@@ -562,6 +552,7 @@ HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m)
|
||||
}
|
||||
|
||||
ntup->count = idx;
|
||||
ntup->version = e->version;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -572,6 +563,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe
|
||||
{
|
||||
element->level = etup->level;
|
||||
element->deleted = etup->deleted;
|
||||
element->version = etup->version;
|
||||
element->neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid);
|
||||
element->neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid);
|
||||
element->heaptidsLength = 0;
|
||||
@@ -798,9 +790,6 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, HnswQuery * q, Relation i
|
||||
return sc;
|
||||
}
|
||||
|
||||
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
|
||||
#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr)
|
||||
|
||||
/*
|
||||
* Compare candidate distances
|
||||
*/
|
||||
@@ -816,6 +805,21 @@ CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, v
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare discarded candidate distances
|
||||
*/
|
||||
static int
|
||||
CompareNearestDiscardedCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg)
|
||||
{
|
||||
if (HnswGetSearchCandidateConst(w_node, a)->distance < HnswGetSearchCandidateConst(w_node, b)->distance)
|
||||
return 1;
|
||||
|
||||
if (HnswGetSearchCandidateConst(w_node, a)->distance > HnswGetSearchCandidateConst(w_node, b)->distance)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare candidate distances
|
||||
*/
|
||||
@@ -936,8 +940,11 @@ HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation i
|
||||
|
||||
ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno));
|
||||
|
||||
/* Ensure expected neighbors */
|
||||
if (ntup->count != (element->level + 2) * m)
|
||||
/*
|
||||
* Ensure the neighbor tuple has not been deleted or replaced between
|
||||
* index scan iterations
|
||||
*/
|
||||
if (ntup->version != element->version || ntup->count != (element->level + 2) * m)
|
||||
{
|
||||
UnlockReleaseBuffer(buf);
|
||||
return false;
|
||||
@@ -983,13 +990,13 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u
|
||||
* Algorithm 2 from paper
|
||||
*/
|
||||
List *
|
||||
HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement, bool inMemory)
|
||||
HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement, bool inMemory, visited_hash * v, pairingheap **discarded, bool initVisited, int64 *tuples)
|
||||
{
|
||||
List *w = NIL;
|
||||
pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL);
|
||||
pairingheap *W = pairingheap_allocate(CompareFurthestCandidates, NULL);
|
||||
int wlen = 0;
|
||||
visited_hash v;
|
||||
visited_hash vh;
|
||||
ListCell *lc2;
|
||||
HnswNeighborArray *localNeighborhood = NULL;
|
||||
Size neighborhoodSize = 0;
|
||||
@@ -999,7 +1006,19 @@ HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation in
|
||||
uint64 additional = 0;
|
||||
uint64 maxAdditional = q->keyData && lc == 0 ? 10000 : 0;
|
||||
|
||||
InitVisited(base, &v, inMemory, ef, m);
|
||||
if (v == NULL)
|
||||
{
|
||||
v = &vh;
|
||||
initVisited = true;
|
||||
}
|
||||
|
||||
if (initVisited)
|
||||
{
|
||||
InitVisited(base, v, inMemory, ef, m);
|
||||
|
||||
if (discarded != NULL)
|
||||
*discarded = pairingheap_allocate(CompareNearestDiscardedCandidates, NULL);
|
||||
}
|
||||
|
||||
/* Create local memory for neighborhood if needed */
|
||||
if (inMemory)
|
||||
@@ -1014,7 +1033,13 @@ HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation in
|
||||
HnswSearchCandidate *sc = (HnswSearchCandidate *) lfirst(lc2);
|
||||
bool found;
|
||||
|
||||
AddToVisited(base, &v, sc->element, inMemory, &found);
|
||||
if (initVisited)
|
||||
{
|
||||
AddToVisited(base, v, sc->element, inMemory, &found);
|
||||
|
||||
if (tuples != NULL)
|
||||
(*tuples)++;
|
||||
}
|
||||
|
||||
pairingheap_add(C, &sc->c_node);
|
||||
pairingheap_add(W, &sc->w_node);
|
||||
@@ -1044,9 +1069,12 @@ HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation in
|
||||
cElement = HnswPtrAccess(base, c->element);
|
||||
|
||||
if (inMemory)
|
||||
HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, &v, lc, localNeighborhood, neighborhoodSize);
|
||||
HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, v, lc, localNeighborhood, neighborhoodSize);
|
||||
else
|
||||
HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, &v, index, m, lm, lc);
|
||||
HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, v, index, m, lm, lc);
|
||||
|
||||
if (tuples != NULL)
|
||||
(*tuples) += unvisitedLength;
|
||||
|
||||
for (int i = 0; i < unvisitedLength; i++)
|
||||
{
|
||||
@@ -1071,16 +1099,25 @@ HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation in
|
||||
|
||||
/* Avoid any allocations if not adding */
|
||||
eElement = NULL;
|
||||
HnswLoadElementImpl(blkno, offno, &eDistance, &eMatches, q, index, support, inserting, alwaysAdd ? NULL : &f->distance, &eElement);
|
||||
HnswLoadElementImpl(blkno, offno, &eDistance, &eMatches, q, index, support, inserting, alwaysAdd || discarded != NULL ? NULL : &f->distance, &eElement);
|
||||
|
||||
if (eElement == NULL)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(eDistance < f->distance || alwaysAdd))
|
||||
continue;
|
||||
if (eElement == NULL || !(eDistance < f->distance || alwaysAdd))
|
||||
{
|
||||
if (discarded != NULL)
|
||||
{
|
||||
/* Create a new candidate */
|
||||
e = palloc(sizeof(HnswSearchCandidate));
|
||||
HnswPtrStore(base, e->element, eElement);
|
||||
e->distance = eDistance;
|
||||
pairingheap_add(*discarded, &e->w_node);
|
||||
}
|
||||
|
||||
Assert(!eElement->deleted);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Make robust to issues */
|
||||
if (eElement->level < lc)
|
||||
@@ -1109,7 +1146,12 @@ HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation in
|
||||
|
||||
/* No need to decrement wlen */
|
||||
if (wlen > ef)
|
||||
pairingheap_remove_first(W);
|
||||
{
|
||||
HnswSearchCandidate *d = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W));
|
||||
|
||||
if (discarded != NULL)
|
||||
pairingheap_add(*discarded, &d->w_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1451,7 +1493,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint
|
||||
/* 1st phase: greedy search to insert level */
|
||||
for (int lc = entryLevel; lc >= level + 1; lc--)
|
||||
{
|
||||
w = HnswSearchLayer(base, &q, ep, 1, lc, index, support, m, true, skipElement, inMemory);
|
||||
w = HnswSearchLayer(base, &q, ep, 1, lc, index, support, m, true, skipElement, inMemory, NULL, NULL, true, NULL);
|
||||
ep = w;
|
||||
}
|
||||
|
||||
@@ -1470,7 +1512,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint
|
||||
List *lw = NIL;
|
||||
ListCell *lc2;
|
||||
|
||||
w = HnswSearchLayer(base, &q, ep, efConstruction, lc, index, support, m, true, skipElement, inMemory);
|
||||
w = HnswSearchLayer(base, &q, ep, efConstruction, lc, index, support, m, true, skipElement, inMemory, NULL, NULL, true, NULL);
|
||||
|
||||
/* Convert search candidates to candidates */
|
||||
foreach(lc2, w)
|
||||
|
||||
@@ -535,6 +535,14 @@ MarkDeleted(HnswVacuumState * vacuumstate)
|
||||
for (int i = 0; i < ntup->count; i++)
|
||||
ItemPointerSetInvalid(&ntup->indextids[i]);
|
||||
|
||||
/* Increment version */
|
||||
/* This is used to avoid incorrect reads for iterative scans */
|
||||
/* Reserve some bits for future use */
|
||||
etup->version++;
|
||||
if (etup->version > 15)
|
||||
etup->version = 1;
|
||||
ntup->version = etup->version;
|
||||
|
||||
/*
|
||||
* We modified the tuples in place, no need to call
|
||||
* PageIndexTupleOverwrite
|
||||
|
||||
@@ -138,7 +138,7 @@ SampleRows(IvfflatBuildState * buildstate)
|
||||
* Add tuple to sort
|
||||
*/
|
||||
static void
|
||||
AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState * buildstate)
|
||||
AddTupleToSort(Relation index, ItemPointer tid, Datum *values, bool *isnull, IvfflatBuildState * buildstate)
|
||||
{
|
||||
double distance;
|
||||
double minDistance = DBL_MAX;
|
||||
@@ -184,6 +184,11 @@ AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState
|
||||
slot->tts_isnull[1] = false;
|
||||
slot->tts_values[2] = value;
|
||||
slot->tts_isnull[2] = false;
|
||||
for (int i = 1; i < buildstate->tupdesc->natts; i++)
|
||||
{
|
||||
slot->tts_values[2 + i] = values[i];
|
||||
slot->tts_isnull[2 + i] = isnull[i];
|
||||
}
|
||||
ExecStoreVirtualTuple(slot);
|
||||
|
||||
/*
|
||||
@@ -215,7 +220,7 @@ BuildCallback(Relation index, ItemPointer tid, Datum *values,
|
||||
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
|
||||
|
||||
/* Add tuple to sort */
|
||||
AddTupleToSort(index, tid, values, buildstate);
|
||||
AddTupleToSort(index, tid, values, isnull, buildstate);
|
||||
|
||||
/* Reset memory context */
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
@@ -226,19 +231,20 @@ BuildCallback(Relation index, ItemPointer tid, Datum *values,
|
||||
* Get index tuple from sort state
|
||||
*/
|
||||
static inline void
|
||||
GetNextTuple(Tuplesortstate *sortstate, TupleDesc tupdesc, TupleTableSlot *slot, IndexTuple *itup, int *list)
|
||||
GetNextTuple(Tuplesortstate *sortstate, TupleDesc tupdesc, TupleTableSlot *slot, Datum *values, bool *isnull, IndexTuple *itup, int *list)
|
||||
{
|
||||
Datum value;
|
||||
bool isnull;
|
||||
|
||||
if (tuplesort_gettupleslot(sortstate, true, false, slot, NULL))
|
||||
{
|
||||
*list = DatumGetInt32(slot_getattr(slot, 1, &isnull));
|
||||
value = slot_getattr(slot, 3, &isnull);
|
||||
bool unused;
|
||||
|
||||
*list = DatumGetInt32(slot_getattr(slot, 1, &unused));
|
||||
|
||||
for (int i = 0; i < tupdesc->natts; i++)
|
||||
values[i] = slot_getattr(slot, 3 + i, &isnull[i]);
|
||||
|
||||
/* Form the index tuple */
|
||||
*itup = index_form_tuple(tupdesc, &value, &isnull);
|
||||
(*itup)->t_tid = *((ItemPointer) DatumGetPointer(slot_getattr(slot, 2, &isnull)));
|
||||
*itup = index_form_tuple(tupdesc, values, isnull);
|
||||
(*itup)->t_tid = *((ItemPointer) DatumGetPointer(slot_getattr(slot, 2, &unused)));
|
||||
}
|
||||
else
|
||||
*list = -1;
|
||||
@@ -254,14 +260,16 @@ InsertTuples(Relation index, IvfflatBuildState * buildstate, ForkNumber forkNum)
|
||||
IndexTuple itup = NULL; /* silence compiler warning */
|
||||
int64 inserted = 0;
|
||||
|
||||
TupleTableSlot *slot = MakeSingleTupleTableSlot(buildstate->tupdesc, &TTSOpsMinimalTuple);
|
||||
TupleDesc tupdesc = RelationGetDescr(index);
|
||||
TupleTableSlot *slot = MakeSingleTupleTableSlot(buildstate->sortdesc, &TTSOpsMinimalTuple);
|
||||
TupleDesc tupdesc = buildstate->tupdesc;
|
||||
Datum *values = palloc(tupdesc->natts * sizeof(Datum));
|
||||
bool *isnull = palloc(tupdesc->natts * sizeof(bool));
|
||||
|
||||
pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE, PROGRESS_IVFFLAT_PHASE_LOAD);
|
||||
|
||||
pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_TOTAL, buildstate->indtuples);
|
||||
|
||||
GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list);
|
||||
GetNextTuple(buildstate->sortstate, tupdesc, slot, values, isnull, &itup, &list);
|
||||
|
||||
for (int i = 0; i < buildstate->centers->length; i++)
|
||||
{
|
||||
@@ -297,7 +305,7 @@ InsertTuples(Relation index, IvfflatBuildState * buildstate, ForkNumber forkNum)
|
||||
|
||||
pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE, ++inserted);
|
||||
|
||||
GetNextTuple(buildstate->sortstate, tupdesc, slot, &itup, &list);
|
||||
GetNextTuple(buildstate->sortstate, tupdesc, slot, values, isnull, &itup, &list);
|
||||
}
|
||||
|
||||
insertPage = BufferGetBlockNumber(buf);
|
||||
@@ -307,6 +315,9 @@ InsertTuples(Relation index, IvfflatBuildState * buildstate, ForkNumber forkNum)
|
||||
/* Set the start and insert pages */
|
||||
IvfflatUpdateList(index, buildstate->listInfo[i], insertPage, InvalidBlockNumber, startPage, forkNum);
|
||||
}
|
||||
|
||||
pfree(values);
|
||||
pfree(isnull);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -319,6 +330,7 @@ InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, In
|
||||
buildstate->index = index;
|
||||
buildstate->indexInfo = indexInfo;
|
||||
buildstate->typeInfo = IvfflatGetTypeInfo(index);
|
||||
buildstate->tupdesc = RelationGetDescr(index);
|
||||
|
||||
buildstate->lists = IvfflatGetLists(index);
|
||||
buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod;
|
||||
@@ -356,12 +368,13 @@ InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, In
|
||||
errmsg("dimensions must be greater than one for this opclass")));
|
||||
|
||||
/* Create tuple description for sorting */
|
||||
buildstate->tupdesc = CreateTemplateTupleDesc(3);
|
||||
TupleDescInitEntry(buildstate->tupdesc, (AttrNumber) 1, "list", INT4OID, -1, 0);
|
||||
TupleDescInitEntry(buildstate->tupdesc, (AttrNumber) 2, "tid", TIDOID, -1, 0);
|
||||
TupleDescInitEntry(buildstate->tupdesc, (AttrNumber) 3, "vector", RelationGetDescr(index)->attrs[0].atttypid, -1, 0);
|
||||
buildstate->sortdesc = CreateTemplateTupleDesc(2 + buildstate->tupdesc->natts);
|
||||
TupleDescInitEntry(buildstate->sortdesc, (AttrNumber) 1, "list", INT4OID, -1, 0);
|
||||
TupleDescInitEntry(buildstate->sortdesc, (AttrNumber) 2, "tid", TIDOID, -1, 0);
|
||||
for (int i = 0; i < buildstate->tupdesc->natts; i++)
|
||||
TupleDescInitEntry(buildstate->sortdesc, (AttrNumber) (3 + i), NULL, buildstate->tupdesc->attrs[i].atttypid, -1, 0);
|
||||
|
||||
buildstate->slot = MakeSingleTupleTableSlot(buildstate->tupdesc, &TTSOpsVirtual);
|
||||
buildstate->slot = MakeSingleTupleTableSlot(buildstate->sortdesc, &TTSOpsVirtual);
|
||||
|
||||
buildstate->centers = VectorArrayInit(buildstate->lists, buildstate->dimensions, buildstate->typeInfo->itemSize(buildstate->dimensions));
|
||||
buildstate->listInfo = palloc(sizeof(ListInfo) * buildstate->lists);
|
||||
@@ -633,7 +646,7 @@ IvfflatParallelScanAndSort(IvfflatSpool * ivfspool, IvfflatShared * ivfshared, S
|
||||
InitBuildState(&buildstate, ivfspool->heap, ivfspool->index, indexInfo);
|
||||
memcpy(buildstate.centers->items, ivfcenters, buildstate.centers->itemsize * buildstate.centers->maxlen);
|
||||
buildstate.centers->length = buildstate.centers->maxlen;
|
||||
ivfspool->sortstate = InitBuildSortState(buildstate.tupdesc, sortmem, coordinate);
|
||||
ivfspool->sortstate = InitBuildSortState(buildstate.sortdesc, sortmem, coordinate);
|
||||
buildstate.sortstate = ivfspool->sortstate;
|
||||
scan = table_beginscan_parallel(ivfspool->heap,
|
||||
ParallelTableScanFromIvfflatShared(ivfshared));
|
||||
@@ -950,7 +963,7 @@ AssignTuples(IvfflatBuildState * buildstate)
|
||||
}
|
||||
|
||||
/* Begin serial/leader tuplesort */
|
||||
buildstate->sortstate = InitBuildSortState(buildstate->tupdesc, maintenance_work_mem, coordinate);
|
||||
buildstate->sortstate = InitBuildSortState(buildstate->sortdesc, maintenance_work_mem, coordinate);
|
||||
|
||||
/* Add tuples to sort */
|
||||
if (buildstate->heap != NULL)
|
||||
|
||||
@@ -17,8 +17,16 @@
|
||||
#endif
|
||||
|
||||
int ivfflat_probes;
|
||||
int ivfflat_iterative_search;
|
||||
int ivfflat_iterative_search_max_probes;
|
||||
static relopt_kind ivfflat_relopt_kind;
|
||||
|
||||
static const struct config_enum_entry ivfflat_iterative_search_options[] = {
|
||||
{"off", IVFFLAT_ITERATIVE_SEARCH_OFF, false},
|
||||
{"on", IVFFLAT_ITERATIVE_SEARCH_RELAXED, false},
|
||||
{NULL, 0, false}
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize index options and variables
|
||||
*/
|
||||
@@ -33,6 +41,14 @@ IvfflatInit(void)
|
||||
"Valid range is 1..lists.", &ivfflat_probes,
|
||||
IVFFLAT_DEFAULT_PROBES, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL);
|
||||
|
||||
DefineCustomEnumVariable("ivfflat.iterative_search", "Sets whether to use iterative search",
|
||||
NULL, &ivfflat_iterative_search,
|
||||
IVFFLAT_ITERATIVE_SEARCH_OFF, ivfflat_iterative_search_options, PGC_USERSET, 0, NULL, NULL, NULL);
|
||||
|
||||
DefineCustomIntVariable("ivfflat.iterative_search_max_probes", "Sets the max number of probes for iterative search",
|
||||
"Zero sets to the number of lists", &ivfflat_iterative_search_max_probes,
|
||||
0, 0, IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL);
|
||||
|
||||
MarkGUCPrefixReserved("ivfflat");
|
||||
}
|
||||
|
||||
|
||||
@@ -80,6 +80,14 @@
|
||||
|
||||
/* Variables */
|
||||
extern int ivfflat_probes;
|
||||
extern int ivfflat_iterative_search;
|
||||
extern int ivfflat_iterative_search_max_probes;
|
||||
|
||||
typedef enum IvfflatIterativeSearchType
|
||||
{
|
||||
IVFFLAT_ITERATIVE_SEARCH_OFF,
|
||||
IVFFLAT_ITERATIVE_SEARCH_RELAXED
|
||||
} IvfflatIterativeSearchType;
|
||||
|
||||
typedef struct VectorArrayData
|
||||
{
|
||||
@@ -165,6 +173,7 @@ typedef struct IvfflatBuildState
|
||||
Relation index;
|
||||
IndexInfo *indexInfo;
|
||||
const IvfflatTypeInfo *typeInfo;
|
||||
TupleDesc tupdesc;
|
||||
|
||||
/* Settings */
|
||||
int dimensions;
|
||||
@@ -198,7 +207,7 @@ typedef struct IvfflatBuildState
|
||||
|
||||
/* Sorting */
|
||||
Tuplesortstate *sortstate;
|
||||
TupleDesc tupdesc;
|
||||
TupleDesc sortdesc;
|
||||
TupleTableSlot *slot;
|
||||
|
||||
/* Memory */
|
||||
@@ -247,8 +256,10 @@ typedef struct IvfflatScanOpaqueData
|
||||
{
|
||||
const IvfflatTypeInfo *typeInfo;
|
||||
int probes;
|
||||
int maxProbes;
|
||||
int dimensions;
|
||||
bool first;
|
||||
Datum value;
|
||||
|
||||
/* Sorting */
|
||||
Tuplesortstate *sortstate;
|
||||
@@ -265,6 +276,8 @@ typedef struct IvfflatScanOpaqueData
|
||||
|
||||
/* Lists */
|
||||
pairingheap *listQueue;
|
||||
BlockNumber *listPages;
|
||||
int listIndex;
|
||||
IvfflatScanList lists[FLEXIBLE_ARRAY_MEMBER]; /* must come last */
|
||||
} IvfflatScanOpaqueData;
|
||||
|
||||
|
||||
@@ -78,6 +78,8 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, R
|
||||
BlockNumber insertPage = InvalidBlockNumber;
|
||||
ListInfo listInfo;
|
||||
BlockNumber originalInsertPage;
|
||||
TupleDesc tupdesc = RelationGetDescr(index);
|
||||
Datum *newValues = palloc(tupdesc->natts * sizeof(Datum));
|
||||
|
||||
/* Detoast once for all calls */
|
||||
value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
||||
@@ -98,12 +100,16 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, R
|
||||
IvfflatGetMetaPageInfo(index, NULL, NULL);
|
||||
|
||||
/* Find the insert page - sets the page and list info */
|
||||
FindInsertPage(index, values, &insertPage, &listInfo);
|
||||
FindInsertPage(index, &value, &insertPage, &listInfo);
|
||||
Assert(BlockNumberIsValid(insertPage));
|
||||
originalInsertPage = insertPage;
|
||||
|
||||
newValues[0] = value;
|
||||
for (int i = 1; i < tupdesc->natts; i++)
|
||||
newValues[i] = values[i];
|
||||
|
||||
/* Form tuple */
|
||||
itup = index_form_tuple(RelationGetDescr(index), &value, isnull);
|
||||
itup = index_form_tuple(tupdesc, newValues, isnull);
|
||||
itup->t_tid = *heap_tid;
|
||||
|
||||
/* Get tuple size */
|
||||
|
||||
@@ -65,7 +65,7 @@ GetScanLists(IndexScanDesc scan, Datum value)
|
||||
/* Use procinfo from the index instead of scan key for performance */
|
||||
distance = DatumGetFloat8(so->distfunc(so->procinfo, so->collation, PointerGetDatum(&list->center), value));
|
||||
|
||||
if (listCount < so->probes)
|
||||
if (listCount < so->maxProbes)
|
||||
{
|
||||
IvfflatScanList *scanlist;
|
||||
|
||||
@@ -78,7 +78,7 @@ GetScanLists(IndexScanDesc scan, Datum value)
|
||||
pairingheap_add(so->listQueue, &scanlist->ph_node);
|
||||
|
||||
/* Calculate max distance */
|
||||
if (listCount == so->probes)
|
||||
if (listCount == so->maxProbes)
|
||||
maxDistance = GetScanList(pairingheap_first(so->listQueue))->distance;
|
||||
}
|
||||
else if (distance < maxDistance)
|
||||
@@ -102,6 +102,11 @@ GetScanLists(IndexScanDesc scan, Datum value)
|
||||
|
||||
UnlockReleaseBuffer(cbuf);
|
||||
}
|
||||
|
||||
for (int i = listCount - 1; i >= 0; i--)
|
||||
so->listPages[i] = GetScanList(pairingheap_remove_first(so->listQueue))->startPage;
|
||||
|
||||
Assert(pairingheap_is_empty(so->listQueue));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -114,11 +119,14 @@ GetScanItems(IndexScanDesc scan, Datum value)
|
||||
TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
|
||||
double tuples = 0;
|
||||
TupleTableSlot *slot = so->vslot;
|
||||
int batchProbes = 0;
|
||||
|
||||
tuplesort_reset(so->sortstate);
|
||||
|
||||
/* Search closest probes lists */
|
||||
while (!pairingheap_is_empty(so->listQueue))
|
||||
while (so->listIndex < so->maxProbes && (++batchProbes) <= so->probes)
|
||||
{
|
||||
BlockNumber searchPage = GetScanList(pairingheap_remove_first(so->listQueue))->startPage;
|
||||
BlockNumber searchPage = so->listPages[so->listIndex++];
|
||||
|
||||
/* Search all entry pages for list */
|
||||
while (BlockNumberIsValid(searchPage))
|
||||
@@ -166,13 +174,17 @@ GetScanItems(IndexScanDesc scan, Datum value)
|
||||
}
|
||||
}
|
||||
|
||||
if (tuples < 100)
|
||||
if (tuples < 100 && ivfflat_iterative_search == IVFFLAT_ITERATIVE_SEARCH_OFF)
|
||||
ereport(DEBUG1,
|
||||
(errmsg("index scan found few tuples"),
|
||||
errdetail("Index may have been created with little data."),
|
||||
errhint("Recreate the index and possibly decrease lists.")));
|
||||
|
||||
tuplesort_performsort(so->sortstate);
|
||||
|
||||
#if defined(IVFFLAT_MEMORY)
|
||||
elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(CurrentMemoryContext, true) / (1024 * 1024));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -240,6 +252,7 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys)
|
||||
int lists;
|
||||
int dimensions;
|
||||
int probes = ivfflat_probes;
|
||||
int maxProbes;
|
||||
|
||||
scan = RelationGetIndexScan(index, nkeys, norderbys);
|
||||
|
||||
@@ -249,10 +262,21 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys)
|
||||
if (probes > lists)
|
||||
probes = lists;
|
||||
|
||||
so = (IvfflatScanOpaque) palloc(offsetof(IvfflatScanOpaqueData, lists) + probes * sizeof(IvfflatScanList));
|
||||
if (ivfflat_iterative_search != IVFFLAT_ITERATIVE_SEARCH_OFF)
|
||||
{
|
||||
if (ivfflat_iterative_search_max_probes == 0)
|
||||
maxProbes = lists;
|
||||
else
|
||||
maxProbes = Min(ivfflat_iterative_search_max_probes, lists);
|
||||
}
|
||||
else
|
||||
maxProbes = probes;
|
||||
|
||||
so = (IvfflatScanOpaque) palloc(offsetof(IvfflatScanOpaqueData, lists) + maxProbes * sizeof(IvfflatScanList));
|
||||
so->typeInfo = IvfflatGetTypeInfo(index);
|
||||
so->first = true;
|
||||
so->probes = probes;
|
||||
so->maxProbes = maxProbes;
|
||||
so->dimensions = dimensions;
|
||||
|
||||
/* Set support functions */
|
||||
@@ -280,6 +304,8 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys)
|
||||
so->bas = GetAccessStrategy(BAS_BULKREAD);
|
||||
|
||||
so->listQueue = pairingheap_allocate(CompareLists, scan);
|
||||
so->listPages = palloc(maxProbes * sizeof(BlockNumber));
|
||||
so->listIndex = 0;
|
||||
|
||||
scan->opaque = so;
|
||||
|
||||
@@ -294,11 +320,9 @@ ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int
|
||||
{
|
||||
IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque;
|
||||
|
||||
if (!so->first)
|
||||
tuplesort_reset(so->sortstate);
|
||||
|
||||
so->first = true;
|
||||
pairingheap_reset(so->listQueue);
|
||||
so->listIndex = 0;
|
||||
|
||||
if (keys && scan->numberOfKeys > 0)
|
||||
memmove(scan->keyData, keys, scan->numberOfKeys * sizeof(ScanKeyData));
|
||||
@@ -314,6 +338,8 @@ bool
|
||||
ivfflatgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
{
|
||||
IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque;
|
||||
ItemPointer heaptid;
|
||||
bool isnull;
|
||||
|
||||
/*
|
||||
* Index can be used to scan backward, but Postgres doesn't support
|
||||
@@ -341,28 +367,25 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
IvfflatBench("GetScanLists", GetScanLists(scan, value));
|
||||
IvfflatBench("GetScanItems", GetScanItems(scan, value));
|
||||
so->first = false;
|
||||
so->value = value;
|
||||
|
||||
#if defined(IVFFLAT_MEMORY)
|
||||
elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(CurrentMemoryContext, true) / (1024 * 1024));
|
||||
#endif
|
||||
|
||||
/* Clean up if we allocated a new value */
|
||||
if (value != scan->orderByData->sk_argument)
|
||||
pfree(DatumGetPointer(value));
|
||||
/* TODO clean up if we allocated a new value */
|
||||
}
|
||||
|
||||
if (tuplesort_gettupleslot(so->sortstate, true, false, so->mslot, NULL))
|
||||
while (!tuplesort_gettupleslot(so->sortstate, true, false, so->mslot, NULL))
|
||||
{
|
||||
bool isnull;
|
||||
ItemPointer heaptid = (ItemPointer) DatumGetPointer(slot_getattr(so->mslot, 2, &isnull));
|
||||
if (so->listIndex == so->maxProbes)
|
||||
return false;
|
||||
|
||||
IvfflatBench("GetScanItems", GetScanItems(scan, so->value));
|
||||
}
|
||||
|
||||
heaptid = (ItemPointer) DatumGetPointer(slot_getattr(so->mslot, 2, &isnull));
|
||||
|
||||
scan->xs_heaptid = *heaptid;
|
||||
scan->xs_recheck = false;
|
||||
scan->xs_recheckorderby = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -374,6 +397,7 @@ ivfflatendscan(IndexScanDesc scan)
|
||||
IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque;
|
||||
|
||||
pairingheap_free(so->listQueue);
|
||||
pfree(so->listPages);
|
||||
tuplesort_end(so->sortstate);
|
||||
FreeAccessStrategy(so->bas);
|
||||
FreeTupleDesc(so->tupdesc);
|
||||
|
||||
@@ -26,7 +26,7 @@ ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||
Page cpage;
|
||||
OffsetNumber coffno;
|
||||
OffsetNumber cmaxoffno;
|
||||
BlockNumber startPages[MaxOffsetNumber];
|
||||
BlockNumber listPages[MaxOffsetNumber];
|
||||
ListInfo listInfo;
|
||||
|
||||
cbuf = ReadBuffer(index, blkno);
|
||||
@@ -40,7 +40,7 @@ ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||
{
|
||||
IvfflatList list = (IvfflatList) PageGetItem(cpage, PageGetItemId(cpage, coffno));
|
||||
|
||||
startPages[coffno - FirstOffsetNumber] = list->startPage;
|
||||
listPages[coffno - FirstOffsetNumber] = list->startPage;
|
||||
}
|
||||
|
||||
listInfo.blkno = blkno;
|
||||
@@ -50,7 +50,7 @@ ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||
|
||||
for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno))
|
||||
{
|
||||
BlockNumber searchPage = startPages[coffno - FirstOffsetNumber];
|
||||
BlockNumber searchPage = listPages[coffno - FirstOffsetNumber];
|
||||
BlockNumber insertPage = InvalidBlockNumber;
|
||||
|
||||
/* Iterate over entry pages */
|
||||
|
||||
@@ -6,13 +6,7 @@ use Test::More;
|
||||
|
||||
my $dim = 3;
|
||||
|
||||
my @r = ();
|
||||
for (1 .. $dim)
|
||||
{
|
||||
my $v = int(rand(1000)) + 1;
|
||||
push(@r, "i % $v");
|
||||
}
|
||||
my $array_sql = join(", ", @r);
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
# Initialize node
|
||||
my $node = PostgreSQL::Test::Cluster->new('node');
|
||||
@@ -23,19 +17,20 @@ $node->start;
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
|
||||
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops);");
|
||||
|
||||
# Get size
|
||||
my $size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');");
|
||||
|
||||
# Store values
|
||||
$node->safe_psql("postgres", "CREATE TABLE tmp AS SELECT * FROM tst;");
|
||||
|
||||
# Delete all, vacuum, and insert same data
|
||||
$node->safe_psql("postgres", "DELETE FROM tst;");
|
||||
$node->safe_psql("postgres", "VACUUM tst;");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
$node->safe_psql("postgres", "INSERT INTO tst SELECT * FROM tmp;");
|
||||
|
||||
# Check size
|
||||
my $new_size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');");
|
||||
|
||||
54
test/t/041_ivfflat_iterative_search.pl
Normal file
54
test/t/041_ivfflat_iterative_search.pl
Normal file
@@ -0,0 +1,54 @@
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
my $dim = 3;
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
# Initialize node
|
||||
my $node = PostgreSQL::Test::Cluster->new('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4 PRIMARY KEY, v vector($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops);");
|
||||
|
||||
my $count = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET ivfflat.probes = 10;
|
||||
SET ivfflat.iterative_search = on;
|
||||
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 11) t;
|
||||
));
|
||||
is($count, 10);
|
||||
|
||||
foreach ((30, 50, 70))
|
||||
{
|
||||
my $max_probes = $_;
|
||||
my $expected = $max_probes / 10;
|
||||
my $sum = 0;
|
||||
|
||||
for my $i (1 .. 20)
|
||||
{
|
||||
$count = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET ivfflat.probes = 10;
|
||||
SET ivfflat.iterative_search = on;
|
||||
SET ivfflat.iterative_search_max_probes = $max_probes;
|
||||
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst WHERE i = $i) LIMIT 11) t;
|
||||
));
|
||||
$sum += $count;
|
||||
}
|
||||
|
||||
my $avg = $sum / 20;
|
||||
cmp_ok($avg, '>', $expected - 2);
|
||||
cmp_ok($avg, '<', $expected + 2);
|
||||
}
|
||||
|
||||
done_testing();
|
||||
125
test/t/042_ivfflat_iterative_search_recall.pl
Normal file
125
test/t/042_ivfflat_iterative_search_recall.pl
Normal file
@@ -0,0 +1,125 @@
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
my $node;
|
||||
my @queries = ();
|
||||
my @expected;
|
||||
my $limit = 20;
|
||||
my @cs = (100, 1000);
|
||||
|
||||
sub test_recall
|
||||
{
|
||||
my ($c, $probes, $min, $operator) = @_;
|
||||
my $correct = 0;
|
||||
my $total = 0;
|
||||
|
||||
my $explain = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET ivfflat.probes = $probes;
|
||||
SET ivfflat.iterative_search = on;
|
||||
EXPLAIN ANALYZE SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[0]' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan using idx on tst/);
|
||||
|
||||
for my $i (0 .. $#queries)
|
||||
{
|
||||
my $actual = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET ivfflat.probes = $probes;
|
||||
SET ivfflat.iterative_search = on;
|
||||
SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[$i]' LIMIT $limit;
|
||||
));
|
||||
my @actual_ids = split("\n", $actual);
|
||||
|
||||
my @expected_ids = split("\n", $expected[$i]);
|
||||
my %expected_set = map { $_ => 1 } @expected_ids;
|
||||
|
||||
foreach (@actual_ids)
|
||||
{
|
||||
if (exists($expected_set{$_}))
|
||||
{
|
||||
$correct++;
|
||||
}
|
||||
}
|
||||
|
||||
$total += $limit;
|
||||
}
|
||||
|
||||
cmp_ok($correct / $total, ">=", $min, $operator);
|
||||
}
|
||||
|
||||
# Initialize node
|
||||
$node = PostgreSQL::Test::Cluster->new('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
|
||||
# Generate queries
|
||||
for (1 .. 20)
|
||||
{
|
||||
my $r1 = rand();
|
||||
my $r2 = rand();
|
||||
my $r3 = rand();
|
||||
push(@queries, "[$r1,$r2,$r3]");
|
||||
}
|
||||
|
||||
# Check each index type
|
||||
my @operators = ("<->", "<=>");
|
||||
my @opclasses = ("vector_l2_ops", "vector_cosine_ops");
|
||||
|
||||
for my $i (0 .. $#operators)
|
||||
{
|
||||
my $operator = $operators[$i];
|
||||
my $opclass = $opclasses[$i];
|
||||
|
||||
$node->safe_psql("postgres", "CREATE INDEX idx ON tst USING ivfflat (v $opclass);");
|
||||
|
||||
foreach (@cs)
|
||||
{
|
||||
my $c = $_;
|
||||
|
||||
# Get exact results
|
||||
@expected = ();
|
||||
foreach (@queries)
|
||||
{
|
||||
my $res = $node->safe_psql("postgres", qq(
|
||||
SET enable_indexscan = off;
|
||||
WITH top AS (
|
||||
SELECT v $operator '$_' AS distance FROM tst WHERE i % $c = 0 ORDER BY distance LIMIT $limit
|
||||
)
|
||||
SELECT i FROM tst WHERE (v $operator '$_') <= (SELECT MAX(distance) FROM top)
|
||||
));
|
||||
push(@expected, $res);
|
||||
}
|
||||
|
||||
if ($c == 100)
|
||||
{
|
||||
test_recall($c, 1, 0.58, $operator);
|
||||
test_recall($c, 10, 0.98, $operator);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ($operator eq "<->")
|
||||
{
|
||||
test_recall($c, 1, 0.80, $operator);
|
||||
}
|
||||
else
|
||||
{
|
||||
test_recall($c, 1, 0.88, $operator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
}
|
||||
|
||||
done_testing();
|
||||
67
test/t/043_hnsw_iterative_search.pl
Normal file
67
test/t/043_hnsw_iterative_search.pl
Normal file
@@ -0,0 +1,67 @@
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
my $dim = 3;
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
# Initialize node
|
||||
my $node = PostgreSQL::Test::Cluster->new('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4 PRIMARY KEY, v vector($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
$node->safe_psql("postgres", qq(
|
||||
SET maintenance_work_mem = '128MB';
|
||||
SET max_parallel_maintenance_workers = 2;
|
||||
CREATE INDEX ON tst USING hnsw (v vector_l2_ops)
|
||||
));
|
||||
|
||||
my $count = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET hnsw.iterative_search = on;
|
||||
SET work_mem = '8MB';
|
||||
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 11) t;
|
||||
));
|
||||
is($count, 10);
|
||||
|
||||
foreach ((30000, 50000, 70000))
|
||||
{
|
||||
my $max_tuples = $_;
|
||||
my $expected = $max_tuples / 10000;
|
||||
my $sum = 0;
|
||||
|
||||
for my $i (1 .. 20)
|
||||
{
|
||||
$count = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET hnsw.iterative_search = on;
|
||||
SET hnsw.iterative_search_max_tuples = $max_tuples;
|
||||
SET work_mem = '8MB';
|
||||
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst WHERE i = $i) LIMIT 11) t;
|
||||
));
|
||||
$sum += $count;
|
||||
}
|
||||
|
||||
my $avg = $sum / 20;
|
||||
cmp_ok($avg, '>', $expected - 2);
|
||||
cmp_ok($avg, '<', $expected + 2);
|
||||
}
|
||||
|
||||
my ($ret, $stdout, $stderr) = $node->psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET hnsw.iterative_search = on;
|
||||
SET client_min_messages = debug1;
|
||||
SET work_mem = '2MB';
|
||||
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 11) t;
|
||||
));
|
||||
like($stderr, qr/hnsw index scan exceeded work_mem after \d+ tuples/);
|
||||
|
||||
done_testing();
|
||||
131
test/t/044_hnsw_iterative_search_recall.pl
Normal file
131
test/t/044_hnsw_iterative_search_recall.pl
Normal file
@@ -0,0 +1,131 @@
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
my $node;
|
||||
my @queries = ();
|
||||
my @expected;
|
||||
my $limit = 20;
|
||||
my $dim = 3;
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
my @cs = (100, 1000);
|
||||
|
||||
sub test_recall
|
||||
{
|
||||
my ($c, $ef_search, $min, $operator) = @_;
|
||||
my $correct = 0;
|
||||
my $total = 0;
|
||||
|
||||
my $explain = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET hnsw.ef_search = $ef_search;
|
||||
SET hnsw.iterative_search = on;
|
||||
EXPLAIN ANALYZE SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[0]' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan using idx on tst/);
|
||||
|
||||
for my $i (0 .. $#queries)
|
||||
{
|
||||
my $actual = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET hnsw.ef_search = $ef_search;
|
||||
SET hnsw.iterative_search = on;
|
||||
SELECT i FROM tst WHERE i % $c = 0 ORDER BY v $operator '$queries[$i]' LIMIT $limit;
|
||||
));
|
||||
my @actual_ids = split("\n", $actual);
|
||||
|
||||
my @expected_ids = split("\n", $expected[$i]);
|
||||
my %expected_set = map { $_ => 1 } @expected_ids;
|
||||
|
||||
foreach (@actual_ids)
|
||||
{
|
||||
if (exists($expected_set{$_}))
|
||||
{
|
||||
$correct++;
|
||||
}
|
||||
}
|
||||
|
||||
$total += $limit;
|
||||
}
|
||||
|
||||
cmp_ok($correct / $total, ">=", $min, $operator);
|
||||
}
|
||||
|
||||
# Initialize node
|
||||
$node = PostgreSQL::Test::Cluster->new('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
|
||||
# Generate queries
|
||||
for (1 .. 20)
|
||||
{
|
||||
my @r = ();
|
||||
for (1 .. $dim)
|
||||
{
|
||||
push(@r, rand());
|
||||
}
|
||||
push(@queries, "[" . join(",", @r) . "]");
|
||||
}
|
||||
|
||||
# Check each index type
|
||||
my @operators = ("<->", "<=>");
|
||||
my @opclasses = ("vector_l2_ops", "vector_cosine_ops");
|
||||
|
||||
for my $i (0 .. $#operators)
|
||||
{
|
||||
my $operator = $operators[$i];
|
||||
my $opclass = $opclasses[$i];
|
||||
|
||||
$node->safe_psql("postgres", qq(
|
||||
SET maintenance_work_mem = '128MB';
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
));
|
||||
|
||||
foreach (@cs)
|
||||
{
|
||||
my $c = $_;
|
||||
|
||||
# Get exact results
|
||||
@expected = ();
|
||||
foreach (@queries)
|
||||
{
|
||||
my $res = $node->safe_psql("postgres", qq(
|
||||
SET enable_indexscan = off;
|
||||
WITH top AS (
|
||||
SELECT v $operator '$_' AS distance FROM tst WHERE i % $c = 0 ORDER BY distance LIMIT $limit
|
||||
)
|
||||
SELECT i FROM tst WHERE (v $operator '$_') <= (SELECT MAX(distance) FROM top)
|
||||
));
|
||||
push(@expected, $res);
|
||||
}
|
||||
|
||||
if ($c == 100)
|
||||
{
|
||||
test_recall($c, 40, 0.99, $operator);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ($operator eq "<->")
|
||||
{
|
||||
test_recall($c, 40, 0.99, $operator);
|
||||
}
|
||||
else
|
||||
{
|
||||
test_recall($c, 40, 0.99, $operator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
}
|
||||
|
||||
done_testing();
|
||||
Reference in New Issue
Block a user