Replace dynahash hash table in HNSW with simplehash for speed - #378

Co-authored-by: Heikki Linnakangas <heikki.linnakangas@iki.fi>
This commit is contained in:
Andrew Kane
2023-12-17 11:24:13 -05:00
parent a59aa02dd9
commit 921427ee03
4 changed files with 140 additions and 30 deletions

View File

@@ -1,5 +1,6 @@
## 0.5.2 (unreleased)
- Improved performance of HNSW
- Added support for on-disk parallel index builds for HNSW
- Fixed `invalid memory alloc request size` error with HNSW index build

View File

@@ -104,6 +104,7 @@ typedef struct HnswElementData
List *heaptids;
uint8 level;
uint8 deleted;
uint32 hash;
HnswNeighborArray *neighbors;
BlockNumber blkno;
OffsetNumber offno;
@@ -303,7 +304,7 @@ typedef struct HnswVacuumState
Oid collation;
/* Variables */
HTAB *deleted;
struct tidhash_hash *deleted;
BufferAccessStrategy bas;
HnswNeighborTuple ntup;
HnswElementData highestPoint;
@@ -360,4 +361,31 @@ void hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys,
bool hnswgettuple(IndexScanDesc scan, ScanDirection dir);
void hnswendscan(IndexScanDesc scan);
/* Hash tables */
typedef struct TidHashEntry
{
ItemPointerData tid;
char status;
} TidHashEntry;
#define SH_PREFIX tidhash
#define SH_ELEMENT_TYPE TidHashEntry
#define SH_KEY_TYPE ItemPointerData
#define SH_SCOPE extern
#define SH_DECLARE
#include "lib/simplehash.h"
typedef struct PointerHashEntry
{
uintptr_t ptr;
char status;
} PointerHashEntry;
#define SH_PREFIX pointerhash
#define SH_ELEMENT_TYPE PointerHashEntry
#define SH_KEY_TYPE uintptr_t
#define SH_SCOPE extern
#define SH_DECLARE
#include "lib/simplehash.h"
#endif

View File

@@ -7,6 +7,89 @@
#include "utils/datum.h"
#include "vector.h"
#if PG_VERSION_NUM >= 130000
#include "common/hashfn.h"
#else
#include "utils/hashutils.h"
#endif
#if PG_VERSION_NUM < 170000
static inline uint64
murmurhash64(uint64 data)
{
uint64 h = data;
h ^= h >> 33;
h *= 0xff51afd7ed558ccd;
h ^= h >> 33;
h *= 0xc4ceb9fe1a85ec53;
h ^= h >> 33;
return h;
}
#endif
/* TID hash table */
static uint32
hash_tid(ItemPointerData tid)
{
union
{
uint64 i;
ItemPointerData tid;
} x;
/* Initialize unused bytes */
x.i = 0;
x.tid = tid;
return murmurhash64(x.i);
}
#define SH_PREFIX tidhash
#define SH_ELEMENT_TYPE TidHashEntry
#define SH_KEY_TYPE ItemPointerData
#define SH_KEY tid
#define SH_HASH_KEY(tb, key) hash_tid(key)
#define SH_EQUAL(tb, a, b) ItemPointerEquals(&a, &b)
#define SH_SCOPE extern
#define SH_DEFINE
#include "lib/simplehash.h"
/* Needed to include simplehash.h twice */
#if PG_VERSION_NUM < 120000
#undef SH_EQUAL
#define sh_log2 pointerhash_sh_log2
#define sh_pow2 pointerhash_sh_pow2
#endif
/* Pointer hash table */
static uint32
hash_pointer(uintptr_t ptr)
{
#if SIZEOF_VOID_P == 8
return murmurhash64((uint64) ptr);
#else
return murmurhash32((uint32) ptr);
#endif
}
#define SH_PREFIX pointerhash
#define SH_ELEMENT_TYPE PointerHashEntry
#define SH_KEY_TYPE uintptr_t
#define SH_KEY ptr
#define SH_HASH_KEY(tb, key) hash_pointer(key)
#define SH_EQUAL(tb, a, b) (a == b)
#define SH_SCOPE extern
#define SH_DEFINE
#include "lib/simplehash.h"
typedef union
{
pointerhash_hash *pointers;
tidhash_hash *tids;
} visited_hash;
/*
* Get the max number of connections in an upper layer for each element in the index
*/
@@ -553,16 +636,22 @@ CreatePairingHeapNode(HnswCandidate * c)
* Add to visited
*/
static inline void
AddToVisited(HTAB *v, HnswCandidate * hc, Relation index, bool *found)
AddToVisited(visited_hash v, HnswCandidate * hc, Relation index, bool *found)
{
if (index == NULL)
hash_search(v, &hc->element, HASH_ENTER, found);
{
#if PG_VERSION_NUM >= 130000
pointerhash_insert_hash(v.pointers, (uintptr_t) hc->element, hc->element->hash, found);
#else
pointerhash_insert(v.pointers, (uintptr_t) hc->element, found);
#endif
}
else
{
ItemPointerData indextid;
ItemPointerSet(&indextid, hc->element->blkno, hc->element->offno);
hash_search(v, &indextid, HASH_ENTER, found);
tidhash_insert(v.tids, indextid, found);
}
}
@@ -578,30 +667,21 @@ HnswSearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *pro
pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL);
pairingheap *W = pairingheap_allocate(CompareFurthestCandidates, NULL);
int wlen = 0;
HASHCTL hash_ctl;
HTAB *v;
visited_hash v;
/* Create hash table */
if (index == NULL)
{
hash_ctl.keysize = sizeof(HnswElement *);
hash_ctl.entrysize = sizeof(HnswElement *);
}
v.pointers = pointerhash_create(CurrentMemoryContext, ef * m * 2, NULL);
else
{
hash_ctl.keysize = sizeof(ItemPointerData);
hash_ctl.entrysize = sizeof(ItemPointerData);
}
hash_ctl.hcxt = CurrentMemoryContext;
v = hash_create("hnsw visited", 256, &hash_ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
v.tids = tidhash_create(CurrentMemoryContext, ef * m * 2, NULL);
/* Add entry points to v, C, and W */
foreach(lc2, ep)
{
HnswCandidate *hc = (HnswCandidate *) lfirst(lc2);
bool found;
AddToVisited(v, hc, index, NULL);
AddToVisited(v, hc, index, &found);
pairingheap_add(C, &(CreatePairingHeapNode(hc)->ph_node));
pairingheap_add(W, &(CreatePairingHeapNode(hc)->ph_node));
@@ -1021,6 +1101,12 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
Datum q = element->value;
HnswElement skipElement = existing ? element : NULL;
#if PG_VERSION_NUM >= 130000
/* Precompute hash */
if (index == NULL)
element->hash = hash_pointer((uintptr_t) element);
#endif
/* No neighbors if no entry point */
if (entryPoint == NULL)
return;

View File

@@ -12,12 +12,9 @@
* Check if deleted list contains an index TID
*/
static bool
DeletedContains(HTAB *deleted, ItemPointer indextid)
DeletedContains(tidhash_hash * deleted, ItemPointer indextid)
{
bool found;
hash_search(deleted, indextid, HASH_FIND, &found);
return found;
return tidhash_lookup(deleted, *indextid) != NULL;
}
/*
@@ -110,11 +107,13 @@ RemoveHeapTids(HnswVacuumState * vacuumstate)
if (!ItemPointerIsValid(&etup->heaptids[0]))
{
ItemPointerData ip;
bool found;
/* Add to deleted list */
ItemPointerSet(&ip, blkno, offno);
(void) hash_search(vacuumstate->deleted, &ip, HASH_ENTER, NULL);
tidhash_insert(vacuumstate->deleted, ip, &found);
Assert(!found);
}
else if (etup->level > highestLevel && !(entryPoint != NULL && blkno == entryPoint->blkno && offno == entryPoint->offno))
{
@@ -575,7 +574,6 @@ static void
InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state)
{
Relation index = info->index;
HASHCTL hash_ctl;
if (stats == NULL)
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
@@ -597,10 +595,7 @@ InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkD
HnswGetMetaPageInfo(index, &vacuumstate->m, NULL);
/* Create hash table */
hash_ctl.keysize = sizeof(ItemPointerData);
hash_ctl.entrysize = sizeof(ItemPointerData);
hash_ctl.hcxt = CurrentMemoryContext;
vacuumstate->deleted = hash_create("hnswbulkdelete indextids", 256, &hash_ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
vacuumstate->deleted = tidhash_create(CurrentMemoryContext, 256, NULL);
}
/*
@@ -609,7 +604,7 @@ InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkD
static void
FreeVacuumState(HnswVacuumState * vacuumstate)
{
hash_destroy(vacuumstate->deleted);
tidhash_destroy(vacuumstate->deleted);
FreeAccessStrategy(vacuumstate->bas);
pfree(vacuumstate->ntup);
MemoryContextDelete(vacuumstate->tmpCtx);