Merge branch 'master' into hnsw-streaming

This commit is contained in:
Andrew Kane
2024-09-20 13:57:23 -07:00
19 changed files with 317 additions and 172 deletions

View File

@@ -13,14 +13,12 @@ jobs:
- postgres: 17
os: ubuntu-24.04
- postgres: 16
os: ubuntu-24.04
os: ubuntu-22.04
- postgres: 15
os: ubuntu-22.04
- postgres: 14
os: ubuntu-22.04
- postgres: 13
os: ubuntu-20.04
- postgres: 12
- postgres: 13
os: ubuntu-20.04
steps:
- uses: actions/checkout@v4

View File

@@ -1,6 +1,8 @@
## 0.7.5 (unreleased)
## 0.8.0 (unreleased)
- Added casts for arrays to `sparsevec`
- Reduced memory usage for HNSW index scans
- Dropped support for Postgres 12
## 0.7.4 (2024-08-05)

View File

@@ -0,0 +1,26 @@
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION vector UPDATE TO '0.8.0'" to load this file. \quit
CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE CAST (integer[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT;
CREATE CAST (real[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT;
CREATE CAST (double precision[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT;
CREATE CAST (numeric[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT;

View File

@@ -782,6 +782,18 @@ CREATE FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) RETURNS sparseve
CREATE FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) RETURNS halfvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
-- sparsevec casts
CREATE CAST (sparsevec AS sparsevec)
@@ -799,6 +811,18 @@ CREATE CAST (sparsevec AS halfvec)
CREATE CAST (halfvec AS sparsevec)
WITH FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) AS IMPLICIT;
CREATE CAST (integer[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT;
CREATE CAST (real[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT;
CREATE CAST (double precision[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT;
CREATE CAST (numeric[] AS sparsevec)
WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT;
-- sparsevec operators
CREATE OPERATOR <-> (

View File

@@ -4,8 +4,8 @@
#include "postgres.h"
/* Check version in first header */
#if PG_VERSION_NUM < 120000
#error "Requires PostgreSQL 12+"
#if PG_VERSION_NUM < 130000
#error "Requires PostgreSQL 13+"
#endif
extern uint64 (*BitHammingDistance) (uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 distance);

View File

@@ -19,11 +19,6 @@
#include "utils/numeric.h"
#include "vector.h"
#if PG_VERSION_NUM < 130000
#define TYPALIGN_DOUBLE 'd'
#define TYPALIGN_INT 'i'
#endif
#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1)
#define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1))

View File

@@ -9,6 +9,7 @@
#include "commands/vacuum.h"
#include "hnsw.h"
#include "miscadmin.h"
#include "utils/float.h"
#include "utils/guc.h"
#include "utils/selfuncs.h"
@@ -60,17 +61,9 @@ HnswInit(void)
hnsw_relopt_kind = add_reloption_kind();
add_int_reloption(hnsw_relopt_kind, "m", "Max number of connections",
HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M
#if PG_VERSION_NUM >= 130000
,AccessExclusiveLock
#endif
);
HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M, AccessExclusiveLock);
add_int_reloption(hnsw_relopt_kind, "ef_construction", "Size of the dynamic candidate list for construction",
HNSW_DEFAULT_EF_CONSTRUCTION, HNSW_MIN_EF_CONSTRUCTION, HNSW_MAX_EF_CONSTRUCTION
#if PG_VERSION_NUM >= 130000
,AccessExclusiveLock
#endif
);
HNSW_DEFAULT_EF_CONSTRUCTION, HNSW_MIN_EF_CONSTRUCTION, HNSW_MAX_EF_CONSTRUCTION, AccessExclusiveLock);
DefineCustomIntVariable("hnsw.ef_search", "Sets the size of the dynamic candidate list for search",
"Valid range is 1..1000.", &hnsw_ef_search,
@@ -117,8 +110,8 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
/* Never use index without order */
if (path->indexorderbys == NULL)
{
*indexStartupCost = DBL_MAX;
*indexTotalCost = DBL_MAX;
*indexStartupCost = get_float8_infinity();
*indexTotalCost = get_float8_infinity();
*indexSelectivity = 0;
*indexCorrelation = 0;
*indexPages = 0;
@@ -159,23 +152,10 @@ hnswoptions(Datum reloptions, bool validate)
{"ef_construction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)},
};
#if PG_VERSION_NUM >= 130000
return (bytea *) build_reloptions(reloptions, validate,
hnsw_relopt_kind,
sizeof(HnswOptions),
tab, lengthof(tab));
#else
relopt_value *options;
int numoptions;
HnswOptions *rdopts;
options = parseRelOptions(reloptions, validate, hnsw_relopt_kind, &numoptions);
rdopts = allocateReloptStruct(sizeof(HnswOptions), options, numoptions);
fillRelOptions((void *) rdopts, sizeof(HnswOptions), options, numoptions,
validate, tab, lengthof(tab));
return (bytea *) rdopts;
#endif
}
/*
@@ -200,9 +180,7 @@ hnswhandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = 0;
amroutine->amsupport = 3;
#if PG_VERSION_NUM >= 130000
amroutine->amoptsprocnum = 0;
#endif
amroutine->amcanorder = false;
amroutine->amcanorderbyop = true;
amroutine->amcanbackward = false; /* can change direction mid-scan */
@@ -219,15 +197,11 @@ hnswhandler(PG_FUNCTION_ARGS)
amroutine->amcanbuildparallel = true;
#endif
amroutine->amcaninclude = false;
#if PG_VERSION_NUM >= 130000
amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */
#endif
#if PG_VERSION_NUM >= 160000
amroutine->amsummarizing = false;
#endif
#if PG_VERSION_NUM >= 130000
amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL;
#endif
amroutine->amkeytype = InvalidOid;
/* Interface functions */

View File

@@ -77,11 +77,6 @@
#define SeedRandom(seed) srandom(seed)
#endif
#if PG_VERSION_NUM < 130000
#define list_delete_last(list) list_truncate(list, list_length(list) - 1)
#define list_sort(list, cmp) ((list) = list_qsort(list, cmp))
#endif
#define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE)
#define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE)

View File

@@ -60,12 +60,6 @@
#include "pgstat.h"
#endif
#if PG_VERSION_NUM >= 130000
#define CALLBACK_ITEM_POINTER ItemPointer tid
#else
#define CALLBACK_ITEM_POINTER HeapTuple hup
#endif
#if PG_VERSION_NUM >= 140000
#include "utils/backend_status.h"
#include "utils/wait_event.h"
@@ -75,10 +69,6 @@
#define PARALLEL_KEY_HNSW_AREA UINT64CONST(0xA000000000000002)
#define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000003)
#if PG_VERSION_NUM < 130000
#define GENERATIONCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P * 2)
#endif
/*
* Create the metapage
*/
@@ -192,7 +182,9 @@ CreateGraphPages(HnswBuildState * buildstate)
/* Initial size check */
if (etupSize > HNSW_TUPLE_ALLOC_SIZE)
elog(ERROR, "index tuple too large");
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("index tuple too large")));
HnswSetElementTuple(base, etup, element);
@@ -583,17 +575,13 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn
* Callback for table_index_build_scan
*/
static void
BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
BuildCallback(Relation index, ItemPointer tid, Datum *values,
bool *isnull, bool tupleIsAlive, void *state)
{
HnswBuildState *buildstate = (HnswBuildState *) state;
HnswGraph *graph = buildstate->graph;
MemoryContext oldCtx;
#if PG_VERSION_NUM < 130000
ItemPointer tid = &hup->t_self;
#endif
/* Skip nulls */
if (isnull[0])
return;
@@ -656,11 +644,7 @@ HnswMemoryContextAlloc(Size size, void *state)
HnswBuildState *buildstate = (HnswBuildState *) state;
void *chunk = MemoryContextAlloc(buildstate->graphCtx, size);
#if PG_VERSION_NUM >= 130000
buildstate->graphData.memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false);
#else
buildstate->graphData.memoryUsed += MAXALIGN(size);
#endif
return chunk;
}
@@ -696,17 +680,25 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index
/* Disallow varbit since require fixed dimensions */
if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID)
elog(ERROR, "type not supported for hnsw index");
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("type not supported for hnsw index")));
/* Require column to have dimensions to be indexed */
if (buildstate->dimensions < 0)
elog(ERROR, "column does not have dimensions");
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("column does not have dimensions")));
if (buildstate->dimensions > buildstate->typeInfo->maxDimensions)
elog(ERROR, "column cannot have more than %d dimensions for hnsw index", buildstate->typeInfo->maxDimensions);
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("column cannot have more than %d dimensions for hnsw index", buildstate->typeInfo->maxDimensions)));
if (buildstate->efConstruction < 2 * buildstate->m)
elog(ERROR, "ef_construction must be greater than or equal to 2 * m");
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ef_construction must be greater than or equal to 2 * m")));
buildstate->reltuples = 0;
buildstate->indtuples = 0;

View File

@@ -187,7 +187,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
so->first = false;
#if defined(HNSW_MEMORY) && PG_VERSION_NUM >= 130000
#if defined(HNSW_MEMORY)
elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(so->tmpCtx, false) / (1024 * 1024));
#endif
}

View File

@@ -5,6 +5,7 @@
#include "access/generic_xlog.h"
#include "catalog/pg_type.h"
#include "catalog/pg_type_d.h"
#include "common/hashfn.h"
#include "fmgr.h"
#include "hnsw.h"
#include "lib/pairingheap.h"
@@ -14,12 +15,6 @@
#include "utils/memdebug.h"
#include "utils/rel.h"
#if PG_VERSION_NUM >= 130000
#include "common/hashfn.h"
#else
#include "utils/hashutils.h"
#endif
#if PG_VERSION_NUM < 170000
static inline uint64
murmurhash64(uint64 data)
@@ -694,23 +689,15 @@ AddToVisited(char *base, visited_hash * v, HnswElementPtr elementPtr, Relation i
}
else if (base != NULL)
{
#if PG_VERSION_NUM >= 130000
HnswElement element = HnswPtrAccess(base, elementPtr);
offsethash_insert_hash(v->offsets, HnswPtrOffset(elementPtr), element->hash, found);
#else
offsethash_insert(v->offsets, HnswPtrOffset(elementPtr), found);
#endif
}
else
{
#if PG_VERSION_NUM >= 130000
HnswElement element = HnswPtrAccess(base, elementPtr);
pointerhash_insert_hash(v->pointers, (uintptr_t) HnswPtrPointer(elementPtr), element->hash, found);
#else
pointerhash_insert(v->pointers, (uintptr_t) HnswPtrPointer(elementPtr), found);
#endif
}
}
@@ -726,6 +713,9 @@ CountElement(HnswElement skipElement, HnswElement e)
/* Ensure does not access heaptidsLength during in-memory build */
pg_memory_barrier();
/* Keep scan-build happy on Mac x86-64 */
Assert(e);
return e->heaptidsLength != 0;
}
@@ -733,21 +723,21 @@ CountElement(HnswElement skipElement, HnswElement e)
* Load unvisited neighbors from memory
*/
static void
HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * neighborhoodData, Size neighborhoodSize)
HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * localNeighborhood, Size neighborhoodSize)
{
/* Get the neighborhood at layer lc */
HnswNeighborArray *neighborhood = HnswGetNeighbors(base, element, lc);
/* Copy neighborhood to local memory */
LWLockAcquire(&element->lock, LW_SHARED);
memcpy(neighborhoodData, neighborhood, neighborhoodSize);
memcpy(localNeighborhood, neighborhood, neighborhoodSize);
LWLockRelease(&element->lock);
*unvisitedLength = 0;
for (int i = 0; i < neighborhoodData->length; i++)
for (int i = 0; i < localNeighborhood->length; i++)
{
HnswCandidate *hc = &neighborhoodData->items[i];
HnswCandidate *hc = &localNeighborhood->items[i];
bool found;
AddToVisited(base, v, hc->element, NULL, &found);
@@ -810,7 +800,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
int wlen = 0;
visited_hash v2;
ListCell *lc2;
HnswNeighborArray *neighborhoodData = NULL;
HnswNeighborArray *localNeighborhood = NULL;
Size neighborhoodSize = 0;
int lm = HnswGetLayerM(m, lc);
HnswUnvisited *unvisited = palloc(lm * sizeof(HnswUnvisited));
@@ -826,7 +816,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
if (index == NULL)
{
neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(lm);
neighborhoodData = palloc(neighborhoodSize);
localNeighborhood = palloc(neighborhoodSize);
}
/* Add entry points to v, C, and W */
@@ -863,7 +853,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
cElement = HnswPtrAccess(base, c->element);
if (index == NULL)
HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, v, lc, neighborhoodData, neighborhoodSize);
HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, v, lc, localNeighborhood, neighborhoodSize);
else
HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, v, index, m, lm, lc);
@@ -969,17 +959,10 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
* Compare candidate distances with pointer tie-breaker
*/
static int
#if PG_VERSION_NUM >= 130000
CompareCandidateDistances(const ListCell *a, const ListCell *b)
{
HnswCandidate *hca = lfirst(a);
HnswCandidate *hcb = lfirst(b);
#else
CompareCandidateDistances(const void *a, const void *b)
{
HnswCandidate *hca = lfirst(*(ListCell **) a);
HnswCandidate *hcb = lfirst(*(ListCell **) b);
#endif
if (hca->distance < hcb->distance)
return 1;
@@ -1000,17 +983,10 @@ CompareCandidateDistances(const void *a, const void *b)
* Compare candidate distances with offset tie-breaker
*/
static int
#if PG_VERSION_NUM >= 130000
CompareCandidateDistancesOffset(const ListCell *a, const ListCell *b)
{
HnswCandidate *hca = lfirst(a);
HnswCandidate *hcb = lfirst(b);
#else
CompareCandidateDistancesOffset(const void *a, const void *b)
{
HnswCandidate *hca = lfirst(*(ListCell **) a);
HnswCandidate *hcb = lfirst(*(ListCell **) b);
#endif
if (hca->distance < hcb->distance)
return 1;
@@ -1292,7 +1268,6 @@ RemoveElements(char *base, List *w, HnswElement skipElement)
return w2;
}
#if PG_VERSION_NUM >= 130000
/*
* Precompute hash
*/
@@ -1308,7 +1283,6 @@ PrecomputeHash(char *base, HnswElement element)
else
element->hash = hash_offset(HnswPtrOffset(ptr));
}
#endif
/*
* Algorithm 1 from paper
@@ -1323,11 +1297,9 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint
Datum q = HnswGetValue(base, element);
HnswElement skipElement = existing ? element : NULL;
#if PG_VERSION_NUM >= 130000
/* Precompute hash */
if (index == NULL)
PrecomputeHash(base, element);
#endif
/* No neighbors if no entry point */
if (entryPoint == NULL)
@@ -1390,7 +1362,9 @@ SparsevecCheckValue(Pointer v)
SparseVector *vec = (SparseVector *) v;
if (vec->nnz > HNSW_MAX_NNZ)
elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ);
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ)));
}
/*

View File

@@ -26,12 +26,6 @@
#include "pgstat.h"
#endif
#if PG_VERSION_NUM >= 130000
#define CALLBACK_ITEM_POINTER ItemPointer tid
#else
#define CALLBACK_ITEM_POINTER HeapTuple hup
#endif
#if PG_VERSION_NUM >= 140000
#include "utils/backend_status.h"
#include "utils/wait_event.h"
@@ -96,7 +90,7 @@ AddSample(Datum *values, IvfflatBuildState * buildstate)
* Callback for sampling
*/
static void
SampleCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
SampleCallback(Relation index, ItemPointer tid, Datum *values,
bool *isnull, bool tupleIsAlive, void *state)
{
IvfflatBuildState *buildstate = (IvfflatBuildState *) state;
@@ -207,16 +201,12 @@ AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState
* Callback for table_index_build_scan
*/
static void
BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
BuildCallback(Relation index, ItemPointer tid, Datum *values,
bool *isnull, bool tupleIsAlive, void *state)
{
IvfflatBuildState *buildstate = (IvfflatBuildState *) state;
MemoryContext oldCtx;
#if PG_VERSION_NUM < 130000
ItemPointer tid = &hup->t_self;
#endif
/* Skip nulls */
if (isnull[0])
return;
@@ -335,14 +325,20 @@ InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, In
/* Disallow varbit since require fixed dimensions */
if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID)
elog(ERROR, "type not supported for ivfflat index");
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("type not supported for ivfflat index")));
/* Require column to have dimensions to be indexed */
if (buildstate->dimensions < 0)
elog(ERROR, "column does not have dimensions");
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("column does not have dimensions")));
if (buildstate->dimensions > buildstate->typeInfo->maxDimensions)
elog(ERROR, "column cannot have more than %d dimensions for ivfflat index", buildstate->typeInfo->maxDimensions);
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("column cannot have more than %d dimensions for ivfflat index", buildstate->typeInfo->maxDimensions)));
buildstate->reltuples = 0;
buildstate->indtuples = 0;
@@ -355,7 +351,9 @@ InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, In
/* Require more than one dimension for spherical k-means */
if (buildstate->kmeansnormprocinfo != NULL && buildstate->dimensions == 1)
elog(ERROR, "dimensions must be greater than one for this opclass");
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("dimensions must be greater than one for this opclass")));
/* Create tuple description for sorting */
buildstate->tupdesc = CreateTemplateTupleDesc(3);

View File

@@ -7,6 +7,7 @@
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "ivfflat.h"
#include "utils/float.h"
#include "utils/guc.h"
#include "utils/selfuncs.h"
#include "utils/spccache.h"
@@ -26,11 +27,7 @@ IvfflatInit(void)
{
ivfflat_relopt_kind = add_reloption_kind();
add_int_reloption(ivfflat_relopt_kind, "lists", "Number of inverted lists",
IVFFLAT_DEFAULT_LISTS, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS
#if PG_VERSION_NUM >= 130000
,AccessExclusiveLock
#endif
);
IVFFLAT_DEFAULT_LISTS, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS, AccessExclusiveLock);
DefineCustomIntVariable("ivfflat.probes", "Sets the number of probes",
"Valid range is 1..lists.", &ivfflat_probes,
@@ -78,8 +75,8 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
/* Never use index without order */
if (path->indexorderbys == NULL)
{
*indexStartupCost = DBL_MAX;
*indexTotalCost = DBL_MAX;
*indexStartupCost = get_float8_infinity();
*indexTotalCost = get_float8_infinity();
*indexSelectivity = 0;
*indexCorrelation = 0;
*indexPages = 0;
@@ -148,23 +145,10 @@ ivfflatoptions(Datum reloptions, bool validate)
{"lists", RELOPT_TYPE_INT, offsetof(IvfflatOptions, lists)},
};
#if PG_VERSION_NUM >= 130000
return (bytea *) build_reloptions(reloptions, validate,
ivfflat_relopt_kind,
sizeof(IvfflatOptions),
tab, lengthof(tab));
#else
relopt_value *options;
int numoptions;
IvfflatOptions *rdopts;
options = parseRelOptions(reloptions, validate, ivfflat_relopt_kind, &numoptions);
rdopts = allocateReloptStruct(sizeof(IvfflatOptions), options, numoptions);
fillRelOptions((void *) rdopts, sizeof(IvfflatOptions), options, numoptions,
validate, tab, lengthof(tab));
return (bytea *) rdopts;
#endif
}
/*
@@ -189,9 +173,7 @@ ivfflathandler(PG_FUNCTION_ARGS)
amroutine->amstrategies = 0;
amroutine->amsupport = 5;
#if PG_VERSION_NUM >= 130000
amroutine->amoptsprocnum = 0;
#endif
amroutine->amcanorder = false;
amroutine->amcanorderbyop = true;
amroutine->amcanbackward = false; /* can change direction mid-scan */
@@ -208,15 +190,11 @@ ivfflathandler(PG_FUNCTION_ARGS)
amroutine->amcanbuildparallel = true;
#endif
amroutine->amcaninclude = false;
#if PG_VERSION_NUM >= 130000
amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */
#endif
#if PG_VERSION_NUM >= 160000
amroutine->amsummarizing = false;
#endif
#if PG_VERSION_NUM >= 130000
amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL;
#endif
amroutine->amkeytype = InvalidOid;
/* Interface functions */

View File

@@ -151,12 +151,8 @@ RandomCenters(Relation index, VectorArray centers, const IvfflatTypeInfo * typeI
static void
ShowMemoryUsage(MemoryContext context, Size estimatedSize)
{
#if PG_VERSION_NUM >= 130000
elog(INFO, "total memory: %zu MB",
MemoryContextMemAllocated(context, true) / (1024 * 1024));
#else
MemoryContextStats(context);
#endif
elog(INFO, "estimated memory: %zu MB", estimatedSize / (1024 * 1024));
}
#endif

View File

@@ -292,14 +292,7 @@ ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int
IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque;
if (!so->first)
{
#if PG_VERSION_NUM >= 130000
tuplesort_reset(so->sortstate);
#else
tuplesort_end(so->sortstate);
so->sortstate = InitScanSortState(so->tupdesc);
#endif
}
so->first = true;
pairingheap_reset(so->listQueue);
@@ -346,7 +339,7 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir)
IvfflatBench("GetScanItems", GetScanItems(scan, value));
so->first = false;
#if defined(IVFFLAT_MEMORY) && PG_VERSION_NUM >= 130000
#if defined(IVFFLAT_MEMORY)
elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(CurrentMemoryContext, true) / (1024 * 1024));
#endif

View File

@@ -3,6 +3,7 @@
#include <limits.h>
#include <math.h>
#include "catalog/pg_type.h"
#include "common/string.h"
#include "fmgr.h"
#include "halfutils.h"
@@ -11,6 +12,7 @@
#include "sparsevec.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "vector.h"
#if PG_VERSION_NUM >= 120000
@@ -670,6 +672,137 @@ halfvec_to_sparsevec(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(result);
}
/*
* Convert array to sparse vector
*/
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(array_to_sparsevec);
Datum
array_to_sparsevec(PG_FUNCTION_ARGS)
{
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
int32 typmod = PG_GETARG_INT32(1);
SparseVector *result;
int16 typlen;
bool typbyval;
char typalign;
Datum *elemsp;
int nelemsp;
int nnz = 0;
float *values;
int j = 0;
if (ARR_NDIM(array) > 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("array must be 1-D")));
if (ARR_HASNULL(array) && array_contains_nulls(array))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign);
deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp);
CheckDim(nelemsp);
CheckExpectedDim(typmod, nelemsp);
#ifdef _MSC_VER
/* /fp:fast may not propagate +/-Infinity or NaN */
#define IS_NOT_ZERO(v) (isnan((float) (v)) || isinf((float) (v)) || ((float) (v)) != 0)
#else
#define IS_NOT_ZERO(v) (((float) (v)) != 0)
#endif
if (ARR_ELEMTYPE(array) == INT4OID)
{
for (int i = 0; i < nelemsp; i++)
nnz += IS_NOT_ZERO(DatumGetInt32(elemsp[i]));
}
else if (ARR_ELEMTYPE(array) == FLOAT8OID)
{
for (int i = 0; i < nelemsp; i++)
nnz += IS_NOT_ZERO(DatumGetFloat8(elemsp[i]));
}
else if (ARR_ELEMTYPE(array) == FLOAT4OID)
{
for (int i = 0; i < nelemsp; i++)
nnz += IS_NOT_ZERO(DatumGetFloat4(elemsp[i]));
}
else if (ARR_ELEMTYPE(array) == NUMERICOID)
{
for (int i = 0; i < nelemsp; i++)
nnz += IS_NOT_ZERO(DirectFunctionCall1(numeric_float4, elemsp[i]));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("unsupported array type")));
}
result = InitSparseVector(nelemsp, nnz);
values = SPARSEVEC_VALUES(result);
#define PROCESS_ARRAY_ELEM(elem) \
do { \
float v = (float) (elem); \
if (IS_NOT_ZERO(v)) { \
/* Safety check */ \
if (j >= result->nnz) \
elog(ERROR, "safety check failed"); \
result->indices[j] = i; \
values[j] = v; \
j++; \
} \
} while (0)
if (ARR_ELEMTYPE(array) == INT4OID)
{
for (int i = 0; i < nelemsp; i++)
PROCESS_ARRAY_ELEM(DatumGetInt32(elemsp[i]));
}
else if (ARR_ELEMTYPE(array) == FLOAT8OID)
{
for (int i = 0; i < nelemsp; i++)
PROCESS_ARRAY_ELEM(DatumGetFloat8(elemsp[i]));
}
else if (ARR_ELEMTYPE(array) == FLOAT4OID)
{
for (int i = 0; i < nelemsp; i++)
PROCESS_ARRAY_ELEM(DatumGetFloat4(elemsp[i]));
}
else if (ARR_ELEMTYPE(array) == NUMERICOID)
{
for (int i = 0; i < nelemsp; i++)
PROCESS_ARRAY_ELEM(DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i])));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("unsupported array type")));
}
#undef PROCESS_ARRAY_ELEM
#undef IS_NOT_ZERO
/*
* Free allocation from deconstruct_array. Do not free individual elements
* when pass-by-reference since they point to original array.
*/
pfree(elemsp);
if (j != result->nnz)
elog(ERROR, "correctness check failed");
/* Check elements */
for (int i = 0; i < result->nnz; i++)
CheckElement(values[i]);
PG_RETURN_POINTER(result);
}
/*
* Get the L2 squared distance between sparse vectors
*/

View File

@@ -26,11 +26,6 @@
#include "varatt.h"
#endif
#if PG_VERSION_NUM < 130000
#define TYPALIGN_DOUBLE 'd'
#define TYPALIGN_INT 'i'
#endif
#define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1)
#define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1))

View File

@@ -208,6 +208,62 @@ SELECT '{1:1e-8}/1'::sparsevec::halfvec;
[0]
(1 row)
SELECT ARRAY[1,0,2,0,3,0]::sparsevec;
array
-----------------
{1:1,3:2,5:3}/6
(1 row)
SELECT ARRAY[1.0,0.0,2.0,0.0,3.0,0.0]::sparsevec;
array
-----------------
{1:1,3:2,5:3}/6
(1 row)
SELECT ARRAY[1,0,2,0,3,0]::float4[]::sparsevec;
array
-----------------
{1:1,3:2,5:3}/6
(1 row)
SELECT ARRAY[1,0,2,0,3,0]::float8[]::sparsevec;
array
-----------------
{1:1,3:2,5:3}/6
(1 row)
SELECT ARRAY[1,0,2,0,3,0]::numeric[]::sparsevec;
array
-----------------
{1:1,3:2,5:3}/6
(1 row)
SELECT '{1,0,2,0,3,0}'::real[]::sparsevec;
sparsevec
-----------------
{1:1,3:2,5:3}/6
(1 row)
SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(6);
sparsevec
-----------------
{1:1,3:2,5:3}/6
(1 row)
SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(5);
ERROR: expected 5 dimensions, not 6
SELECT '{NULL}'::real[]::sparsevec;
ERROR: array must not contain nulls
SELECT '{NaN}'::real[]::sparsevec;
ERROR: NaN not allowed in sparsevec
SELECT '{Infinity}'::real[]::sparsevec;
ERROR: infinite value not allowed in sparsevec
SELECT '{-Infinity}'::real[]::sparsevec;
ERROR: infinite value not allowed in sparsevec
SELECT '{}'::real[]::sparsevec;
ERROR: sparsevec must have at least 1 dimension
SELECT '{{1}}'::real[]::sparsevec;
ERROR: array must be 1-D
SELECT array_agg(n)::vector FROM generate_series(1, 16001) n;
ERROR: vector cannot have more than 16000 dimensions
SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n;

View File

@@ -58,6 +58,22 @@ SELECT '{}/16001'::sparsevec::halfvec;
SELECT '{1:65520}/1'::sparsevec::halfvec;
SELECT '{1:1e-8}/1'::sparsevec::halfvec;
SELECT ARRAY[1,0,2,0,3,0]::sparsevec;
SELECT ARRAY[1.0,0.0,2.0,0.0,3.0,0.0]::sparsevec;
SELECT ARRAY[1,0,2,0,3,0]::float4[]::sparsevec;
SELECT ARRAY[1,0,2,0,3,0]::float8[]::sparsevec;
SELECT ARRAY[1,0,2,0,3,0]::numeric[]::sparsevec;
SELECT '{1,0,2,0,3,0}'::real[]::sparsevec;
SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(6);
SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(5);
SELECT '{NULL}'::real[]::sparsevec;
SELECT '{NaN}'::real[]::sparsevec;
SELECT '{Infinity}'::real[]::sparsevec;
SELECT '{-Infinity}'::real[]::sparsevec;
SELECT '{}'::real[]::sparsevec;
SELECT '{{1}}'::real[]::sparsevec;
SELECT array_agg(n)::vector FROM generate_series(1, 16001) n;
SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n;