diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c9ec6cb..a4682e7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,14 +13,12 @@ jobs: - postgres: 17 os: ubuntu-24.04 - postgres: 16 - os: ubuntu-24.04 + os: ubuntu-22.04 - postgres: 15 os: ubuntu-22.04 - postgres: 14 - os: ubuntu-22.04 - - postgres: 13 os: ubuntu-20.04 - - postgres: 12 + - postgres: 13 os: ubuntu-20.04 steps: - uses: actions/checkout@v4 diff --git a/CHANGELOG.md b/CHANGELOG.md index 246bba0..db6798c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ -## 0.7.5 (unreleased) +## 0.8.0 (unreleased) +- Added casts for arrays to `sparsevec` - Reduced memory usage for HNSW index scans +- Dropped support for Postgres 12 ## 0.7.4 (2024-08-05) diff --git a/sql/vector--0.7.4--0.8.0.sql b/sql/vector--0.7.4--0.8.0.sql new file mode 100644 index 0000000..e00348d --- /dev/null +++ b/sql/vector--0.7.4--0.8.0.sql @@ -0,0 +1,26 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.8.0'" to load this file. \quit + +CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE CAST (integer[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT; diff --git a/sql/vector.sql b/sql/vector.sql index 32eb834..7fc3671 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -782,6 +782,18 @@ CREATE FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) RETURNS sparseve CREATE FUNCTION sparsevec_to_halfvec(sparsevec, integer, boolean) RETURNS halfvec AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(real[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(double precision[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION array_to_sparsevec(numeric[], integer, boolean) RETURNS sparsevec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + -- sparsevec casts CREATE CAST (sparsevec AS sparsevec) @@ -799,6 +811,18 @@ CREATE CAST (sparsevec AS halfvec) CREATE CAST (halfvec AS sparsevec) WITH FUNCTION halfvec_to_sparsevec(halfvec, integer, boolean) AS IMPLICIT; +CREATE CAST (integer[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(integer[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (real[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(real[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (double precision[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(double precision[], integer, boolean) AS ASSIGNMENT; + +CREATE CAST (numeric[] AS sparsevec) + WITH FUNCTION array_to_sparsevec(numeric[], integer, boolean) AS ASSIGNMENT; + -- sparsevec operators CREATE OPERATOR <-> ( diff --git a/src/bitutils.h b/src/bitutils.h index b834190..b4d85bb 100644 --- a/src/bitutils.h +++ b/src/bitutils.h @@ -4,8 +4,8 @@ #include "postgres.h" /* Check version in first header */ -#if PG_VERSION_NUM < 120000 -#error "Requires PostgreSQL 12+" +#if PG_VERSION_NUM < 130000 +#error "Requires PostgreSQL 13+" #endif extern uint64 (*BitHammingDistance) (uint32 bytes, unsigned char *ax, unsigned char *bx, uint64 distance); diff --git a/src/halfvec.c b/src/halfvec.c index 2cb1127..9cd3de6 100644 --- a/src/halfvec.c +++ b/src/halfvec.c @@ -19,11 +19,6 @@ #include "utils/numeric.h" #include "vector.h" -#if PG_VERSION_NUM < 130000 -#define TYPALIGN_DOUBLE 'd' -#define TYPALIGN_INT 'i' -#endif - #define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1) #define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1)) diff --git a/src/hnsw.c b/src/hnsw.c index 3e08eb1..f4b2b2b 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -9,6 +9,7 @@ #include "commands/vacuum.h" #include "hnsw.h" #include "miscadmin.h" +#include "utils/float.h" #include "utils/guc.h" #include "utils/selfuncs.h" @@ -60,17 +61,9 @@ HnswInit(void) hnsw_relopt_kind = add_reloption_kind(); add_int_reloption(hnsw_relopt_kind, "m", "Max number of connections", - HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M -#if PG_VERSION_NUM >= 130000 - ,AccessExclusiveLock -#endif - ); + HNSW_DEFAULT_M, HNSW_MIN_M, HNSW_MAX_M, AccessExclusiveLock); add_int_reloption(hnsw_relopt_kind, "ef_construction", "Size of the dynamic candidate list for construction", - HNSW_DEFAULT_EF_CONSTRUCTION, HNSW_MIN_EF_CONSTRUCTION, HNSW_MAX_EF_CONSTRUCTION -#if PG_VERSION_NUM >= 130000 - ,AccessExclusiveLock -#endif - ); + HNSW_DEFAULT_EF_CONSTRUCTION, HNSW_MIN_EF_CONSTRUCTION, HNSW_MAX_EF_CONSTRUCTION, AccessExclusiveLock); DefineCustomIntVariable("hnsw.ef_search", "Sets the size of the dynamic candidate list for search", "Valid range is 1..1000.", &hnsw_ef_search, @@ -117,8 +110,8 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, /* Never use index without order */ if (path->indexorderbys == NULL) { - *indexStartupCost = DBL_MAX; - *indexTotalCost = DBL_MAX; + *indexStartupCost = get_float8_infinity(); + *indexTotalCost = get_float8_infinity(); *indexSelectivity = 0; *indexCorrelation = 0; *indexPages = 0; @@ -159,23 +152,10 @@ hnswoptions(Datum reloptions, bool validate) {"ef_construction", RELOPT_TYPE_INT, offsetof(HnswOptions, efConstruction)}, }; -#if PG_VERSION_NUM >= 130000 return (bytea *) build_reloptions(reloptions, validate, hnsw_relopt_kind, sizeof(HnswOptions), tab, lengthof(tab)); -#else - relopt_value *options; - int numoptions; - HnswOptions *rdopts; - - options = parseRelOptions(reloptions, validate, hnsw_relopt_kind, &numoptions); - rdopts = allocateReloptStruct(sizeof(HnswOptions), options, numoptions); - fillRelOptions((void *) rdopts, sizeof(HnswOptions), options, numoptions, - validate, tab, lengthof(tab)); - - return (bytea *) rdopts; -#endif } /* @@ -200,9 +180,7 @@ hnswhandler(PG_FUNCTION_ARGS) amroutine->amstrategies = 0; amroutine->amsupport = 3; -#if PG_VERSION_NUM >= 130000 amroutine->amoptsprocnum = 0; -#endif amroutine->amcanorder = false; amroutine->amcanorderbyop = true; amroutine->amcanbackward = false; /* can change direction mid-scan */ @@ -219,15 +197,11 @@ hnswhandler(PG_FUNCTION_ARGS) amroutine->amcanbuildparallel = true; #endif amroutine->amcaninclude = false; -#if PG_VERSION_NUM >= 130000 amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */ -#endif #if PG_VERSION_NUM >= 160000 amroutine->amsummarizing = false; #endif -#if PG_VERSION_NUM >= 130000 amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL; -#endif amroutine->amkeytype = InvalidOid; /* Interface functions */ diff --git a/src/hnsw.h b/src/hnsw.h index feca91c..f958222 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -77,11 +77,6 @@ #define SeedRandom(seed) srandom(seed) #endif -#if PG_VERSION_NUM < 130000 -#define list_delete_last(list) list_truncate(list, list_length(list) - 1) -#define list_sort(list, cmp) ((list) = list_qsort(list, cmp)) -#endif - #define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE) #define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE) diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 727eec3..498b5d9 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -60,12 +60,6 @@ #include "pgstat.h" #endif -#if PG_VERSION_NUM >= 130000 -#define CALLBACK_ITEM_POINTER ItemPointer tid -#else -#define CALLBACK_ITEM_POINTER HeapTuple hup -#endif - #if PG_VERSION_NUM >= 140000 #include "utils/backend_status.h" #include "utils/wait_event.h" @@ -75,10 +69,6 @@ #define PARALLEL_KEY_HNSW_AREA UINT64CONST(0xA000000000000002) #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xA000000000000003) -#if PG_VERSION_NUM < 130000 -#define GENERATIONCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P * 2) -#endif - /* * Create the metapage */ @@ -192,7 +182,9 @@ CreateGraphPages(HnswBuildState * buildstate) /* Initial size check */ if (etupSize > HNSW_TUPLE_ALLOC_SIZE) - elog(ERROR, "index tuple too large"); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("index tuple too large"))); HnswSetElementTuple(base, etup, element); @@ -583,17 +575,13 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn * Callback for table_index_build_scan */ static void -BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, +BuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state) { HnswBuildState *buildstate = (HnswBuildState *) state; HnswGraph *graph = buildstate->graph; MemoryContext oldCtx; -#if PG_VERSION_NUM < 130000 - ItemPointer tid = &hup->t_self; -#endif - /* Skip nulls */ if (isnull[0]) return; @@ -656,11 +644,7 @@ HnswMemoryContextAlloc(Size size, void *state) HnswBuildState *buildstate = (HnswBuildState *) state; void *chunk = MemoryContextAlloc(buildstate->graphCtx, size); -#if PG_VERSION_NUM >= 130000 buildstate->graphData.memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false); -#else - buildstate->graphData.memoryUsed += MAXALIGN(size); -#endif return chunk; } @@ -696,17 +680,25 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index /* Disallow varbit since require fixed dimensions */ if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID) - elog(ERROR, "type not supported for hnsw index"); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("type not supported for hnsw index"))); /* Require column to have dimensions to be indexed */ if (buildstate->dimensions < 0) - elog(ERROR, "column does not have dimensions"); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("column does not have dimensions"))); if (buildstate->dimensions > buildstate->typeInfo->maxDimensions) - elog(ERROR, "column cannot have more than %d dimensions for hnsw index", buildstate->typeInfo->maxDimensions); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("column cannot have more than %d dimensions for hnsw index", buildstate->typeInfo->maxDimensions))); if (buildstate->efConstruction < 2 * buildstate->m) - elog(ERROR, "ef_construction must be greater than or equal to 2 * m"); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ef_construction must be greater than or equal to 2 * m"))); buildstate->reltuples = 0; buildstate->indtuples = 0; diff --git a/src/hnswscan.c b/src/hnswscan.c index 9b8c9a3..44f1e65 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -187,7 +187,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir) so->first = false; -#if defined(HNSW_MEMORY) && PG_VERSION_NUM >= 130000 +#if defined(HNSW_MEMORY) elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(so->tmpCtx, false) / (1024 * 1024)); #endif } diff --git a/src/hnswutils.c b/src/hnswutils.c index 047e6a0..ed56ed7 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -5,6 +5,7 @@ #include "access/generic_xlog.h" #include "catalog/pg_type.h" #include "catalog/pg_type_d.h" +#include "common/hashfn.h" #include "fmgr.h" #include "hnsw.h" #include "lib/pairingheap.h" @@ -14,12 +15,6 @@ #include "utils/memdebug.h" #include "utils/rel.h" -#if PG_VERSION_NUM >= 130000 -#include "common/hashfn.h" -#else -#include "utils/hashutils.h" -#endif - #if PG_VERSION_NUM < 170000 static inline uint64 murmurhash64(uint64 data) @@ -694,23 +689,15 @@ AddToVisited(char *base, visited_hash * v, HnswElementPtr elementPtr, Relation i } else if (base != NULL) { -#if PG_VERSION_NUM >= 130000 HnswElement element = HnswPtrAccess(base, elementPtr); offsethash_insert_hash(v->offsets, HnswPtrOffset(elementPtr), element->hash, found); -#else - offsethash_insert(v->offsets, HnswPtrOffset(elementPtr), found); -#endif } else { -#if PG_VERSION_NUM >= 130000 HnswElement element = HnswPtrAccess(base, elementPtr); pointerhash_insert_hash(v->pointers, (uintptr_t) HnswPtrPointer(elementPtr), element->hash, found); -#else - pointerhash_insert(v->pointers, (uintptr_t) HnswPtrPointer(elementPtr), found); -#endif } } @@ -726,6 +713,9 @@ CountElement(HnswElement skipElement, HnswElement e) /* Ensure does not access heaptidsLength during in-memory build */ pg_memory_barrier(); + /* Keep scan-build happy on Mac x86-64 */ + Assert(e); + return e->heaptidsLength != 0; } @@ -733,21 +723,21 @@ CountElement(HnswElement skipElement, HnswElement e) * Load unvisited neighbors from memory */ static void -HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * neighborhoodData, Size neighborhoodSize) +HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * localNeighborhood, Size neighborhoodSize) { /* Get the neighborhood at layer lc */ HnswNeighborArray *neighborhood = HnswGetNeighbors(base, element, lc); /* Copy neighborhood to local memory */ LWLockAcquire(&element->lock, LW_SHARED); - memcpy(neighborhoodData, neighborhood, neighborhoodSize); + memcpy(localNeighborhood, neighborhood, neighborhoodSize); LWLockRelease(&element->lock); *unvisitedLength = 0; - for (int i = 0; i < neighborhoodData->length; i++) + for (int i = 0; i < localNeighborhood->length; i++) { - HnswCandidate *hc = &neighborhoodData->items[i]; + HnswCandidate *hc = &localNeighborhood->items[i]; bool found; AddToVisited(base, v, hc->element, NULL, &found); @@ -810,7 +800,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F int wlen = 0; visited_hash v2; ListCell *lc2; - HnswNeighborArray *neighborhoodData = NULL; + HnswNeighborArray *localNeighborhood = NULL; Size neighborhoodSize = 0; int lm = HnswGetLayerM(m, lc); HnswUnvisited *unvisited = palloc(lm * sizeof(HnswUnvisited)); @@ -826,7 +816,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F if (index == NULL) { neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(lm); - neighborhoodData = palloc(neighborhoodSize); + localNeighborhood = palloc(neighborhoodSize); } /* Add entry points to v, C, and W */ @@ -863,7 +853,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F cElement = HnswPtrAccess(base, c->element); if (index == NULL) - HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, v, lc, neighborhoodData, neighborhoodSize); + HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, v, lc, localNeighborhood, neighborhoodSize); else HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, v, index, m, lm, lc); @@ -969,17 +959,10 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F * Compare candidate distances with pointer tie-breaker */ static int -#if PG_VERSION_NUM >= 130000 CompareCandidateDistances(const ListCell *a, const ListCell *b) { HnswCandidate *hca = lfirst(a); HnswCandidate *hcb = lfirst(b); -#else -CompareCandidateDistances(const void *a, const void *b) -{ - HnswCandidate *hca = lfirst(*(ListCell **) a); - HnswCandidate *hcb = lfirst(*(ListCell **) b); -#endif if (hca->distance < hcb->distance) return 1; @@ -1000,17 +983,10 @@ CompareCandidateDistances(const void *a, const void *b) * Compare candidate distances with offset tie-breaker */ static int -#if PG_VERSION_NUM >= 130000 CompareCandidateDistancesOffset(const ListCell *a, const ListCell *b) { HnswCandidate *hca = lfirst(a); HnswCandidate *hcb = lfirst(b); -#else -CompareCandidateDistancesOffset(const void *a, const void *b) -{ - HnswCandidate *hca = lfirst(*(ListCell **) a); - HnswCandidate *hcb = lfirst(*(ListCell **) b); -#endif if (hca->distance < hcb->distance) return 1; @@ -1292,7 +1268,6 @@ RemoveElements(char *base, List *w, HnswElement skipElement) return w2; } -#if PG_VERSION_NUM >= 130000 /* * Precompute hash */ @@ -1308,7 +1283,6 @@ PrecomputeHash(char *base, HnswElement element) else element->hash = hash_offset(HnswPtrOffset(ptr)); } -#endif /* * Algorithm 1 from paper @@ -1323,11 +1297,9 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint Datum q = HnswGetValue(base, element); HnswElement skipElement = existing ? element : NULL; -#if PG_VERSION_NUM >= 130000 /* Precompute hash */ if (index == NULL) PrecomputeHash(base, element); -#endif /* No neighbors if no entry point */ if (entryPoint == NULL) @@ -1390,7 +1362,9 @@ SparsevecCheckValue(Pointer v) SparseVector *vec = (SparseVector *) v; if (vec->nnz > HNSW_MAX_NNZ) - elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ))); } /* diff --git a/src/ivfbuild.c b/src/ivfbuild.c index 3c52278..85a247f 100644 --- a/src/ivfbuild.c +++ b/src/ivfbuild.c @@ -26,12 +26,6 @@ #include "pgstat.h" #endif -#if PG_VERSION_NUM >= 130000 -#define CALLBACK_ITEM_POINTER ItemPointer tid -#else -#define CALLBACK_ITEM_POINTER HeapTuple hup -#endif - #if PG_VERSION_NUM >= 140000 #include "utils/backend_status.h" #include "utils/wait_event.h" @@ -96,7 +90,7 @@ AddSample(Datum *values, IvfflatBuildState * buildstate) * Callback for sampling */ static void -SampleCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, +SampleCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state) { IvfflatBuildState *buildstate = (IvfflatBuildState *) state; @@ -207,16 +201,12 @@ AddTupleToSort(Relation index, ItemPointer tid, Datum *values, IvfflatBuildState * Callback for table_index_build_scan */ static void -BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, +BuildCallback(Relation index, ItemPointer tid, Datum *values, bool *isnull, bool tupleIsAlive, void *state) { IvfflatBuildState *buildstate = (IvfflatBuildState *) state; MemoryContext oldCtx; -#if PG_VERSION_NUM < 130000 - ItemPointer tid = &hup->t_self; -#endif - /* Skip nulls */ if (isnull[0]) return; @@ -335,14 +325,20 @@ InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, In /* Disallow varbit since require fixed dimensions */ if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID) - elog(ERROR, "type not supported for ivfflat index"); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("type not supported for ivfflat index"))); /* Require column to have dimensions to be indexed */ if (buildstate->dimensions < 0) - elog(ERROR, "column does not have dimensions"); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("column does not have dimensions"))); if (buildstate->dimensions > buildstate->typeInfo->maxDimensions) - elog(ERROR, "column cannot have more than %d dimensions for ivfflat index", buildstate->typeInfo->maxDimensions); + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("column cannot have more than %d dimensions for ivfflat index", buildstate->typeInfo->maxDimensions))); buildstate->reltuples = 0; buildstate->indtuples = 0; @@ -355,7 +351,9 @@ InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, In /* Require more than one dimension for spherical k-means */ if (buildstate->kmeansnormprocinfo != NULL && buildstate->dimensions == 1) - elog(ERROR, "dimensions must be greater than one for this opclass"); + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("dimensions must be greater than one for this opclass"))); /* Create tuple description for sorting */ buildstate->tupdesc = CreateTemplateTupleDesc(3); diff --git a/src/ivfflat.c b/src/ivfflat.c index 9aca936..4e9b9a4 100644 --- a/src/ivfflat.c +++ b/src/ivfflat.c @@ -7,6 +7,7 @@ #include "commands/progress.h" #include "commands/vacuum.h" #include "ivfflat.h" +#include "utils/float.h" #include "utils/guc.h" #include "utils/selfuncs.h" #include "utils/spccache.h" @@ -26,11 +27,7 @@ IvfflatInit(void) { ivfflat_relopt_kind = add_reloption_kind(); add_int_reloption(ivfflat_relopt_kind, "lists", "Number of inverted lists", - IVFFLAT_DEFAULT_LISTS, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS -#if PG_VERSION_NUM >= 130000 - ,AccessExclusiveLock -#endif - ); + IVFFLAT_DEFAULT_LISTS, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS, AccessExclusiveLock); DefineCustomIntVariable("ivfflat.probes", "Sets the number of probes", "Valid range is 1..lists.", &ivfflat_probes, @@ -78,8 +75,8 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, /* Never use index without order */ if (path->indexorderbys == NULL) { - *indexStartupCost = DBL_MAX; - *indexTotalCost = DBL_MAX; + *indexStartupCost = get_float8_infinity(); + *indexTotalCost = get_float8_infinity(); *indexSelectivity = 0; *indexCorrelation = 0; *indexPages = 0; @@ -148,23 +145,10 @@ ivfflatoptions(Datum reloptions, bool validate) {"lists", RELOPT_TYPE_INT, offsetof(IvfflatOptions, lists)}, }; -#if PG_VERSION_NUM >= 130000 return (bytea *) build_reloptions(reloptions, validate, ivfflat_relopt_kind, sizeof(IvfflatOptions), tab, lengthof(tab)); -#else - relopt_value *options; - int numoptions; - IvfflatOptions *rdopts; - - options = parseRelOptions(reloptions, validate, ivfflat_relopt_kind, &numoptions); - rdopts = allocateReloptStruct(sizeof(IvfflatOptions), options, numoptions); - fillRelOptions((void *) rdopts, sizeof(IvfflatOptions), options, numoptions, - validate, tab, lengthof(tab)); - - return (bytea *) rdopts; -#endif } /* @@ -189,9 +173,7 @@ ivfflathandler(PG_FUNCTION_ARGS) amroutine->amstrategies = 0; amroutine->amsupport = 5; -#if PG_VERSION_NUM >= 130000 amroutine->amoptsprocnum = 0; -#endif amroutine->amcanorder = false; amroutine->amcanorderbyop = true; amroutine->amcanbackward = false; /* can change direction mid-scan */ @@ -208,15 +190,11 @@ ivfflathandler(PG_FUNCTION_ARGS) amroutine->amcanbuildparallel = true; #endif amroutine->amcaninclude = false; -#if PG_VERSION_NUM >= 130000 amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */ -#endif #if PG_VERSION_NUM >= 160000 amroutine->amsummarizing = false; #endif -#if PG_VERSION_NUM >= 130000 amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL; -#endif amroutine->amkeytype = InvalidOid; /* Interface functions */ diff --git a/src/ivfkmeans.c b/src/ivfkmeans.c index 003d55d..4b6d14f 100644 --- a/src/ivfkmeans.c +++ b/src/ivfkmeans.c @@ -151,12 +151,8 @@ RandomCenters(Relation index, VectorArray centers, const IvfflatTypeInfo * typeI static void ShowMemoryUsage(MemoryContext context, Size estimatedSize) { -#if PG_VERSION_NUM >= 130000 elog(INFO, "total memory: %zu MB", MemoryContextMemAllocated(context, true) / (1024 * 1024)); -#else - MemoryContextStats(context); -#endif elog(INFO, "estimated memory: %zu MB", estimatedSize / (1024 * 1024)); } #endif diff --git a/src/ivfscan.c b/src/ivfscan.c index 78f949e..1e95cd6 100644 --- a/src/ivfscan.c +++ b/src/ivfscan.c @@ -292,14 +292,7 @@ ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; if (!so->first) - { -#if PG_VERSION_NUM >= 130000 tuplesort_reset(so->sortstate); -#else - tuplesort_end(so->sortstate); - so->sortstate = InitScanSortState(so->tupdesc); -#endif - } so->first = true; pairingheap_reset(so->listQueue); @@ -346,7 +339,7 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir) IvfflatBench("GetScanItems", GetScanItems(scan, value)); so->first = false; -#if defined(IVFFLAT_MEMORY) && PG_VERSION_NUM >= 130000 +#if defined(IVFFLAT_MEMORY) elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(CurrentMemoryContext, true) / (1024 * 1024)); #endif diff --git a/src/sparsevec.c b/src/sparsevec.c index 4211fd8..44d765b 100644 --- a/src/sparsevec.c +++ b/src/sparsevec.c @@ -3,6 +3,7 @@ #include #include +#include "catalog/pg_type.h" #include "common/string.h" #include "fmgr.h" #include "halfutils.h" @@ -11,6 +12,7 @@ #include "sparsevec.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" #include "vector.h" #if PG_VERSION_NUM >= 120000 @@ -670,6 +672,137 @@ halfvec_to_sparsevec(PG_FUNCTION_ARGS) PG_RETURN_POINTER(result); } +/* + * Convert array to sparse vector + */ +FUNCTION_PREFIX PG_FUNCTION_INFO_V1(array_to_sparsevec); +Datum +array_to_sparsevec(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int32 typmod = PG_GETARG_INT32(1); + SparseVector *result; + int16 typlen; + bool typbyval; + char typalign; + Datum *elemsp; + int nelemsp; + int nnz = 0; + float *values; + int j = 0; + + if (ARR_NDIM(array) > 1) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("array must be 1-D"))); + + if (ARR_HASNULL(array) && array_contains_nulls(array)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("array must not contain nulls"))); + + get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign); + deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp); + + CheckDim(nelemsp); + CheckExpectedDim(typmod, nelemsp); + +#ifdef _MSC_VER +/* /fp:fast may not propagate +/-Infinity or NaN */ +#define IS_NOT_ZERO(v) (isnan((float) (v)) || isinf((float) (v)) || ((float) (v)) != 0) +#else +#define IS_NOT_ZERO(v) (((float) (v)) != 0) +#endif + + if (ARR_ELEMTYPE(array) == INT4OID) + { + for (int i = 0; i < nelemsp; i++) + nnz += IS_NOT_ZERO(DatumGetInt32(elemsp[i])); + } + else if (ARR_ELEMTYPE(array) == FLOAT8OID) + { + for (int i = 0; i < nelemsp; i++) + nnz += IS_NOT_ZERO(DatumGetFloat8(elemsp[i])); + } + else if (ARR_ELEMTYPE(array) == FLOAT4OID) + { + for (int i = 0; i < nelemsp; i++) + nnz += IS_NOT_ZERO(DatumGetFloat4(elemsp[i])); + } + else if (ARR_ELEMTYPE(array) == NUMERICOID) + { + for (int i = 0; i < nelemsp; i++) + nnz += IS_NOT_ZERO(DirectFunctionCall1(numeric_float4, elemsp[i])); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("unsupported array type"))); + } + + result = InitSparseVector(nelemsp, nnz); + values = SPARSEVEC_VALUES(result); + +#define PROCESS_ARRAY_ELEM(elem) \ + do { \ + float v = (float) (elem); \ + if (IS_NOT_ZERO(v)) { \ + /* Safety check */ \ + if (j >= result->nnz) \ + elog(ERROR, "safety check failed"); \ + result->indices[j] = i; \ + values[j] = v; \ + j++; \ + } \ + } while (0) + + if (ARR_ELEMTYPE(array) == INT4OID) + { + for (int i = 0; i < nelemsp; i++) + PROCESS_ARRAY_ELEM(DatumGetInt32(elemsp[i])); + } + else if (ARR_ELEMTYPE(array) == FLOAT8OID) + { + for (int i = 0; i < nelemsp; i++) + PROCESS_ARRAY_ELEM(DatumGetFloat8(elemsp[i])); + } + else if (ARR_ELEMTYPE(array) == FLOAT4OID) + { + for (int i = 0; i < nelemsp; i++) + PROCESS_ARRAY_ELEM(DatumGetFloat4(elemsp[i])); + } + else if (ARR_ELEMTYPE(array) == NUMERICOID) + { + for (int i = 0; i < nelemsp; i++) + PROCESS_ARRAY_ELEM(DatumGetFloat4(DirectFunctionCall1(numeric_float4, elemsp[i]))); + } + else + { + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("unsupported array type"))); + } + +#undef PROCESS_ARRAY_ELEM +#undef IS_NOT_ZERO + + /* + * Free allocation from deconstruct_array. Do not free individual elements + * when pass-by-reference since they point to original array. + */ + pfree(elemsp); + + if (j != result->nnz) + elog(ERROR, "correctness check failed"); + + /* Check elements */ + for (int i = 0; i < result->nnz; i++) + CheckElement(values[i]); + + PG_RETURN_POINTER(result); +} + /* * Get the L2 squared distance between sparse vectors */ diff --git a/src/vector.c b/src/vector.c index a0e2c3f..facc07e 100644 --- a/src/vector.c +++ b/src/vector.c @@ -26,11 +26,6 @@ #include "varatt.h" #endif -#if PG_VERSION_NUM < 130000 -#define TYPALIGN_DOUBLE 'd' -#define TYPALIGN_INT 'i' -#endif - #define STATE_DIMS(x) (ARR_DIMS(x)[0] - 1) #define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1)) diff --git a/test/expected/cast.out b/test/expected/cast.out index 1aba43c..c180fe6 100644 --- a/test/expected/cast.out +++ b/test/expected/cast.out @@ -208,6 +208,62 @@ SELECT '{1:1e-8}/1'::sparsevec::halfvec; [0] (1 row) +SELECT ARRAY[1,0,2,0,3,0]::sparsevec; + array +----------------- + {1:1,3:2,5:3}/6 +(1 row) + +SELECT ARRAY[1.0,0.0,2.0,0.0,3.0,0.0]::sparsevec; + array +----------------- + {1:1,3:2,5:3}/6 +(1 row) + +SELECT ARRAY[1,0,2,0,3,0]::float4[]::sparsevec; + array +----------------- + {1:1,3:2,5:3}/6 +(1 row) + +SELECT ARRAY[1,0,2,0,3,0]::float8[]::sparsevec; + array +----------------- + {1:1,3:2,5:3}/6 +(1 row) + +SELECT ARRAY[1,0,2,0,3,0]::numeric[]::sparsevec; + array +----------------- + {1:1,3:2,5:3}/6 +(1 row) + +SELECT '{1,0,2,0,3,0}'::real[]::sparsevec; + sparsevec +----------------- + {1:1,3:2,5:3}/6 +(1 row) + +SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(6); + sparsevec +----------------- + {1:1,3:2,5:3}/6 +(1 row) + +SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(5); +ERROR: expected 5 dimensions, not 6 +SELECT '{NULL}'::real[]::sparsevec; +ERROR: array must not contain nulls +SELECT '{NaN}'::real[]::sparsevec; +ERROR: NaN not allowed in sparsevec +SELECT '{Infinity}'::real[]::sparsevec; +ERROR: infinite value not allowed in sparsevec +SELECT '{-Infinity}'::real[]::sparsevec; +ERROR: infinite value not allowed in sparsevec +SELECT '{}'::real[]::sparsevec; +ERROR: sparsevec must have at least 1 dimension +SELECT '{{1}}'::real[]::sparsevec; +ERROR: array must be 1-D SELECT array_agg(n)::vector FROM generate_series(1, 16001) n; ERROR: vector cannot have more than 16000 dimensions SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n; diff --git a/test/sql/cast.sql b/test/sql/cast.sql index cd2eb3c..fe83931 100644 --- a/test/sql/cast.sql +++ b/test/sql/cast.sql @@ -58,6 +58,22 @@ SELECT '{}/16001'::sparsevec::halfvec; SELECT '{1:65520}/1'::sparsevec::halfvec; SELECT '{1:1e-8}/1'::sparsevec::halfvec; +SELECT ARRAY[1,0,2,0,3,0]::sparsevec; +SELECT ARRAY[1.0,0.0,2.0,0.0,3.0,0.0]::sparsevec; +SELECT ARRAY[1,0,2,0,3,0]::float4[]::sparsevec; +SELECT ARRAY[1,0,2,0,3,0]::float8[]::sparsevec; +SELECT ARRAY[1,0,2,0,3,0]::numeric[]::sparsevec; + +SELECT '{1,0,2,0,3,0}'::real[]::sparsevec; +SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(6); +SELECT '{1,0,2,0,3,0}'::real[]::sparsevec(5); +SELECT '{NULL}'::real[]::sparsevec; +SELECT '{NaN}'::real[]::sparsevec; +SELECT '{Infinity}'::real[]::sparsevec; +SELECT '{-Infinity}'::real[]::sparsevec; +SELECT '{}'::real[]::sparsevec; +SELECT '{{1}}'::real[]::sparsevec; + SELECT array_agg(n)::vector FROM generate_series(1, 16001) n; SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n;