From 44d8d28b40ddaca6b79ecf598c42996f5ad71ac5 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 27 Sep 2024 13:39:54 -0700 Subject: [PATCH 01/12] Added note about postgresql@17 formula [skip ci] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7439964..c832e9e 100644 --- a/README.md +++ b/README.md @@ -1064,7 +1064,7 @@ With Homebrew Postgres, you can use: brew install pgvector ``` -Note: This only adds it to the `postgresql@14` formula +Note: This only adds it to the `postgresql@17` and `postgresql@14` formulas ### PGXN From 74020a90dac83024aafc9593985f5beacce813de Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 27 Sep 2024 13:49:43 -0700 Subject: [PATCH 02/12] Updated package versions in readme [skip ci] --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c832e9e..6e3af41 100644 --- a/README.md +++ b/README.md @@ -1079,29 +1079,29 @@ pgxn install vector Debian and Ubuntu packages are available from the [PostgreSQL APT Repository](https://wiki.postgresql.org/wiki/Apt). Follow the [setup instructions](https://wiki.postgresql.org/wiki/Apt#Quickstart) and run: ```sh -sudo apt install postgresql-16-pgvector +sudo apt install postgresql-17-pgvector ``` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### Yum RPM packages are available from the [PostgreSQL Yum Repository](https://yum.postgresql.org/). Follow the [setup instructions](https://www.postgresql.org/download/linux/redhat/) for your distribution and run: ```sh -sudo yum install pgvector_16 +sudo yum install pgvector_17 # or -sudo dnf install pgvector_16 +sudo dnf install pgvector_17 ``` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### pkg Install the FreeBSD package with: ```sh -pkg install postgresql15-pgvector +pkg install postgresql16-pgvector ``` or the port with: From 2bca4e406b19108d5525537f30e92b2e781b078b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 27 Sep 2024 13:50:57 -0700 Subject: [PATCH 03/12] Restored quarterly package version for FreeBSD in readme [skip ci] --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e3af41..14ab90a 100644 --- a/README.md +++ b/README.md @@ -1101,7 +1101,7 @@ Note: Replace `17` with your Postgres server version Install the FreeBSD package with: ```sh -pkg install postgresql16-pgvector +pkg install postgresql15-pgvector ``` or the port with: From daf9c5c7431d85667151622a26bba6889d03fb83 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 27 Sep 2024 13:52:11 -0700 Subject: [PATCH 04/12] Updated package versions in readme [skip ci] --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 14ab90a..29b7662 100644 --- a/README.md +++ b/README.md @@ -983,7 +983,7 @@ l2_normalize(sparsevec) → sparsevec | Normalize with Euclidean norm | 0.7.0 If your machine has multiple Postgres installations, specify the path to [pg_config](https://www.postgresql.org/docs/current/app-pgconfig.html) with: ```sh -export PG_CONFIG=/Library/PostgreSQL/16/bin/pg_config +export PG_CONFIG=/Library/PostgreSQL/17/bin/pg_config ``` Then re-run the installation instructions (run `make clean` before `make` if needed). If `sudo` is needed for `make install`, use: @@ -994,11 +994,11 @@ sudo --preserve-env=PG_CONFIG make install A few common paths on Mac are: -- EDB installer - `/Library/PostgreSQL/16/bin/pg_config` -- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@16/bin/pg_config` -- Homebrew (x86-64) - `/usr/local/opt/postgresql@16/bin/pg_config` +- EDB installer - `/Library/PostgreSQL/17/bin/pg_config` +- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@17/bin/pg_config` +- Homebrew (x86-64) - `/usr/local/opt/postgresql@17/bin/pg_config` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### Missing Header @@ -1007,10 +1007,10 @@ If compilation fails with `fatal error: postgres.h: No such file or directory`, For Ubuntu and Debian, use: ```sh -sudo apt install postgresql-server-dev-16 +sudo apt install postgresql-server-dev-17 ``` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### Missing SDK From 8a2eebd6a4a3c5ede931931006d602441e4f1422 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 27 Sep 2024 14:05:36 -0700 Subject: [PATCH 05/12] Added note about Postgres 17 on Windows - #669 [skip ci] --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 29b7662..f4f1d33 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,8 @@ nmake /F Makefile.win nmake /F Makefile.win install ``` +Note: Postgres 17 is not supported yet due to an upstream issue + See the [installation notes](#installation-notes---windows) if you run into issues You can also install it with [Docker](#docker) or [conda-forge](#conda-forge). From cf419f448bc4b28c905c244220427bcad01bbb21 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 27 Sep 2024 16:19:06 -0700 Subject: [PATCH 06/12] Updated Postgres version for Docker [skip ci] --- Makefile | 2 +- README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index e7ae85e..2c85e10 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ dist: git archive --format zip --prefix=$(EXTENSION)-$(EXTVERSION)/ --output dist/$(EXTENSION)-$(EXTVERSION).zip master # for Docker -PG_MAJOR ?= 16 +PG_MAJOR ?= 17 .PHONY: docker diff --git a/README.md b/README.md index f4f1d33..d6a1346 100644 --- a/README.md +++ b/README.md @@ -1045,17 +1045,17 @@ If installation fails with `Access is denied`, re-run the installation instructi Get the [Docker image](https://hub.docker.com/r/pgvector/pgvector) with: ```sh -docker pull pgvector/pgvector:pg16 +docker pull pgvector/pgvector:pg17 ``` -This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `16` with your Postgres server version, and run it the same way). +This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `17` with your Postgres server version, and run it the same way). You can also build the image manually: ```sh git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git cd pgvector -docker build --pull --build-arg PG_MAJOR=16 -t myuser/pgvector . +docker build --pull --build-arg PG_MAJOR=17 -t myuser/pgvector . ``` ### Homebrew From 54f8d9733d505086739748e93cc268bcbedc0676 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 27 Sep 2024 16:19:57 -0700 Subject: [PATCH 07/12] Updated default Postgres version in Dockerfile [skip ci] --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index eef0c8f..9364409 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG PG_MAJOR=16 +ARG PG_MAJOR=17 FROM postgres:$PG_MAJOR ARG PG_MAJOR From 5ee0471ead6c72815fa538da3248e5fa225f8143 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 28 Sep 2024 09:23:41 -0700 Subject: [PATCH 08/12] Updated readme [skip ci] --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index d6a1346..6cdca4f 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,8 @@ Or add a vector column to an existing table ALTER TABLE items ADD COLUMN embedding vector(3); ``` +Also supports [half-precision](#half-precision-vectors), [binary](#binary-vectors), and [sparse](#sparse-vectors) vectors + Insert vectors ```sql @@ -147,6 +149,8 @@ Supported distance functions are: - `<#>` - (negative) inner product - `<=>` - cosine distance - `<+>` - L1 distance (added in 0.7.0) +- `<~>` - Hamming distance (binary vectors, added in 0.7.0) +- `<%>` - Jaccard distance (binary vectors, added in 0.7.0) Get the nearest neighbors to a row From 158d9340bca5796d8f98182e5a65356bac676b74 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 28 Sep 2024 14:50:23 -0700 Subject: [PATCH 09/12] Added distance filters to cost tests [skip ci] --- test/t/039_hnsw_cost.pl | 7 ++++++- test/t/040_ivfflat_cost.pl | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/test/t/039_hnsw_cost.pl b/test/t/039_hnsw_cost.pl index dad50cc..a26c09a 100644 --- a/test/t/039_hnsw_cost.pl +++ b/test/t/039_hnsw_cost.pl @@ -17,7 +17,7 @@ $node->safe_psql("postgres", "CREATE EXTENSION vector;"); for my $dim (@dims) { my $array_sql = join(",", ('random()') x $dim); - my $n = $dim == 384 ? 2000 : 1000; + my $n = $dim == 384 ? 3000 : 1000; # Create table and index $node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); @@ -40,6 +40,11 @@ for my $dim (@dims) )); like($explain, qr/Index Scan using idx/); + $explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit; + )); + like($explain, qr/Index Scan using idx/); + $node->safe_psql("postgres", "DROP TABLE tst;"); } diff --git a/test/t/040_ivfflat_cost.pl b/test/t/040_ivfflat_cost.pl index 2f6fbf9..1c311a3 100644 --- a/test/t/040_ivfflat_cost.pl +++ b/test/t/040_ivfflat_cost.pl @@ -39,6 +39,11 @@ for my $dim (@dims) )); like($explain, qr/Index Scan using idx/); + $explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit; + )); + like($explain, qr/Index Scan using idx/); + $node->safe_psql("postgres", "DROP TABLE tst;"); } From dc23752618e7719317b2a852cdd92e51e83b1af5 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 28 Sep 2024 19:18:52 -0700 Subject: [PATCH 10/12] Fixed uninitialized variable [skip ci] --- src/hnswutils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index d807248..c76c6bb 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -754,6 +754,8 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u int start; ItemPointerData indextids[HNSW_MAX_M * 2]; + *unvisitedLength = 0; + buf = ReadBuffer(index, element->neighborPage); LockBuffer(buf, BUFFER_LOCK_SHARE); page = BufferGetPage(buf); @@ -773,8 +775,6 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u UnlockReleaseBuffer(buf); - *unvisitedLength = 0; - for (int i = 0; i < lm; i++) { ItemPointer indextid = &indextids[i]; From cae3458329518d30ce4e7513ba26eb9359dabcb6 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 15:06:50 -0700 Subject: [PATCH 11/12] Updated distance to use double --- src/hnsw.h | 4 ++-- src/hnswutils.c | 17 +++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 9fb650a..b57e9f8 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -160,7 +160,7 @@ typedef struct HnswSearchCandidate pairingheap_node c_node; pairingheap_node w_node; HnswElementPtr element; - float distance; + double distance; } HnswSearchCandidate; /* HNSW index options */ @@ -390,7 +390,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); -void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance); +void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); void HnswLoadNeighbors(HnswElement element, Relation index, int m); diff --git a/src/hnswutils.c b/src/hnswutils.c index c76c6bb..0a8d847 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -549,7 +549,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe * Load an element and optionally get its distance from q */ static void -HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance, HnswElement * element) +HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance, HnswElement * element) { Buffer buf; Page page; @@ -570,7 +570,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu if (DatumGetPointer(*q) == NULL) *distance = 0; else - *distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data))); + *distance = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data))); } /* Load element */ @@ -589,7 +589,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu * Load an element and optionally get its distance from q */ void -HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance) +HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance) { HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, procinfo, collation, loadVec, maxDistance, &element); } @@ -597,7 +597,7 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, /* * Get the distance for an element */ -static float +static double GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation) { Datum value = HnswGetValue(base, element); @@ -857,7 +857,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F { HnswElement eElement; HnswSearchCandidate *e; - float eDistance; + double eDistance; bool alwaysAdd = wlen < ef; f = HnswGetSearchCandidate(w_node, pairingheap_first(W)); @@ -1163,7 +1163,12 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm HnswElement hc3Element = HnswPtrAccess(base, hc3->element); if (HnswPtrIsNull(base, hc3Element->value)) - HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL); + { + double distance; + + HnswLoadElement(hc3Element, &distance, &q, index, procinfo, collation, true, NULL); + hc3->distance = distance; + } else hc3->distance = GetElementDistance(base, hc3Element, q, procinfo, collation); From f2afd1125752d45777cf738856f3985e8fe9b2e9 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 15:09:54 -0700 Subject: [PATCH 12/12] Use sc for search candidates [skip ci] --- src/hnswutils.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 0a8d847..79bc086 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -611,14 +611,14 @@ GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, HnswSearchCandidate * HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) { - HnswSearchCandidate *hc = palloc(sizeof(HnswSearchCandidate)); + HnswSearchCandidate *sc = palloc(sizeof(HnswSearchCandidate)); - HnswPtrStore(base, hc->element, entryPoint); + HnswPtrStore(base, sc->element, entryPoint); if (index == NULL) - hc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation); + sc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation); else - HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL); - return hc; + HnswLoadElement(entryPoint, &sc->distance, &q, index, procinfo, collation, loadVec, NULL); + return sc; } #define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr) @@ -820,20 +820,20 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Add entry points to v, C, and W */ foreach(lc2, ep) { - HnswSearchCandidate *hc = (HnswSearchCandidate *) lfirst(lc2); + HnswSearchCandidate *sc = (HnswSearchCandidate *) lfirst(lc2); bool found; - AddToVisited(base, &v, hc->element, index, &found); + AddToVisited(base, &v, sc->element, index, &found); - pairingheap_add(C, &hc->c_node); - pairingheap_add(W, &hc->w_node); + pairingheap_add(C, &sc->c_node); + pairingheap_add(W, &sc->w_node); /* * Do not count elements being deleted towards ef when vacuuming. It * would be ideal to do this for inserts as well, but this could * affect insert performance. */ - if (CountElement(skipElement, HnswPtrAccess(base, hc->element))) + if (CountElement(skipElement, HnswPtrAccess(base, sc->element))) wlen++; } @@ -916,9 +916,9 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Add each element of W to w */ while (!pairingheap_is_empty(W)) { - HnswSearchCandidate *hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W)); + HnswSearchCandidate *sc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W)); - w = lappend(w, hc); + w = lappend(w, sc); } return w;