diff --git a/Dockerfile b/Dockerfile index eef0c8f..9364409 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG PG_MAJOR=16 +ARG PG_MAJOR=17 FROM postgres:$PG_MAJOR ARG PG_MAJOR diff --git a/Makefile b/Makefile index e7ae85e..2c85e10 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ dist: git archive --format zip --prefix=$(EXTENSION)-$(EXTVERSION)/ --output dist/$(EXTENSION)-$(EXTVERSION).zip master # for Docker -PG_MAJOR ?= 16 +PG_MAJOR ?= 17 .PHONY: docker diff --git a/README.md b/README.md index 6ae1d04..5268be6 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,8 @@ nmake /F Makefile.win nmake /F Makefile.win install ``` +Note: Postgres 17 is not supported yet due to an upstream issue + See the [installation notes](#installation-notes---windows) if you run into issues You can also install it with [Docker](#docker) or [conda-forge](#conda-forge). @@ -100,6 +102,8 @@ Or add a vector column to an existing table ALTER TABLE items ADD COLUMN embedding vector(3); ``` +Also supports [half-precision](#half-precision-vectors), [binary](#binary-vectors), and [sparse](#sparse-vectors) vectors + Insert vectors ```sql @@ -145,6 +149,8 @@ Supported distance functions are: - `<#>` - (negative) inner product - `<=>` - cosine distance - `<+>` - L1 distance (added in 0.7.0) +- `<~>` - Hamming distance (binary vectors, added in 0.7.0) +- `<%>` - Jaccard distance (binary vectors, added in 0.7.0) Get the nearest neighbors to a row @@ -1050,7 +1056,7 @@ l2_normalize(sparsevec) → sparsevec | Normalize with Euclidean norm | 0.7.0 If your machine has multiple Postgres installations, specify the path to [pg_config](https://www.postgresql.org/docs/current/app-pgconfig.html) with: ```sh -export PG_CONFIG=/Library/PostgreSQL/16/bin/pg_config +export PG_CONFIG=/Library/PostgreSQL/17/bin/pg_config ``` Then re-run the installation instructions (run `make clean` before `make` if needed). If `sudo` is needed for `make install`, use: @@ -1061,11 +1067,11 @@ sudo --preserve-env=PG_CONFIG make install A few common paths on Mac are: -- EDB installer - `/Library/PostgreSQL/16/bin/pg_config` -- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@16/bin/pg_config` -- Homebrew (x86-64) - `/usr/local/opt/postgresql@16/bin/pg_config` +- EDB installer - `/Library/PostgreSQL/17/bin/pg_config` +- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@17/bin/pg_config` +- Homebrew (x86-64) - `/usr/local/opt/postgresql@17/bin/pg_config` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### Missing Header @@ -1074,10 +1080,10 @@ If compilation fails with `fatal error: postgres.h: No such file or directory`, For Ubuntu and Debian, use: ```sh -sudo apt install postgresql-server-dev-16 +sudo apt install postgresql-server-dev-17 ``` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### Missing SDK @@ -1110,17 +1116,17 @@ If installation fails with `Access is denied`, re-run the installation instructi Get the [Docker image](https://hub.docker.com/r/pgvector/pgvector) with: ```sh -docker pull pgvector/pgvector:pg16 +docker pull pgvector/pgvector:pg17 ``` -This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `16` with your Postgres server version, and run it the same way). +This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `17` with your Postgres server version, and run it the same way). You can also build the image manually: ```sh git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git cd pgvector -docker build --pull --build-arg PG_MAJOR=16 -t myuser/pgvector . +docker build --pull --build-arg PG_MAJOR=17 -t myuser/pgvector . ``` ### Homebrew @@ -1131,7 +1137,7 @@ With Homebrew Postgres, you can use: brew install pgvector ``` -Note: This only adds it to the `postgresql@14` formula +Note: This only adds it to the `postgresql@17` and `postgresql@14` formulas ### PGXN @@ -1146,22 +1152,22 @@ pgxn install vector Debian and Ubuntu packages are available from the [PostgreSQL APT Repository](https://wiki.postgresql.org/wiki/Apt). Follow the [setup instructions](https://wiki.postgresql.org/wiki/Apt#Quickstart) and run: ```sh -sudo apt install postgresql-16-pgvector +sudo apt install postgresql-17-pgvector ``` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### Yum RPM packages are available from the [PostgreSQL Yum Repository](https://yum.postgresql.org/). Follow the [setup instructions](https://www.postgresql.org/download/linux/redhat/) for your distribution and run: ```sh -sudo yum install pgvector_16 +sudo yum install pgvector_17 # or -sudo dnf install pgvector_16 +sudo dnf install pgvector_17 ``` -Note: Replace `16` with your Postgres server version +Note: Replace `17` with your Postgres server version ### pkg diff --git a/src/hnsw.h b/src/hnsw.h index b2424be..e290e1b 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -192,7 +192,7 @@ typedef struct HnswSearchCandidate pairingheap_node c_node; pairingheap_node w_node; HnswElementPtr element; - float distance; + double distance; } HnswSearchCandidate; #define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr) @@ -438,7 +438,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); -void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance); +void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); void HnswLoadNeighbors(HnswElement element, Relation index, int m); diff --git a/src/hnswutils.c b/src/hnswutils.c index 066264c..2b731b5 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -547,7 +547,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe * Load an element and optionally get its distance from q */ static void -HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance, HnswElement * element) +HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance, HnswElement * element) { Buffer buf; Page page; @@ -568,7 +568,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu if (DatumGetPointer(*q) == NULL) *distance = 0; else - *distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data))); + *distance = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data))); } /* Load element */ @@ -587,7 +587,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu * Load an element and optionally get its distance from q */ void -HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance) +HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance) { HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, procinfo, collation, loadVec, maxDistance, &element); } @@ -595,7 +595,7 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, /* * Get the distance for an element */ -static float +static double GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation) { Datum value = HnswGetValue(base, element); @@ -609,14 +609,14 @@ GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, HnswSearchCandidate * HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) { - HnswSearchCandidate *hc = palloc(sizeof(HnswSearchCandidate)); + HnswSearchCandidate *sc = palloc(sizeof(HnswSearchCandidate)); - HnswPtrStore(base, hc->element, entryPoint); + HnswPtrStore(base, sc->element, entryPoint); if (index == NULL) - hc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation); + sc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation); else - HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL); - return hc; + HnswLoadElement(entryPoint, &sc->distance, &q, index, procinfo, collation, loadVec, NULL); + return sc; } /* @@ -845,26 +845,26 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Add entry points to v, C, and W */ foreach(lc2, ep) { - HnswSearchCandidate *hc = (HnswSearchCandidate *) lfirst(lc2); + HnswSearchCandidate *sc = (HnswSearchCandidate *) lfirst(lc2); bool found; if (initVisited) { - AddToVisited(base, v, hc->element, index, &found); + AddToVisited(base, v, sc->element, index, &found); if (tuples != NULL) (*tuples)++; } - pairingheap_add(C, &hc->c_node); - pairingheap_add(W, &hc->w_node); + pairingheap_add(C, &sc->c_node); + pairingheap_add(W, &sc->w_node); /* * Do not count elements being deleted towards ef when vacuuming. It * would be ideal to do this for inserts as well, but this could * affect insert performance. */ - if (CountElement(skipElement, HnswPtrAccess(base, hc->element))) + if (CountElement(skipElement, HnswPtrAccess(base, sc->element))) wlen++; } @@ -891,7 +891,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F { HnswElement eElement; HnswSearchCandidate *e; - float eDistance; + double eDistance; bool alwaysAdd = wlen < ef; f = HnswGetSearchCandidate(w_node, pairingheap_first(W)); @@ -961,9 +961,9 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Add each element of W to w */ while (!pairingheap_is_empty(W)) { - HnswSearchCandidate *hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W)); + HnswSearchCandidate *sc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W)); - w = lappend(w, hc); + w = lappend(w, sc); } return w; @@ -1208,7 +1208,12 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm HnswElement hc3Element = HnswPtrAccess(base, hc3->element); if (HnswPtrIsNull(base, hc3Element->value)) - HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL); + { + double distance; + + HnswLoadElement(hc3Element, &distance, &q, index, procinfo, collation, true, NULL); + hc3->distance = distance; + } else hc3->distance = GetElementDistance(base, hc3Element, q, procinfo, collation); diff --git a/test/t/039_hnsw_cost.pl b/test/t/039_hnsw_cost.pl index 7c56422..763e374 100644 --- a/test/t/039_hnsw_cost.pl +++ b/test/t/039_hnsw_cost.pl @@ -17,7 +17,7 @@ $node->safe_psql("postgres", "CREATE EXTENSION vector;"); for my $dim (@dims) { my $array_sql = join(",", ('random()') x $dim); - my $n = 2000; + my $n = 6000; # Create table and index $node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); @@ -40,6 +40,11 @@ for my $dim (@dims) )); like($explain, qr/Index Scan using idx/); + $explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit; + )); + like($explain, qr/Index Scan using idx/); + $node->safe_psql("postgres", "DROP TABLE tst;"); } diff --git a/test/t/040_ivfflat_cost.pl b/test/t/040_ivfflat_cost.pl index 2f6fbf9..1c311a3 100644 --- a/test/t/040_ivfflat_cost.pl +++ b/test/t/040_ivfflat_cost.pl @@ -39,6 +39,11 @@ for my $dim (@dims) )); like($explain, qr/Index Scan using idx/); + $explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit; + )); + like($explain, qr/Index Scan using idx/); + $node->safe_psql("postgres", "DROP TABLE tst;"); }