Merge branch 'master' into hnsw-streaming

This commit is contained in:
Andrew Kane
2024-09-29 15:13:51 -07:00
7 changed files with 60 additions and 39 deletions

View File

@@ -1,4 +1,4 @@
ARG PG_MAJOR=16
ARG PG_MAJOR=17
FROM postgres:$PG_MAJOR
ARG PG_MAJOR

View File

@@ -66,7 +66,7 @@ dist:
git archive --format zip --prefix=$(EXTENSION)-$(EXTVERSION)/ --output dist/$(EXTENSION)-$(EXTVERSION).zip master
# for Docker
PG_MAJOR ?= 16
PG_MAJOR ?= 17
.PHONY: docker

View File

@@ -52,6 +52,8 @@ nmake /F Makefile.win
nmake /F Makefile.win install
```
Note: Postgres 17 is not supported yet due to an upstream issue
See the [installation notes](#installation-notes---windows) if you run into issues
You can also install it with [Docker](#docker) or [conda-forge](#conda-forge).
@@ -100,6 +102,8 @@ Or add a vector column to an existing table
ALTER TABLE items ADD COLUMN embedding vector(3);
```
Also supports [half-precision](#half-precision-vectors), [binary](#binary-vectors), and [sparse](#sparse-vectors) vectors
Insert vectors
```sql
@@ -145,6 +149,8 @@ Supported distance functions are:
- `<#>` - (negative) inner product
- `<=>` - cosine distance
- `<+>` - L1 distance (added in 0.7.0)
- `<~>` - Hamming distance (binary vectors, added in 0.7.0)
- `<%>` - Jaccard distance (binary vectors, added in 0.7.0)
Get the nearest neighbors to a row
@@ -1050,7 +1056,7 @@ l2_normalize(sparsevec) → sparsevec | Normalize with Euclidean norm | 0.7.0
If your machine has multiple Postgres installations, specify the path to [pg_config](https://www.postgresql.org/docs/current/app-pgconfig.html) with:
```sh
export PG_CONFIG=/Library/PostgreSQL/16/bin/pg_config
export PG_CONFIG=/Library/PostgreSQL/17/bin/pg_config
```
Then re-run the installation instructions (run `make clean` before `make` if needed). If `sudo` is needed for `make install`, use:
@@ -1061,11 +1067,11 @@ sudo --preserve-env=PG_CONFIG make install
A few common paths on Mac are:
- EDB installer - `/Library/PostgreSQL/16/bin/pg_config`
- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@16/bin/pg_config`
- Homebrew (x86-64) - `/usr/local/opt/postgresql@16/bin/pg_config`
- EDB installer - `/Library/PostgreSQL/17/bin/pg_config`
- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@17/bin/pg_config`
- Homebrew (x86-64) - `/usr/local/opt/postgresql@17/bin/pg_config`
Note: Replace `16` with your Postgres server version
Note: Replace `17` with your Postgres server version
### Missing Header
@@ -1074,10 +1080,10 @@ If compilation fails with `fatal error: postgres.h: No such file or directory`,
For Ubuntu and Debian, use:
```sh
sudo apt install postgresql-server-dev-16
sudo apt install postgresql-server-dev-17
```
Note: Replace `16` with your Postgres server version
Note: Replace `17` with your Postgres server version
### Missing SDK
@@ -1110,17 +1116,17 @@ If installation fails with `Access is denied`, re-run the installation instructi
Get the [Docker image](https://hub.docker.com/r/pgvector/pgvector) with:
```sh
docker pull pgvector/pgvector:pg16
docker pull pgvector/pgvector:pg17
```
This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `16` with your Postgres server version, and run it the same way).
This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `17` with your Postgres server version, and run it the same way).
You can also build the image manually:
```sh
git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git
cd pgvector
docker build --pull --build-arg PG_MAJOR=16 -t myuser/pgvector .
docker build --pull --build-arg PG_MAJOR=17 -t myuser/pgvector .
```
### Homebrew
@@ -1131,7 +1137,7 @@ With Homebrew Postgres, you can use:
brew install pgvector
```
Note: This only adds it to the `postgresql@14` formula
Note: This only adds it to the `postgresql@17` and `postgresql@14` formulas
### PGXN
@@ -1146,22 +1152,22 @@ pgxn install vector
Debian and Ubuntu packages are available from the [PostgreSQL APT Repository](https://wiki.postgresql.org/wiki/Apt). Follow the [setup instructions](https://wiki.postgresql.org/wiki/Apt#Quickstart) and run:
```sh
sudo apt install postgresql-16-pgvector
sudo apt install postgresql-17-pgvector
```
Note: Replace `16` with your Postgres server version
Note: Replace `17` with your Postgres server version
### Yum
RPM packages are available from the [PostgreSQL Yum Repository](https://yum.postgresql.org/). Follow the [setup instructions](https://www.postgresql.org/download/linux/redhat/) for your distribution and run:
```sh
sudo yum install pgvector_16
sudo yum install pgvector_17
# or
sudo dnf install pgvector_16
sudo dnf install pgvector_17
```
Note: Replace `16` with your Postgres server version
Note: Replace `17` with your Postgres server version
### pkg

View File

@@ -192,7 +192,7 @@ typedef struct HnswSearchCandidate
pairingheap_node c_node;
pairingheap_node w_node;
HnswElementPtr element;
float distance;
double distance;
} HnswSearchCandidate;
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
@@ -438,7 +438,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *
bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building);
void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building);
void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec);
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance);
void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance);
void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element);
void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
void HnswLoadNeighbors(HnswElement element, Relation index, int m);

View File

@@ -547,7 +547,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe
* Load an element and optionally get its distance from q
*/
static void
HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance, HnswElement * element)
HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance, HnswElement * element)
{
Buffer buf;
Page page;
@@ -568,7 +568,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu
if (DatumGetPointer(*q) == NULL)
*distance = 0;
else
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
*distance = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
}
/* Load element */
@@ -587,7 +587,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu
* Load an element and optionally get its distance from q
*/
void
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance)
HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance)
{
HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, procinfo, collation, loadVec, maxDistance, &element);
}
@@ -595,7 +595,7 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
/*
* Get the distance for an element
*/
static float
static double
GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation)
{
Datum value = HnswGetValue(base, element);
@@ -609,14 +609,14 @@ GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo,
HnswSearchCandidate *
HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec)
{
HnswSearchCandidate *hc = palloc(sizeof(HnswSearchCandidate));
HnswSearchCandidate *sc = palloc(sizeof(HnswSearchCandidate));
HnswPtrStore(base, hc->element, entryPoint);
HnswPtrStore(base, sc->element, entryPoint);
if (index == NULL)
hc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation);
sc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation);
else
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL);
return hc;
HnswLoadElement(entryPoint, &sc->distance, &q, index, procinfo, collation, loadVec, NULL);
return sc;
}
/*
@@ -845,26 +845,26 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
/* Add entry points to v, C, and W */
foreach(lc2, ep)
{
HnswSearchCandidate *hc = (HnswSearchCandidate *) lfirst(lc2);
HnswSearchCandidate *sc = (HnswSearchCandidate *) lfirst(lc2);
bool found;
if (initVisited)
{
AddToVisited(base, v, hc->element, index, &found);
AddToVisited(base, v, sc->element, index, &found);
if (tuples != NULL)
(*tuples)++;
}
pairingheap_add(C, &hc->c_node);
pairingheap_add(W, &hc->w_node);
pairingheap_add(C, &sc->c_node);
pairingheap_add(W, &sc->w_node);
/*
* Do not count elements being deleted towards ef when vacuuming. It
* would be ideal to do this for inserts as well, but this could
* affect insert performance.
*/
if (CountElement(skipElement, HnswPtrAccess(base, hc->element)))
if (CountElement(skipElement, HnswPtrAccess(base, sc->element)))
wlen++;
}
@@ -891,7 +891,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
{
HnswElement eElement;
HnswSearchCandidate *e;
float eDistance;
double eDistance;
bool alwaysAdd = wlen < ef;
f = HnswGetSearchCandidate(w_node, pairingheap_first(W));
@@ -961,9 +961,9 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
/* Add each element of W to w */
while (!pairingheap_is_empty(W))
{
HnswSearchCandidate *hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W));
HnswSearchCandidate *sc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W));
w = lappend(w, hc);
w = lappend(w, sc);
}
return w;
@@ -1208,7 +1208,12 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm
HnswElement hc3Element = HnswPtrAccess(base, hc3->element);
if (HnswPtrIsNull(base, hc3Element->value))
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL);
{
double distance;
HnswLoadElement(hc3Element, &distance, &q, index, procinfo, collation, true, NULL);
hc3->distance = distance;
}
else
hc3->distance = GetElementDistance(base, hc3Element, q, procinfo, collation);

View File

@@ -17,7 +17,7 @@ $node->safe_psql("postgres", "CREATE EXTENSION vector;");
for my $dim (@dims)
{
my $array_sql = join(",", ('random()') x $dim);
my $n = 2000;
my $n = 6000;
# Create table and index
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));");
@@ -40,6 +40,11 @@ for my $dim (@dims)
));
like($explain, qr/Index Scan using idx/);
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit;
));
like($explain, qr/Index Scan using idx/);
$node->safe_psql("postgres", "DROP TABLE tst;");
}

View File

@@ -39,6 +39,11 @@ for my $dim (@dims)
));
like($explain, qr/Index Scan using idx/);
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit;
));
like($explain, qr/Index Scan using idx/);
$node->safe_psql("postgres", "DROP TABLE tst;");
}