mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Merge branch 'master' into hnsw-streaming
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
ARG PG_MAJOR=16
|
||||
ARG PG_MAJOR=17
|
||||
FROM postgres:$PG_MAJOR
|
||||
ARG PG_MAJOR
|
||||
|
||||
|
||||
2
Makefile
2
Makefile
@@ -66,7 +66,7 @@ dist:
|
||||
git archive --format zip --prefix=$(EXTENSION)-$(EXTVERSION)/ --output dist/$(EXTENSION)-$(EXTVERSION).zip master
|
||||
|
||||
# for Docker
|
||||
PG_MAJOR ?= 16
|
||||
PG_MAJOR ?= 17
|
||||
|
||||
.PHONY: docker
|
||||
|
||||
|
||||
38
README.md
38
README.md
@@ -52,6 +52,8 @@ nmake /F Makefile.win
|
||||
nmake /F Makefile.win install
|
||||
```
|
||||
|
||||
Note: Postgres 17 is not supported yet due to an upstream issue
|
||||
|
||||
See the [installation notes](#installation-notes---windows) if you run into issues
|
||||
|
||||
You can also install it with [Docker](#docker) or [conda-forge](#conda-forge).
|
||||
@@ -100,6 +102,8 @@ Or add a vector column to an existing table
|
||||
ALTER TABLE items ADD COLUMN embedding vector(3);
|
||||
```
|
||||
|
||||
Also supports [half-precision](#half-precision-vectors), [binary](#binary-vectors), and [sparse](#sparse-vectors) vectors
|
||||
|
||||
Insert vectors
|
||||
|
||||
```sql
|
||||
@@ -145,6 +149,8 @@ Supported distance functions are:
|
||||
- `<#>` - (negative) inner product
|
||||
- `<=>` - cosine distance
|
||||
- `<+>` - L1 distance (added in 0.7.0)
|
||||
- `<~>` - Hamming distance (binary vectors, added in 0.7.0)
|
||||
- `<%>` - Jaccard distance (binary vectors, added in 0.7.0)
|
||||
|
||||
Get the nearest neighbors to a row
|
||||
|
||||
@@ -1050,7 +1056,7 @@ l2_normalize(sparsevec) → sparsevec | Normalize with Euclidean norm | 0.7.0
|
||||
If your machine has multiple Postgres installations, specify the path to [pg_config](https://www.postgresql.org/docs/current/app-pgconfig.html) with:
|
||||
|
||||
```sh
|
||||
export PG_CONFIG=/Library/PostgreSQL/16/bin/pg_config
|
||||
export PG_CONFIG=/Library/PostgreSQL/17/bin/pg_config
|
||||
```
|
||||
|
||||
Then re-run the installation instructions (run `make clean` before `make` if needed). If `sudo` is needed for `make install`, use:
|
||||
@@ -1061,11 +1067,11 @@ sudo --preserve-env=PG_CONFIG make install
|
||||
|
||||
A few common paths on Mac are:
|
||||
|
||||
- EDB installer - `/Library/PostgreSQL/16/bin/pg_config`
|
||||
- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@16/bin/pg_config`
|
||||
- Homebrew (x86-64) - `/usr/local/opt/postgresql@16/bin/pg_config`
|
||||
- EDB installer - `/Library/PostgreSQL/17/bin/pg_config`
|
||||
- Homebrew (arm64) - `/opt/homebrew/opt/postgresql@17/bin/pg_config`
|
||||
- Homebrew (x86-64) - `/usr/local/opt/postgresql@17/bin/pg_config`
|
||||
|
||||
Note: Replace `16` with your Postgres server version
|
||||
Note: Replace `17` with your Postgres server version
|
||||
|
||||
### Missing Header
|
||||
|
||||
@@ -1074,10 +1080,10 @@ If compilation fails with `fatal error: postgres.h: No such file or directory`,
|
||||
For Ubuntu and Debian, use:
|
||||
|
||||
```sh
|
||||
sudo apt install postgresql-server-dev-16
|
||||
sudo apt install postgresql-server-dev-17
|
||||
```
|
||||
|
||||
Note: Replace `16` with your Postgres server version
|
||||
Note: Replace `17` with your Postgres server version
|
||||
|
||||
### Missing SDK
|
||||
|
||||
@@ -1110,17 +1116,17 @@ If installation fails with `Access is denied`, re-run the installation instructi
|
||||
Get the [Docker image](https://hub.docker.com/r/pgvector/pgvector) with:
|
||||
|
||||
```sh
|
||||
docker pull pgvector/pgvector:pg16
|
||||
docker pull pgvector/pgvector:pg17
|
||||
```
|
||||
|
||||
This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `16` with your Postgres server version, and run it the same way).
|
||||
This adds pgvector to the [Postgres image](https://hub.docker.com/_/postgres) (replace `17` with your Postgres server version, and run it the same way).
|
||||
|
||||
You can also build the image manually:
|
||||
|
||||
```sh
|
||||
git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git
|
||||
cd pgvector
|
||||
docker build --pull --build-arg PG_MAJOR=16 -t myuser/pgvector .
|
||||
docker build --pull --build-arg PG_MAJOR=17 -t myuser/pgvector .
|
||||
```
|
||||
|
||||
### Homebrew
|
||||
@@ -1131,7 +1137,7 @@ With Homebrew Postgres, you can use:
|
||||
brew install pgvector
|
||||
```
|
||||
|
||||
Note: This only adds it to the `postgresql@14` formula
|
||||
Note: This only adds it to the `postgresql@17` and `postgresql@14` formulas
|
||||
|
||||
### PGXN
|
||||
|
||||
@@ -1146,22 +1152,22 @@ pgxn install vector
|
||||
Debian and Ubuntu packages are available from the [PostgreSQL APT Repository](https://wiki.postgresql.org/wiki/Apt). Follow the [setup instructions](https://wiki.postgresql.org/wiki/Apt#Quickstart) and run:
|
||||
|
||||
```sh
|
||||
sudo apt install postgresql-16-pgvector
|
||||
sudo apt install postgresql-17-pgvector
|
||||
```
|
||||
|
||||
Note: Replace `16` with your Postgres server version
|
||||
Note: Replace `17` with your Postgres server version
|
||||
|
||||
### Yum
|
||||
|
||||
RPM packages are available from the [PostgreSQL Yum Repository](https://yum.postgresql.org/). Follow the [setup instructions](https://www.postgresql.org/download/linux/redhat/) for your distribution and run:
|
||||
|
||||
```sh
|
||||
sudo yum install pgvector_16
|
||||
sudo yum install pgvector_17
|
||||
# or
|
||||
sudo dnf install pgvector_16
|
||||
sudo dnf install pgvector_17
|
||||
```
|
||||
|
||||
Note: Replace `16` with your Postgres server version
|
||||
Note: Replace `17` with your Postgres server version
|
||||
|
||||
### pkg
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ typedef struct HnswSearchCandidate
|
||||
pairingheap_node c_node;
|
||||
pairingheap_node w_node;
|
||||
HnswElementPtr element;
|
||||
float distance;
|
||||
double distance;
|
||||
} HnswSearchCandidate;
|
||||
|
||||
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
|
||||
@@ -438,7 +438,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *
|
||||
bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building);
|
||||
void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building);
|
||||
void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec);
|
||||
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance);
|
||||
void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance);
|
||||
void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element);
|
||||
void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
|
||||
void HnswLoadNeighbors(HnswElement element, Relation index, int m);
|
||||
|
||||
@@ -547,7 +547,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe
|
||||
* Load an element and optionally get its distance from q
|
||||
*/
|
||||
static void
|
||||
HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance, HnswElement * element)
|
||||
HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance, HnswElement * element)
|
||||
{
|
||||
Buffer buf;
|
||||
Page page;
|
||||
@@ -568,7 +568,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu
|
||||
if (DatumGetPointer(*q) == NULL)
|
||||
*distance = 0;
|
||||
else
|
||||
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
|
||||
*distance = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
|
||||
}
|
||||
|
||||
/* Load element */
|
||||
@@ -587,7 +587,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datu
|
||||
* Load an element and optionally get its distance from q
|
||||
*/
|
||||
void
|
||||
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance)
|
||||
HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance)
|
||||
{
|
||||
HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, procinfo, collation, loadVec, maxDistance, &element);
|
||||
}
|
||||
@@ -595,7 +595,7 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
|
||||
/*
|
||||
* Get the distance for an element
|
||||
*/
|
||||
static float
|
||||
static double
|
||||
GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation)
|
||||
{
|
||||
Datum value = HnswGetValue(base, element);
|
||||
@@ -609,14 +609,14 @@ GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo,
|
||||
HnswSearchCandidate *
|
||||
HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec)
|
||||
{
|
||||
HnswSearchCandidate *hc = palloc(sizeof(HnswSearchCandidate));
|
||||
HnswSearchCandidate *sc = palloc(sizeof(HnswSearchCandidate));
|
||||
|
||||
HnswPtrStore(base, hc->element, entryPoint);
|
||||
HnswPtrStore(base, sc->element, entryPoint);
|
||||
if (index == NULL)
|
||||
hc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation);
|
||||
sc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation);
|
||||
else
|
||||
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL);
|
||||
return hc;
|
||||
HnswLoadElement(entryPoint, &sc->distance, &q, index, procinfo, collation, loadVec, NULL);
|
||||
return sc;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -845,26 +845,26 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
|
||||
/* Add entry points to v, C, and W */
|
||||
foreach(lc2, ep)
|
||||
{
|
||||
HnswSearchCandidate *hc = (HnswSearchCandidate *) lfirst(lc2);
|
||||
HnswSearchCandidate *sc = (HnswSearchCandidate *) lfirst(lc2);
|
||||
bool found;
|
||||
|
||||
if (initVisited)
|
||||
{
|
||||
AddToVisited(base, v, hc->element, index, &found);
|
||||
AddToVisited(base, v, sc->element, index, &found);
|
||||
|
||||
if (tuples != NULL)
|
||||
(*tuples)++;
|
||||
}
|
||||
|
||||
pairingheap_add(C, &hc->c_node);
|
||||
pairingheap_add(W, &hc->w_node);
|
||||
pairingheap_add(C, &sc->c_node);
|
||||
pairingheap_add(W, &sc->w_node);
|
||||
|
||||
/*
|
||||
* Do not count elements being deleted towards ef when vacuuming. It
|
||||
* would be ideal to do this for inserts as well, but this could
|
||||
* affect insert performance.
|
||||
*/
|
||||
if (CountElement(skipElement, HnswPtrAccess(base, hc->element)))
|
||||
if (CountElement(skipElement, HnswPtrAccess(base, sc->element)))
|
||||
wlen++;
|
||||
}
|
||||
|
||||
@@ -891,7 +891,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
|
||||
{
|
||||
HnswElement eElement;
|
||||
HnswSearchCandidate *e;
|
||||
float eDistance;
|
||||
double eDistance;
|
||||
bool alwaysAdd = wlen < ef;
|
||||
|
||||
f = HnswGetSearchCandidate(w_node, pairingheap_first(W));
|
||||
@@ -961,9 +961,9 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
|
||||
/* Add each element of W to w */
|
||||
while (!pairingheap_is_empty(W))
|
||||
{
|
||||
HnswSearchCandidate *hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W));
|
||||
HnswSearchCandidate *sc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W));
|
||||
|
||||
w = lappend(w, hc);
|
||||
w = lappend(w, sc);
|
||||
}
|
||||
|
||||
return w;
|
||||
@@ -1208,7 +1208,12 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm
|
||||
HnswElement hc3Element = HnswPtrAccess(base, hc3->element);
|
||||
|
||||
if (HnswPtrIsNull(base, hc3Element->value))
|
||||
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL);
|
||||
{
|
||||
double distance;
|
||||
|
||||
HnswLoadElement(hc3Element, &distance, &q, index, procinfo, collation, true, NULL);
|
||||
hc3->distance = distance;
|
||||
}
|
||||
else
|
||||
hc3->distance = GetElementDistance(base, hc3Element, q, procinfo, collation);
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ $node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
for my $dim (@dims)
|
||||
{
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
my $n = 2000;
|
||||
my $n = 6000;
|
||||
|
||||
# Create table and index
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));");
|
||||
@@ -40,6 +40,11 @@ for my $dim (@dims)
|
||||
));
|
||||
like($explain, qr/Index Scan using idx/);
|
||||
|
||||
$explain = $node->safe_psql("postgres", qq(
|
||||
EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan using idx/);
|
||||
|
||||
$node->safe_psql("postgres", "DROP TABLE tst;");
|
||||
}
|
||||
|
||||
|
||||
@@ -39,6 +39,11 @@ for my $dim (@dims)
|
||||
));
|
||||
like($explain, qr/Index Scan using idx/);
|
||||
|
||||
$explain = $node->safe_psql("postgres", qq(
|
||||
EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan using idx/);
|
||||
|
||||
$node->safe_psql("postgres", "DROP TABLE tst;");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user