From 7e5df3c9fefbb85030ca2f467468c913ed75d838 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Apr 2024 23:36:43 -0700 Subject: [PATCH] Updated binary vectors section [skip ci] --- README.md | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 77444ed..ffc6dd6 100644 --- a/README.md +++ b/README.md @@ -421,9 +421,7 @@ CREATE TABLE items (embedding vector(3), category_id int) PARTITION BY LIST(cate ## Binary Vectors -*Unreleased* - -Use the `bit` type to store binary vectors +Use the `bit` type to store binary vectors ([example](https://github.com/pgvector/pgvector-python/blob/master/examples/hash_image_search.py)) ```sql CREATE TABLE items (id bigserial PRIMARY KEY, embedding bit(3)); @@ -432,6 +430,12 @@ INSERT INTO items (embedding) VALUES ('000'), ('111'); Get the nearest neighbors by Hamming distance +```sql +SELECT * FROM items ORDER BY bit_count(embedding # '101') LIMIT 5; +``` + +Or (unreleased) + ```sql SELECT * FROM items ORDER BY embedding <~> '101' LIMIT 5; ``` @@ -678,18 +682,6 @@ and query with: SELECT * FROM items ORDER BY embedding::vector(3) <-> '[3,1,2]' LIMIT 5; ``` -#### Are binary vectors supported? - -You can store binary vectors and perform exact nearest neighbor search by Hamming distance in Postgres without an extension ([example](https://github.com/pgvector/pgvector-python/blob/master/examples/hash_image_search.py)). - -```tsql -CREATE TABLE items (id bigserial PRIMARY KEY, embedding bit(3)); -INSERT INTO items (embedding) VALUES ('000'), ('111'); -SELECT * FROM items ORDER BY bit_count(embedding # '101') LIMIT 5; -``` - -Indexing is not currently supported. - #### Do indexes need to fit into memory? No, but like other index types, you’ll likely see better performance if they do. You can get the size of an index with: