From c421dc6483a0bd0b5574418371fdf189e3c313ee Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Apr 2024 23:15:28 -0700 Subject: [PATCH] Added binary quantization section [skip ci] --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 34db282..605e8a7 100644 --- a/README.md +++ b/README.md @@ -419,6 +419,28 @@ Use [partitioning](https://www.postgresql.org/docs/current/ddl-partitioning.html CREATE TABLE items (embedding vector(3), category_id int) PARTITION BY LIST(category_id); ``` +## Binary Quantization [unreleased] + +Use expression indexing for binary quantization + +```sql +CREATE INDEX ON items USING hnsw ((quantize_binary(embedding)::bit(3)) bit_hamming_ops); +``` + +Get the nearest neighbors by Hamming distance + +```sql +SELECT * FROM items ORDER BY quantize_binary(embedding)::bit(3) <~> quantize_binary('[1,-2,3]') LIMIT 5; +``` + +Re-rank by the original vectors for better recall + +```sql +SELECT * FROM ( + SELECT * FROM items ORDER BY quantize_binary(embedding)::bit(3) <~> quantize_binary('[1,-2,3]') LIMIT 20 +) ORDER BY embedding <=> '[1,-2,3]' LIMIT 5; +``` + ## Hybrid Search Use together with Postgres [full-text search](https://www.postgresql.org/docs/current/textsearch-intro.html) for hybrid search.