mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Added random_vector function
This commit is contained in:
@@ -1,3 +1,7 @@
|
||||
## 0.4.1 (unreleased)
|
||||
|
||||
- Added `random_vector` function
|
||||
|
||||
## 0.4.0 (2023-01-11)
|
||||
|
||||
If upgrading with Postgres < 13, see [this note](https://github.com/pgvector/pgvector#040).
|
||||
|
||||
@@ -184,6 +184,7 @@ inner_product(vector, vector) → double precision | inner product
|
||||
l2_distance(vector, vector) → double precision | Euclidean distance
|
||||
vector_dims(vector) → integer | number of dimensions
|
||||
vector_norm(vector) → double precision | Euclidean norm
|
||||
random_vector(integer) → vector | random vector [unreleased]
|
||||
|
||||
### Aggregate Functions
|
||||
|
||||
|
||||
5
sql/vector--0.4.0--0.4.1.sql
Normal file
5
sql/vector--0.4.0--0.4.1.sql
Normal file
@@ -0,0 +1,5 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION vector UPDATE TO '0.4.1'" to load this file. \quit
|
||||
|
||||
CREATE FUNCTION random_vector(integer) RETURNS vector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C VOLATILE STRICT PARALLEL SAFE;
|
||||
@@ -52,6 +52,9 @@ CREATE FUNCTION vector_add(vector, vector) RETURNS vector
|
||||
CREATE FUNCTION vector_sub(vector, vector) RETURNS vector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION random_vector(integer) RETURNS vector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C VOLATILE STRICT PARALLEL SAFE;
|
||||
|
||||
-- private functions
|
||||
|
||||
CREATE FUNCTION vector_lt(vector, vector) RETURNS bool
|
||||
|
||||
@@ -10,15 +10,10 @@
|
||||
#include "access/generic_xlog.h"
|
||||
#include "access/reloptions.h"
|
||||
#include "nodes/execnodes.h"
|
||||
#include "port.h" /* for strtof() and random() */
|
||||
#include "utils/sampling.h"
|
||||
#include "utils/tuplesort.h"
|
||||
#include "vector.h"
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
#include "common/pg_prng.h"
|
||||
#endif
|
||||
|
||||
#ifdef IVFFLAT_BENCH
|
||||
#include "portability/instr_time.h"
|
||||
#endif
|
||||
@@ -68,14 +63,6 @@
|
||||
#define IvfflatBench(name, code) (code)
|
||||
#endif
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
#define RandomDouble() pg_prng_double(&pg_global_prng_state)
|
||||
#define RandomInt() pg_prng_uint32(&pg_global_prng_state)
|
||||
#else
|
||||
#define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE)
|
||||
#define RandomInt() random()
|
||||
#endif
|
||||
|
||||
/* Variables */
|
||||
extern int ivfflat_probes;
|
||||
|
||||
|
||||
19
src/vector.c
19
src/vector.c
@@ -950,3 +950,22 @@ vector_avg(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a random vector
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(random_vector);
|
||||
Datum
|
||||
random_vector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int32 dim = PG_GETARG_INT32(0);
|
||||
Vector *result;
|
||||
|
||||
CheckDim(dim);
|
||||
|
||||
result = InitVector(dim);
|
||||
for (int i = 0; i < dim; i++)
|
||||
result->x[i] = RandomDouble();
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
14
src/vector.h
14
src/vector.h
@@ -3,6 +3,12 @@
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "port.h" /* for strtof() and random() */
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
#include "common/pg_prng.h"
|
||||
#endif
|
||||
|
||||
#define VECTOR_MAX_DIM 16000
|
||||
|
||||
#define VECTOR_SIZE(_dim) (offsetof(Vector, x) + sizeof(float)*(_dim))
|
||||
@@ -10,6 +16,14 @@
|
||||
#define PG_GETARG_VECTOR_P(x) DatumGetVector(PG_GETARG_DATUM(x))
|
||||
#define PG_RETURN_VECTOR_P(x) PG_RETURN_POINTER(x)
|
||||
|
||||
#if PG_VERSION_NUM >= 150000
|
||||
#define RandomDouble() pg_prng_double(&pg_global_prng_state)
|
||||
#define RandomInt() pg_prng_uint32(&pg_global_prng_state)
|
||||
#else
|
||||
#define RandomDouble() (((double) random()) / MAX_RANDOM_VALUE)
|
||||
#define RandomInt() random()
|
||||
#endif
|
||||
|
||||
typedef struct Vector
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
|
||||
@@ -44,11 +44,6 @@ sub test_index_replay
|
||||
return;
|
||||
}
|
||||
|
||||
# Use ARRAY[random(), random(), random(), ...] over
|
||||
# SELECT array_agg(random()) FROM generate_series(1, $dim)
|
||||
# to generate different values for each row
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
# Initialize primary node
|
||||
$node_primary = get_new_node('primary');
|
||||
$node_primary->init(allows_streaming => 1);
|
||||
@@ -75,7 +70,7 @@ $node_replica->start;
|
||||
$node_primary->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node_primary->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));");
|
||||
$node_primary->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
|
||||
"INSERT INTO tst SELECT i % 10, random_vector($dim) FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
$node_primary->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v);");
|
||||
|
||||
@@ -91,7 +86,7 @@ for my $i (1 .. 10)
|
||||
test_index_replay("vacuum $i");
|
||||
my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000);
|
||||
$node_primary->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series($start, $end) i;"
|
||||
"INSERT INTO tst SELECT i % 10, random_vector($dim) FROM generate_series($start, $end) i;"
|
||||
);
|
||||
test_index_replay("insert $i");
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ $node->start;
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;"
|
||||
"INSERT INTO tst SELECT i, random_vector(3) FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
|
||||
# Generate queries
|
||||
|
||||
@@ -13,7 +13,7 @@ $node->start;
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4 primary key, v vector(3));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;"
|
||||
"INSERT INTO tst SELECT i, random_vector(3) FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
|
||||
# Check each index type
|
||||
|
||||
@@ -13,7 +13,7 @@ $node->start;
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (v vector(3));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;"
|
||||
"INSERT INTO tst SELECT random_vector(3) FROM generate_series(1, 100000) i;"
|
||||
);
|
||||
|
||||
$node->safe_psql("postgres", "CREATE INDEX lists50 ON tst USING ivfflat (v) WITH (lists = 50);");
|
||||
|
||||
@@ -6,8 +6,6 @@ use Test::More tests => 5;
|
||||
|
||||
my $dim = 768;
|
||||
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
# Initialize node
|
||||
my $node = get_new_node('node');
|
||||
$node->init;
|
||||
@@ -17,7 +15,7 @@ $node->start;
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (v vector($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
|
||||
"INSERT INTO tst SELECT random_vector($dim) FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v);");
|
||||
|
||||
@@ -28,7 +26,7 @@ $node->pgbench(
|
||||
[qr{^$}],
|
||||
"concurrent INSERTs",
|
||||
{
|
||||
"007_inserts" => "INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 10) i;"
|
||||
"007_inserts" => "INSERT INTO tst SELECT random_vector($dim) FROM generate_series(1, 10) i;"
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ $node->safe_psql("postgres", "CREATE TABLE tst (v1 vector(1024), v2 vector(1024)
|
||||
|
||||
# Test insert succeeds
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT array_agg(n), array_agg(n), array_agg(n) FROM generate_series(1, $dim) n"
|
||||
"INSERT INTO tst SELECT random_vector($dim), random_vector($dim), random_vector($dim)"
|
||||
);
|
||||
|
||||
# Change storage to PLAIN
|
||||
@@ -27,6 +27,6 @@ $node->safe_psql("postgres", "ALTER TABLE tst ALTER COLUMN v3 SET STORAGE PLAIN"
|
||||
|
||||
# Test insert fails
|
||||
my ($ret, $stdout, $stderr) = $node->psql("postgres",
|
||||
"INSERT INTO tst SELECT array_agg(n), array_agg(n), array_agg(n) FROM generate_series(1, $dim) n"
|
||||
"INSERT INTO tst SELECT random_vector($dim), random_vector($dim), random_vector($dim)"
|
||||
);
|
||||
like($stderr, qr/row is too big/);
|
||||
|
||||
Reference in New Issue
Block a user