diff --git a/CHANGELOG.md b/CHANGELOG.md index cf5ba23..802b356 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.1.1 (unreleased) + +- Added binary representation for `vector` type + ## 0.1.0 (2021-04-20) - First release diff --git a/Makefile b/Makefile index d4d1422..a5323a8 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,11 @@ EXTENSION = vector -DATA = vector--0.1.0.sql +DATA = vector--0.1.1.sql vector--0.1.0-0.1.1.sql MODULE_big = vector OBJS = src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o TESTS = $(wildcard sql/*.sql) -REGRESS = btree cast functions ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_unlogged vector +REGRESS = btree cast copy functions ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_unlogged vector PG_CONFIG ?= pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/expected/copy.out b/expected/copy.out new file mode 100644 index 0000000..4c55fa3 --- /dev/null +++ b/expected/copy.out @@ -0,0 +1,18 @@ +SET client_min_messages = warning; +CREATE EXTENSION IF NOT EXISTS vector; +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); +CREATE TABLE t2 (val vector(3)); +\copy t TO '/tmp/data.bin' WITH (FORMAT binary) +\copy t2 FROM '/tmp/data.bin' WITH (FORMAT binary) +SELECT * FROM t2 ORDER BY val; + val +--------- + [0,0,0] + [1,1,1] + [1,2,3] + +(4 rows) + +DROP TABLE t; +DROP TABLE t2; diff --git a/sql/copy.sql b/sql/copy.sql new file mode 100644 index 0000000..c884fa5 --- /dev/null +++ b/sql/copy.sql @@ -0,0 +1,15 @@ +SET client_min_messages = warning; +CREATE EXTENSION IF NOT EXISTS vector; + +CREATE TABLE t (val vector(3)); +INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL); + +CREATE TABLE t2 (val vector(3)); + +\copy t TO '/tmp/data.bin' WITH (FORMAT binary) +\copy t2 FROM '/tmp/data.bin' WITH (FORMAT binary) + +SELECT * FROM t2 ORDER BY val; + +DROP TABLE t; +DROP TABLE t2; diff --git a/src/vector.c b/src/vector.c index fcdfcc3..50434f5 100644 --- a/src/vector.c +++ b/src/vector.c @@ -6,6 +6,7 @@ #include "fmgr.h" #include "catalog/pg_type.h" #include "lib/stringinfo.h" +#include "libpq/pqformat.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -40,6 +41,21 @@ CheckExpectedDim(int32 typmod, int dim) errmsg("expected %d dimensions, not %d", typmod, dim))); } + +static inline void +CheckDim(int dim) +{ + if (dim < 1) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("vector must have at least 1 dimension"))); + + if (dim > VECTOR_MAX_DIM) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); +} + /* * Ensure finite elements */ @@ -217,6 +233,53 @@ vector_typmod_in(PG_FUNCTION_ARGS) PG_RETURN_INT32(*tl); } +/* + * Convert external binary representation to internal representation + */ +PG_FUNCTION_INFO_V1(vector_recv); +Datum +vector_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + int32 typmod = PG_GETARG_INT32(2); + Vector *result; + int16 dim; + int16 unused; + int i; + + dim = pq_getmsgint(buf, sizeof(int16)); + unused = pq_getmsgint(buf, sizeof(int16)); + + CheckDim(dim); + CheckExpectedDim(typmod, dim); + + result = InitVector(dim); + for (i = 0; i < dim; i++) + result->x[i] = pq_getmsgfloat4(buf); + + PG_RETURN_POINTER(result); +} + +/* + * Convert internal representation to the external binary representation + */ +PG_FUNCTION_INFO_V1(vector_send); +Datum +vector_send(PG_FUNCTION_ARGS) +{ + Vector *vec = PG_GETARG_VECTOR_P(0); + StringInfoData buf; + int i; + + pq_begintypsend(&buf); + pq_sendint16(&buf, vec->dim); + pq_sendint16(&buf, vec->unused); + for (i = 0; i < vec->dim; i++) + pq_sendfloat4(&buf, vec->x[i]); + + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + /* * Convert vector to vector */ @@ -259,17 +322,7 @@ array_to_vector(PG_FUNCTION_ARGS) deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, &nullsp, &nelemsp); if (typmod == -1) - { - if (nelemsp < 1) - ereport(ERROR, - (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("vector must have at least 1 dimension"))); - - if (nelemsp > VECTOR_MAX_DIM) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("vector cannot have more than %d dimensions", VECTOR_MAX_DIM))); - } + CheckDim(nelemsp); else CheckExpectedDim(typmod, nelemsp); diff --git a/vector--0.1.0-0.1.1.sql b/vector--0.1.0-0.1.1.sql new file mode 100644 index 0000000..79e0074 --- /dev/null +++ b/vector--0.1.0-0.1.1.sql @@ -0,0 +1,10 @@ +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "ALTER EXTENSION vector UPDATE TO '0.1.1'" to load this file. \quit + +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; + +ALTER TYPE vector SET ( RECEIVE = vector_recv, SEND = vector_send ); diff --git a/vector--0.1.0.sql b/vector--0.1.1.sql similarity index 95% rename from vector--0.1.0.sql rename to vector--0.1.1.sql index 8e7a064..9ae9887 100644 --- a/vector--0.1.0.sql +++ b/vector--0.1.1.sql @@ -14,10 +14,18 @@ CREATE FUNCTION vector_out(vector) RETURNS cstring CREATE FUNCTION vector_typmod_in(cstring[]) RETURNS integer AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION vector_recv(internal, oid, integer) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION vector_send(vector) RETURNS bytea + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; + CREATE TYPE vector ( INPUT = vector_in, OUTPUT = vector_out, - TYPMOD_IN = vector_typmod_in + TYPMOD_IN = vector_typmod_in, + RECEIVE = vector_recv, + SEND = vector_send ); -- functions diff --git a/vector.control b/vector.control index 588b75c..bf0bd4f 100644 --- a/vector.control +++ b/vector.control @@ -1,4 +1,4 @@ comment = 'vector data type and ivfflat access method' -default_version = '0.1.0' +default_version = '0.1.1' module_pathname = '$libdir/vector' relocatable = true