pgvector examples for R
Follow the instructions for your database library:
Enable the extension
dbExecute(db, "CREATE EXTENSION IF NOT EXISTS vector")
Create a table
dbExecute(db, "CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))")
Insert vectors
pgvector.serialize <- function(v) {
stopifnot(is.numeric(v))
paste0("[", paste(v, collapse=","), "]")
}
embeddings <- matrix(c(
1, 1, 1,
2, 2, 2,
1, 1, 2
), nrow=3, byrow=TRUE)
items <- data.frame(embedding=apply(embeddings, 1, pgvector.serialize))
dbAppendTable(db, "items", items)
Get the nearest neighbors
params <- pgvector.serialize(c(1, 2, 3))
dbGetQuery(db, "SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5", params=params)
Add an approximate index
dbExecute(db, "CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)")
# or
dbExecute(db, "CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)")
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Enable the extension
dbxExecute(db, "CREATE EXTENSION IF NOT EXISTS vector")
Create a table
dbxExecute(db, "CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))")
Insert vectors
pgvector.serialize <- function(v) {
stopifnot(is.numeric(v))
paste0("[", paste(v, collapse=","), "]")
}
embeddings <- matrix(c(
1, 1, 1,
2, 2, 2,
1, 1, 2
), nrow=3, byrow=TRUE)
items <- data.frame(embedding=apply(embeddings, 1, pgvector.serialize))
dbxInsert(db, "items", items)
Get the nearest neighbors
params <- pgvector.serialize(c(1, 2, 3))
dbxSelect(db, "SELECT * FROM items ORDER BY embedding <-> ? LIMIT 5", params=params)
Add an approximate index
dbxExecute(db, "CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)")
# or
dbxExecute(db, "CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)")
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Everyone is encouraged to help improve this project. Here are a few ways you can help:
- Report bugs
- Fix bugs and submit pull requests
- Write, clarify, or fix documentation
- Suggest or add new features
To get started with development:
git clone https://github.com/pgvector/pgvector-r.git
cd pgvector-r
createdb pgvector_r_test
In R, do:
install.packages("remotes")
remotes::install_deps(dependencies=TRUE)
And run:
Rscript DBI/example.R
Rscript dbx/example.R