pgvector support for Java, Kotlin, Groovy, and Scala
Supports JDBC, Spring JDBC, Groovy SQL, and Slick
For Maven, add to pom.xml
under <dependencies>
:
<dependency>
<groupId>com.pgvector</groupId>
<artifactId>pgvector</artifactId>
<version>0.1.6</version>
</dependency>
For sbt, add to build.sbt
:
libraryDependencies += "com.pgvector" % "pgvector" % "0.1.6"
For other build tools, see this page.
And follow the instructions for your database library:
- Java - JDBC, Spring JDBC, Hibernate, R2DBC
- Kotlin - JDBC
- Groovy - JDBC, Groovy SQL
- Scala - JDBC, Slick
Or check out some examples:
- Embeddings with OpenAI
- Binary embeddings with Cohere
- Sentence embeddings with Deep Java Library
- Hybrid search with Deep Java Library (Reciprocal Rank Fusion)
- Extended-connectivity fingerprints with the Chemistry Development Kit
- Horizontal scaling with Citus
- Bulk loading with
COPY
Import the PGvector
class
import com.pgvector.PGvector;
Enable the extension
Statement setupStmt = conn.createStatement();
setupStmt.executeUpdate("CREATE EXTENSION IF NOT EXISTS vector");
Register the vector type with your connection
PGvector.registerTypes(conn);
Create a table
Statement createStmt = conn.createStatement();
createStmt.executeUpdate("CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))");
Insert a vector
PreparedStatement insertStmt = conn.prepareStatement("INSERT INTO items (embedding) VALUES (?)");
insertStmt.setObject(1, new PGvector(new float[] {1, 1, 1}));
insertStmt.executeUpdate();
Get the nearest neighbors
PreparedStatement neighborStmt = conn.prepareStatement("SELECT * FROM items ORDER BY embedding <-> ? LIMIT 5");
neighborStmt.setObject(1, new PGvector(new float[] {1, 1, 1}));
ResultSet rs = neighborStmt.executeQuery();
while (rs.next()) {
System.out.println((PGvector) rs.getObject("embedding"));
}
Add an approximate index
Statement indexStmt = conn.createStatement();
indexStmt.executeUpdate("CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)");
// or
indexStmt.executeUpdate("CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)");
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Import the PGvector
class
import com.pgvector.PGvector;
Enable the extension
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS vector");
Create a table
jdbcTemplate.execute("CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))");
Insert a vector
Object[] insertParams = new Object[] { new PGvector(new float[] {1, 1, 1}) };
jdbcTemplate.update("INSERT INTO items (embedding) VALUES (?)", insertParams);
Get the nearest neighbors
Object[] neighborParams = new Object[] { new PGvector(new float[] {1, 1, 1}) };
List<Map<String, Object>> rows = jdbcTemplate.queryForList("SELECT * FROM items ORDER BY embedding <-> ? LIMIT 5", neighborParams);
for (Map row : rows) {
System.out.println(row.get("embedding"));
}
Add an approximate index
jdbcTemplate.execute("CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)");
// or
jdbcTemplate.execute("CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)");
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Hibernate 6.4+ has a vector module (use this instead of com.pgvector.pgvector
).
For Maven, add to pom.xml
under <dependencies>
:
<dependency>
<groupId>org.hibernate.orm</groupId>
<artifactId>hibernate-vector</artifactId>
<version>6.4.0.Final</version>
</dependency>
Define an entity
import jakarta.persistence.*;
import org.hibernate.annotations.Array;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
@Entity
class Item {
@Id
@GeneratedValue
private Long id;
@Column
@JdbcTypeCode(SqlTypes.VECTOR)
@Array(length = 3) // dimensions
private float[] embedding;
public void setEmbedding(float[] embedding) {
this.embedding = embedding;
}
}
Insert a vector
Item item = new Item();
item.setEmbedding(new float[] {1, 1, 1});
entityManager.persist(item);
Get the nearest neighbors
List<Item> items = entityManager
.createQuery("FROM Item ORDER BY l2_distance(embedding, :embedding) LIMIT 5", Item.class)
.setParameter("embedding", new float[] {1, 1, 1})
.getResultList();
See a full example
R2DBC PostgreSQL 1.0.3+ supports the vector type (use this instead of com.pgvector.pgvector
).
For Maven, add to pom.xml
under <dependencies>
:
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>r2dbc-postgresql</artifactId>
<version>1.0.3.RELEASE</version>
</dependency>
Import the PGvector
class
import com.pgvector.PGvector
Enable the extension
val setupStmt = conn.createStatement()
setupStmt.executeUpdate("CREATE EXTENSION IF NOT EXISTS vector")
Register the vector type with your connection
PGvector.registerTypes(conn)
Create a table
val createStmt = conn.createStatement()
createStmt.executeUpdate("CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))")
Insert a vector
val insertStmt = conn.prepareStatement("INSERT INTO items (embedding) VALUES (?)")
insertStmt.setObject(1, PGvector(floatArrayOf(1.0f, 1.0f, 1.0f)))
insertStmt.executeUpdate()
Get the nearest neighbors
val neighborStmt = conn.prepareStatement("SELECT * FROM items ORDER BY embedding <-> ? LIMIT 5")
neighborStmt.setObject(1, PGvector(floatArrayOf(1.0f, 1.0f, 1.0f)))
val rs = neighborStmt.executeQuery()
while (rs.next()) {
println(rs.getObject("embedding") as PGvector?)
}
Add an approximate index
val indexStmt = conn.createStatement()
indexStmt.executeUpdate("CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)")
// or
indexStmt.executeUpdate("CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)")
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Import the PGvector
class
import com.pgvector.PGvector
Enable the extension
def setupStmt = conn.createStatement()
setupStmt.executeUpdate("CREATE EXTENSION IF NOT EXISTS vector")
Register the vector type with your connection
PGvector.registerTypes(conn)
Create a table
def createStmt = conn.createStatement()
createStmt.executeUpdate("CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))")
Insert a vector
def insertStmt = conn.prepareStatement("INSERT INTO items (embedding) VALUES (?)")
insertStmt.setObject(1, new PGvector([1, 1, 1] as float[]))
insertStmt.executeUpdate()
Get the nearest neighbors
def neighborStmt = conn.prepareStatement("SELECT * FROM items ORDER BY embedding <-> ? LIMIT 5")
neighborStmt.setObject(1, new PGvector([1, 1, 1] as float[]))
def rs = neighborStmt.executeQuery()
while (rs.next()) {
println((PGvector) rs.getObject("embedding"))
}
Add an approximate index
def indexStmt = conn.createStatement()
indexStmt.executeUpdate("CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)")
// or
indexStmt.executeUpdate("CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)")
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Import the PGvector
class
import com.pgvector.PGvector
Enable the extension
sql.execute "CREATE EXTENSION IF NOT EXISTS vector"
Create a table
sql.execute "CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))"
Insert a vector
def params = [new PGvector([1, 1, 1] as float[])]
sql.executeInsert "INSERT INTO items (embedding) VALUES (?)", params
Get the nearest neighbors
def params = [new PGvector([1, 1, 1] as float[])]
sql.eachRow("SELECT * FROM items ORDER BY embedding <-> ? LIMIT 5", params) { row ->
println row.embedding
}
Add an approximate index
sql.execute "CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)"
// or
sql.execute "CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)"
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Import the PGvector
class
import com.pgvector.PGvector
Enable the extension
val setupStmt = conn.createStatement()
setupStmt.executeUpdate("CREATE EXTENSION IF NOT EXISTS vector")
Register the vector type with your connection
PGvector.registerTypes(conn)
Create a table
val createStmt = conn.createStatement()
createStmt.executeUpdate("CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))")
Insert a vector
val insertStmt = conn.prepareStatement("INSERT INTO items (embedding) VALUES (?)")
insertStmt.setObject(1, new PGvector(Array[Float](1, 1, 1)))
insertStmt.executeUpdate()
Get the nearest neighbors
val neighborStmt = conn.prepareStatement("SELECT * FROM items ORDER BY embedding <-> ? LIMIT 5")
neighborStmt.setObject(1, new PGvector(Array[Float](1, 1, 1)))
val rs = neighborStmt.executeQuery()
while (rs.next()) {
println(rs.getObject("embedding").asInstanceOf[PGvector])
}
Add an approximate index
val indexStmt = conn.createStatement()
indexStmt.executeUpdate("CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)")
// or
indexStmt.executeUpdate("CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)")
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
Import the PGvector
class
import com.pgvector.PGvector
Enable the extension
db.run(sqlu"CREATE EXTENSION IF NOT EXISTS vector")
Add a vector column
class Items(tag: Tag) extends Table[(String)](tag, "items") {
def embedding = column[String]("embedding", O.SqlType("vector(3)"))
def * = (embedding)
}
Insert a vector
val embedding = new PGvector(Array[Float](1, 1, 1)).toString
db.run(sqlu"INSERT INTO items (embedding) VALUES ($embedding::vector)")
Get the nearest neighbors
val embedding = new PGvector(Array[Float](1, 1, 1)).toString
db.run(sql"SELECT * FROM items ORDER BY embedding <-> $embedding::vector LIMIT 5".as[(String)])
Add an approximate index
db.run(sqlu"CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)")
// or
db.run(sqlu"CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)")
Use vector_ip_ops
for inner product and vector_cosine_ops
for cosine distance
See a full example
View the changelog
Everyone is encouraged to help improve this project. Here are a few ways you can help:
- Report bugs
- Fix bugs and submit pull requests
- Write, clarify, or fix documentation
- Suggest or add new features
To get started with development:
git clone https://github.com/pgvector/pgvector-java.git
cd pgvector-java
createdb pgvector_java_test
mvn test
To run an example:
cd examples/loading
createdb pgvector_example
mvn package
java -jar target/example-jar-with-dependencies.jar