-
Notifications
You must be signed in to change notification settings - Fork 1
/
example.rb
38 lines (32 loc) · 1.09 KB
/
example.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
require "pg"
require "pgvector"
require "tomoto"
conn = PG.connect(dbname: "pgvector_example")
conn.exec("CREATE EXTENSION IF NOT EXISTS vector")
conn.exec("DROP TABLE IF EXISTS documents")
conn.exec("CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(20))")
def generate_embeddings(input)
model = Tomoto::LDA.new(k: 20)
stop_words = Set.new(["the", "is"])
input.each do |text|
model.add_doc(text.downcase.split.reject { |w| stop_words.include?(w) })
end
model.train(100) # iterations
input.map.with_index do |_, i|
model.docs[i].topics.values
end
end
input = [
"The dog is barking",
"The cat is purring",
"The bear is growling"
]
embeddings = generate_embeddings(input)
input.zip(embeddings) do |content, embedding|
conn.exec_params("INSERT INTO documents (content, embedding) VALUES ($1, $2)", [content, embedding])
end
document_id = 1
result = conn.exec_params("SELECT content FROM documents WHERE id != $1 ORDER BY embedding <=> (SELECT embedding FROM documents WHERE id = $1) LIMIT 5", [document_id])
result.each do |row|
puts row["content"]
end