Skip to content

Commit

Permalink
Merge pull request #2883 from redpanda-data/clicky
Browse files Browse the repository at this point in the history
  • Loading branch information
rockwotj authored Sep 24, 2024
2 parents 8e56f2b + 4313713 commit 7f0faa8
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,43 @@ aws_bedrock_embeddings:
This processor sends text to your chosen large language model (LLM) and computes vector embeddings, using the AWS Bedrock API.
For more information, see the https://docs.aws.amazon.com/bedrock/latest/userguide[AWS Bedrock documentation^].
== Examples
[tabs]
======
Store embedding vectors in Clickhouse::
+
--
Compute embeddings for some generated data and store it within https://clickhouse.com/[Clickhouse^]
```yamlinput:
generate:
interval: 1s
mapping: |
root = {"text": fake("paragraph")}
pipeline:
processors:
- branch:
request_map: |
root = this.text
processors:
- aws_bedrock_embeddings:
model: amazon.titan-embed-text-v1
result_map: |
root.embeddings = this
output:
sql_insert:
driver: clickhouse
dsn: "clickhouse://localhost:9000"
table: searchable_text
columns: ["id", "text", "vector"]
args_mapping: "root = [uuid_v4(), this.text, this.embeddings]"
```
--
======
== Fields
=== `region`
Expand Down
33 changes: 32 additions & 1 deletion docs/modules/components/pages/processors/ollama_embeddings.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,38 @@ output:
grpc_host: localhost:6334
collection_name: "example_collection"
id: "root = uuid_v4()"
vector_mapping: "root = this"```
vector_mapping: "root = this"
```
--
Store embedding vectors in Clickhouse::
+
--
Compute embeddings for some generated data and store it within https://clickhouse.com/[Clickhouse^]
```yamlinput:
generate:
interval: 1s
mapping: |
root = {"text": fake("paragraph")}
pipeline:
processors:
- branch:
processors:
- ollama_embeddings:
model: snowflake-artic-embed
text: "${!this.text}"
result_map: |
root.embeddings = this
output:
sql_insert:
driver: clickhouse
dsn: "clickhouse://localhost:9000"
table: searchable_text
columns: ["id", "text", "vector"]
args_mapping: "root = [uuid_v4(), this.text, this.embeddings]"
```
--
======
Expand Down
28 changes: 27 additions & 1 deletion internal/impl/aws/enterprise/processor_bedrock_embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,33 @@ For more information, see the https://docs.aws.amazon.com/bedrock/latest/usergui
Description("The model ID to use. For a full list see the https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[AWS Bedrock documentation^].")).
Field(service.NewStringField(bedepFieldText).
Description("The prompt you want to generate a response for. By default, the processor submits the entire payload as a string.").
Optional())
Optional()).
Example(
"Store embedding vectors in Clickhouse",
"Compute embeddings for some generated data and store it within https://clickhouse.com/[Clickhouse^]",
`input:
generate:
interval: 1s
mapping: |
root = {"text": fake("paragraph")}
pipeline:
processors:
- branch:
request_map: |
root = this.text
processors:
- aws_bedrock_embeddings:
model: amazon.titan-embed-text-v1
result_map: |
root.embeddings = this
output:
sql_insert:
driver: clickhouse
dsn: "clickhouse://localhost:9000"
table: searchable_text
columns: ["id", "text", "vector"]
args_mapping: "root = [uuid_v4(), this.text, this.embeddings]"
`)
}

func newBedrockEmbeddingsProcessor(conf *service.ParsedConfig, mgr *service.Resources) (service.Processor, error) {
Expand Down
28 changes: 27 additions & 1 deletion internal/impl/ollama/embeddings_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,33 @@ output:
grpc_host: localhost:6334
collection_name: "example_collection"
id: "root = uuid_v4()"
vector_mapping: "root = this"`)
vector_mapping: "root = this"
`).
Example(
"Store embedding vectors in Clickhouse",
"Compute embeddings for some generated data and store it within https://clickhouse.com/[Clickhouse^]",
`input:
generate:
interval: 1s
mapping: |
root = {"text": fake("paragraph")}
pipeline:
processors:
- branch:
processors:
- ollama_embeddings:
model: snowflake-artic-embed
text: "${!this.text}"
result_map: |
root.embeddings = this
output:
sql_insert:
driver: clickhouse
dsn: "clickhouse://localhost:9000"
table: searchable_text
columns: ["id", "text", "vector"]
args_mapping: "root = [uuid_v4(), this.text, this.embeddings]"
`)
}

func makeOllamaEmbeddingProcessor(conf *service.ParsedConfig, mgr *service.Resources) (service.Processor, error) {
Expand Down

0 comments on commit 7f0faa8

Please sign in to comment.