-
-
Notifications
You must be signed in to change notification settings - Fork 632
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
28 changed files
with
841 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,3 +17,4 @@ cover.cov | |
vendor/* | ||
|
||
embeddings/cybertron/models/* | ||
examples/cybertron-embedding-example/models/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
140 changes: 140 additions & 0 deletions
140
examples/cybertron-embedding-example/cybertron-embedding.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"log" | ||
"os" | ||
"strings" | ||
|
||
"github.com/chewxy/math32" | ||
"github.com/google/uuid" | ||
"github.com/tmc/langchaingo/embeddings" | ||
"github.com/tmc/langchaingo/embeddings/cybertron" | ||
"github.com/tmc/langchaingo/schema" | ||
"github.com/tmc/langchaingo/vectorstores" | ||
"github.com/tmc/langchaingo/vectorstores/weaviate" | ||
) | ||
|
||
func cosineSimilarity(x, y []float32) float32 { | ||
if len(x) != len(y) { | ||
log.Fatal("x and y have different lengths") | ||
} | ||
|
||
var dot, nx, ny float32 | ||
|
||
for i := range x { | ||
nx += x[i] * x[i] | ||
ny += y[i] * y[i] | ||
dot += x[i] * y[i] | ||
} | ||
|
||
return dot / (math32.Sqrt(nx) * math32.Sqrt(ny)) | ||
} | ||
|
||
func randomIndexName() string { | ||
return "Test" + strings.ReplaceAll(uuid.New().String(), "-", "") | ||
} | ||
|
||
func exampleInMemory(ctx context.Context, emb embeddings.Embedder) { | ||
// We're going to create embeddings for the following strings, then calculate the similarity | ||
// between them using cosine-simularity. | ||
docs := []string{ | ||
"tokyo", | ||
"japan", | ||
"potato", | ||
} | ||
|
||
vecs, err := emb.EmbedDocuments(ctx, docs) | ||
if err != nil { | ||
log.Fatal("embed query", err) | ||
} | ||
|
||
fmt.Println("Similarities:") | ||
|
||
for i := range docs { | ||
for j := range docs { | ||
fmt.Printf("%6s ~ %6s = %0.2f\n", docs[i], docs[j], cosineSimilarity(vecs[i], vecs[j])) | ||
} | ||
} | ||
} | ||
|
||
func exampleWeaviate(ctx context.Context, emb embeddings.Embedder) { | ||
scheme := os.Getenv("WEAVIATE_SCHEME") | ||
host := os.Getenv("WEAVIATE_HOST") | ||
|
||
if scheme == "" || host == "" { | ||
log.Print("Set WEAVIATE_HOST and WEAVIATE_SCHEME to run the weaviate example") | ||
|
||
return | ||
} | ||
|
||
// Create a new Weaviate vector store with the Cybertron Embedder to generate embeddings. | ||
store, err := weaviate.New( | ||
weaviate.WithEmbedder(emb), | ||
weaviate.WithScheme(scheme), | ||
weaviate.WithHost(host), | ||
weaviate.WithIndexName(randomIndexName()), | ||
) | ||
if err != nil { | ||
log.Fatal("create weaviate store", err) | ||
} | ||
|
||
// Add some documents to the vector store. This will use the Cybertron Embedder to create | ||
// embeddings for the documents. | ||
_, err = store.AddDocuments(ctx, []schema.Document{ | ||
{PageContent: "tokyo"}, | ||
{PageContent: "japan"}, | ||
{PageContent: "potato"}, | ||
}) | ||
if err != nil { | ||
log.Fatal("add documents", err) | ||
} | ||
|
||
// Perform a similarity search, returning at most three results with similarity scores of | ||
// at least 0.8. This again uses the Cybertron Embedder to create an embedding for the | ||
// search query. | ||
matches, err := store.SimilaritySearch(ctx, "japan", 3, | ||
vectorstores.WithScoreThreshold(0.8), | ||
) | ||
if err != nil { | ||
log.Fatal("similarity search", err) | ||
} | ||
|
||
fmt.Println("Matches:") | ||
for _, match := range matches { | ||
fmt.Printf(" japan ~ %6s = %0.2f\n", match.PageContent, match.Score) | ||
} | ||
} | ||
|
||
func main() { | ||
ctx := context.Background() | ||
|
||
// Create an embedder client that uses the "BAAI/bge-small-en-v1.5" model and caches it in | ||
// the "models" directory. Cybertron will automatically download the model from HuggingFace | ||
// and convert it when needed. | ||
// | ||
// Note that not all models are supported and that Cybertron executes the model locally on | ||
// the CPU, so larger models will be quite slow! | ||
emc, err := cybertron.NewCybertron( | ||
cybertron.WithModelsDir("models"), | ||
cybertron.WithModel("BAAI/bge-small-en-v1.5"), | ||
) | ||
if err != nil { | ||
log.Fatal("create embedder client", err) | ||
} | ||
|
||
// Create an embedder from the previously created client. | ||
emb, err := embeddings.NewEmbedder(emc, | ||
embeddings.WithStripNewLines(false), | ||
) | ||
if err != nil { | ||
log.Fatal("create embedder", err) | ||
} | ||
|
||
// Example: use the Embedder to do an in-memory comparison between some documents. | ||
exampleInMemory(ctx, emb) | ||
|
||
// Example: use the Embedder together with a Vector Store. | ||
exampleWeaviate(ctx, emb) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
module github.com/tmtsmc/langchaingo/examples/cybertron-embedding-example | ||
|
||
go 1.21 | ||
|
||
require ( | ||
github.com/chewxy/math32 v1.10.1 | ||
github.com/google/uuid v1.4.0 | ||
github.com/tmc/langchaingo v0.1.4-0.20240123022810-6f20ee5cf8df | ||
) | ||
|
||
require ( | ||
github.com/PuerkitoBio/purell v1.1.1 // indirect | ||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect | ||
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d // indirect | ||
github.com/dlclark/regexp2 v1.8.1 // indirect | ||
github.com/go-openapi/analysis v0.21.2 // indirect | ||
github.com/go-openapi/errors v0.20.3 // indirect | ||
github.com/go-openapi/jsonpointer v0.19.5 // indirect | ||
github.com/go-openapi/jsonreference v0.19.6 // indirect | ||
github.com/go-openapi/loads v0.21.1 // indirect | ||
github.com/go-openapi/spec v0.20.4 // indirect | ||
github.com/go-openapi/strfmt v0.21.3 // indirect | ||
github.com/go-openapi/swag v0.22.3 // indirect | ||
github.com/go-openapi/validate v0.21.0 // indirect | ||
github.com/golang/protobuf v1.5.3 // indirect | ||
github.com/google/flatbuffers v23.5.26+incompatible // indirect | ||
github.com/josharian/intern v1.0.0 // indirect | ||
github.com/mailru/easyjson v0.7.7 // indirect | ||
github.com/mattn/go-colorable v0.1.13 // indirect | ||
github.com/mattn/go-isatty v0.0.19 // indirect | ||
github.com/mitchellh/mapstructure v1.5.0 // indirect | ||
github.com/nlpodyssey/cybertron v0.2.1 // indirect | ||
github.com/nlpodyssey/gopickle v0.2.0 // indirect | ||
github.com/nlpodyssey/gotokenizers v0.2.0 // indirect | ||
github.com/nlpodyssey/spago v1.1.0 // indirect | ||
github.com/oklog/ulid v1.3.1 // indirect | ||
github.com/pkoukk/tiktoken-go v0.1.2 // indirect | ||
github.com/rs/zerolog v1.31.0 // indirect | ||
github.com/weaviate/weaviate v1.19.13 // indirect | ||
github.com/weaviate/weaviate-go-client/v4 v4.8.1 // indirect | ||
go.mongodb.org/mongo-driver v1.11.3 // indirect | ||
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect | ||
golang.org/x/net v0.19.0 // indirect | ||
golang.org/x/oauth2 v0.15.0 // indirect | ||
golang.org/x/sync v0.5.0 // indirect | ||
golang.org/x/sys v0.15.0 // indirect | ||
golang.org/x/text v0.14.0 // indirect | ||
google.golang.org/appengine v1.6.8 // indirect | ||
google.golang.org/protobuf v1.31.0 // indirect | ||
gopkg.in/yaml.v3 v3.0.1 // indirect | ||
) |
Oops, something went wrong.