From b7ec06436f2284d44574a3a4fd3119d1c3468396 Mon Sep 17 00:00:00 2001 From: "Alexandre E. Souza" Date: Wed, 6 Mar 2024 23:12:30 -0300 Subject: [PATCH 1/2] Example spliters example about use spliter --- .../text_splitters/examples/index.mdx | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/docs/modules/data_connection/text_splitters/examples/index.mdx b/docs/docs/modules/data_connection/text_splitters/examples/index.mdx index 972e6dd4f..d7989fbe2 100644 --- a/docs/docs/modules/data_connection/text_splitters/examples/index.mdx +++ b/docs/docs/modules/data_connection/text_splitters/examples/index.mdx @@ -6,4 +6,32 @@ import DocCardList from "@theme/DocCardList"; # Text Splitters: Examples +Splitters are components or tools used to divide texts into smaller, more manageable parts or specific segments. This division can be necessary for various reasons, such as improving the processing, analysis, or understanding of large or complex texts. Splitters can be simple, like dividing a text into sentences or paragraphs, or more complex, such as splitting based on themes, topics, or specific grammatical structures. + +For create spliters can use PDF, Text or HTML + +```go +func main(){ +func textToSplit() []schema.Document { + + f, err := os.Open("./spliters/docs/transcript.txt") + if err != nil { + fmt.Println("Error opening file: ", err) + } + + p := documentloaders.NewText(f) + + split := textsplitter.NewRecursiveCharacter() + split.ChunkSize = 300 // size of the chunk is number of characters + split.ChunkOverlap = 30 // overlap is the number of characters that the chunks overlap + docs, err := p.LoadAndSplit(context.Background(), split) + + if err != nil { + fmt.Println("Error loading document: ", err) + } + + log.Println("Document loaded: ", len(docs)) +} + + From b9283c7da5c8e219ed5856a72c045ce991a6f7ce Mon Sep 17 00:00:00 2001 From: Alexandre E Souza Date: Fri, 8 Mar 2024 09:45:25 -0300 Subject: [PATCH 2/2] fix: update names --- .../modules/data_connection/text_splitters/examples/index.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/modules/data_connection/text_splitters/examples/index.mdx b/docs/docs/modules/data_connection/text_splitters/examples/index.mdx index d7989fbe2..285d5ae1b 100644 --- a/docs/docs/modules/data_connection/text_splitters/examples/index.mdx +++ b/docs/docs/modules/data_connection/text_splitters/examples/index.mdx @@ -8,13 +8,13 @@ import DocCardList from "@theme/DocCardList"; Splitters are components or tools used to divide texts into smaller, more manageable parts or specific segments. This division can be necessary for various reasons, such as improving the processing, analysis, or understanding of large or complex texts. Splitters can be simple, like dividing a text into sentences or paragraphs, or more complex, such as splitting based on themes, topics, or specific grammatical structures. -For create spliters can use PDF, Text or HTML +For create splitters can use PDF, Text or HTML ```go func main(){ func textToSplit() []schema.Document { - f, err := os.Open("./spliters/docs/transcript.txt") + f, err := os.Open("./splitters/docs/transcript.txt") if err != nil { fmt.Println("Error opening file: ", err) }