From 1f96ed4760dc2a5b7b46f886b4aca4838f7ca4aa Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 13:07:34 -0700 Subject: [PATCH 01/25] modified: todo.md --- todo.md | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/todo.md b/todo.md index 316611d..845b2ef 100644 --- a/todo.md +++ b/todo.md @@ -1,12 +1,12 @@ -### Todo list +### Todo list [README] -- Add intro +- Add intro - Clearly define: [Utilikit, Pluggable/Components, multi-shot, zero-shot,] - - create summarization of prompt reusability, and component extendability + - create summarization of prompt reusability, and component extendability - Then, clearly state the intention of the repository. : Provide Reasoning, I want this to be a nexus of information to empower my LLMs moving forward. By continually updating this repository as a codebase and conglomeration of documentation, it may serve as a `git clone`able neuron for machine learning models. - - Finally, provide one to two brief statements to close out and resummarize + - Finally, provide one to two brief statements to close out and resummarize --- @@ -24,9 +24,9 @@ [LangChain] -- langchain_conv_agent.py - - Lacks single execution runnability - - Fix by removing argparsing and implement default settings, with a configuration file +- ~~langchain_conv_agent.py~~ + - ~~Lacks single execution runnability~~ + - ~~Fix by removing argparsing and implement default settings, with a configuration file~~ - Config file settings: - Embedding Engine: [OpenAI, HuggingFace, etc.] - ***Lacks .env var loading(API keys, model names[OpenAI, HuggingFace])*** @@ -37,14 +37,17 @@ - `DocumentRetriever` queries them locally (HF model is cached after first download. Therefore, all runs after the first, are entirely local since we're using ChromaDB) +- qa_local_docs.py + - Doesn't automatically collect and generate embeddings for the data folder + - To ensure automation, create a first-run / boot-up process --- [OpenAI] -- Auto-Embedder - - Requires testing - - test.py requires updates -- [Task]:Update test.py and run +- ~~Auto-Embedder~~ + - ~~Requires testing~~ + - ~~test.py requires updates~~ +- ~~[Task]:Update test.py and run~~ --- From c1ea6c9889603166f338ed91c772f747a7e5f645 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 16:54:35 -0700 Subject: [PATCH 02/25] Tried adding more checks for stability --- LangChain/Retrieval-Agents/qa_local_docs.py | 65 ++++++++++----------- 1 file changed, 30 insertions(+), 35 deletions(-) diff --git a/LangChain/Retrieval-Agents/qa_local_docs.py b/LangChain/Retrieval-Agents/qa_local_docs.py index 33729f9..2252a9a 100644 --- a/LangChain/Retrieval-Agents/qa_local_docs.py +++ b/LangChain/Retrieval-Agents/qa_local_docs.py @@ -1,24 +1,15 @@ import os import glob -from typing import Generator, List, Tuple +from typing import Dict, List, Union from dotenv import load_dotenv from retrying import retry from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.llms import OpenAI as OpenAILLM +from langchain.embeddings.tensorflow import UniversalSentenceEncoder +from langchain.llms import TensorFlow as TensorFlowLLM from langchain.chains.question_answering import load_qa_chain from langchain.vectorstores import cosine_similarity -# Define the retrying decorator for specific functions -def retry_if_value_error(exception: Exception) -> bool: - """Return True if we should retry (in this case when it's a ValueError), False otherwise""" - return isinstance(exception, ValueError) - -def retry_if_file_not_found_error(exception: Exception) -> bool: - """Return True if we should retry (in this case when it's a FileNotFoundError), False otherwise""" - return isinstance(exception, FileNotFoundError) - class PDFProcessor: """ A class to handle PDF document processing, similarity search, and question answering. @@ -27,9 +18,9 @@ class PDFProcessor: ---------- OPENAI_API_KEY : str OpenAI API Key for authentication. - embeddings : OpenAIEmbeddings - Object for OpenAI embeddings. - llm : OpenAILLM + embeddings : UniversalSentenceEncoder + Object for Universal Sentence Encoder embeddings. + llm : TensorFlowLLM Language model for generating embeddings. Methods @@ -40,7 +31,7 @@ class PDFProcessor: Load PDFs from a specified directory. _load_and_split_document(file_path: str, chunk_size: int = 2000, chunk_overlap: int = 0) -> List[str]: Load and split a single document. - perform_similarity_search(documents: List[List[str]], query: str, num_results: int = 10) -> List[Tuple[float, str]]: + perform_similarity_search(documents: List[List[str]], query: str, threshold: float = 0.5) -> List[Dict[str, Union[float, str]]]: Perform similarity search on documents. """ @@ -63,8 +54,8 @@ def _load_env_vars(self): def _initialize_reusable_objects(self): """Initialize reusable objects like embeddings and language models.""" - self.embeddings = OpenAIEmbeddings(openai_api_key=self.OPENAI_API_KEY) - self.llm = OpenAILLM(temperature=0, openai_api_key=self.OPENAI_API_KEY) + self.embeddings = UniversalSentenceEncoder() + self.llm = TensorFlowLLM(temperature=0) @staticmethod def get_user_query(prompt: str = "Please enter your query: ") -> str: @@ -79,7 +70,6 @@ def get_user_query(prompt: str = "Please enter your query: ") -> str: """ return input(prompt) - @retry(retry_on_exception=retry_if_file_not_found_error, stop_max_attempt_number=3) def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[str]]: """ Load all PDF files from a given directory. @@ -92,11 +82,11 @@ def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[s """ try: if not os.path.exists(directory_path): - raise FileNotFoundError(f"The directory {directory_path} does not exist.") + return [] pdf_files = glob.glob(f"{directory_path}/*.pdf") if not pdf_files: - raise FileNotFoundError(f"No PDF files found in the directory {directory_path}.") + return [] texts = [] for pdf_file in pdf_files: @@ -104,9 +94,9 @@ def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[s return texts except FileNotFoundError as fe: print(f"FileNotFoundError encountered: {fe}") - raise + return [] - def _load_and_split_document(self, file_path: str, chunk_size: int = 2000, chunk_overlap: int = 0) -> List[str]: + def _load_and_split_document(self, file_path: str, chunk_size: int = 500, chunk_overlap: int = 0) -> List[str]: """ Load and split a PDF document into text chunks. @@ -125,30 +115,37 @@ def _load_and_split_document(self, file_path: str, chunk_size: int = 2000, chunk text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) return text_splitter.split_documents(data) - def perform_similarity_search(self, documents: List[List[str]], query: str, num_results: int = 10) -> List[Tuple[float, str]]: + def perform_similarity_search(self, documents: List[List[str]], query: str, threshold: float = 0.7) -> List[Dict[str, Union[float, str]]]: """ Perform similarity search on documents based on a query. Parameters: documents (List[List[str]]): List of documents to search. query (str): User query for similarity search. - num_results (int): Number of results to return. + threshold (float): Minimum similarity score to return. Returns: - List[Tuple[float, str]]: List of tuples containing similarity score and document or chunk. + List[Dict[str, Union[float, str]]]: List of dictionaries containing similarity score, document or chunk, and any other relevant metadata. """ try: if not query: - raise ValueError("Query should not be empty.") + query = self.get_user_query("Please enter a valid query: ") results = [] + query_embedding = self.embeddings.embed(query) for document in documents: - similarity_score = cosine_similarity(document, query) - results.append((similarity_score, document)) - results = sorted(results, key=lambda x: x[0], reverse=True)[:num_results] + document_embedding = self.embeddings.embed(document) + similarity_score = cosine_similarity(document_embedding, query_embedding) + if similarity_score >= threshold: + result = { + "similarity_score": similarity_score, + "document": document, + "metadata": {} + } + results.append(result) return results except Exception as e: print(f"An error occurred: {e}") - raise + return [] if __name__ == "__main__": try: @@ -160,14 +157,12 @@ def perform_similarity_search(self, documents: List[List[str]], query: str, num_ num_docs = len(texts) print(f'Loaded {num_docs} document(s).') - # Get user query for similarity search - query = pdf_processor.get_user_query() - # Perform similarity search based on the query + query = pdf_processor.get_user_query() results = pdf_processor.perform_similarity_search(texts, query) # Print the results for i, result in enumerate(results): - print(f"{i+1}. Similarity score: {result[0]}, Document: {result[1]}") + print(f"{i+1}. Similarity score: {result['similarity_score']}, Document: {result['document']}") except Exception as e: print(f"An error occurred: {e}") \ No newline at end of file From f2e7bc8527c608a7479f7c18b4e47cdd854720f5 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 16:54:53 -0700 Subject: [PATCH 03/25] Upgraded qa_local_docs. See changes below: - Generated by GitHub Copilot I made several changes to the code. First, I added a PDFProcessor class to handle PDF document processing, similarity search, and question answering. The class has several methods to perform these tasks, including load_pdfs_from_directory, perform_similarity_search, and answer_question. I also added a get_user_query method to get user input for a query or question. In the load_pdfs_from_directory method, I modified the code to use the DirectoryLoader class from langchain.document_loaders to load all PDF files from a given directory. I then split the loaded documents into chunks, stored them in a vectorstore, and initialized the RAG model for question answering. In the perform_similarity_search method, I modified the code to take a list of documents and a query as input, and return a list of dictionaries containing the similarity score, document or chunk, and any other relevant metadata. In the answer_question method, I modified the code to use the RAG model to answer the input question. Finally, in the __main__ block, I created an instance of the PDFProcessor class, loaded PDFs from a directory, performed similarity search based on a query, and answered a question using the RAG model. modified: LangChain/Retrieval-Agents/qa_local_docs.py --- LangChain/Retrieval-Agents/qa_local_docs.py | 71 +++++++++++++-------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/LangChain/Retrieval-Agents/qa_local_docs.py b/LangChain/Retrieval-Agents/qa_local_docs.py index 2252a9a..444a941 100644 --- a/LangChain/Retrieval-Agents/qa_local_docs.py +++ b/LangChain/Retrieval-Agents/qa_local_docs.py @@ -9,6 +9,11 @@ from langchain.llms import TensorFlow as TensorFlowLLM from langchain.chains.question_answering import load_qa_chain from langchain.vectorstores import cosine_similarity +from langchain.vectorstores import Chroma +from langchain.embeddings import OpenAIEmbeddings +from langchain.chains import RetrievalQA +from langchain.chat_models import ChatOpenAI +from langchain.document_loaders import DirectoryLoader class PDFProcessor: """ @@ -22,6 +27,10 @@ class PDFProcessor: Object for Universal Sentence Encoder embeddings. llm : TensorFlowLLM Language model for generating embeddings. + vectorstore : Chroma + Vectorstore for storing document embeddings. + qa_chain : RetrievalQA + Question answering chain for answering questions. Methods ------- @@ -33,6 +42,8 @@ class PDFProcessor: Load and split a single document. perform_similarity_search(documents: List[List[str]], query: str, threshold: float = 0.5) -> List[Dict[str, Union[float, str]]]: Perform similarity search on documents. + answer_question(question: str) -> str: + Answer a question using the Retrieval Augmented Generation (RAG) model. """ def __init__(self): @@ -55,7 +66,9 @@ def _load_env_vars(self): def _initialize_reusable_objects(self): """Initialize reusable objects like embeddings and language models.""" self.embeddings = UniversalSentenceEncoder() - self.llm = TensorFlowLLM(temperature=0) + self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) + self.vectorstore = None + self.qa_chain = None @staticmethod def get_user_query(prompt: str = "Please enter your query: ") -> str: @@ -84,37 +97,21 @@ def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[s if not os.path.exists(directory_path): return [] - pdf_files = glob.glob(f"{directory_path}/*.pdf") - if not pdf_files: - return [] - - texts = [] - for pdf_file in pdf_files: - texts.extend(self._load_and_split_document(pdf_file)) - return texts + loader = DirectoryLoader(directory_path) + data = loader.load() + text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) + all_splits = text_splitter.split_documents(data) + self.vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) + self.qa_chain = RetrievalQA.from_chain_type( + self.llm, + retriever=self.vectorstore.as_retriever(), + chain_type_kwargs={"prompt": hub.pull("rlm/rag-prompt")} + ) + return all_splits except FileNotFoundError as fe: print(f"FileNotFoundError encountered: {fe}") return [] - def _load_and_split_document(self, file_path: str, chunk_size: int = 500, chunk_overlap: int = 0) -> List[str]: - """ - Load and split a PDF document into text chunks. - - Parameters: - file_path (str): Path to the PDF file. - chunk_size (int): Size of each text chunk. - chunk_overlap (int): Overlapping characters between chunks. - - Returns: - List[str]: List of text chunks. - """ - if not os.path.exists(file_path): - raise FileNotFoundError(f"The file {file_path} does not exist.") - loader = PyPDFLoader(file_path) - data = loader.load() - text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) - return text_splitter.split_documents(data) - def perform_similarity_search(self, documents: List[List[str]], query: str, threshold: float = 0.7) -> List[Dict[str, Union[float, str]]]: """ Perform similarity search on documents based on a query. @@ -147,6 +144,19 @@ def perform_similarity_search(self, documents: List[List[str]], query: str, thre print(f"An error occurred: {e}") return [] + def answer_question(self, question: str) -> str: + """ + Answer a question using the Retrieval Augmented Generation (RAG) model. + + Parameters: + question (str): The question to answer. + + Returns: + str: The answer to the question. + """ + result = self.qa_chain({"query": question}) + return result["result"] + if __name__ == "__main__": try: # Initialize PDFProcessor class @@ -164,5 +174,10 @@ def perform_similarity_search(self, documents: List[List[str]], query: str, thre # Print the results for i, result in enumerate(results): print(f"{i+1}. Similarity score: {result['similarity_score']}, Document: {result['document']}") + + # Answer a question using the RAG model + question = pdf_processor.get_user_query("Please enter a question: ") + answer = pdf_processor.answer_question(question) + print(f"Answer: {answer}") except Exception as e: print(f"An error occurred: {e}") \ No newline at end of file From 20c747792463f04eb1a0591974be21a4facdf332 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 17:22:47 -0700 Subject: [PATCH 04/25] Enhanced logic of `qa_local_docs.py` - Added updates to TODO for qa_local_docs.py --- LangChain/Retrieval-Agents/qa_local_docs.py | 25 +++++++++------------ todo.md | 17 ++++++++++++-- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/LangChain/Retrieval-Agents/qa_local_docs.py b/LangChain/Retrieval-Agents/qa_local_docs.py index 444a941..19e7e75 100644 --- a/LangChain/Retrieval-Agents/qa_local_docs.py +++ b/LangChain/Retrieval-Agents/qa_local_docs.py @@ -1,19 +1,15 @@ import os -import glob from typing import Dict, List, Union from dotenv import load_dotenv from retrying import retry -from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.tensorflow import UniversalSentenceEncoder -from langchain.llms import TensorFlow as TensorFlowLLM -from langchain.chains.question_answering import load_qa_chain -from langchain.vectorstores import cosine_similarity from langchain.vectorstores import Chroma from langchain.embeddings import OpenAIEmbeddings from langchain.chains import RetrievalQA -from langchain.chat_models import ChatOpenAI from langchain.document_loaders import DirectoryLoader +from langchain.chat_models import ChatOpenAI + class PDFProcessor: """ @@ -25,7 +21,6 @@ class PDFProcessor: OpenAI API Key for authentication. embeddings : UniversalSentenceEncoder Object for Universal Sentence Encoder embeddings. - llm : TensorFlowLLM Language model for generating embeddings. vectorstore : Chroma Vectorstore for storing document embeddings. @@ -38,10 +33,8 @@ class PDFProcessor: Get query from the user. load_pdfs_from_directory(directory_path: str = 'data/') -> List[List[str]]: Load PDFs from a specified directory. - _load_and_split_document(file_path: str, chunk_size: int = 2000, chunk_overlap: int = 0) -> List[str]: - Load and split a single document. - perform_similarity_search(documents: List[List[str]], query: str, threshold: float = 0.5) -> List[Dict[str, Union[float, str]]]: - Perform similarity search on documents. + perform_similarity_search(documents: List[List[str]], query: str, threshold: float = 0.7) -> List[Dict[str, Union[float, str]]]]: + Perform similarity search on documents. Higher threshold means more similar results. answer_question(question: str) -> str: Answer a question using the Retrieval Augmented Generation (RAG) model. """ @@ -105,6 +98,8 @@ def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[s self.qa_chain = RetrievalQA.from_chain_type( self.llm, retriever=self.vectorstore.as_retriever(), + # Pull premade RAG prompt from + # https://smith.langchain.com/hub/rlm/rag-prompt chain_type_kwargs={"prompt": hub.pull("rlm/rag-prompt")} ) return all_splits @@ -173,11 +168,13 @@ def answer_question(self, question: str) -> str: # Print the results for i, result in enumerate(results): - print(f"{i+1}. Similarity score: {result['similarity_score']}, Document: {result['document']}") + print(f"{i+1}. Similarity score: {result['similarity_score']}, \nDocument: {result['document']}") # Answer a question using the RAG model - question = pdf_processor.get_user_query("Please enter a question: ") + question = pdf_processor.get_user_query("""Welcome! \ + \nYour document agent has been fully instantiated. \ + Please enter a clear and concise question: """) answer = pdf_processor.answer_question(question) - print(f"Answer: {answer}") + print(f"\nAnswer: {answer}") except Exception as e: print(f"An error occurred: {e}") \ No newline at end of file diff --git a/todo.md b/todo.md index 845b2ef..e241a52 100644 --- a/todo.md +++ b/todo.md @@ -25,6 +25,7 @@ [LangChain] - ~~langchain_conv_agent.py~~ + - ~~Lacks single execution runnability~~ - ~~Fix by removing argparsing and implement default settings, with a configuration file~~ - Config file settings: @@ -38,8 +39,20 @@ (HF model is cached after first download. Therefore, all runs after the first, are entirely local since we're using ChromaDB) - qa_local_docs.py - - Doesn't automatically collect and generate embeddings for the data folder - - To ensure automation, create a first-run / boot-up process + + - ~~Doesn't automatically collect and generate embeddings for the data folder~~ + - ~~To ensure automation, create a first-run / boot-up process~~ + + 1. Move the `PDFProcessor` class to a separate file to increase modularity and maintainability. + 2. Use dependency injection to pass in the necessary objects to the `PDFProcessor` class instead of initializing them in the constructor. This will increase modularity and make the class more testable. + 3. Use a logger instead of `print` statements to log errors and other messages. This will make the code more maintainable and scalable. + 4. Use constants or configuration files to store environment variables and other configuration settings. This will make the code more maintainable and scalable. + 5. Use type hints and docstrings to improve readability and maintainability of the code. + 6. Refactor the `perform_similarity_search` method to use a more efficient algorithm for similarity search, such as Locality-Sensitive Hashing (LSH) or Approximate Nearest Neighbors (ANN). This will increase scalability and reliance of the code. + 7. Refactor the `load_pdfs_from_directory` method to use a more efficient PDF parsing library, such as PyPDF2 or pdfminer. This will increase scalability and reliance of the code. + 8. Refactor the `answer_question` method to use a more advanced question answering model, such as BERT or T5. This will increase the accuracy and reliability of the answers. + 9. Use version control to track changes to the code and collaborate with other developers. This will increase maintainability and reliance of the code. + 10. Write unit tests to ensure that the code works as expected and to catch regressions. This will increase maintainability and reliance of the code. --- From 241cc2d277931d24ab9cda7e2502f79261281822 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 17:38:27 -0700 Subject: [PATCH 05/25] Corrected faulty GPT updates in TODO modified: todo.md --- todo.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/todo.md b/todo.md index e241a52..828b25b 100644 --- a/todo.md +++ b/todo.md @@ -46,13 +46,6 @@ 1. Move the `PDFProcessor` class to a separate file to increase modularity and maintainability. 2. Use dependency injection to pass in the necessary objects to the `PDFProcessor` class instead of initializing them in the constructor. This will increase modularity and make the class more testable. 3. Use a logger instead of `print` statements to log errors and other messages. This will make the code more maintainable and scalable. - 4. Use constants or configuration files to store environment variables and other configuration settings. This will make the code more maintainable and scalable. - 5. Use type hints and docstrings to improve readability and maintainability of the code. - 6. Refactor the `perform_similarity_search` method to use a more efficient algorithm for similarity search, such as Locality-Sensitive Hashing (LSH) or Approximate Nearest Neighbors (ANN). This will increase scalability and reliance of the code. - 7. Refactor the `load_pdfs_from_directory` method to use a more efficient PDF parsing library, such as PyPDF2 or pdfminer. This will increase scalability and reliance of the code. - 8. Refactor the `answer_question` method to use a more advanced question answering model, such as BERT or T5. This will increase the accuracy and reliability of the answers. - 9. Use version control to track changes to the code and collaborate with other developers. This will increase maintainability and reliance of the code. - 10. Write unit tests to ensure that the code works as expected and to catch regressions. This will increase maintainability and reliance of the code. --- From 830c60a073b1bc6a3d00d3535be56bfd5335806d Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:07:48 -0700 Subject: [PATCH 06/25] Refactored qa_local_docs and reorganized dirs renamed: LangChain/Retrieval-Agents/stateful_chatbot.py -> LangChain/Chatbots/stateful_chatbot.py renamed: LangChain/Retrieval-Agents/__init__.py -> LangChain/Retrieval-Augmented-Generation/__init__.py new file: LangChain/Retrieval-Augmented-Generation/main.py renamed: LangChain/Retrieval-Agents/qa_local_docs.py -> LangChain/Retrieval-Augmented-Generation/qa_local_docs.py modified: todo.md --- .../stateful_chatbot.py | 0 .../__init__.py | 0 .../Retrieval-Augmented-Generation/main.py | 49 +++++++++++++++++++ .../qa_local_docs.py | 11 ++++- todo.md | 23 ++++----- 5 files changed, 70 insertions(+), 13 deletions(-) rename LangChain/{Retrieval-Agents => Chatbots}/stateful_chatbot.py (100%) rename LangChain/{Retrieval-Agents => Retrieval-Augmented-Generation}/__init__.py (100%) create mode 100644 LangChain/Retrieval-Augmented-Generation/main.py rename LangChain/{Retrieval-Agents => Retrieval-Augmented-Generation}/qa_local_docs.py (93%) diff --git a/LangChain/Retrieval-Agents/stateful_chatbot.py b/LangChain/Chatbots/stateful_chatbot.py similarity index 100% rename from LangChain/Retrieval-Agents/stateful_chatbot.py rename to LangChain/Chatbots/stateful_chatbot.py diff --git a/LangChain/Retrieval-Agents/__init__.py b/LangChain/Retrieval-Augmented-Generation/__init__.py similarity index 100% rename from LangChain/Retrieval-Agents/__init__.py rename to LangChain/Retrieval-Augmented-Generation/__init__.py diff --git a/LangChain/Retrieval-Augmented-Generation/main.py b/LangChain/Retrieval-Augmented-Generation/main.py new file mode 100644 index 0000000..4820cd9 --- /dev/null +++ b/LangChain/Retrieval-Augmented-Generation/main.py @@ -0,0 +1,49 @@ +import logging +from qa_local_docs import PDFProcessor + +def setup_logging(): + """Set up logging configuration.""" + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + +if __name__ == "__main__": + # Set up logging + setup_logging() + + try: + # Initialize PDFProcessor class + pdf_processor = PDFProcessor() + + # Load PDFs from directory and count the number of loaded documents + texts = pdf_processor.load_pdfs_from_directory() + num_docs = len(texts) + logging.info(f'Loaded {num_docs} document(s) from directory.') + + # Perform similarity search based on the query + query = pdf_processor.get_user_query() + logging.debug(f'User query: {query}') + results = pdf_processor.perform_similarity_search(texts, query) + + # Log the results + if results: + logging.info(f'Found {len(results)} similar document(s) for query: {query}') + for i, result in enumerate(results): + logging.debug(f"{i+1}. Similarity score: {result['similarity_score']}, \nDocument: {result['document']}") + else: + logging.warning(f'No similar documents found for query: {query}') + + # Answer a question using the RAG model + question = pdf_processor.get_user_query("""Welcome! \ + \nYour document agent has been fully instantiated. \ + Please enter a clear and concise question: """) + logging.debug(f'User question: {question}') + answer = pdf_processor.answer_question(question) + logging.info(f"\nAnswer: {answer}") + except FileNotFoundError as fe: + logging.error(f"FileNotFoundError encountered: {fe}") + except ValueError as ve: + logging.error(f"ValueError encountered: {ve}") + except Exception as e: + logging.error(f"An error occurred: {e}") \ No newline at end of file diff --git a/LangChain/Retrieval-Agents/qa_local_docs.py b/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py similarity index 93% rename from LangChain/Retrieval-Agents/qa_local_docs.py rename to LangChain/Retrieval-Augmented-Generation/qa_local_docs.py index 19e7e75..36da47e 100644 --- a/LangChain/Retrieval-Agents/qa_local_docs.py +++ b/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py @@ -10,7 +10,6 @@ from langchain.document_loaders import DirectoryLoader from langchain.chat_models import ChatOpenAI - class PDFProcessor: """ A class to handle PDF document processing, similarity search, and question answering. @@ -92,8 +91,14 @@ def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[s loader = DirectoryLoader(directory_path) data = loader.load() + """ + Adjustable chunk size and overlap + - 500 characters is a safe starting point for chunk size + - We use 0 overlap to avoid duplicate chunks + """ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) all_splits = text_splitter.split_documents(data) + # Store document embeddings in a vectorstore self.vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) self.qa_chain = RetrievalQA.from_chain_type( self.llm, @@ -102,6 +107,7 @@ def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[s # https://smith.langchain.com/hub/rlm/rag-prompt chain_type_kwargs={"prompt": hub.pull("rlm/rag-prompt")} ) + # Return all text splits from PDFs return all_splits except FileNotFoundError as fe: print(f"FileNotFoundError encountered: {fe}") @@ -134,7 +140,8 @@ def perform_similarity_search(self, documents: List[List[str]], query: str, thre "metadata": {} } results.append(result) - return results + # Sort results by similarity score in reverse order because we want the highest similarity score first + return sorted(results, key=lambda k: k['similarity_score'], reverse=True) except Exception as e: print(f"An error occurred: {e}") return [] diff --git a/todo.md b/todo.md index 828b25b..284aef2 100644 --- a/todo.md +++ b/todo.md @@ -24,28 +24,29 @@ [LangChain] -- ~~langchain_conv_agent.py~~ +- stateful_chatbot.py - - ~~Lacks single execution runnability~~ - - ~~Fix by removing argparsing and implement default settings, with a configuration file~~ + - Lacks single execution runnability + - Fix by removing argparsing and implement default settings, with a configuration file - Config file settings: - Embedding Engine: [OpenAI, HuggingFace, etc.] - ***Lacks .env var loading(API keys, model names[OpenAI, HuggingFace])*** - - Ambiguity regarding (EmbeddingManager and DocumentRetriever) - - Needs comments and to load via .env file - - Differentiate EmbeddingManager and DocumentRetriever by explaining how they're implemented into the pipeline stream created by the module. - - One generates embeddings - - `DocumentRetriever` queries them locally + - ~~Ambiguity regarding (EmbeddingManager and DocumentRetriever)~~ + - (**AVOID SUGGESTIONS BELOW**) + - ~~Needs comments and to load via .env file~~ + - ~~Differentiate EmbeddingManager and DocumentRetriever by explaining how they're implemented into the pipeline stream created by the module.~~ + - ~~One generates embeddings~~ + - ~~`DocumentRetriever` queries them locally (HF model is cached after first download. Therefore, all runs after the first, - are entirely local since we're using ChromaDB) + are entirely local since we're using ChromaDB)~~ - qa_local_docs.py - ~~Doesn't automatically collect and generate embeddings for the data folder~~ - ~~To ensure automation, create a first-run / boot-up process~~ - 1. Move the `PDFProcessor` class to a separate file to increase modularity and maintainability. + 1. ~~Move the `PDFProcessor` class to a separate file to increase modularity and maintainability.~~ 2. Use dependency injection to pass in the necessary objects to the `PDFProcessor` class instead of initializing them in the constructor. This will increase modularity and make the class more testable. - 3. Use a logger instead of `print` statements to log errors and other messages. This will make the code more maintainable and scalable. + 3. ~~Use a logger instead of `print` statements to log errors and other messages. This will make the code more maintainable and scalable.~~ --- From e38f879e13a615173ceaa92aa5620a27e5f3403f Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:08:38 -0700 Subject: [PATCH 07/25] Removed useless initialization + print statements modified: LangChain/Retrieval-Augmented-Generation/qa_local_docs.py --- .../qa_local_docs.py | 43 +++---------------- 1 file changed, 6 insertions(+), 37 deletions(-) diff --git a/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py b/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py index 36da47e..b1fb007 100644 --- a/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py +++ b/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py @@ -38,10 +38,13 @@ class PDFProcessor: Answer a question using the Retrieval Augmented Generation (RAG) model. """ - def __init__(self): + def __init__(self, embeddings: UniversalSentenceEncoder, llm: ChatOpenAI, vectorstore: Chroma, qa_chain: RetrievalQA): """Initialize PDFProcessor with environment variables and reusable objects.""" self._load_env_vars() - self._initialize_reusable_objects() + self.embeddings = embeddings + self.llm = llm + self.vectorstore = vectorstore + self.qa_chain = qa_chain @retry(retry_on_exception=retry_if_value_error, stop_max_attempt_number=3) def _load_env_vars(self): @@ -55,13 +58,6 @@ def _load_env_vars(self): print(f"ValueError encountered: {ve}") raise - def _initialize_reusable_objects(self): - """Initialize reusable objects like embeddings and language models.""" - self.embeddings = UniversalSentenceEncoder() - self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) - self.vectorstore = None - self.qa_chain = None - @staticmethod def get_user_query(prompt: str = "Please enter your query: ") -> str: """ @@ -157,31 +153,4 @@ def answer_question(self, question: str) -> str: str: The answer to the question. """ result = self.qa_chain({"query": question}) - return result["result"] - -if __name__ == "__main__": - try: - # Initialize PDFProcessor class - pdf_processor = PDFProcessor() - - # Load PDFs from directory and count the number of loaded documents - texts = pdf_processor.load_pdfs_from_directory() - num_docs = len(texts) - print(f'Loaded {num_docs} document(s).') - - # Perform similarity search based on the query - query = pdf_processor.get_user_query() - results = pdf_processor.perform_similarity_search(texts, query) - - # Print the results - for i, result in enumerate(results): - print(f"{i+1}. Similarity score: {result['similarity_score']}, \nDocument: {result['document']}") - - # Answer a question using the RAG model - question = pdf_processor.get_user_query("""Welcome! \ - \nYour document agent has been fully instantiated. \ - Please enter a clear and concise question: """) - answer = pdf_processor.answer_question(question) - print(f"\nAnswer: {answer}") - except Exception as e: - print(f"An error occurred: {e}") \ No newline at end of file + return result["result"] \ No newline at end of file From 9034a6bf7c6096803d473f66dcfcaea482cec288 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:12:34 -0700 Subject: [PATCH 08/25] Add test module new file: LangChain/Retrieval-Augmented-Generation/test.py --- .../Retrieval-Augmented-Generation/test.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 LangChain/Retrieval-Augmented-Generation/test.py diff --git a/LangChain/Retrieval-Augmented-Generation/test.py b/LangChain/Retrieval-Augmented-Generation/test.py new file mode 100644 index 0000000..8f085a8 --- /dev/null +++ b/LangChain/Retrieval-Augmented-Generation/test.py @@ -0,0 +1,32 @@ +import unittest +from unittest.mock import patch, MagicMock +from qa_local_docs import PDFProcessor + +# Assumes that 'data/' directory contains PDFs +class TestPDFProcessor(unittest.TestCase): + def setUp(self): + self.pdf_processor = PDFProcessor() + + def test_load_pdfs_from_directory(self): + # Test that the method returns a non-empty list + result = self.pdf_processor.load_pdfs_from_directory() + self.assertTrue(isinstance(result, list)) + self.assertTrue(len(result) > 0) + + def test_perform_similarity_search(self): + # Test that the method returns a non-empty list + texts = self.pdf_processor.load_pdfs_from_directory() + result = self.pdf_processor.perform_similarity_search(texts, "test") + self.assertTrue(isinstance(result, list)) + self.assertTrue(len(result) > 0) + + @patch('qa_local_docs.ChatOpenAI') + @patch('qa_local_docs.Chroma') + @patch('qa_local_docs.UniversalSentenceEncoder') + def test_answer_question(self, mock_embeddings, mock_vectorstore, mock_llm): + # Test that the method returns a string + mock_result = MagicMock() + mock_result.__getitem__.return_value = {"result": "test answer"} + mock_llm.return_value = mock_result + result = self.pdf_processor.answer_question("test question") + self.assertTrue(isinstance(result, str)) \ No newline at end of file From d60d308ee622e13d6901fe9ba40d254c60435449 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:20:55 -0700 Subject: [PATCH 09/25] ++ .env & easy configuration for multiple variables new file: LangChain/Retrieval-Augmented-Generation/.env.template modified: LangChain/Retrieval-Augmented-Generation/qa_local_docs.py modified: LangChain/Retrieval-Augmented-Generation/test.py --- LangChain/Retrieval-Augmented-Generation/.env.template | 5 +++++ .../Retrieval-Augmented-Generation/qa_local_docs.py | 3 ++- LangChain/Retrieval-Augmented-Generation/test.py | 10 ++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 LangChain/Retrieval-Augmented-Generation/.env.template diff --git a/LangChain/Retrieval-Augmented-Generation/.env.template b/LangChain/Retrieval-Augmented-Generation/.env.template new file mode 100644 index 0000000..b269983 --- /dev/null +++ b/LangChain/Retrieval-Augmented-Generation/.env.template @@ -0,0 +1,5 @@ +OPENAI_API_KEY= +SIMILARITY_THRESHOLD=0.7 +CHUNK_SIZE=500 +CHUNK_OVERLAP=0 +LLM_CHAIN_PROMPT_URL=https://smith.langchain.com/hub/rlm/rag-prompt \ No newline at end of file diff --git a/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py b/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py index b1fb007..12ecd13 100644 --- a/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py +++ b/LangChain/Retrieval-Augmented-Generation/qa_local_docs.py @@ -54,6 +54,7 @@ def _load_env_vars(self): self.OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'sk-') if not self.OPENAI_API_KEY: raise ValueError("OPENAI_API_KEY is missing. Please set the environment variable.") + self.LLM_CHAIN_PROMPT_URL = os.getenv('LLM_CHAIN_PROMPT_URL', 'https://smith.langchain.com/hub/rlm/rag-prompt') except ValueError as ve: print(f"ValueError encountered: {ve}") raise @@ -101,7 +102,7 @@ def load_pdfs_from_directory(self, directory_path: str = 'data/') -> List[List[s retriever=self.vectorstore.as_retriever(), # Pull premade RAG prompt from # https://smith.langchain.com/hub/rlm/rag-prompt - chain_type_kwargs={"prompt": hub.pull("rlm/rag-prompt")} + chain_type_kwargs={"prompt": hub.pull(self.LLM_CHAIN_PROMPT_URL)} ) # Return all text splits from PDFs return all_splits diff --git a/LangChain/Retrieval-Augmented-Generation/test.py b/LangChain/Retrieval-Augmented-Generation/test.py index 8f085a8..99ef6f5 100644 --- a/LangChain/Retrieval-Augmented-Generation/test.py +++ b/LangChain/Retrieval-Augmented-Generation/test.py @@ -1,11 +1,17 @@ import unittest from unittest.mock import patch, MagicMock -from qa_local_docs import PDFProcessor +from qa_local_docs import PDFProcessor, ChatOpenAI, Chroma, UniversalSentenceEncoder, RetrievalQA # Assumes that 'data/' directory contains PDFs class TestPDFProcessor(unittest.TestCase): + # Set up reusable objects def setUp(self): - self.pdf_processor = PDFProcessor() + embeddings = UniversalSentenceEncoder() + llm = ChatOpenAI() + vectorstore = Chroma() + qa_chain = RetrievalQA() + # Tie reusable objects together + self.pdf_processor = PDFProcessor(embeddings, llm, vectorstore, qa_chain) def test_load_pdfs_from_directory(self): # Test that the method returns a non-empty list From bb649239badb55bfa43ab8221017585f21dae6c1 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 18:22:12 -0700 Subject: [PATCH 10/25] new file: LangChain/Chatbots/__init__.py --- LangChain/Chatbots/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 LangChain/Chatbots/__init__.py diff --git a/LangChain/Chatbots/__init__.py b/LangChain/Chatbots/__init__.py new file mode 100644 index 0000000..e69de29 From 0a2d0d8e5d17e20bef299098658050dc9f69483a Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 21:01:55 -0700 Subject: [PATCH 11/25] Moved a dir, renamed a dir; Updated README renamed: OpenAI/GPT-Prompt-Examples/MS-6_Daethyra_Custom-Instruction_GPT4.md -> OpenAI/Prompts/MS-6_Daethyra_Custom-Instruction_GPT4.md renamed: OpenAI/GPT-Prompt-Examples/multi-shot/MS-1.MD -> OpenAI/Prompts/multi-shot/MS-1.MD renamed: OpenAI/GPT-Prompt-Examples/multi-shot/MS-2_Large-Template.txt -> OpenAI/Prompts/multi-shot/MS-2_Large-Template.txt renamed: OpenAI/GPT-Prompt-Examples/multi-shot/MS-5_No-Prose_Doc-Reader.txt -> OpenAI/Prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt renamed: OpenAI/GPT-Prompt-Examples/OUT-prompt-cheatsheet.md -> OpenAI/Prompts/prompt-cheatsheet.md renamed: OpenAI/GPT-Prompt-Examples/system-role/SR-1_List-o-Prompts.md -> OpenAI/Prompts/system-role/SR-1_List-o-Prompts.md renamed: OpenAI/GPT-Prompt-Examples/system-role/SR-2_package-migration.md -> OpenAI/Prompts/system-role/SR-2_package-migration.md renamed: OpenAI/GPT-Prompt-Examples/system-role/SR-3_thorough-programmer.md -> OpenAI/Prompts/system-role/SR-3_thorough-programmer.md renamed: OpenAI/GPT-Prompt-Examples/system-role/SR-4_online-searches.md -> OpenAI/Prompts/system-role/SR-4_online-searches.md renamed: OpenAI/GPT-Prompt-Examples/user-role/UR-1.MD -> OpenAI/Prompts/user-role/UR-1.MD renamed: OpenAI/GPT-Prompt-Examples/user-role/UR-2.md -> OpenAI/Prompts/user-role/UR-2.md modified: README.md --- .../MS-6_Daethyra_Custom-Instruction_GPT4.md | 0 OpenAI/{GPT-Prompt-Examples => Prompts}/multi-shot/MS-1.MD | 0 .../multi-shot/MS-2_Large-Template.txt | 0 .../multi-shot/MS-5_No-Prose_Doc-Reader.txt | 0 .../OUT-prompt-cheatsheet.md => Prompts/prompt-cheatsheet.md} | 0 .../system-role/SR-1_List-o-Prompts.md | 0 .../system-role/SR-2_package-migration.md | 0 .../system-role/SR-3_thorough-programmer.md | 0 .../system-role/SR-4_online-searches.md | 0 OpenAI/{GPT-Prompt-Examples => Prompts}/user-role/UR-1.MD | 0 OpenAI/{GPT-Prompt-Examples => Prompts}/user-role/UR-2.md | 0 README.md | 4 ++-- 12 files changed, 2 insertions(+), 2 deletions(-) rename OpenAI/{GPT-Prompt-Examples => Prompts}/MS-6_Daethyra_Custom-Instruction_GPT4.md (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/multi-shot/MS-1.MD (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/multi-shot/MS-2_Large-Template.txt (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/multi-shot/MS-5_No-Prose_Doc-Reader.txt (100%) rename OpenAI/{GPT-Prompt-Examples/OUT-prompt-cheatsheet.md => Prompts/prompt-cheatsheet.md} (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/system-role/SR-1_List-o-Prompts.md (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/system-role/SR-2_package-migration.md (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/system-role/SR-3_thorough-programmer.md (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/system-role/SR-4_online-searches.md (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/user-role/UR-1.MD (100%) rename OpenAI/{GPT-Prompt-Examples => Prompts}/user-role/UR-2.md (100%) diff --git a/OpenAI/GPT-Prompt-Examples/MS-6_Daethyra_Custom-Instruction_GPT4.md b/OpenAI/Prompts/MS-6_Daethyra_Custom-Instruction_GPT4.md similarity index 100% rename from OpenAI/GPT-Prompt-Examples/MS-6_Daethyra_Custom-Instruction_GPT4.md rename to OpenAI/Prompts/MS-6_Daethyra_Custom-Instruction_GPT4.md diff --git a/OpenAI/GPT-Prompt-Examples/multi-shot/MS-1.MD b/OpenAI/Prompts/multi-shot/MS-1.MD similarity index 100% rename from OpenAI/GPT-Prompt-Examples/multi-shot/MS-1.MD rename to OpenAI/Prompts/multi-shot/MS-1.MD diff --git a/OpenAI/GPT-Prompt-Examples/multi-shot/MS-2_Large-Template.txt b/OpenAI/Prompts/multi-shot/MS-2_Large-Template.txt similarity index 100% rename from OpenAI/GPT-Prompt-Examples/multi-shot/MS-2_Large-Template.txt rename to OpenAI/Prompts/multi-shot/MS-2_Large-Template.txt diff --git a/OpenAI/GPT-Prompt-Examples/multi-shot/MS-5_No-Prose_Doc-Reader.txt b/OpenAI/Prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt similarity index 100% rename from OpenAI/GPT-Prompt-Examples/multi-shot/MS-5_No-Prose_Doc-Reader.txt rename to OpenAI/Prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt diff --git a/OpenAI/GPT-Prompt-Examples/OUT-prompt-cheatsheet.md b/OpenAI/Prompts/prompt-cheatsheet.md similarity index 100% rename from OpenAI/GPT-Prompt-Examples/OUT-prompt-cheatsheet.md rename to OpenAI/Prompts/prompt-cheatsheet.md diff --git a/OpenAI/GPT-Prompt-Examples/system-role/SR-1_List-o-Prompts.md b/OpenAI/Prompts/system-role/SR-1_List-o-Prompts.md similarity index 100% rename from OpenAI/GPT-Prompt-Examples/system-role/SR-1_List-o-Prompts.md rename to OpenAI/Prompts/system-role/SR-1_List-o-Prompts.md diff --git a/OpenAI/GPT-Prompt-Examples/system-role/SR-2_package-migration.md b/OpenAI/Prompts/system-role/SR-2_package-migration.md similarity index 100% rename from OpenAI/GPT-Prompt-Examples/system-role/SR-2_package-migration.md rename to OpenAI/Prompts/system-role/SR-2_package-migration.md diff --git a/OpenAI/GPT-Prompt-Examples/system-role/SR-3_thorough-programmer.md b/OpenAI/Prompts/system-role/SR-3_thorough-programmer.md similarity index 100% rename from OpenAI/GPT-Prompt-Examples/system-role/SR-3_thorough-programmer.md rename to OpenAI/Prompts/system-role/SR-3_thorough-programmer.md diff --git a/OpenAI/GPT-Prompt-Examples/system-role/SR-4_online-searches.md b/OpenAI/Prompts/system-role/SR-4_online-searches.md similarity index 100% rename from OpenAI/GPT-Prompt-Examples/system-role/SR-4_online-searches.md rename to OpenAI/Prompts/system-role/SR-4_online-searches.md diff --git a/OpenAI/GPT-Prompt-Examples/user-role/UR-1.MD b/OpenAI/Prompts/user-role/UR-1.MD similarity index 100% rename from OpenAI/GPT-Prompt-Examples/user-role/UR-1.MD rename to OpenAI/Prompts/user-role/UR-1.MD diff --git a/OpenAI/GPT-Prompt-Examples/user-role/UR-2.md b/OpenAI/Prompts/user-role/UR-2.md similarity index 100% rename from OpenAI/GPT-Prompt-Examples/user-role/UR-2.md rename to OpenAI/Prompts/user-role/UR-2.md diff --git a/README.md b/README.md index e3d5f24..4aee853 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,13 @@ Provides an automated pipeline for retrieving embeddings from [OpenAIs `text-emb --- -B. **[GPT-Prompt-Examples](./OpenAI/GPT-Prompt-Examples)** +B. **[Prompts](./OpenAI/Prompts/)** There are three main prompt types, [multi-shot](./OpenAI/GPT-Prompt-Examples/multi-shot), [system-role](./OpenAI/GPT-Prompt-Examples/system-role), [user-role](./OpenAI/GPT-Prompt-Examples/user-role). Please also see the [OUT-prompt-cheatsheet](./OpenAI/GPT-Prompt-Examples/OUT-prompt-cheatsheet.md). -- **[Cheatsheet](./OpenAI/GPT-Prompt-Examples/OUT-prompt-cheatsheet.md)**: @Daethyra's go-to prompts. +- **[Cheatsheet](./OpenAI/GPT-Prompt-Examples/prompt-cheatsheet.md)**: @Daethyra's go-to prompts. - **[multi-shot](./OpenAI/GPT-Prompt-Examples/multi-shot)**: Prompts, with prompts inside them. It's kind of like a bundle of Matryoshka prompts! From e787961f671a022f5ae8141196ad3fb07e65f5ff Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 21:08:28 -0700 Subject: [PATCH 12/25] Small README changes --- README.md | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 4aee853..7225070 100644 --- a/README.md +++ b/README.md @@ -20,18 +20,18 @@ Provides an automated pipeline for retrieving embeddings from [OpenAIs `text-emb B. **[Prompts](./OpenAI/Prompts/)** -There are three main prompt types, [multi-shot](./OpenAI/GPT-Prompt-Examples/multi-shot), [system-role](./OpenAI/GPT-Prompt-Examples/system-role), [user-role](./OpenAI/GPT-Prompt-Examples/user-role). +There are three main prompt types, [multi-shot](./OpenAI/Prompts/multi-shot), [system-role](./OpenAI/Prompts/system-role), [user-role](./OpenAI/Prompts/user-role). -Please also see the [OUT-prompt-cheatsheet](./OpenAI/GPT-Prompt-Examples/OUT-prompt-cheatsheet.md). +Please also see the [OUT-prompt-cheatsheet](./OpenAI/Prompts/OUT-prompt-cheatsheet.md). -- **[Cheatsheet](./OpenAI/GPT-Prompt-Examples/prompt-cheatsheet.md)**: @Daethyra's go-to prompts. +- **[Cheatsheet](./OpenAI/Prompts/prompt-cheatsheet.md)**: @Daethyra's go-to prompts. -- **[multi-shot](./OpenAI/GPT-Prompt-Examples/multi-shot)**: Prompts, with prompts inside them. +- **[multi-shot](./OpenAI/Prompts/multi-shot)**: Prompts, with prompts inside them. It's kind of like a bundle of Matryoshka prompts! -- **[system-role](./OpenAI/GPT-Prompt-Examples/system-role)**: Steer your LLM by shifting the ground it stands on. +- **[system-role](./OpenAI/Prompts/system-role)**: Steer your LLM by shifting the ground it stands on. -- **[user-role](./OpenAI/GPT-Prompt-Examples/user-role)**: Markdown files for user-role prompts. +- **[user-role](./OpenAI/Prompts/user-role)**: Markdown files for user-role prompts. --- @@ -48,9 +48,7 @@ This module offers a set of functionalities for conversational agents in LangCha - Text splitting using `RecursiveCharacterTextSplitter` - Various embeddings options like `OpenAIEmbeddings`, `CacheBackedEmbeddings`, and `HuggingFaceEmbeddings` -**Potential Use Cases:** - -${MASK} +**Potential Use Cases:** For developing conversational agents with advanced features. --- @@ -64,9 +62,7 @@ This module focuses on querying local documents and employs the following featur - Vector storage options like `Chroma` - Embedding options via `OpenAIEmbeddings` -**Potential Use Cases:** - -${MASK} +**Potential Use Cases:** For querying large sets of documents efficiently. --- @@ -86,8 +82,7 @@ This module focuses on generating captions for images using Hugging Face's trans - Caption caching for improved efficiency - Device selection (CPU or GPU) based on availability -**Potential Use Cases:** -${MASK} +**Potential Use Cases:** For generating accurate and context-appropriate image captions. --- From dbb90c3d9733d714cdf8198a7a29be439f5787bc Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 11 Oct 2023 21:10:37 -0700 Subject: [PATCH 13/25] Added HuggingFace section to 'todo.md' --- todo.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/todo.md b/todo.md index 284aef2..70adb43 100644 --- a/todo.md +++ b/todo.md @@ -58,3 +58,8 @@ - ~~[Task]:Update test.py and run~~ --- + +[HuggingFace] + +- Test: `integrable_image_captioner.py` + - Deposit AI art images for batch tests \ No newline at end of file From 7ba795e9f7be484d89619551e1e1ebd9faa5c610 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Thu, 12 Oct 2023 15:39:34 -0700 Subject: [PATCH 14/25] Attempted fine-tuning of a sequence classification model using HuggingFace's Accelerate library. new file: HuggingFace/Accelerate/.env.template new file: HuggingFace/Accelerate/README.md new file: HuggingFace/Accelerate/fine_tune_sequence_classification_model.py --- HuggingFace/Accelerate/.env.template | 26 ++ HuggingFace/Accelerate/README.md | 49 ++++ ...fine_tune_sequence_classification_model.py | 246 ++++++++++++++++++ 3 files changed, 321 insertions(+) create mode 100644 HuggingFace/Accelerate/.env.template create mode 100644 HuggingFace/Accelerate/README.md create mode 100644 HuggingFace/Accelerate/fine_tune_sequence_classification_model.py diff --git a/HuggingFace/Accelerate/.env.template b/HuggingFace/Accelerate/.env.template new file mode 100644 index 0000000..e13396e --- /dev/null +++ b/HuggingFace/Accelerate/.env.template @@ -0,0 +1,26 @@ +# Checkpoint to use for the model +CHECKPOINT=distilbert-base-uncased + +# Number of epochs to train the model +NUM_EPOCHS=3 + +# Learning rate for the optimizer +LR=3e-5 + +# Path to the data directory +DATA_PATH=data_path + +# Tokenizer to use for the model +TOKENIZER=distilbert-base-uncased + +# Train, validation, and test split ratios +TRAIN_RATIO=0.8 +EVAL_RATIO=0.1 +VAL_RATIO=0.05 +TEST_RATIO=0.05 + +# Seed for reproducibility +SEED=42 + +# Batch size for training and evaluation +BATCH_SIZE=16 \ No newline at end of file diff --git a/HuggingFace/Accelerate/README.md b/HuggingFace/Accelerate/README.md new file mode 100644 index 0000000..ae8239c --- /dev/null +++ b/HuggingFace/Accelerate/README.md @@ -0,0 +1,49 @@ +# Getting Started with Sequence Classification + +Welcome to the Sequence Classification example! This guide will help you get started with training a sequence classification model using the Hugging Face Transformers library. + +## Installation + +To install the required packages, you can use pip: + +`pip install torch transformers accelerate tqdm python-dotenv` + +## Usage + +To use the Sequence Classification example, you can run the `sequence_classification.py` script: + +`python sequence_classification.py` + +This will train a sequence classification model on a dataset and evaluate its performance on the validation and test sets. + +## Configuration + +The behavior of the Sequence Classification example can be configured using environment variables. Here are the available environment variables and their default values: + +- `CHECKPOINT`: The path or identifier of the pre-trained checkpoint to use. Default is `distilbert-base-uncased`. +- `NUM_EPOCHS`: The number of epochs to train for. Default is `3`. +- `LR`: The learning rate to use for the optimizer. Default is `3e-5`. +- `DATA_PATH`: The path to the dataset. This is a required environment variable. +- `TOKENIZER`: The path or identifier of the tokenizer to use. Default is `distilbert-base-uncased`. +- `TRAIN_RATIO`: The ratio of examples to use for training. Default is `0.8`. +- `EVAL_RATIO`: The ratio of examples to use for evaluation. Default is `0.1`. +- `VAL_RATIO`: The ratio of examples to use for validation. Default is `0.05`. +- `TEST_RATIO`: The ratio of examples to use for testing. Default is `0.05`. +- `SEED`: The random seed to use for shuffling the dataset. Default is `42`. +- `BATCH_SIZE`: The batch size to use for training, evaluation, and validation. Default is `16`. + +You can set these environment variables using a `.env` file in the same directory as the `sequence_classification.py` script. Here's an example `.env` file: + +```DATA_PATH=data.csv TRAIN_RATIO=0.7 EVAL_RATIO=0.15 VAL_RATIO=0.05 TEST_RATIO=0.1``` + +--- + +# GPT Description + +This Python script defines a Trainer class that can be used to fine-tune a pre-trained sequence classification model using the Hugging Face Transformers library. The Trainer class provides methods for preparing the dataset, training the model, and evaluating the model's performance. The split_dataset function is also defined in the script, which can be used to split a dataset into training, evaluation, validation, and test subsets. + +The script includes an example usage section that demonstrates how to use the Trainer class and split_dataset function with a custom dataset. The example usage section shows how to load a pre-trained model, prepare the dataset, fine-tune the model, and evaluate the model's performance. The example usage section also shows how to save the fine-tuned model to disk for later use. + +Finally, the script includes a unit test class TestFineTuneSequenceClassificationModel that tests the split_dataset, prepare, train, and evaluate methods of the Trainer class. The unit test class provides a set of test cases that can be used to verify the correctness of the Trainer class implementation. The unit test class can be run using a testing framework such as unittest to ensure that the Trainer class is working as expected. + +To improve the readability of the code, it may be helpful to add comments to explain the purpose of each method and variable. Additionally, it may be helpful to break up the Trainer class into smaller, more focused classes or functions to improve the modularity of the code. Finally, it may be helpful to add more error handling and input validation to the code to make it more robust and prevent unexpected errors. \ No newline at end of file diff --git a/HuggingFace/Accelerate/fine_tune_sequence_classification_model.py b/HuggingFace/Accelerate/fine_tune_sequence_classification_model.py new file mode 100644 index 0000000..75d2422 --- /dev/null +++ b/HuggingFace/Accelerate/fine_tune_sequence_classification_model.py @@ -0,0 +1,246 @@ +import os +import random +import torch +from accelerate import Accelerator +from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler, AutoTokenizer +from torch.utils.data import DataLoader, Subset +from tqdm import tqdm +from dotenv import load_dotenv +import unittest + +load_dotenv() + +class Trainer: + """ + A class for training a sequence classification model using the Hugging Face Transformers library. + + Args: + checkpoint (str): The path or identifier of the pre-trained checkpoint to use. + train_dataloader (DataLoader): The data loader for the training set. + eval_dataloader (DataLoader): The data loader for the evaluation set. + val_dataloader (DataLoader): The data loader for the validation set. + test_dataloader (DataLoader): The data loader for the test set. + num_epochs (int, optional): The number of epochs to train for. Defaults to 3. + lr (float, optional): The learning rate to use for the optimizer. Defaults to 3e-5. + """ + def __init__(self, checkpoint=None, train_dataloader=None, eval_dataloader=None, val_dataloader=None, test_dataloader=None, num_epochs=None, lr=None): + """ + Initializes a new instance of the Trainer class. + + Args: + checkpoint (str): The path or identifier of the pre-trained checkpoint to use. + train_dataloader (DataLoader): The data loader for the training set. + eval_dataloader (DataLoader): The data loader for the evaluation set. + val_dataloader (DataLoader): The data loader for the validation set. + test_dataloader (DataLoader): The data loader for the test set. + num_epochs (int, optional): The number of epochs to train for. Defaults to 3. + lr (float, optional): The learning rate to use for the optimizer. Defaults to 3e-5. + """ + self.checkpoint = checkpoint or os.getenv("CHECKPOINT", "distilbert-base-uncased") + self.train_dataloader = train_dataloader + self.eval_dataloader = eval_dataloader + self.val_dataloader = val_dataloader + self.test_dataloader = test_dataloader + self.num_epochs = num_epochs or int(os.getenv("NUM_EPOCHS", 3)) + self.lr = lr or float(os.getenv("LR", 3e-5)) + self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + self.accelerator = Accelerator() + self.model = None + self.optimizer = None + self.lr_scheduler = None + self.progress_bar = None + + def prepare(self): + """ + Initializes the model, optimizer, and learning rate scheduler. + """ + if self.train_dataloader is None or self.eval_dataloader is None or self.val_dataloader is None or self.test_dataloader is None: + raise ValueError("Data loaders not defined. Cannot prepare trainer.") + self.model = AutoModelForSequenceClassification.from_pretrained(self.checkpoint, num_labels=2) + self.optimizer = AdamW(self.model.parameters(), lr=self.lr) + self.model.to(self.device) + self.train_dataloader, self.eval_dataloader, self.val_dataloader, self.test_dataloader, self.model, self.optimizer = self.accelerator.prepare( + self.train_dataloader, self.eval_dataloader, self.val_dataloader, self.test_dataloader, self.model, self.optimizer + ) + num_training_steps = self.num_epochs * len(self.train_dataloader) + self.lr_scheduler = get_scheduler( + "linear", + optimizer=self.optimizer, + num_warmup_steps=0, + num_training_steps=num_training_steps + ) + self.progress_bar = tqdm(range(num_training_steps)) + + def train(self): + """ + Trains the model for the specified number of epochs. + + Raises: + ValueError: If the model, optimizer, learning rate scheduler, or progress bar is not initialized. + """ + if self.model is None or self.optimizer is None or self.lr_scheduler is None or self.progress_bar is None: + raise ValueError("Trainer not prepared. Call prepare() method first.") + self.model.train() + for epoch in range(self.num_epochs): + for batch in self.train_dataloader: + batch = {k: v.to(self.device) for k, v in batch.items()} + outputs = self.model(**batch) + loss = outputs.loss + loss.backward() + self.accelerator.backward(loss) + + self.optimizer.step() + self.lr_scheduler.step() + self.optimizer.zero_grad() + self.progress_bar.update(1) + +def split_dataset(dataset, train_ratio=0.8, eval_ratio=0.1, val_ratio=0.05, test_ratio=0.05, seed=42): + """ + Splits a dataset into training, evaluation, validation, and test subsets. + + Args: + dataset (Dataset): The dataset to split. + train_ratio (float, optional): The ratio of examples to use for training. Defaults to 0.8. + eval_ratio (float, optional): The ratio of examples to use for evaluation. Defaults to 0.1. + val_ratio (float, optional): The ratio of examples to use for validation. Defaults to 0.05. + test_ratio (float, optional): The ratio of examples to use for testing. Defaults to 0.05. + seed (int, optional): The random seed to use for shuffling the dataset. Defaults to 42. + + Returns: + Tuple[Subset]: A tuple of four subsets for training, evaluation, validation, and test. + """ + num_examples = len(dataset) + indices = list(range(num_examples)) + random.seed(seed) + random.shuffle(indices) + train_size = int(train_ratio * num_examples) + eval_size = int(eval_ratio * num_examples) + val_size = int(val_ratio * num_examples) + test_size = int(test_ratio * num_examples) + train_indices = indices[:train_size] + eval_indices = indices[train_size:train_size+eval_size] + val_indices = indices[train_size+eval_size:train_size+eval_size+val_size] + test_indices = indices[train_size+eval_size+val_size:train_size+eval_size+val_size+test_size] + train_subset = Subset(dataset, train_indices) + eval_subset = Subset(dataset, eval_indices) + val_subset = Subset(dataset, val_indices) + test_subset = Subset(dataset, test_indices) + return train_subset, eval_subset, val_subset, test_subset + +# Example usage +if __name__ == "__main__": + from my_dataset import MyDataset + + # Load dataset + data_path = os.getenv("DATA_PATH") + tokenizer = AutoTokenizer.from_pretrained(os.getenv("TOKENIZER", "distilbert-base-uncased")) + dataset = MyDataset(data_path, tokenizer) + + # Split dataset + train_ratio = float(os.getenv("TRAIN_RATIO", 0.8)) + eval_ratio = float(os.getenv("EVAL_RATIO", 0.1)) + val_ratio = float(os.getenv("VAL_RATIO", 0.05)) + test_ratio = float(os.getenv("TEST_RATIO", 0.05)) + seed = int(os.getenv("SEED", 42)) + train_subset, eval_subset, val_subset, test_subset = split_dataset(dataset, train_ratio, eval_ratio, val_ratio, test_ratio, seed) + + # Create data loaders + batch_size = int(os.getenv("BATCH_SIZE", 16)) + train_dataloader = DataLoader(train_subset, batch_size=batch_size, shuffle=True) + eval_dataloader = DataLoader(eval_subset, batch_size=batch_size, shuffle=False) + val_dataloader = DataLoader(val_subset, batch_size=batch_size, shuffle=False) + test_dataloader = DataLoader(test_subset, batch_size=batch_size, shuffle=False) + + # Create trainer + trainer = Trainer(train_dataloader=train_dataloader, eval_dataloader=eval_dataloader, val_dataloader=val_dataloader, test_dataloader=test_dataloader) + + # Prepare trainer + trainer.prepare() + + # Train model + trainer.train() + + # Evaluate model on validation set + trainer.model.eval() + with torch.no_grad(): + total_correct = 0 + total_samples = 0 + for batch in val_dataloader: + batch = {k: v.to(trainer.device) for k, v in batch.items()} + outputs = trainer.model(**batch) + logits = outputs.logits + predictions = torch.argmax(logits, dim=1) + labels = batch["labels"] + total_correct += (predictions == labels).sum().item() + total_samples += len(labels) + accuracy = total_correct / total_samples + print(f"Validation accuracy: {accuracy:.4f}") + + # Evaluate model on test set + trainer.model.eval() + with torch.no_grad(): + total_correct = 0 + total_samples = 0 + for batch in test_dataloader: + batch = {k: v.to(trainer.device) for k, v in batch.items()} + outputs = trainer.model(**batch) + logits = outputs.logits + predictions = torch.argmax(logits, dim=1) + labels = batch["labels"] + total_correct += (predictions == labels).sum().item() + total_samples += len(labels) + accuracy = total_correct / total_samples + print(f"Test accuracy: {accuracy:.4f}") + +class TestFineTuneSequenceClassificationModel(unittest.TestCase): + def setUp(self): + self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") + self.dataset = MyDataset("data_path", self.tokenizer) + self.train_subset, self.eval_subset, self.val_subset, self.test_subset = split_dataset(self.dataset, train_ratio=0.8, eval_ratio=0.1, val_ratio=0.05, test_ratio=0.05, seed=42) + self.batch_size = 16 + self.train_dataloader = DataLoader(self.train_subset, batch_size=self.batch_size, shuffle=True) + self.eval_dataloader = DataLoader(self.eval_subset, batch_size=self.batch_size, shuffle=False) + self.val_dataloader = DataLoader(self.val_subset, batch_size=self.batch_size, shuffle=False) + self.test_dataloader = DataLoader(self.test_subset, batch_size=self.batch_size, shuffle=False) + self.trainer = Trainer(train_dataloader=self.train_dataloader, eval_dataloader=self.eval_dataloader, val_dataloader=self.val_dataloader, test_dataloader=self.test_dataloader) + + def test_split_dataset(self): + train_subset, eval_subset, val_subset, test_subset = split_dataset(self.dataset, train_ratio=0.8, eval_ratio=0.1, val_ratio=0.05, test_ratio=0.05, seed=42) + self.assertEqual(len(train_subset), 80) + self.assertEqual(len(eval_subset), 10) + self.assertEqual(len(val_subset), 5) + self.assertEqual(len(test_subset), 5) + + def test_prepare(self): + self.trainer.prepare() + self.assertIsNotNone(self.trainer.model) + self.assertIsNotNone(self.trainer.optimizer) + self.assertIsNotNone(self.trainer.lr_scheduler) + self.assertIsNotNone(self.trainer.progress_bar) + + def test_train(self): + self.trainer.prepare() + self.trainer.train() + self.assertIsNotNone(self.trainer.model) + + def test_evaluate(self): + self.trainer.prepare() + self.trainer.train() + self.trainer.model.eval() + with torch.no_grad(): + total_correct = 0 + total_samples = 0 + for batch in self.val_dataloader: + batch = {k: v.to(self.trainer.device) for k, v in batch.items()} + outputs = self.trainer.model(**batch) + logits = outputs.logits + predictions = torch.argmax(logits, dim=1) + labels = batch["labels"] + total_correct += (predictions == labels).sum().item() + total_samples += len(labels) + accuracy = total_correct / total_samples + self.assertGreaterEqual(accuracy, 0.0) + self.assertLessEqual(accuracy, 1.0) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From 826d1188d7536e43162a6826ceb1e362ea24a87b Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Thu, 12 Oct 2023 15:57:29 -0700 Subject: [PATCH 15/25] Update README.md A bunch of updates still required for conference with the repo's directory structure --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 7225070..e7a5252 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,6 @@ A. **[Auto-Embedder](./OpenAI/Auto-Embedder)** Provides an automated pipeline for retrieving embeddings from [OpenAIs `text-embedding-ada-002`](https://platform.openai.com/docs/guides/embeddings) and upserting them to a [Pinecone index](https://docs.pinecone.io/docs/indexes). - **[`pinembed.py`](./OpenAI/Auto-Embedder/pinembed.py)**: A Python module to easily automate the retrieval of embeddings from OpenAI and storage in Pinecone. - - **[.env.template](./OpenAI/Auto-Embedder/.env.template)**: Template for environment variables. --- @@ -22,7 +21,7 @@ B. **[Prompts](./OpenAI/Prompts/)** There are three main prompt types, [multi-shot](./OpenAI/Prompts/multi-shot), [system-role](./OpenAI/Prompts/system-role), [user-role](./OpenAI/Prompts/user-role). -Please also see the [OUT-prompt-cheatsheet](./OpenAI/Prompts/OUT-prompt-cheatsheet.md). +Please also see the [prompt-cheatsheet](./OpenAI/Prompts/prompt-cheatsheet.md). - **[Cheatsheet](./OpenAI/Prompts/prompt-cheatsheet.md)**: @Daethyra's go-to prompts. From 5386e79f3b8b904bbda06031138c56daf3736fe5 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Thu, 12 Oct 2023 16:23:50 -0700 Subject: [PATCH 16/25] Removed stateful_chatbot, replaced w/ chroma_memory.py new file: LangChain/Chatbots/chroma_memory.py new file: LangChain/Chatbots/how-to_chroma-memory.md deleted: LangChain/Chatbots/stateful_chatbot.py modified: README.md --- LangChain/Chatbots/chroma_memory.py | 45 ++++ LangChain/Chatbots/how-to_chroma-memory.md | 38 ++++ LangChain/Chatbots/stateful_chatbot.py | 230 --------------------- README.md | 4 +- 4 files changed, 85 insertions(+), 232 deletions(-) create mode 100644 LangChain/Chatbots/chroma_memory.py create mode 100644 LangChain/Chatbots/how-to_chroma-memory.md delete mode 100644 LangChain/Chatbots/stateful_chatbot.py diff --git a/LangChain/Chatbots/chroma_memory.py b/LangChain/Chatbots/chroma_memory.py new file mode 100644 index 0000000..f459c24 --- /dev/null +++ b/LangChain/Chatbots/chroma_memory.py @@ -0,0 +1,45 @@ +import logging +from typing import List, Any, Dict +from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings, HuggingFaceEmbeddings +from langchain.filters import EmbeddingsRedundantFilter +from langchain.chat_models import ChatOpenAI +from langchain.chains.conversation.memory import ConversationBufferWindowMemory +from langchain.chains import RetrievalQA +import chromadb +from langchain.vectorstores import Chroma + +logging.basicConfig(level=logging.ERROR) + +class ChromaMemory: + def __init__(self, model_name: str, cache_dir: str, max_history_len: int, vectorstore: Chroma): + """ + Initialize the ChromaMemory with a model name, cache directory, maximum history length, and a vectorstore. + Args: + model_name (str): The name of the LLM model to use. + cache_dir (str): The path to the directory to cache embeddings. + vectorstore (Chroma): The vectorstore to use for similarity matching. + chroma_memory = ChromaMemory(model_name, cache_dir, max_history_len, vectorstore) + max_history_len (int): The maximum length of the conversation history to remember. + + """ + try: + self.embeddings = CacheBackedEmbeddings( + OpenAIEmbeddings(model_name), + cache_dir + ) + self.filter = EmbeddingsRedundantFilter() + self.chat_model = ChatOpenAI( + self.embeddings, + self.filter + ) + self.memory = ConversationBufferWindowMemory( + max_history_len, + self.chat_model + ) + self.retrieval = RetrievalQA( + self.memory, + vectorstore + ) + except Exception as e: + logging.error(f"Error initializing ChromaMemory: {e}") + raise ValueError(f"Error initializing ChromaMemory: {e}") from e \ No newline at end of file diff --git a/LangChain/Chatbots/how-to_chroma-memory.md b/LangChain/Chatbots/how-to_chroma-memory.md new file mode 100644 index 0000000..a866ee1 --- /dev/null +++ b/LangChain/Chatbots/how-to_chroma-memory.md @@ -0,0 +1,38 @@ +# This is a basic guide on how to use the ChromaMemory component to store chat history and retrieve answers to questions from the conversation history. + +### 1. Import the ChromaMemory class from the chroma_memory module: + +`from chroma_memory import ChromaMemory` + +### 2. Create an instance of the ChromaMemory class, passing in the required parameters: + +``` +model_name = "text-embedding-ada-002" +cache_dir = "/opt/llm/vectorstore/chroma" +vectorstore = Chroma("/opt/llm/vectorstore/chroma") +chroma_memory = ChromaMemory(model_name, cache_dir, max_history_len, vectorstore) +max_history_len = 100 +``` + +The model_name parameter specifies the name of the LLM model to use, the cache_dir parameter specifies the path to the directory to cache embeddings, the max_history_len parameter specifies the maximum length of the conversation history to remember, and the vectorstore parameter specifies the vectorstore to use for similarity matching. + +### 3. To store a new chat message in the conversation history, call the add_message method of the ConversationBufferWindowMemory object: + +``` +message = "Hello, how are you?" +chroma_memory.memory.add_message(message) +``` + +### 4. This will add the message to the conversation history. + +To retrieve an answer to a question from the conversation history, call the retrieve method of the RetrievalQA object: + +``` +question = "What's your favorite color?" +answer = chroma_memory.retrieval.retrieve(question) +print(answer) +``` + +This will retrieve the answer to the most similar question in the conversation history to the input question. + +That's it! For more information, please see the official LangChain documentation. \ No newline at end of file diff --git a/LangChain/Chatbots/stateful_chatbot.py b/LangChain/Chatbots/stateful_chatbot.py deleted file mode 100644 index 8852499..0000000 --- a/LangChain/Chatbots/stateful_chatbot.py +++ /dev/null @@ -1,230 +0,0 @@ -import logging -from typing import List, Any, Dict -from langchain.document_loaders import PyPDFDirectoryLoader -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings, HuggingFaceEmbeddings -from langchain.filters import EmbeddingsRedundantFilter -from langchain.chat_models import ChatOpenAI -from langchain.chains.conversation.memory import ConversationBufferWindowMemory -from langchain.chains import RetrievalQA -import chromadb -from langchain.vectorstores import Chroma - -logging.basicConfig(level=logging.ERROR) - -# PDF Document Management -class PDFDocumentManager: - def __init__(self, directory: str): - """ - Initialize the PDFDocumentManager with a directory path. - Args: - directory (str): The path to the directory containing PDF files. - """ - try: - self.loader = PyPDFDirectoryLoader(directory) - except Exception as e: - logging.error(f"Error initializing PyPDFDirectoryLoader: {e}") - raise ValueError(f"Error initializing PyPDFDirectoryLoader: {e}") from e - - def load_documents(self) -> List[Any]: - """ - Load PDF documents from the specified directory. - Returns: - List[Any]: A list of loaded PDF documents. - """ - try: - return self.loader.load() - except Exception as e: - logging.error(f"Error loading documents: {e}") - raise ValueError(f"Error loading documents: {e}") from e - -# Text Splitting -class TextSplitManager: - def __init__(self, chunk_size: int, chunk_overlap: int, length_function=len, add_start_index=True): - """ - Initialize TextSplitManager with configuration for text splitting. - Args: - chunk_size (int): The maximum size for each chunk. - chunk_overlap (int): The overlap between adjacent chunks. - length_function (callable, optional): Function to compute the length of a chunk. Defaults to len. - add_start_index (bool, optional): Whether to include the start index of each chunk. Defaults to True. - """ - self.text_splitter = RecursiveCharacterTextSplitter( - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - length_function=length_function, - add_start_index=add_start_index - ) - - def create_documents(self, docs: List[Any]) -> List[Any]: - """ - Create document chunks based on the configuration. - Args: - docs (List[Any]): List of documents to be chunked. - Returns: - List[Any]: List of document chunks. - """ - try: - return self.text_splitter.create_documents(docs) - except Exception as e: - logging.error(f"Error in text splitting: {e}") - raise ValueError(f"Error in text splitting: {e}") from e - -# Embeddings and Filtering -class EmbeddingManager: - def __init__(self): - """ - Initialize EmbeddingManager for handling document embeddings. - """ - self.embedder = CacheBackedEmbeddings(OpenAIEmbeddings()) - - def embed_documents(self, docs: List[Any]) -> List[Any]: - """ - Embed the documents using the configured embedder. - Args: - docs (List[Any]): List of documents to be embedded. - Returns: - List[Any]: List of embedded documents. - """ - try: - return self.embedder.embed_documents(docs) - except Exception as e: - logging.error(f"Error in embedding documents: {e}") - raise ValueError(f"Error in embedding documents: {e}") from e - - def filter_redundant(self, embeddings: List[Any]) -> List[Any]: - """ - Filter redundant embeddings from the list. - Args: - embeddings (List[Any]): List of embeddings. - Returns: - List[Any]: List of non-redundant embeddings. - """ - try: - filter_instance = EmbeddingsRedundantFilter(embeddings) - return filter_instance() - except Exception as e: - logging.error(f"Error in filtering redundant embeddings: {e}") - raise ValueError(f"Error in filtering redundant embeddings: {e}") from e - -# Document Retrieval and Reordering -class DocumentRetriever: - def __init__(self, model_name: str, texts: List[str], search_kwargs: Dict[str, Any]): - """ - Initialize DocumentRetriever for document retrieval and reordering. - Args: - model_name (str): Name of the embedding model to use. - texts (List[str]): Texts for retriever training. - search_kwargs (Dict[str, Any]): Additional search parameters. - """ - self.embeddings = HuggingFaceEmbeddings(model_name=model_name) - self.retriever = Chroma.from_texts(texts, embedding=self.embeddings).as_retriever( - search_kwargs=search_kwargs - ) - - def get_relevant_documents(self, query: str) -> List[Any]: - """ - Retrieve relevant documents based on the query. - Args: - query (str): The query string. - Returns: - List[Any]: List of relevant documents. - """ - try: - return self.retriever.get_relevant_documents(query) - except Exception as e: - logging.error(f"Error retrieving relevant documents: {e}") - raise ValueError(f"Error retrieving relevant documents: {e}") from e - -# Chat and QA functionalities -class ChatQA: - def __init__(self, api_key: str, model_name: str, directory: str, chunk_size: int, chunk_overlap: int, search_k: int): - """ - Initialize ChatQA for chat and QA functionalities. - Args: - api_key (str): API key for OpenAI. - model_name (str): Name of the model for embeddings. - directory (str): The path to the directory containing PDF files. - chunk_size (int): The maximum size for each chunk. - chunk_overlap (int): The overlap between adjacent chunks. - search_k (int): Number of documents to retrieve. - """ - self.pdf_manager = PDFDocumentManager(directory) - self.text_split_manager = TextSplitManager(chunk_size, chunk_overlap) - self.embedding_manager = EmbeddingManager() - self.llm = ChatOpenAI( - openai_api_key=api_key, - model_name='gpt-3.5-turbo', - temperature=0.0 - ) - self.conversational_memory = ConversationBufferWindowMemory( - memory_key='chat_history', - k=5, - return_messages=True - ) - self.retriever = DocumentRetriever(model_name, [], {"k": search_k}) - self.qa = RetrievalQA.from_chain_type( - llm=self.llm, - chain_type="stuff", - retriever=self.retriever.retriever - ) - - def load_documents(self) -> List[Any]: - """ - Load PDF documents from the specified directory, split them into chunks, and embed them. - Returns: - List[Any]: List of embedded document chunks. - """ - try: - docs = self.pdf_manager.load_documents() - chunks = self.text_split_manager.create_documents(docs) - embeddings = self.embedding_manager.embed_documents(chunks) - return self.embedding_manager.filter_redundant(embeddings) - except Exception as e: - logging.error(f"Error loading and embedding documents: {e}") - raise ValueError(f"Error loading and embedding documents: {e}") from e - - def update_retriever(self, texts: List[str]): - """ - Update the retriever with new texts. - Args: - texts (List[str]): List of texts to update the retriever. - """ - try: - self.retriever = DocumentRetriever(self.retriever.embeddings.model_name, texts, self.retriever.search_kwargs) - self.qa = RetrievalQA.from_chain_type( - llm=self.llm, - chain_type="stuff", - retriever=self.retriever.retriever - ) - except Exception as e: - logging.error(f"Error updating retriever: {e}") - raise ValueError(f"Error updating retriever: {e}") from e - - def get_relevant_documents(self, query: str) -> List[Any]: - """ - Retrieve relevant documents based on the query. - Args: - query (str): The query string. - Returns: - List[Any]: List of relevant documents. - """ - try: - return self.retriever.get_relevant_documents(query) - except Exception as e: - logging.error(f"Error retrieving relevant documents: {e}") - raise ValueError(f"Error retrieving relevant documents: {e}") from e - - def ask_question(self, query: str) -> str: - """ - Ask a question based on the query. - Args: - query (str): The query string. - Returns: - str: The answer to the question. - """ - try: - return self.qa.ask_question(query) - except Exception as e: - logging.error(f"Error asking question: {e}") - raise ValueError(f"Error asking question: {e}") from e \ No newline at end of file diff --git a/README.md b/README.md index e7a5252..556a82d 100644 --- a/README.md +++ b/README.md @@ -38,12 +38,12 @@ It's kind of like a bundle of Matryoshka prompts! --- -A. **[`stateful_chatbot.py`](./LangChain/Retrieval-Agents/stateful_chatbot.py)** +A. **[`stateful_chatbot.py`](./LangChain/Retrieval-Augmented-Generation/qa_local_docs.py)** This module offers a set of functionalities for conversational agents in LangChain. Specifically, it provides: - Argument parsing for configuring the agent -- Document loading via `PyPDFDirectoryLoader` +- Document loading via `PDFProcessor` - Text splitting using `RecursiveCharacterTextSplitter` - Various embeddings options like `OpenAIEmbeddings`, `CacheBackedEmbeddings`, and `HuggingFaceEmbeddings` From 27840d1cdd22065c8cc76358c17d1c57e5d1e4b3 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 18 Oct 2023 19:27:44 -0700 Subject: [PATCH 17/25] Moved Prompts dir to root --- .../Prompts => Prompts}/MS-6_Daethyra_Custom-Instruction_GPT4.md | 0 {OpenAI/Prompts => Prompts}/multi-shot/MS-1.MD | 0 {OpenAI/Prompts => Prompts}/multi-shot/MS-2_Large-Template.txt | 0 .../Prompts => Prompts}/multi-shot/MS-5_No-Prose_Doc-Reader.txt | 0 {OpenAI/Prompts => Prompts}/prompt-cheatsheet.md | 0 {OpenAI/Prompts => Prompts}/system-role/SR-1_List-o-Prompts.md | 0 {OpenAI/Prompts => Prompts}/system-role/SR-2_package-migration.md | 0 .../Prompts => Prompts}/system-role/SR-3_thorough-programmer.md | 0 {OpenAI/Prompts => Prompts}/system-role/SR-4_online-searches.md | 0 {OpenAI/Prompts => Prompts}/user-role/UR-1.MD | 0 {OpenAI/Prompts => Prompts}/user-role/UR-2.md | 0 11 files changed, 0 insertions(+), 0 deletions(-) rename {OpenAI/Prompts => Prompts}/MS-6_Daethyra_Custom-Instruction_GPT4.md (100%) rename {OpenAI/Prompts => Prompts}/multi-shot/MS-1.MD (100%) rename {OpenAI/Prompts => Prompts}/multi-shot/MS-2_Large-Template.txt (100%) rename {OpenAI/Prompts => Prompts}/multi-shot/MS-5_No-Prose_Doc-Reader.txt (100%) rename {OpenAI/Prompts => Prompts}/prompt-cheatsheet.md (100%) rename {OpenAI/Prompts => Prompts}/system-role/SR-1_List-o-Prompts.md (100%) rename {OpenAI/Prompts => Prompts}/system-role/SR-2_package-migration.md (100%) rename {OpenAI/Prompts => Prompts}/system-role/SR-3_thorough-programmer.md (100%) rename {OpenAI/Prompts => Prompts}/system-role/SR-4_online-searches.md (100%) rename {OpenAI/Prompts => Prompts}/user-role/UR-1.MD (100%) rename {OpenAI/Prompts => Prompts}/user-role/UR-2.md (100%) diff --git a/OpenAI/Prompts/MS-6_Daethyra_Custom-Instruction_GPT4.md b/Prompts/MS-6_Daethyra_Custom-Instruction_GPT4.md similarity index 100% rename from OpenAI/Prompts/MS-6_Daethyra_Custom-Instruction_GPT4.md rename to Prompts/MS-6_Daethyra_Custom-Instruction_GPT4.md diff --git a/OpenAI/Prompts/multi-shot/MS-1.MD b/Prompts/multi-shot/MS-1.MD similarity index 100% rename from OpenAI/Prompts/multi-shot/MS-1.MD rename to Prompts/multi-shot/MS-1.MD diff --git a/OpenAI/Prompts/multi-shot/MS-2_Large-Template.txt b/Prompts/multi-shot/MS-2_Large-Template.txt similarity index 100% rename from OpenAI/Prompts/multi-shot/MS-2_Large-Template.txt rename to Prompts/multi-shot/MS-2_Large-Template.txt diff --git a/OpenAI/Prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt b/Prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt similarity index 100% rename from OpenAI/Prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt rename to Prompts/multi-shot/MS-5_No-Prose_Doc-Reader.txt diff --git a/OpenAI/Prompts/prompt-cheatsheet.md b/Prompts/prompt-cheatsheet.md similarity index 100% rename from OpenAI/Prompts/prompt-cheatsheet.md rename to Prompts/prompt-cheatsheet.md diff --git a/OpenAI/Prompts/system-role/SR-1_List-o-Prompts.md b/Prompts/system-role/SR-1_List-o-Prompts.md similarity index 100% rename from OpenAI/Prompts/system-role/SR-1_List-o-Prompts.md rename to Prompts/system-role/SR-1_List-o-Prompts.md diff --git a/OpenAI/Prompts/system-role/SR-2_package-migration.md b/Prompts/system-role/SR-2_package-migration.md similarity index 100% rename from OpenAI/Prompts/system-role/SR-2_package-migration.md rename to Prompts/system-role/SR-2_package-migration.md diff --git a/OpenAI/Prompts/system-role/SR-3_thorough-programmer.md b/Prompts/system-role/SR-3_thorough-programmer.md similarity index 100% rename from OpenAI/Prompts/system-role/SR-3_thorough-programmer.md rename to Prompts/system-role/SR-3_thorough-programmer.md diff --git a/OpenAI/Prompts/system-role/SR-4_online-searches.md b/Prompts/system-role/SR-4_online-searches.md similarity index 100% rename from OpenAI/Prompts/system-role/SR-4_online-searches.md rename to Prompts/system-role/SR-4_online-searches.md diff --git a/OpenAI/Prompts/user-role/UR-1.MD b/Prompts/user-role/UR-1.MD similarity index 100% rename from OpenAI/Prompts/user-role/UR-1.MD rename to Prompts/user-role/UR-1.MD diff --git a/OpenAI/Prompts/user-role/UR-2.md b/Prompts/user-role/UR-2.md similarity index 100% rename from OpenAI/Prompts/user-role/UR-2.md rename to Prompts/user-role/UR-2.md From 719778cfd6f649cafe8865f149a939858e100323 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 18 Oct 2023 19:36:12 -0700 Subject: [PATCH 18/25] Renamed to Embedding-Upsertion to better represent its contents also modified todo --- .../.env.template | 0 .../{Auto-Embedder => Embedding-Upsertion}/README.md | 0 .../{Auto-Embedder => Embedding-Upsertion}/__init__.py | 0 .../{Auto-Embedder => Embedding-Upsertion}/pinembed.py | 0 .../requirements.txt | 0 OpenAI/{Auto-Embedder => Embedding-Upsertion}/test.py | 0 README.md | 9 ++++++++- todo.md | 10 ---------- 8 files changed, 8 insertions(+), 11 deletions(-) rename OpenAI/{Auto-Embedder => Embedding-Upsertion}/.env.template (100%) rename OpenAI/{Auto-Embedder => Embedding-Upsertion}/README.md (100%) rename OpenAI/{Auto-Embedder => Embedding-Upsertion}/__init__.py (100%) rename OpenAI/{Auto-Embedder => Embedding-Upsertion}/pinembed.py (100%) rename OpenAI/{Auto-Embedder => Embedding-Upsertion}/requirements.txt (100%) rename OpenAI/{Auto-Embedder => Embedding-Upsertion}/test.py (100%) diff --git a/OpenAI/Auto-Embedder/.env.template b/OpenAI/Embedding-Upsertion/.env.template similarity index 100% rename from OpenAI/Auto-Embedder/.env.template rename to OpenAI/Embedding-Upsertion/.env.template diff --git a/OpenAI/Auto-Embedder/README.md b/OpenAI/Embedding-Upsertion/README.md similarity index 100% rename from OpenAI/Auto-Embedder/README.md rename to OpenAI/Embedding-Upsertion/README.md diff --git a/OpenAI/Auto-Embedder/__init__.py b/OpenAI/Embedding-Upsertion/__init__.py similarity index 100% rename from OpenAI/Auto-Embedder/__init__.py rename to OpenAI/Embedding-Upsertion/__init__.py diff --git a/OpenAI/Auto-Embedder/pinembed.py b/OpenAI/Embedding-Upsertion/pinembed.py similarity index 100% rename from OpenAI/Auto-Embedder/pinembed.py rename to OpenAI/Embedding-Upsertion/pinembed.py diff --git a/OpenAI/Auto-Embedder/requirements.txt b/OpenAI/Embedding-Upsertion/requirements.txt similarity index 100% rename from OpenAI/Auto-Embedder/requirements.txt rename to OpenAI/Embedding-Upsertion/requirements.txt diff --git a/OpenAI/Auto-Embedder/test.py b/OpenAI/Embedding-Upsertion/test.py similarity index 100% rename from OpenAI/Auto-Embedder/test.py rename to OpenAI/Embedding-Upsertion/test.py diff --git a/README.md b/README.md index 556a82d..aa314d3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,13 @@ # LLM Utilikit -${INTRO} +Welcome to LLM-Utilikit, your one-stop library of Python modules designed to supercharge your projects. Whether you're just getting started or looking to enhance an existing project, our toolkit offers a rich set of pluggable components and a treasure trove of large language model prompts and templates. But that's not all—we envision LLM-Utilikit as a communal canvas, inviting contributors from all walks of life to enrich this toolkit with their own prompts, templates, and Python modules. Join us in crafting a toolkit that's greater than the sum of its parts. + +### Supported libraries: +- OpenAI +- LangChain +- HuggingFace +- Pinecone + ${SupportedLibraries} ${Intention : Reasoning} ${BriefResummary} diff --git a/todo.md b/todo.md index 70adb43..2d0e249 100644 --- a/todo.md +++ b/todo.md @@ -1,15 +1,5 @@ ### Todo list -[README] - -- Add intro - - Clearly define: [Utilikit, Pluggable/Components, multi-shot, zero-shot,] - - create summarization of prompt reusability, and component extendability - - Then, clearly state the intention of the repository. : Provide Reasoning, I want this to be a nexus of information to empower my LLMs moving forward. By continually updating this repository as a codebase and conglomeration of documentation, it may serve as a `git clone`able neuron for machine learning models. - - Finally, provide one to two brief statements to close out and resummarize - ---- - [GitHub] - Clean all of my Jupyter notebook Gists to create an agent From 4403ff647f103542728a1362b2c867833d4b8b64 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 18 Oct 2023 19:39:53 -0700 Subject: [PATCH 19/25] Filled in variables for readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index aa314d3..ab51d49 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # LLM Utilikit -Welcome to LLM-Utilikit, your one-stop library of Python modules designed to supercharge your projects. Whether you're just getting started or looking to enhance an existing project, our toolkit offers a rich set of pluggable components and a treasure trove of large language model prompts and templates. But that's not all—we envision LLM-Utilikit as a communal canvas, inviting contributors from all walks of life to enrich this toolkit with their own prompts, templates, and Python modules. Join us in crafting a toolkit that's greater than the sum of its parts. +Welcome to LLM-Utilikit, your one-stop library of Python modules designed to supercharge your projects. Whether you're just getting started or looking to enhance an existing project, our toolkit offers a rich set of pluggable components and a treasure trove of large language model prompts and templates. But that's not all — I envision the Utilikit as a communal canvas, inviting proompters from all industries and walks of life to enrich this toolkit with their own prompts, templates, and Python modules. Join us in crafting a toolkit that's greater than the sum of its parts. ### Supported libraries: - OpenAI @@ -8,9 +8,9 @@ Welcome to LLM-Utilikit, your one-stop library of Python modules designed to sup - HuggingFace - Pinecone -${SupportedLibraries} -${Intention : Reasoning} -${BriefResummary} +The genesis of LLM-Utilikit lies in the recognition of two key challenges faced by developers and data scientists alike: the need for a quick start and the desire for modular, reusable components. Our library addresses these challenges head-on by offering a curated set of Python modules that can either serve as a robust starting point for new projects or as plug-and-play components to elevate existing ones. Moreover, we believe in the collective wisdom of the community. That's why LLM-Utilikit is designed to be a collaborative platform, encouraging contributions that range from innovative prompts and templates to versatile Python modules. + +In summary, LLM-Utilikit is more than just a library—it's a community-driven platform designed to empower your projects. From versatile Python modules to a rich repository of large language model prompts and templates, we offer a comprehensive toolkit that caters to both beginners and seasoned developers. #### 1. **[OpenAI: Utilikit](./OpenAI/)** From 04292a00c8f5162366f9f889680cc3cb6034e046 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 18 Oct 2023 19:52:10 -0700 Subject: [PATCH 20/25] Finalizing readme edits, just need new mind map --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ab51d49..65a0708 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # LLM Utilikit -Welcome to LLM-Utilikit, your one-stop library of Python modules designed to supercharge your projects. Whether you're just getting started or looking to enhance an existing project, our toolkit offers a rich set of pluggable components and a treasure trove of large language model prompts and templates. But that's not all — I envision the Utilikit as a communal canvas, inviting proompters from all industries and walks of life to enrich this toolkit with their own prompts, templates, and Python modules. Join us in crafting a toolkit that's greater than the sum of its parts. +Welcome to the Utilikit, your one-stop library of Python modules designed to supercharge your projects. Whether you're just getting started or looking to enhance an existing project, this library offers a rich set of pluggable components and a treasure trove of large language model prompts and templates. But that's not all — I envision the Utilikit as a communal canvas, inviting proompters from all industries and walks of life to enrich this toolkit with their own prompts, templates, and Python modules. Join us in crafting a toolkit that's greater than the sum of its parts. ### Supported libraries: - OpenAI @@ -8,9 +8,7 @@ Welcome to LLM-Utilikit, your one-stop library of Python modules designed to sup - HuggingFace - Pinecone -The genesis of LLM-Utilikit lies in the recognition of two key challenges faced by developers and data scientists alike: the need for a quick start and the desire for modular, reusable components. Our library addresses these challenges head-on by offering a curated set of Python modules that can either serve as a robust starting point for new projects or as plug-and-play components to elevate existing ones. Moreover, we believe in the collective wisdom of the community. That's why LLM-Utilikit is designed to be a collaborative platform, encouraging contributions that range from innovative prompts and templates to versatile Python modules. - -In summary, LLM-Utilikit is more than just a library—it's a community-driven platform designed to empower your projects. From versatile Python modules to a rich repository of large language model prompts and templates, we offer a comprehensive toolkit that caters to both beginners and seasoned developers. +This project aims to solve two key challenges faced by developers and data scientists alike: the need for a quick start and the desire for modular, reusable components. This library addresses these challenges head-on by offering a curated set of Python modules that can either serve as a robust starting point for new projects or as plug-and-play components to elevate existing ones. #### 1. **[OpenAI: Utilikit](./OpenAI/)** From 3ab6108e6f6437cb0c141dc5188e43148efc9606 Mon Sep 17 00:00:00 2001 From: Daemon <109057945+Daethyra@users.noreply.github.com> Date: Wed, 18 Oct 2023 19:59:58 -0700 Subject: [PATCH 21/25] modified: README.md --- README.md | 50 +++++++++++++++++++------------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 65a0708..955718d 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Welcome to the Utilikit, your one-stop library of Python modules designed to supercharge your projects. Whether you're just getting started or looking to enhance an existing project, this library offers a rich set of pluggable components and a treasure trove of large language model prompts and templates. But that's not all — I envision the Utilikit as a communal canvas, inviting proompters from all industries and walks of life to enrich this toolkit with their own prompts, templates, and Python modules. Join us in crafting a toolkit that's greater than the sum of its parts. -### Supported libraries: +## Supported libraries: - OpenAI - LangChain - HuggingFace @@ -10,38 +10,30 @@ Welcome to the Utilikit, your one-stop library of Python modules designed to sup This project aims to solve two key challenges faced by developers and data scientists alike: the need for a quick start and the desire for modular, reusable components. This library addresses these challenges head-on by offering a curated set of Python modules that can either serve as a robust starting point for new projects or as plug-and-play components to elevate existing ones. -#### 1. **[OpenAI: Utilikit](./OpenAI/)** +## 0. **[Prompts](./Prompts/)** ---- - -A. **[Auto-Embedder](./OpenAI/Auto-Embedder)** +There are three main prompt types, [multi-shot](./Prompts/multi-shot), [system-role](./Prompts/system-role), [user-role](./Prompts/user-role). -Provides an automated pipeline for retrieving embeddings from [OpenAIs `text-embedding-ada-002`](https://platform.openai.com/docs/guides/embeddings) and upserting them to a [Pinecone index](https://docs.pinecone.io/docs/indexes). +Please also see the [prompt-cheatsheet](./Prompts/prompt-cheatsheet.md). -- **[`pinembed.py`](./OpenAI/Auto-Embedder/pinembed.py)**: A Python module to easily automate the retrieval of embeddings from OpenAI and storage in Pinecone. - ---- +- **[Cheatsheet](./Prompts/prompt-cheatsheet.md)**: @Daethyra's go-to prompts. -B. **[Prompts](./OpenAI/Prompts/)** - -There are three main prompt types, [multi-shot](./OpenAI/Prompts/multi-shot), [system-role](./OpenAI/Prompts/system-role), [user-role](./OpenAI/Prompts/user-role). - -Please also see the [prompt-cheatsheet](./OpenAI/Prompts/prompt-cheatsheet.md). +- **[multi-shot](./Prompts/multi-shot)**: Prompts, with prompts inside them. +It's kind of like a bundle of Matryoshka prompts! -- **[Cheatsheet](./OpenAI/Prompts/prompt-cheatsheet.md)**: @Daethyra's go-to prompts. +- **[system-role](./Prompts/system-role)**: Steer your LLM by shifting the ground it stands on. -- **[multi-shot](./OpenAI/Prompts/multi-shot)**: Prompts, with prompts inside them. -It's kind of like a bundle of Matryoshka prompts! +- **[user-role](./Prompts/user-role)**: Markdown files for user-role prompts. -- **[system-role](./OpenAI/Prompts/system-role)**: Steer your LLM by shifting the ground it stands on. +## 1. **[OpenAI](./OpenAI/)** -- **[user-role](./OpenAI/Prompts/user-role)**: Markdown files for user-role prompts. +A. **[Auto-Embedder](./OpenAI/Auto-Embedder)** ---- +Provides an automated pipeline for retrieving embeddings from [OpenAIs `text-embedding-ada-002`](https://platform.openai.com/docs/guides/embeddings) and upserting them to a [Pinecone index](https://docs.pinecone.io/docs/indexes). -#### 2. **[LangChain: Pluggable Components](./LangChain/)** +- **[`pinembed.py`](./OpenAI/Auto-Embedder/pinembed.py)**: A Python module to easily automate the retrieval of embeddings from OpenAI and storage in Pinecone. ---- +## 2. **[LangChain](./LangChain/)** A. **[`stateful_chatbot.py`](./LangChain/Retrieval-Augmented-Generation/qa_local_docs.py)** @@ -54,8 +46,6 @@ This module offers a set of functionalities for conversational agents in LangCha **Potential Use Cases:** For developing conversational agents with advanced features. ---- - B. **[`qa_local_docs.py`](./LangChain/Retrieval-Agents/qa_local_docs.py)** This module focuses on querying local documents and employs the following features: @@ -68,13 +58,7 @@ This module focuses on querying local documents and employs the following featur **Potential Use Cases:** For querying large sets of documents efficiently. ---- - -These modules are designed to be extensible and can be easily integrated into your LangChain projects. - ---- - -#### 3. **[HuggingFace: Pluggable Components](./HuggingFace/)** +### 3. **[HuggingFace](./HuggingFace/)** A. **[`integrable_captioner.py`](./HuggingFace\image_captioner\integrable_image_captioner.py)** @@ -88,6 +72,10 @@ This module focuses on generating captions for images using Hugging Face's trans **Potential Use Cases:** For generating accurate and context-appropriate image captions. +## Installation + +Distribution as a package for easy installation and integration is planned, however that *not* currently in progress. + ---
~wIs@t*sp;U5lnmGmje@5j&_8OsA2$rKWKIA{lXuEor6rjvO-_LsM?J=^$Z+U zHuAsIW!IT|>}bWj{l)85N-Wj%;{|uq-j}9O&CuP>*PdVh=I9>bG;qC@7M}8}-Vr+o z?Vu!L3Mbf0NkV|(`~xD`+nX4+IUn*;j|_n_h_yO-2lLAT3!(8c^|_Fa(ch7{`x&H_ zTE^xL%&Ih!uSoy3jX(B;whGmHlHh=eFtxvv%r_zK5y56oBY;vNgu4tVfsP_r@^UAF zUs)#93WRpGj09;A=PabcvmT`I9^q}Mizrb+1lvz4GL{qOVE*k!Sxo!nh6Y9w1AO_1 zQ=Gd>VlK{LR&kJupFcXuUw|)t62U$g(Sa*av<_bhctjs`!`(t`H8^-rikv`_&`S6t zs3IAcER@dI_Pgdfea~dzfts4No28h?zMcC=fhq`dM}p2%qy%h}*3uSWoZXfd#-@vX zpV$w4t9;?{{DjJHqe9H?nqlG zfyW=XPM2T#+?xXMPiFtky}zp>mZ{^`bFt?GEx9bS^Y@yLw @^F?>$+<7M`CJm`zVYHpP8x^cYZ)V4dFZZ%oGz^6IKvU80( zYdTJqRtolc*26r)nyagF_`M_>iuEn_Tkx~7o)dHEarKI+MW@p}$A@J%hOAF;ST-A8 zUf+rOqz+xoRDEGg_Z$JaX+|6_>? zAP+Py&M!Ks7zLfn`QN-e%W0{|+SMgnUJ8#Q$ZOk09Hs0s#wAgd_(&WzIOfK>mH8<3 zo(ja4)%7Sb@*tSA_5Dlp!;sD|3RQw1r`B$SKgk~$3w2Y@emKao*5V&WEEYi?h`xCW zGPk*4DAaKFK7LzRXjmXlth~HI{zd+?uCUP1uwB=@#=Y7zWRITRWmCu4b*#Ea*0K6= zP<3kQ898An{&shZM)ve&j56qOFOaCpaXSzo(>oVnq?OVe>EYt+@h;(oq^!=&s!o%( zgTYKb0*(%1`#VTDnVKPj?{soBlJgZ1Zuuupe09DSQ+&!RzdGk$k+&8xkVn&e2RYDL z9+V6u#PS0LiNpiN;@ubN8ARpKb%pDMQ()6$s3iX#t@SM7HxcaIFqHtM6D0XDi_ TJ3ql$7lnnqKG0ra$wWc@m37=6Tr;JyLOR(;a{H_Y_WcH z**1K9DnDNLQbTK9;rYw?Xq+ZmvwPptH}#)}1|Md9BYh)TkSfIgb)OKx z-=ya_0i?W(P(d}>nN1VH42@SOxfksfp=m}?NbD6PG)F_knk8MZV-RBhJ41GhymBt< zP~PB?yI=2Fc}!;;2ga)X?ed$2*n_cDSJ&-z`(f6!al|K*aA(q{rEuu(9MKHy;7yKA zPA#pq#PB>*Qdg5-f;pUpOSj_!ATDHhMIFU@D15+ml!?|avdLNsuU$JWWyL+6`_6e& z6Ifq-ziEQE@){!8vK_|Or&5wpR%M^5-|o^NA7*uH)f;E?+Vu}Fs)_4RlvL<~N}hZ4 zrKQ01i?!`16cd*|sVDxV^%Kr*#6qO~yq4FDHpLW44tGy(gc1*z&X`sFtP@t_e_!lQ z<*mv~hbSX;y=qc67qca0E;0>Tw4nY9XNr>dX!=d3N(KAl5RvKELzZwc7V8t9@`m!f zV9|WjMA(g1AnoSw0;DU*RUz_!dV)ng%|jJUT$yh4nY1Q+Mwg+v9K+L?d5&wOc2HWQ zlV?IU?tW0}(K7hs=j6DRP||nvsF3M-`31yguooJDMW^Wx$_VO!Eb9hsIwR+>6KmZk zJ9In@)6pzro1Ejql^&*c4L7)Fv6qlcg%{U@XIB-reIUB7m}JneOpd{OZW3QrfHbJ` z1g~arj9&{sHA8;Sed@K^mHE(bu?DoV(bXm8{>$~lKEdj2<2W_TahK%pf@-OkKVZ2e z^* u|nfk7*Ai%{9S7 oLn6_AsvSAkX#4m1is6#VRL z73Yj5)uN=i?wr7cs4A_33jma!DuG`1`7?>1dwrqMWAD7D-NS<2rLL86apzfXupNKa z!M3HZKpjPRP&Wy5xbre@Oa3( p6jUD6t-YjJE;j% SM9=bO=0<-M_$R=BFU(G9}ri5${h z;pvur+=X=i2c3- $2QE?0RUv8N0IPWM5PHesPGR{cCB{mEbLCg)lV47{eLN z;wo?)+G&dTq_*S4p5`5CPl$ckZ3Ooz@0(J{R7?ZuR*MujlFe+}1@>^=r-gT?P5D++ zg7{UkB!0ff8`#|_Jy9B0-&v@}Hw$kKTaCGc30s& 6l;vd GDgb=P9O^-Fx zS`!q}Um3bk_G>*G0$W8eR?~FTZLfALR9TJ$D!Xpl7f+MNdVA0+4oRfvK2+ej(Xenk z9|xp#t9dzdgSZMyhTJRb*ilqm*wg|>qQ@kq5wQ)#b|(B>xt!6#TBHXK{l#g6*0|&N za+?A4%L8N6P(VOH(}&Q;gcuOVC1XJo?rYx7#A!;qe%u8BzBv6CVT*$}-x2hUfGDaX zBA9GN-A4k(j~mG5HIZt*-*mr?it~Gf^#;wmhQ7PG5C6 B!EpZ7M?Zf# zJJy)$d_3Y_JKvp!3(REN%N=_5Id@pXxz(obp$?Igt=_6MkaTjUt@GX>0&v91fI}S2 zqB#z5huXawx#DcnlJiu5mgjl~VF7nXT>(r3`h7t6r+A0X=Khq%*GL5MgU`01S|m{s zgkp2%2ZBqAc|CYLtaf}XcnK(T;ohk+yZmc*F#?K}Ww`-WY^hKZi_%U~XrO8_NsU@d z@c^y^+vr0p=kI6)_i`lKl!zY?s;lIbNjR&A-Neiwr d9U?we>3!@~m0i(KwXr%;A2$V2&WLKT)y87(IA z)|+XP!~)d%fq^&zU+_6h#BRPd8@`?34r25v>3sd0&HI5$a*GNFA&I{+a$03^lq40t zx;gqRfp(idRY=sY8(1aZ!OEdgcqB)B@eLcxQkaI61wx8SR6rRjD8-?UGBHpNx|gWK zw{9A23FBrsg3HsByBCO30P!_9&gb V= zn@AvP7DMQO^LueuFoFZM-XVEJaQ)Fd@sOaT&454> 1+s6Lsbb=Zro?t1d2n{qnw*Z6CW;ZZ zfOj`d2ryth0liPFlOp%MZ6`!(Q8Qt`sNzg!mK+;yJ4A3EBI*%b2860XW|)*HwYW~m zRGpeiHI|~uhHj)VvTWl0&Nl%4$p1v!dxkZ&w%ww-EQkn^-pNu_q^ooU@+wOZ5F#}c zB?}ReCL%3JNS4z3QUsJ15i!z?NK?QB5{iO?bO8mDsDOaXD8vaVYd?PH+ULi9-*12C z&-uYcfXqqeoKLy$agQ->_8_c5S&`&~%pp4wlRsI7E2_L>aG^b{OPmX8L}nDA>P0dQ zX*mShJmzEQCFN#&JcRC*pKZ2jE62;htB-<;^eB5~Weo>+z QFb`EU_zI^ul~n{&o0FDRW<9G}BhnX9tj@Pvx7j^Y{2F zD|@79Mb&TMwdOdaC+>VCShM^ =hnsc@ChA8|{9q z8Xvplmt%t|e1p1>0`9~PIp-Z+Q|I$XufFk*`umi+_MPa2Hc#o^iOpZDv1z+Db9EP? z6Kv`pBxc?IUYo{I;Z{(;@hBJ1ZueM ?u9Ir3ouDX>eHFw*erKpweIe5lB^pyR(PIj;xsm&?h z{AlI&jd>S$OoQ9k%?%rjpvt?>#OHlazLCj6iJ0lz8FrDU^y=Ey?592wNh18iwCve6 zTPrOh^CbsRT(tmZmMqU{6Q<2~qxmLOJ@8;o5y_RZd{97?6&XM+Onj!ncUZKI-X(!9 zAyMEAvfUFSU^R;dSR(p|*nc=22|W;+5QkJ?08ga+?4IQVtYz&%F8tUEtid)kU9d`% zWkD8(!Y&3AtHF3&Fek!Jw2dI4AMxJ~tq{s!dsa8qNwO$t25$RjOTMhI><$2D{_Vq| z*KJ4w5j0S*O#HJ&7xe~6RsWA8Wr*}l_# +_Xo6*;S3b({zk7riQfXs z9wnH6wwMZtl~uyW;3DrYf-Ke?+!FEz4lLSY!h^6Y8fX*1q8-hAsd*qreiS5 F_7SBeI0kuXt-_m$BN0_?OKu#z1mwcrK%-`30Bfz2|Eu4)`|AZOKQ2i zL;Uj%f!F?m8~$j4cJCBX48ljpGhf{@7`Bbh%DsEkH)K=4W3>_(uv8XC7;9%Ne6mt= zXD-`#?HV*>mB?I;%RwF^XWy ruQTtk%5N73|yoPk|+hbbqMc-wM zU=(UJ0h24~#r?(Smq%3{sx)gt*RCVC4`J(t;Mv_o5Ib!wPId%?_wYsL&icxJWKlAx z;_YXjRUCgsWbUGIGDDmH+<$k+F@x6PsInu&`Xg50xWsT;u-Mo~Hads&n}w;OUjSNy zNKarby0x{CR2XL=QsM(lif+~;#d*6&$4 &oQhNbdei;)ihaX8m})fdqp%~!>p$>!(roM`a3OJHZ3H4Z}1b& zzW8u4{JxRK9W5nY)QcerH8?O7AALXk<%5(z`F;D)4&Ch%r;%r1j8J@9kk;stLfi{a zbCWnF{Uoa?5Mul88Ct6%#;a=-hTrE~JRS2Byv{L;4T%ho3iAJMy F?{QxYM5>{BTmwrAp}IfdafD1&q8RB{w%`d7li zn?JodsI+##`P>OR&`A=8#qopbR=OPhbik!#_)3jMYv(spQu*;fQ`O!r4TTC1OKt z2N?A6+{+`Ys7tF; G|?xXMWBl}v*xIdO><%HwNXY(tkOga&gcppJ6{{&ACzxy3Qg3t?4Osg%p z$Zj=N!F{U9+w#juDT{+ZWj=tFvN+E9=?>S`3i~(x`I;$~KFsff% 1Sa)ie_G)s5peofR NLgJ7*Q zI)Ry+G0aDd5!6hU^LW1A{BUB%ICXaue5)E~U?U)fTkY`SU*qUi+{dG+&p4zE4QQV6 z7UKj_Qm(8tktAmpV+?u?;x3Uu0^tO2W96#gkiAwia0~M?n5n?BMh_P7eBi<^Uu_t{ z^TGwPuGBEGNx%wY+uTpiAcI#I$+XFvOF(xBrJ7!HnrL0Q+dl{@*y_RFH2O*)T`2nk zdemZ79tU5sh^=&FTB@_Ha(VWIL6G8`;xJq;M?cL74IehiqRl<9+S?@@ztcR)WlV zi2LFZexIP`mxY|k@-5lko9*&`Oky&@1oo>DGDM*Xo5D_G@4ys%Afh+HH^F{-X*FfG zUOpQYLYY)&VlxQa7StqR%egSLQ1l*;Na(Rr8WJADyK@S=LI47B;uk_5&U`s|Q)m@5 zE4?g~36kQU25IFDsywcctwIFl9tnuh#8TbHGD$WXl;CTpI*pN>hNx^wX3LIGzP-xD z{T7{oDxy*&ozr`1nW3KQgW|14V~BS zH?YuCb6*n6z-67 |fK<-vY?a;T DdNEzEb!^n4 zAA8)c(y =Jf6--xr#4gixf=jA(1TEA857!Ttf)%>cs z)WhXfd<}@U> hOQ#$dP!beT75F`W>kZz+O{;9Z=n~N 4e=Q(TAISR 7p2RnRj3WAnif-s3guJb8 zKdPaMRi47QXj(hv*-kL7EkNqO`QCm?>27VFxQh|w^48!S%Gf~mE_42ltJ^{^@5IHK zscU4t@Q|$65%2jUVkd7?H0&Q?b1H2`ZDoC?&r;A^%SNm0OGEX<#B&7UoWhlwa>Cmg zafnOdU4es_<|t{=%JBl%uGZd8^bX+|vI)67KPUd1Zahzo;lug4a=e_-heB9LLabMv zx{t~!=lGt(R;ocWsy7{KUN Q^68p5Vym*EYYV{BG6zrD%*EEoWMdQVt3)4S KGJG$4**13+_^^L~z=jR(s&roLTNw zfWHUQWOaP^)swoKiQ7?ujs}0g2|PEPMt7NXUMCW y`ZePU-%i=lt4)nAJH=5I%y|yY27PAIy7C}k)4k^ j_o(M olIouck{3}W~AU7wX--jrj LaWKMs1Tk<{)qg&toA zv~U0*bn5bLdB^#Gz#8T7&(2MgTmTyNZ}w(3(QXpu*Aehx?8+khJTBN}k#GoVxoLEl z=7&`FLQYyUvFZL0IxCujfS-ZWlpUYo8x~4^Ey@K;H!~Bpmr)J}{yan_W<3+i_D8(8 zSJAk8$*XL)r-=*I;AouxW;Z6Y`(%o`7+SB(qvmEsdZ)FRuh*3jG@@uu+o@9L{PE|H zS42H oZK-)^+V&LwqBP#OM1O;7=f zI831hh%o|T@Vct!h3`?{L7weDTV6BxaUhW$>WJZ{un z G;5^ zjm^Nn`0}b>&pGZG%xHJuhNS7>-|Kw)(lipbk2Yas)Ldk#Za1odJ`7#D_r&M+pGVCe zMdZ>0?p@hkT$9o3Uo*n+yX@+y7o1<+cO?7M=mpAKv%VuY?>x-A=X4~q B)=F2F^`27(=#@HHddw3{UDifgtXPM zzN5J36Dg7x+^PdS?Od|$PF{_8__cpt+qJ(Qm+t6azq`^)+m-wmw|epL6E6#QGy6rX zcSO@lm|jBVDWU>R5jEFLQ)(Ji43@$pAlqcT2(&pBpJwQTRkD2yGkTOtpPhTtJXO{f zA5s(KZCn}=eMz@6LE8RVYo^2VkYeJ>%9W$yV{s_H6_)!NxS-7LAwoH$sEvn|u_L4t ze2Q#Jd0G`~0JY70l dr!fg4sL%~sqLcufQd p365s^5VRTxUEN-o@ctfL~$Y`UM4T1(8Q#rCZ@zKF3$bi$F+v<89)~J zAP4&YD%+;4 Q>RoMG1gsvAi z6^_;dsRbp|7ZjX1(hS-T>UJEe7rBkPgYP_Sh2V$7HLTg}A_`*==?TARMC^iTJ*}y9 zWKslQfkRBCYT#^{v&sB8=2Bgx2gkMto3z=}R1+jTGHjvBq)F=KGhbTwcwqr=Xznht z9@9fp;D^Aeyc2wJU}SX)h=zcmBUgzZ2`+-hlFtyz!@+}{O13u|vA^MEIXkQ70>%g` zFp2>M#ad|$>qbq#Sii`bNW #;=rSdt%R4n)5I}4+P zScr{ !SLnqI$ir(ETf%AuV} zOm8Z7pfaT_p!gE5FcYZe%pe$FEUu%bE{axpnjP1Of}Ck`3$%vb4B{0K&~7nAi4(fC zp>Q{N1e#_UCvxi6X*s^?(5GiX;*b!LhT6t=hsey?M1m5kYf0DR_}kq8(uFrT@=nm# zb3wOp--)~5e+Vv0NrzT5X>&E#2t($&gY 6~5R4#XhSfpXq=P21P7;ORhE1OZ;u1v46fIdF043P%aBCl+0sHC< zN9)Eg90VFrFJHQlNlWERvh;7ZruKwS$#>HlKyCFez8%EwL_QO~V4*=hfj*d|rZhpR zU2Wt*ye{E(7&AAUYcfUoYpO?qs!dwld(K8nxKkEH*1X7*G?m=~O{pTtYo9oyZpM z#nG7Z-K*0g+y?lK(Mmj1Bm;O2O7~148$8n4t4V6wurfOykv6lHqt6(ES^!2AJ!fg_ zo!E2TDZ4JDkSGI+pxQzyo1rzHco5*umi1v{wcq0P`y jn6E{U6ldUQSqGp}>=H zY1JY@Wz6DyBqxMHI7VWOmubVv_xZ--{DTjI xl&yot%E<-f5pV4$l$jUKsY z;g_{JbIQT!%~8eFy)3rH_RB`4EbX} q4lF1h1>*uFT$cl){%MSd1#DMrFl)E zyiUKWhe4@DvzvkF+>s`$CtpuKjPU%r;lBHB+R4bA{Zt9u9yvd6H;%%&gV&9MnN{p^ z-dC__neb&QxH*FR?b_f6K#I%e9l{~!j27V6JU_w^wxPF2Uo@E3__DbUs^|fr`*xu$ z;a#EJxYZVTeJ*#B%o5C|QDtx>clZ)0RP%K?J`CYb2%FKQoUpA!sQdy7f_`=(WrD3_ z7Wm-s9Zj$qlQOlR%{)e$WG!`4k%Grjv`Iuy0fy8xn6yTk-Z{d`{+Y2NRGNj`I*Ik6 z;E|Ij_<@kmzd$y>2ozrJ{;?bkood($?65L`DKK$QG*NV+)p0gmSPp7tm+%_H?K{fB zB7sssT}GvN&nBXR#ACTNa3sfpPCWpJ*Wyos1PPeCVK5M>E5X{Sn1!MPQW#)Npu5__ zW%>UCBIrkW55QMfx2oeHy1rkYFxO3{B{u@r>Lz&H{wNrOlwP%ng?QY%f(pLsTq&{l zCsDRNnul7H!f(q1iwE^!NWoWtV8qlNs(}zL0ELQy(^<0ND!6I-F9c5;7U?LWWK)c? z3DfjuDLAw1>erdNGb8kjrgHt|-7X!>r<3^>R-)|4;y3BzJp1;-ZW5BP1GnRfaxYeiDh2C?nIu#J^*^$6 zM#U%Wmg8~3{UxQhcT+=+e@dI&YY9E;xR3tF=Xi=7@l_2goF~6d5}2Hu*a(^Un4m1o zHPu(8jrTqOcDVcMikf`Yt FfB95rJI$ub~5|iZ= ^lJyYR{2S~+*y}-cM4`>a?X!p^{rdPZ!mDu%~RsJ!? zL@VF^3 8x|?; i)4`#o>PIB29Z%hD>K!s*q;c>^v ;d&s6XmeQHbUcvor*sm}hMdmYiu`nXi}1eMY*6 z_X3my9XKcanEp1xYx6_J&7S<4SvkBfb)3Jj6G(R746hNl;E2oLU*GMDkB)zs*?$c< zOxGa3ubd*J7bHluZ|Q##fJr(US9f}9 4H5(1b7x?2c8a(mAkSrx0-FG2j?>pTXCj1ksg8y_|z$DtJ{2VDaSSm zHRT_IP0)2SA)CSmLHYHTvn _mOld z-0(4V-pT{o4Sg6-T!{vqJ*X wYad)~&Jc `uJApj;@8Q)ZhmQ0XM;_1v8}-pQu%#ww5lX*D_&+vY_9!>y`LL6-N3 z=a7-Ylg3i-x6i_w#Ez%kh<1 q)lWyJ!}_yIK{N_`Unmv8p{*UJ}=X?9W!+Jt-A& z<--sbgMV##FswLRcpzc$6v2b=5w*+8gs&|q1CZ}=uG@I@tUTWu+D(!U7VhEHP8FtK z;F#Quns#{6ZWqp+#v;H*Gn&`686_rHPqvnsYWNDh_q`m``o~m3rGI1)AAmUw_CH%f zC>x(4@O{S&fK%qv&(A0b5Edq!$V1)pc*V7h9Bss3J?%SD;#pA5vCc0F7Pbw^!A?{u zyuW5o&i*?epfR?lOWpEUx-_>@n_j|xo+)2Anyuq>mNl=2>oZ-jm%`XhVKJD9*ws z!^T?z?hv-4kZ* cD}jQ}vj>H^JH-F+S+F2r={7q?+eAg7I9`{K0(nq&L_6uP z_h8qw3Wajk{lb##f| _~IvbgU>cneT-ObEis61rH8?Quu($I^u_h>OJseAckx@mKY3GgJZ zeWzs3ZvF4RJZeM3qmNHpnm9JJjvA?+{5sP^r}xbGv`sHfzi<)rIey$z%I@2hCxN$3 z!&aEtpNlZ3*)}m-1lhcO!p>h*JTm32KSW|kdcn31tq$SRKnQTIP|5Z$zQytbo2D>3 z-6vPp-v;(yo{U0Dcm=mUbchIxB<@HlRu7g0Toe$a0`tdh+-N~&T`2#=!^IkoKHL1@ z)JOgWPE4s%@OZ?>p7GVdISQiqrL8?=Nt#rP54sXHsn>4z*gAn2R1E`e;F*GHbOYa@ z8;b-jA$CVUbq~?uWw;^hnID=PEh|O)R7U4ZF08l|xSt}6UUQPIAKT|5d-%`l;cGP# zdwl(@cbE8Z^RK&Pj-ggxy2;ml4s-KH$ y!_Nm+K<#j#(9wn?j)PWK+eM<2g{eEW#ZJ9j38RoFHZ*vstR5naw?sp zq*}>96O7s4;45!39hFbiHx8{8o9fP;h#=w?_I{XgRQZh!=8z(JRR4>V?^jHrJVIxM zNo =DWja_IhnwlZ9C%B-za ;>6lXDSf6jRrJRg^YG1T(c_PFihQh`0nIOW19GYn!!dR2|)kxy8v7LjGg zTyy#Dg~bFXhd&6j!Vh47JBLjn?e-g^suEt$zMqvRs4GzWBRR%_^0eI+hks}_I6klo zF?Zk~yl(EN?+Y*@B`tpoXnIkI;UFfX5eV9MGRGfWVApI;nc0}|0RJ54Q44!eIV6XC zcA}|2`F8lB{j-5jpEkDSh@(yqhbBG?+BCiu WAm9Uad%J-)H``=DdC5%Vj-@6e(k} z@@u~P!P?J17HDnCdOxoN?lShV>WTku!u?Ob$1aTVcH!nbJy7);%gKT9Y=oHC$7ur4 zTx`4lqN$a$GcP j`aH+*JoA@%WLy@T>^6qSr8wY&b@Mfs6Wd9;R@*5;N%&B+ho zKn4JU@D7y=R;t5MD~MAgVa%Z{B|_fADQiEn*U5n4Pk(oZ1cvXCymugO_kF$PNq_if zmeo$EB`59@`c4E|k|QGF_uV2Pv{ww9`u$CtL0x)X8bjb6RWI{H<;rfEDjO0_airwS zd_{(LS%FUxrVm&g+GJ}D)XE&F9=K|!8F{EgU7K0}3M00&uqOcs7R%WnFNIweX#8SZ zz7A>&@f~Ll*ehyAVTF#AA9q1OGcTw^EwsQCA^$RwUsWMgCUHS@|6rCy<8i>Y{{0bc zE7;uQ;S=Dbwp~Cn3H$%Eg)~DOo|yP&i%&g(E4L+q;AEY);rvVd2tDP4_gnWSf|r#; z#0}%zfBPWV`J*6x802{qhr&5n0sIbVEM0B+Ca9Gl(4>miP5jBW1`H-gBnM60g<4pE z0V0_OSad&l6Pw ?KE3BYO+0Sx5Nw!fsPbOt0MK&T NkM6aFM# z37>jY8yVSSujmp| T#u2*(<6^_q6OC8Rbw zb6ra|UXl-PykIN)-144y`H>Ek|I=ul|Fid%_9h73Z^{T~s~~+=jj$gnV?=A _^3agVXLewQCNOlmzz?{D0LX$I!ZxCiF?$~qBc2!Fb* zVRs=m=k&*mIj1LYv7`3g8X8T(cx=tTbMxVEyY^F+yK=U^ws4wzvzlnIK&=1rK2XW@ z@yTOd0r!&SSFS3SNf(WG{aJrB* Xh>2SqxFIp#g()r+qKF8v9zagLHO!NGk3zfG!3X==o=z4>OiB+lle z(ssw=JRjSJXDNdWy957hp@P}tfPghy60K0ezSfm_Fa(j8336~L&j?2tM`g|&<`7!K z$kr_VZByOc1Tj|k*ckuTy#v(Kg@14&9~Qdg%F_#P>pp;6PL9lOtb&`~112ebRvchJ zfLJ@tbrlp)6&BSD;aFna9;KFEbQ%|vd0_KVZuFk<>l1X^ZieMUu5Msdi#nQPlJ-SE zT22vf!8N(2Gy96xFu#zx*2x}xSQg#kD}{@=8+h-~iJXeR;ij$^*~De6#CMfK8FqSq zWNoYzyIe)*hkQzn!*M2`tY4YUIy17>RnkE$zuhu4bU}cWo&SKcK12p(uzmJ*p61VY zAN3uPcc~tZs<07%ZL>IB=i+4OXIMgxmH~dadU|0i`-8rH`@K1tur?v}@Tfj-_;g=l z`PX=iY#ay0T+eKbRnr}EDA(!_kJ7(td)BdN;Fwj@#$qil^&DqM=Zo=mpB1MIu{+zI z-*!8>^WFY5lGa|4trz|{YtVbBzDD>AhvZs8D;Ba!03GQp^*2x)$*NaS6M22a4A9Hn zD+ir&LP*3AOz#N#o=y9Ae5;E`I6@?fAe+W+9dPtJBEyzhE7<>#tf`kxFKloo_zn}u z3!jj31eFg*Jo!GART@ZI& 3+=slR3dvOm9^+7fZo zTlB!T$4eP}N{w-0OM$fR+w8HorSyc?E0=E11bjjkoW7@WO~iWYrLVRiw|!e$=jcCM zocUN5pL`_i;=&W{*VlZ1?;I<_WSi$6K4;*Ybn#oIxAxvS(y?;a!UAXO9FD!ilId>g zTr3bjX#(qGbSUXj;kVqe>)I^DPQp9214CunXsIgDy5R4+tDpVH?)zTrH~sB+`$c@= zZj~FtcBM^_IuH&IS$L5TRh9ep#)ZC@I=rFL{P&a9Ltl4pFz>{AD3~5p{Oc{TfOZ=D zKt3C3^WjWLM30^AQRX?PeSY7+nK4`Q8}t;rJ&Fna;u#ck1^gv+&SP3?;#=shChMRQ z$+_%Uso5wtAb _G4s6kpt#pzj|`taO%tVC*teqgQ;~;T{O<#y_y&&qzop_5 z^UTy_T|U4ane#WEe0%h)i9^c4YcP)&PhZWB2nuKE6466nT5kkkvfhTPvK{u`QWblU z>Txbn@CA+qlq8-AF6569LuKq_BJ)b+&ceVQMZs$O-}R<7wGC|#QEVMuue;#$*7j_B zNmW67X8QZ~tnQO~dx>JR4J3YvMn^7a__ha7y521b_AkNc#q6$j>%2=UVo9%myqVTy zU^?e Fo(ui#3 $Y~6pMBI$A-$i2~)=DjT2 q`t|iC{3eakAfe6XAi i#D_mg>aHTedfF--6+%XHeP~JDN 8ZKY15}uh;``7`=64xBbm-S*D{a~F!PI2cnexGtb8eK12<=puWw|NH&m&~ZL zNw% ;<|A$dHoC{j7EbHu2=eoiN|30AzS>0je#CfFMv^=q0ElJW>ltbZ83+@vG zy=a#TU>;=Kft8SEkylPdLa{wDv%4)EI4Ev&=6N_yqXP+Gn6Nv~okHnC zpt?~AS^#OPf)$C|AD--;G==U8(a%ys01}x{lVj4cg6?cBeNyg63BoEyh5r@|;tQ*` zZBubY5>K0rFsiJQ!+&yQ8iabiL%o;+c?3p_wP(b*+``S>u6BP{!235IN}l(ID?DnC ztStfUpG|EErv7nxMdjTpzkRZqRXQ<>tf$q5%~dNwy^8nd0u-*12VIP=;lpfqMG_n6 z3P~R|du%7H fHbU7aEy_Sy#Xd7O!rC8&Z)>nV+y=po8cl`dl& zW|N5*SLC^w6L@sQ94#B;y|x*fX>|}6PWJ`+G$tu+GdAgpPy}w7TW-ash3Q7;L%r!n z6s3hCZuz6HMNMU;Oy_7k=@cI!s1)uc42kqS#UC5PV+HlllE?5`xH61pnb31HvD@+S z%-8EHhje2ZQy}eS%x0H~!13%G%t5KeYcD5KEPN2DpP`m?D@M(U@+JA7=6b+)=hcPp zp_>Y^MXc}q0XMClN7~XkHOW1uE_dqJj~EmfNW0ul(st{yIeMkiX5OVq;YGZ6hKPvK z>Vb{9&neDz@^9^H>+i)#8$YNTow`;gqMX`wpef(1`stU7!)~N`3SU;9t0*YuNAiw= zoLuP #E1b&nwohU;Z{s zpYaSFvFeTv2sA`>l@iM_d>L)wnQ$%zz`CH12H1O-`T%9Z8-(c8)VP7LqN^y$vu_%7 zV*DI*SxUH2s=^1yP|WDl;XdfvX0zgW$!FU=m1>v$=GZnRC7*ZMxF7nwwxKhxCF@qH zd3e;`;w{zvRU>9HP8YQo>~5RO|L^Yh|CpitZ&Q^2AHLsjZEbnhT=wnT#g^vg^!c(l ziTA7wBfW2*)^^_~xxNDd`bG#hozqqDZW^bJzFYCJr*LY-=1m sQ>4Ce}6dh z@b978-?PjHS4g(GI}BzY%4eR_X6D$LG4~a-0y739al_oFubzkKJ4lv%`H|Ro&|D#4 z`G{e){od|NxDpBR0i>skZ%oMDkv_<%K40EszcU%JvEhB)HMQdU>$RJATGjkG$Juf( zSWDTqHbYIS)@PuoY28O7Vc*Ejnb@51ug4$j8P(t(damxz`L^rIa_zV6_1 3YCj-Ru(}x`$W_rr%%nt=gLBka`lDLu>O}b>)mlsboZHx@|Oe0OT zo_~lY1UHc{Ke0DlbRokz28wCD7ql)!xT|_MsZPhAib|4umlTubkskN)7Z8Du1cK^+ z?T98II0a2}rM|kD(%dN@HbA#d65)(@ d>KD;@yy{^yMS3~ORjCGHq zL ?w{UD2?`1R@n6#)&VSzu~Em{(Hsn03l76VYZLf7I`LI=u4aNoDNRwEO-G z?@dK^(mrDrE7q=7t@&{xk&O4jP1TSbF@+>`)i^A>^wTr%{CNl2JO1?sjT*U3zBf+i zl$QfZGO+-AcPv}eUvLL?`ISL*@tnFhzP`(u&@U_dfSZ1(`czc1C$IlOp@lTQ#cWf1 zjm*UGr3|=yxK6jW0JLL4Zw->}|BSBVyN|6Aw|4vG$9U Tr+e70wG3UzW=&$dX<0tHd%N~vjc_=h#y|AuMZEd;sS`g-X7 zUVg-FZ|C st8V=9#x1a}YP2HHwbwrBV{=#h zL7CA #XD pDxF=2C1%LMJb8x+Sh)HM#vl!8$^6K5ab`stqMEt`!;UV3_yxZ5jru6o9hR) zF>^r-k!pOc>+iSWbKxSmc>1k#rxgB`d978B tAM*-bBP@IsSJwLiX^y>6{*|>zsAAbG# z!c%yU)wy~XQbUhhg-cEML>Eh|dR^x6iIZ-ZnSKf%&98;u<<#Guy8HUXz>%9bgSAX} z9xtb_tUBu4+L*1un+l#%l?mfmRpBwd0vy0I;-7`%IQ?|&)+kUCWCjX{NitSqxIua> zP&9?|LA%Mf@~Q2bJas _KM|dMynB3B zLbwf==mp2cbMiU?xU`;bbpV%nmI_tQuQi-XQ0JH!)!poM?ePm@8ECz)y`;VMu4~=; zbp_Eccy~SBw?Tt?*^0xx%t)Pt&{HbDVzORyHOzvYvd}uMVc|SOdikkosH%BT{2t45 z+pFy(1w!|<{-+xr8v0dASMCVPw71zu9hJ4D7;vkb6yA0FL2w4K2oSvVc^CNca3)U= ze-a*IQYI^yVJ*w6JXekGCMcJI$ON%IRd?_)-jX;u&zfaep2*F%z(DeIpRSX$C>G%y zX{Xi(2m+d0Lq~mjYxK$+q5gPu+@B-1{c0euxfv-OFKW(Z6H|?evKIP5P#&wdcW^qE zg-Tx|ZYR70Hq3<`;<25-rZg>@jR4H|X-` >Jc<){m)3p6hf>aAree6ziy_c@)N=Ajj;n2YoW 0vI5Thr zPw^wx7bf?b$jQ&{#qZ!$Fk^F&-6xmvW+6jNRX~y1+U{pq5MDP4-xN;RnLCb`s Q_ c@3>>5 2?C zyohDSiy@>R$Y-4V8%MFtb>ir@Hk{ZS-7 5O%n)`9^@*KZQ2_{r&gck;)I*e_?)myX;r=9j0)Xt2c#YuU{b_RN zmAf1RSRs$=9-i;C%?bOQulC373q6___%C1ph;A1Z)Zr0o @;RJC!NSfln7l^!!D|IUi8TQ$3B?IoWCYb4Y?+5pVvi~1R9)d~ zQe$sNx}Hck?FLP%T{0+)Wa2Z**L^(5jG+aEWq@^}g_>)zFxczS)4WN7FKB?F&a+ zsd1F)BZ>v*i V)U({IxE&EsZ*&e?~MXgsUsZ}m_@*Iu&H6D?p0u(C`+aJrE zat^lJG>QTmg_t}4ccH@nBxLxX|E_+-U8hUyvCbQ(-ySO-$m`g0&2B;ahjNeia@j$S z(~G*cD?TeHV-s(m^tPFCFVeGP>Q +7#`{#YGti? zFeJgngl0@C<$T$Xg-FSck7;3PMGK!^XpY1PzeW?G-yhK2s0RoPn&>ZYh8^c%Yo1t8 z**-WOMMop~=YUptTP>Wy^MFQal68&5DFZrod-SUEhsC0ow4^$`fkb8H4BmdiAF7@3 zm3-M$-`qItifNcQc=LP6Cb1r|ED6$ssb1l3MHXpN- ->)i1ef zZRNOFrg0*BAJY--Or;JzdbfqfNytrXoZUNURZ5<+B2|60>0VnRKLh2QI?z3L?wr2Z z1=D`%?4Eb8u2L147Uw8wW!UrMxg!3cJ?Ryt+N+}*OYU#Jx-i_buJt*W?T;=sHyr)y zmW8GFin-Xm9>_QCJ=TAXtkooIDu^_$^G(?;;nL_9?7 HC)qfi64= zCdSV(DyXw`S_;Jo6TSwcO&_IMv0d0H6P7KVC<6p K_j*xa871N1Lb?s`OEnlI?mdoKxO@y_o;>!(f2{y*BK4ud>}JW0Wya zw!dUv^nMva(&>p*w*56SM$s1Ooo{}tRI|q{)WD9#_K;NvrBM&(>`z%0yaDek;ytw& z@qwoZpx6mm4>!@mHv|!&gu=o-h^cGnR3%aeBAEy|{j)^{cM_2~4FB|bTIepQB}iPJ zmBi!93^M2Dm;2cc+1B>s?bcNvVTZXcHRTsisgU4XE;y;|e5!Sx1IMH47Dt(jg!h2~ za|6T{cyt2P&m3OzRRsUjM)(HXB+tlI GJed>k0T?>NAb#dcT~vwP37kq*5s#g1Gm%5_NF{g$IJHC6$iV zr6pTfVmq&EZ~Aa(e5t_>!U3QZaD#1`sU6{|-}%>|iB2qh(6}BFI|oow2Ah_PmwkL* zI6O7;GMr=0)+0a0`#~+8Jkp*;tR!Ch5hc4l`~m(1qzrE8mE@pm-=Nm!yhn>lWi|2z zLa B810S&pvZA4SC%j~` H2hzby?H#8 zZ67z>RaZ%t2_eK>rIIX@eVfePMu^G2o8*!$A<4*?sbt@-63RALiLp 5qJnwSf_xpLD_m4gwW;ExV$8ny=`8$5!Ek%n_*q;cJP@?gNdFOXB zR`Ig^UM!>KQ#emB9Vu%HRq}oOcb?2Cya=4CF}1|?h(Z;jVr~EVUd}C4S1h!M<2cD0 zRPNJ1QZM(+^Y!z%J%92DaeXJyDjzf;pXjou^BOHo-yOdj&x^t`=MC_dnR6EE74F~d zTgE{8 FCLaJ*;B2W_dSiuKV77LSg_oa zw_T?B7XS+X>h8XL`rJQ%q@EM_%k+=2truYKPdA^$>diQ=z_ztPi#aQX2WJLQq;Wa} z 3y^xi#H?AEX$3= !yNx+ zfkCy|;g!nSLAdpxLZ@vbKbpdL4rap%l}vf#xZ* WAc>dCr0uu<@89E&+mVK=&V`4GZNrjizUmm)=a8y fn8ij zv#V3|FYG;?YuT!xGAsGIx_8R$$(^^TRQ(`5>bw)~uC!LFv}A0VDqWR% (|Al-y+}X!elP!(PnMFgq3W&gcTfkoq-C7El$UA(v#mIbb=DqYG!1VFA)g# z?a&P9(34{guS{&V6BMvUErx1MXzdAda+5^?HUNEj!2Nm3bajM}AWN^sQI&T2s?&8! z3?>Q1wkJ-G0ib1~VuLUtKvRGX-nyPDInXRcCGm7(Cv>(YdaITdBx;A6>`@c*ZzMj; zfrO*!x}LJ_{Z|YvKQ}}`D(%K6fm$WZAdD;tMZ0ozs|{aTmSN-UDWkOpPtHz?yY84r z=9Eb4>Cz%DyZ7DK4B$KIM=m($k8#8FNy%@T-np9>Nh`LnLR~7=`-Zlrb)DS}?Nuk9 z7R0s>C1*9RUjG_3g-k7|F7pukMv~TBjJSQAw|ZiU5CWGXFaWRbHKMJiF?NzwSHQ7# zsgA06fjh~rD`V;9UcYjdlu+x 2uU8Q+3fBZrOUEEceON&*1wke`X(wn!iLT|@y{KJ3B`^MqY?g}3*@P{ z;R`RAC(XKS;Sc)W62*iph(^6jZ4dLaLfX7FdQNrc4b2uNIA!?wy{mcExHx-QA7`EC zI34PT89b6WZMD4|y5U13vYpo^dpbN+Kk7{OCuAzfu)n5ME^g8KA0U5YR!FyPsgw8S z7pl#6q!Y^f{K~N%0Ycj{scG8M3W^Q&u9wv+n?G#5w2VRl@SOSs;M{3jN(y3Nds*EH z=T(#2vQ;*5aWPoi*fWEzUyw>|<98KqiQZWZo;EX%u$`NII*-}o@M?a|{QJ|~SCh8` z^YivoVbZeN#jQr}k+L@L>qf7p^J`II6ebVso=kTn()NR{fb&cm-x&aKu=$;rsD)fC z*a;u#KSKESfpWl?96o*xwO~diK*_ Q-oRr!N-kB>p9ntJDD*(-yE+?)uZ zOCbRfzw1d2O@1~##DVCo{g7%905=ldji={Xy%@ cO >hV?_@{)m4C#pu63bG0F#MPMtmLA z;g~pN25m6=e_#5>zLS d9o=6YagdX1i*_8?XAGg(?zW$VZ+-6W6kodkp zW>c+J>HIHbcms}p8&I7rE0ml*Hd()NS^DLe`OC`bn$MdN?VQQ8n9IWz&-0}tT_+80 zsgnOteRHwz&>ItHcfo%=F{g09iYGk210`?c0Hpx|xnVOU=ef?BsNZc!sD1EE?&mT~ z7Us9lP>|a@8WmY&)cAv$)thPX@`+4QDw+|>BzGW^;QKc2jsa=EIq(*V@*M8eeZe3e z3NV(-pF8PZA;KIagACVw9 0JC^ z#rPQ3rd!_175DBq2-yMz+3Nu#)5yj?mq53a_>-|=!WB@y=ei|FB}PN57l)kbmMk6G zT ORV26KwV^gA(lN^ zM>!jV%DweHeHYIpE1dp29)Y_6d6lArt1rERn#atcG^Pdi{L9*m
|4ZMq)E8$PO% zy@vy}-+evtHfFVA&FA50K>UN~d&s{`7{-Hgx8^pNcs;6Hm!Y9neVzyU`TSudx5zfR zeHJQ*i(Ol|VcahfU7@ObOuYBC j z5Z>a>5?W1B3zI<<_8jgoAg8BbrX>#%+UKAgu+073wHqwIjq#|h6C3 2=8KQ`mQlk_y<+YkhL zSO0m6raS`>i)a7{9+OeveJuv)b0Gcs>|)^Q|6fKDRmUNGUxNV^C)k =GnS@oNETUkqGq~PH|M-ep0M+;HZIE+LopxgUmakBCNK3 zjCb;_Z-Jf|eqdT5Ryk2QQ{M;j=2*`s99O*EsWs`^RJ`BO_t=f!JD!0V6Ka0S3+cc` zQgLc|f4ZqyAU<&3v^rE|FspI8n#qN+6*d_zVHyz~#?jVgR><|)xLd)ulYOilvKQKJ zd`kS3Xjf|cvcSfzqD{-j#YlU~Oby7hjeP-zvSxh$-wB%kJHOBIyl;->ALQ7-eCRPx z&HlV!z~ 5qRepzQ`>>Z4;*Y)&iyeqCy-d fpk(C+?!ZS0Z1i%tBA!PddI(n(&ytjM$Edv;t(0@?Xv%50mh{ zc%7Z{=EMBYg4-u}*g;ScqT(=OC~rc%rc3>53LDHlW4ac#(hqOTgKujf_oJ>SA>{lX z16?9rSgLbXjpJiY5CIi@V0agB9M{MCZ$3d!j&VaFP}7Kbu_)t{Tu*3*fl9{I5~%Qa zlu&J&0?%fr5 dRgtu+rw#R1-Ff*!;6ldrGtU|=1fq*) z6I!~<*|nvH&ms(KmCb7%@=VnUekW|78^K=_WC@Jd7)iY5C;mq*_&yIDk`qn~wB=fG z(%Sl^czRG$F=YN7Jc$7L6g`uN0f3VjPgI)#^d)E(A{nm&kvXok?83S93M|@|{ee6a z5GPw7KU0;AX7VL=8fQ3^Hew=SA0dz;g7e!cMH+ClW)Kg+e3&&KUBuLcLVLLPYoQ7T zI&NaDEy{4-6)NLgV4AL;@SK4QOyi73CojHbb88t7%E!?!q1!VTIkz%Mi<{ZTB|pY5 z-I?X@$2vdte$)LXU!3ZV4NvT zR8k^rpJvA1&;!?iu}v{@3qPRKKg)z5cBH z7 bdmMturTkA@@Q$_dA$O1sN|_+FVZaXT;W}U xCnD5V$MD5q8`P%;hG`L^{0JI`>2V((MZ^=mUN>1twV{*L?@t zM!P_?vYkFL$=9}ZMnC)*AZI9bUVCA2_EB&|tn>+HZnU5(Rk*|}#I`#a+v|-pG0YnE z_}HiMvE4J|D!IpX&&G*|PyMW|TL*hI-re%d@_b#Bf%1-PorRhT)boY*njCEN^keeL z=RqgQWxW)?K;}s+vBn 2)c(_(YUK;)J7GmAff4S4;#*46 <@LbhppbPX$=ECQFVdwE#<}tRDZyn^A16ZZJ{0(^w2p< zxKjIw^4re6u{|LXmre(#$01Lb*UKlgp(SNh?Oj5n%RE90&fDz-%n%azV%ScW35A7u zh!RDHvI|;V+E)pF3_?6gQadd I-Y1j4E_7u3Qcn*qZdO7+{J#d>?Ey>3pD-t`dhaAqX?l395Hq`7Eqx0;r&Ao|IaX} z|99RKY#me_4{s2<9^rd1TnlZ=jw*$W056fDblS8$FzA-*WGotMS%v{0&}FVO+p3;P zzYjmOfqDoUx=mqBv?R{~c$JG|1={Fh>y!k7z@O~dv845`xq+0CUpV{kiv_C(f_N8- zTw@!OhEa^?#BZMPH)A_b`oLi<5X(!b1cD$Qj=MI*Q!C_M0tGJbxPyWa6vF_H>@v~Y zj;@2KYFs3H-o1{4h}NO&T^O$EBBQEm%Pb=4EAP)+RLEI> c3uO#Wb-z0HIjh~F z 8Ww_Pf$^7o?L~yw1Eee^X)}Tw$(VrBh;;=OS*uXd+rDG?FaAC*URd zlbFMNQEd1nV98NoIwN080+r{495?SBSe^JEF@h+66r>F63<9bYX86i@5fHPbwG(47 z^2Ts{Ki_~8)R`8aZ@__dP$c|a$dBNT&}=#6%A`;!GK;b?Yc!0Q-)pQ^iz9xFjp07n zc}ws`Oi;K=BRcK@mOJ^{J3ic?o$@)TiyaF1#LL?ku;G?aMkw2d`(!7-Nvw=##U@dS z@hF*Q7;q3+<~gu$)fPfYj1xd3iuny&-%j6CgBHj3n{(u|Xm6X`P7DBlFZ@wFl~@hx zW?;+EY5WJ 5N|w5?zpT34HGb0^f;YpPKjA@)I~>I^^-m z?enM@1^ppoV;tcl|CFDD;peTWvG{e21kY)}SZNV#4UA6$&(eAqtBkN2v@pWk;sm%QJ3cv_Bj1UE;d+tO@OjU;Tv z(kf!xy}mtTop^Q~!)@&L=j=P-`MrEd-l}p$h3SsGWN0@pb6qj@>9TpgyhV=K^XpaX z#kMhr2V)N3wrlewC8z10PHYQh+Mjkxwvw%%C3j-d2q51!?Nj>9W(PdJL2somlAY8> ztl_?6M_NH Nn$qe)*as_+wZ!;1CePm}l)*kY@|}U~wx*qcYHI zX>(*IktH$ydVDDk7V={@CywzCIB%O`=V!DtDF=)tpd<|RC$ToOaa*;A&7b5-?9?`S zRXAhAsT6S?^h4tlJB4IIAY9bUYx_7Uh9@>NDbz0qk(tV`6C+Wv(+Z>Ok!#5fi1vM1 z=7T6(1j-wI9{>gWk)~{^wHAC(8v&Tfx1+>)=a^{79C?E{p`gogq&1RLv$?f9jqv#N z_Do_BR1z7+BCbrTU7s|o{Z6P-Emz~|KVB4k?6(ca2@c^kKjW|icK}u=&l%ZFtVD*D zE*eWi-#N=|;7}X%BOcgDzE}W|ra k!U)!mJ9^Vy_?AUtzNk!USW@?b=fX8ltHU>M!Q+D0&wOeKEmLvqxS-lO z)Oo|{9>t@lWQ5vty0o-Y1o?)8%~f-W=%IehI1MEa>9l)`4v)fxGVPs9yMyh|fOMro z^Y?-dU1g5#gDHOBXWtaWN90S0CA>(0l0dxgD#wTAoidtM`5E|pO0r-NFuU+d0ra}K zRAdN1Oyr@)7R&)-kn >;=%&hA<$x=1%rLGV2kC5RaFE z#kvsUHY{BG;V#%xSSD Ti#eSuhe QmRxBBgriFV}_%-k! zGwDJQUS7cCqmGD@Gn-E;bb*pCHoxY`)|F3N5g7TzH(tK{KL$^-VM&!FjvGa24H)V4 z&{HvRMDvN|>0=OL#@!Vxg~G`2Ql4`YL B%UDtT_Z2R7`Cc6+(te^c{s%(m&9m!S1XNIy`q>~wUH;HS;kc)+{M?R2Z3RG z6tduAeKYh&C~5v7NT7Xda`N*e*709#B^o`!cn_P>Mvu3~CbdUfv3c3^)HP{{bn)Zd z$j!=)$sNOP>{s~rIIb(0$@|#EA;zr=Mv5Ny5g;*n0Fs>Jb^yPRvWMr+$pUVj19&AY z8mJkB2mzP`kA01Cw0j<8_I!t)p;cZ5)*crM5>o18Yo2dtHY+rMLaeI{duz=s`-8j% zH-`mlBmc&;t7FGxrQI DlcmU-PP9Ur@0I$YIJ!+O; z&hE&VNsIRo8u4b+L60XM#jeHuc9K|(m80S18b9)^8Ho765Egk4cJq9WC5Ilpp|W!o z8`_@jT~|@k8vf;rbG;xVco~-c=Z{##vP`CRLWf;gFt&J*I;)>sEn%bYVp9-ZrhEg? z0lYUr72rx809hl@>5Qehx5@I#Hug&AE5(as0ZHg~VPYQg%Ao*zQbGr_bhc*3-oV;^ z;mdlNg<-2*G&L$x^GnP1*Prfb``rlPEdz{!QGhvQERDMfWwBFdXoPCw$_@ho!w-8L zf#iL*ZE*xPIY|fD*3*S*EPh4lwg9lA<{;0dx(3+qMp`s-FzsMR8tu%6-w!k3EAh?% z@%llh2|%+UOUPM>M0Vjp>! zDV(ZfZ$>vi*eL`Cqp=tvq=4rP;-SYLcGN2uC0gWKDzM%8gBJp>v=shl%$xee(XV5S zytUTF=3{2tPCT6vzB0}-x;=>bOj{n_A%R`2MTkH|s>4L@#BzPYhBil@9LSbqD76uO zCr6`vf+o^C#^{F$H(|-BH-rv?Ft+p?Q2YScA9rhuzVc3*_M!w!WabfRY2ub%k*rlk zruqvGps0%6K_}+x6R8vhV?|$%GnRVQB9gqCWC>f6#7j8u+gE9w-gH+t7XF)gxrXyy zjH66S=a*dRoKB-YkKL(=GaVQ9fUSW*{@egLk(6oO;#tfpVfw_E @^dF|0 zJB6dwND0Yj({q&vS;BjfJz}SylFU3uJu?kvpT0;a<_v#F_U}<`t}xW`PHyEbb3oLZ z4-i@jkKzA-EIN%vp}-l;LF_y;XeEnd!3~28HA{jtCt(WgkDUtRGa$dgG7vb412^HU zgl0va+0IKuv-mltMUZq~I2+Rj7ur}$S_=$g%7d1JXr)dW;oyRH4Y2KouaYaFbmmx_ z5N01% >?icshVCi{pS o&-aCi%H8(D*k#xl6m9FMXYN^Ha3lrT?LfNPpCl+)6zGa7_+Wn$v_vgyF+`X%&4 z4dL>dbJH+3?xmaG3h-L=W5e1?7<&}bIf}F_x)5L00~gFTrf_Vz_kl;eHn!Rn%86oc z+d$5`6uo3)b>3ADC9yU~n=MK%hVJ#ZZGEjOfh^F8GpiH$n84Btd)M8x-^SfP8e3jS zefG{v$)SOy6w_jH;5FOnbc8ok)v=BgPyN`&I+~bqI`Qz<^Gp@VRHr8QBT|YYAF2*} zn=d7XP20QVK3%ZM(4Vix+F;G*?7kGjxy~GSOyy8dsdYYV?|o`podir~)Y(J&`yML1 z#%DE)vphpwi6r?FBtbE%If~QZ(D?`UZlicNhXV0pJUfmw6_w;dS?MQJ9$`8R!yf|Y z6F-W;l&>Kn{?6IsOCATo=;JPM#6j9b;)e2uEg9 j-FDD;G)>rOpsNFDVjO4@gxW_&QI zJ^wTm@;5ffGP(^f#Zhe#A=1FSky^9nF*_n-KX-Y;#v|Je@z1>w4{(-$yDbpq09$Lo z1)k8fhm$npLq)+C2-5g7?g5*`L%0hsab{2$N1}~>P&-Pg^EO0u0UY9eATBY0A#eVH zMMo3c=BSo~tL2-37b4Hw$9AMm0$q{#pz%>Cl(v Ed8~A7jHghynx%fQxN;9^XzHR(;$>6#i?OJfyn|o^#Cp_v9a?vHRo%S?_ccL?K zmiU0L3o_3ZIR$*dq-_VDB?r+)foy<$P>!1gkpMX#PQ^SRinE|<2I7GKFQe;r%W$eo zP;&DhdL88WOv+)Lwe&+;UJ W`2c>d`n`tD|V=cT)bT(Wmi5mt%$ zA-ZUxE^Bbo*CG2)Az7soT+mJqABG>$^^mHENX$NuHCwhqZ3qjo3mR+nFoL21p*iio zPT7v`%%u@BxT->#@I;&l=R&J^z=ZtN^aGX?PmQ&D5+)AezpkAhV-4R)dcN`{H%Od4 za)qu!W4ty<5teezxbYmn4{q1JINa@>Os3i2-l}19GA_>_Zpw`KfKG0kdz*X1J%yao zz3fnS*s<(_WLyW}om^?N^R{_ZC0??k0YpK`B}4c&j}Q4GW`nIw_J;OvwnlteDi(sV zwBZg<+fJJz`WIk}Q9nE13Lq Lf);h%C9&HfD>3iBu+gyhP~_y1QUb@%?kd8^bQqy!nq+GlQB?gaW61t2axC> z=GyAy_FW#}YlEH1!!7A(V{x7^6ivp#cPjCU*tAZ90>K;X=!4D;@-RNzx;SS@k3ran zrMD2O=iA{4l;4`wAU*IMWtwz24kFjEk)6{@O`|xWk_%zNAUbZOGM>~ T(P4=Glb40beb|J$Wi+47P-A{hoshg^3t}c C9)BG5SB>oxpd|I>O;=Rp;=A+non(TaXEkXi>KN>5H%yZDmWPnpiIpyg)`?<`< z=7dH1gjU=-#;7-WZysT6B>8-pls&vP>KW7O7;|%)A~L6s+ZXCOITM17%@QANxn3os zd$QRxvvaRW(P=U_(`!` et)W|={pJcA58!Tkda>0Csdwvp3H&3mJ5 zOuI{JMdcF8yDs6__Kk`XI=#O>iZ(nbKF ()6wDz9)9%Cg^DXY2cyIJBRziIuf0u^L&f^rg`4pxI2fry*D; zlJcn!8a6piU!J;Hq095XMY)gj?pa@w l{l2461Q3<{mQDZW_A zkBik0lA}47-!9*K<*aOFird8{)~~RY!GfM%dNs-6cKTE1{*E)@w=Lh&3=p*p(Y<}R zi!523LHby$XNYDSE2LJ#;L-905mQ*qRqhjfTgts^5K5nC7#yP)VQFA~&?c84>Pub@ z 9?GxX61vfEk_VompFX4&YIhR@COqRbU{qg<{4L{_Nj6>LK{c zp9a+0N?_?eXcSLa2uzQH*7$$Ipr@6fuz#5z15j?&$)CIOnkhp=KNMe5D*7Q$`nVQ9 z-ti$}t>XnSUR) z1N={XD)g_~+?78L9{_lgH&sTkcD*SdSx%IQnaLe5rB?e_R&^-0XdW {f1^x4ldeuGS*_Z9`*zpQ@l%DnnJME?Frw|0fpr@NfZ&kxIedY zdxFZRe;=5sj&|i|Z5_wv(ehysvn@A_df4R|Pj_G86#=PP{=fir?#R*ShIc}9Puv>~ zj`3~Nt{QjEe&XCAgvcS;XwM3)1%u80yZOXtV{JdA6g1oI65H)N$L3=NPLnJbQ#wYI zPc9D3;k(Ws%=7 5>q&mb>9eKC z9IcLg1-3)V*_fCQl+sU&DP{hfyL&DU%)Eb>I#qkt=jvY(x`tt-=X0Mw_x2pIF4KBb ztcqNr>vew CNM5GEScWXAOx!v-5KBu4PbVXD^@Vt6h)!}l>i1Qh;cC4JI^T#tM zYf?@Pl*N}>#T+&-^dCwwv}2K#KLna+STqJAdN+yVdR$kCxNQcJU9(#7Qp9#cG_}H* z-rjsdNL9D(i`+k+L+cj%-ew+N(+T(?oMlFO_0U;+TWIs@VEj?yvTIUgl10L*IwN~@ zY1A+9YhdLu>@BsgNAITkD#c!MK9;R4S&*qL>7nP~aJ40opZ;^#pRn l-OTIm%Z z@F+{WJC=X) uVB|)Y>oK#je-Jxjix~_8Tx2T?i9$ z4SOB*gS~Zk&Y*tKG{rXrePE_+xC?89AU_GFz*xC{)}0Kccx?II(Ae8woNkWklVxw+ z;lMKM_6TJ#NyxJ|k2F{9c&1}b6T999iP)t0j`aIemksizvqh5&Pu#=aRQvPEJk=K8 z_r2S$Y$Qy{>JE!V;(Z-(r?(Sg^L2p5K+c)~w06Gf<3^6s?T6S`+X|!`)n>vjMQQh} zuMMx46(ypp=`Pjny-zOk-P^S;zy(3lo{~s>D@(;w + zCU@9JkXEMObutaOh!o1E7rNvw+LM$HJYBlf7W~vb*IVZJkh?ALmUGzW`R{AOzx^(- zwFmS~v`}B#P~PP26WJJkCT46(hh5K;(BfZF