diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index 5369200b0..ea57f2c0d 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -8,6 +8,9 @@ from llama_index.core.indices import VectorStoreIndex from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor from llama_index.core.llms import ChatMessage, MessageRole +from llama_index.core.postprocessor import ( + SimilarityPostprocessor, +) from llama_index.core.storage import StorageContext from llama_index.core.types import TokenGen from pydantic import BaseModel @@ -20,6 +23,7 @@ ) from private_gpt.open_ai.extensions.context_filter import ContextFilter from private_gpt.server.chunks.chunks_service import Chunk +from private_gpt.settings.settings import Settings class Completion(BaseModel): @@ -68,14 +72,18 @@ def from_messages(cls, messages: list[ChatMessage]) -> "ChatEngineInput": @singleton class ChatService: + settings: Settings + @inject def __init__( self, + settings: Settings, llm_component: LLMComponent, vector_store_component: VectorStoreComponent, embedding_component: EmbeddingComponent, node_store_component: NodeStoreComponent, ) -> None: + self.settings = settings self.llm_component = llm_component self.embedding_component = embedding_component self.vector_store_component = vector_store_component @@ -98,9 +106,12 @@ def _chat_engine( use_context: bool = False, context_filter: ContextFilter | None = None, ) -> BaseChatEngine: + settings = self.settings if use_context: vector_index_retriever = self.vector_store_component.get_retriever( - index=self.index, context_filter=context_filter + index=self.index, + context_filter=context_filter, + similarity_top_k=self.settings.rag.similarity_top_k, ) return ContextChatEngine.from_defaults( system_prompt=system_prompt, @@ -108,6 +119,9 @@ def _chat_engine( llm=self.llm_component.llm, # Takes no effect at the moment node_postprocessors=[ MetadataReplacementPostProcessor(target_metadata_key="window"), + SimilarityPostprocessor( + similarity_cutoff=settings.rag.similarity_value + ), ], ) else: diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 0fe1747c5..5896f00d6 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -284,6 +284,17 @@ class UISettings(BaseModel): ) +class RagSettings(BaseModel): + similarity_top_k: int = Field( + 2, + description="This value controls the number of documents returned by the RAG pipeline", + ) + similarity_value: float = Field( + None, + description="If set, any documents retrieved from the RAG must meet a certain match score. Acceptable values are between 0 and 1.", + ) + + class PostgresSettings(BaseModel): host: str = Field( "localhost", @@ -379,6 +390,7 @@ class Settings(BaseModel): azopenai: AzureOpenAISettings vectorstore: VectorstoreSettings nodestore: NodeStoreSettings + rag: RagSettings qdrant: QdrantSettings | None = None postgres: PostgresSettings | None = None diff --git a/settings.yaml b/settings.yaml index dd0f5a057..87a63ef4f 100644 --- a/settings.yaml +++ b/settings.yaml @@ -42,6 +42,12 @@ llm: tokenizer: mistralai/Mistral-7B-Instruct-v0.2 temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) +rag: + similarity_top_k: 2 + #This value controls how many "top" documents the RAG returns to use in the context. + #similarity_value: 0.45 + #This value is disabled by default. If you enable this settings, the RAG will only use articles that meet a certain percentage score. + llamacpp: prompt_style: "mistral" llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF