Skip to content

Commit

Permalink
Merge pull request #24 from Daethyra/working
Browse files Browse the repository at this point in the history
Added lazy loading PDFs
  • Loading branch information
Daethyra authored Oct 6, 2023
2 parents 9e4a824 + de27d60 commit 63cdda6
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions LangChain/query_local_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,26 +80,26 @@ def get_user_query(prompt="Please enter your query: "):
return input(prompt)

@retry(retry_on_exception=retry_if_file_not_found_error, stop_max_attempt_number=3)
def load_pdfs_from_directory(self, directory_path='data/'):
def load_pdfs_from_directory(self, directory_path='data/') -> Generator: # <--- Configure directory path HERE <---
"""
Load all PDF files from a given directory.
Load all PDF files from a given directory lazily using a generator.
Parameters:
directory_path (str): Directory path to load PDFs from.
Returns:
list: List of text chunks from all loaded PDFs.
Yields:
list: List of text chunks from a loaded PDF.
"""
try:
if not os.path.exists(directory_path):
raise FileNotFoundError(f"The directory {directory_path} does not exist.")

pdf_files = glob.glob(f"{directory_path}/*.pdf")
if not pdf_files:
raise FileNotFoundError(f"No PDF files found in the directory {directory_path}.")
all_texts = []

for pdf_file in pdf_files:
all_texts.extend(self._load_and_split_document(pdf_file))
return all_texts
yield self._load_and_split_document(pdf_file)
except FileNotFoundError as fe:
print(f"FileNotFoundError encountered: {fe}")
raise
Expand Down

0 comments on commit 63cdda6

Please sign in to comment.