Skip to content

Commit

Permalink
add 'reg' data index option
Browse files Browse the repository at this point in the history
  • Loading branch information
rchan26 committed Jun 11, 2024
1 parent f422c39 commit 47f649f
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
19 changes: 17 additions & 2 deletions reginald/models/models/llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def __init__(
Path to the data directory.
which_index : str
Which index to construct (if force_new_index is True) or use.
Options are "handbook", "wikis", "public", or "all_data".
Options are "handbook", "wikis", "public", "reg" or "all_data".
settings : _Settings
llama_index.core.settings._Settings object to use to create the index.
"""
Expand Down Expand Up @@ -225,6 +225,21 @@ def prep_documents(self) -> None:
self._load_rds_course(gh_token)
self._load_turing_way(gh_token)

elif self.which_index == "reg":
logging.info("Regenerating index for REG. Will take a long time...")

# load in scraped turing.ac.uk website
self._load_turing_ac_uk()

# load public data from repos
self._load_handbook(gh_token)

# load hut23 data
self._load_hut23(gh_token)

# load wikis
self._load_wikis(gh_token)

elif self.which_index == "all_data":
logging.info("Regenerating index for ALL DATA. Will take a long time...")

Expand Down Expand Up @@ -556,7 +571,7 @@ def __init__(
Path to the data directory.
which_index : str
Which index to construct (if force_new_index is True) or use.
Options are "handbook", "wikis", "public", or "all_data".
Options are "handbook", "wikis", "public", "reg" or "all_data".
mode : Optional[str], optional
The type of engine to use when interacting with the data, options of "chat" or "query".
Default is "chat".
Expand Down
2 changes: 1 addition & 1 deletion reginald/models/setup_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"model": "hello",
"mode": "chat",
"data_dir": pathlib.Path(__file__).parent.parent.parent / "data",
"which_index": "all_data",
"which_index": "reg",
"force_new_index": False,
"max_input_size": 4096,
"k": 3,
Expand Down
2 changes: 1 addition & 1 deletion reginald/parser_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(self, create_index_only: bool = False, *args, **kwargs):
"Default is 'all_data'."
),
default=lambda: get_env_var("LLAMA_INDEX_WHICH_INDEX", secret_value=False),
choices=["handbook", "wikis", "public", "all_data"],
choices=["handbook", "wikis", "public", "reg", "all_data"],
)
self.add_argument(
"--max-input-size",
Expand Down

0 comments on commit 47f649f

Please sign in to comment.