From 104293f411cd517babf19ecb7d80031b9e6df5f6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 16 Mar 2023 21:31:39 -0300 Subject: [PATCH 1/7] Add LoRA support --- css/main.css | 11 ++++++++++- download-model.py | 17 +++++++++++------ modules/models.py | 2 ++ modules/shared.py | 3 ++- requirements.txt | 1 + server.py | 25 +++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 8 deletions(-) diff --git a/css/main.css b/css/main.css index f5ccfe9446..87c3bded2c 100644 --- a/css/main.css +++ b/css/main.css @@ -1,12 +1,15 @@ .tabs.svelte-710i53 { margin-top: 0 } + .py-6 { padding-top: 2.5rem } + .dark #refresh-button { background-color: #ffffff1f; } + #refresh-button { flex: none; margin: 0; @@ -17,22 +20,28 @@ border-radius: 10px; background-color: #0000000d; } + #download-label, #upload-label { min-height: 0 } + #accordion { } + .dark svg { fill: white; } + svg { display: unset !important; vertical-align: middle !important; margin: 5px; } + ol li p, ul li p { display: inline-block; } -#main, #parameters, #chat-settings, #interface-mode { + +#main, #parameters, #chat-settings, #interface-mode, #lora { border: 0; } diff --git a/download-model.py b/download-model.py index 8be398c4e0..808b9fc239 100644 --- a/download-model.py +++ b/download-model.py @@ -101,6 +101,7 @@ def get_download_links_from_huggingface(model, branch): classifications = [] has_pytorch = False has_safetensors = False + is_lora = False while True: content = requests.get(f"{base}{page}{cursor.decode()}").content @@ -110,8 +111,10 @@ def get_download_links_from_huggingface(model, branch): for i in range(len(dict)): fname = dict[i]['path'] + if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')): + is_lora = True - is_pytorch = re.match("pytorch_model.*\.bin", fname) + is_pytorch = re.match("(pytorch|adapter)_model.*\.bin", fname) is_safetensors = re.match("model.*\.safetensors", fname) is_tokenizer = re.match("tokenizer.*\.model", fname) is_text = re.match(".*\.(txt|json)", fname) or is_tokenizer @@ -130,6 +133,7 @@ def get_download_links_from_huggingface(model, branch): has_pytorch = True classifications.append('pytorch') + cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50' cursor = base64.b64encode(cursor) cursor = cursor.replace(b'=', b'%3D') @@ -140,7 +144,7 @@ def get_download_links_from_huggingface(model, branch): if classifications[i] == 'pytorch': links.pop(i) - return links + return links, is_lora if __name__ == '__main__': model = args.MODEL @@ -159,15 +163,16 @@ def get_download_links_from_huggingface(model, branch): except ValueError as err_branch: print(f"Error: {err_branch}") sys.exit() + + links, is_lora = get_download_links_from_huggingface(model, branch) + base_folder = 'models' if not is_lora else 'loras' if branch != 'main': - output_folder = Path("models") / (model.split('/')[-1] + f'_{branch}') + output_folder = Path(base_folder) / (model.split('/')[-1] + f'_{branch}') else: - output_folder = Path("models") / model.split('/')[-1] + output_folder = Path(base_folder) / model.split('/')[-1] if not output_folder.exists(): output_folder.mkdir() - links = get_download_links_from_huggingface(model, branch) - # Downloading the files print(f"Downloading the model to {output_folder}") pool = multiprocessing.Pool(processes=args.threads) diff --git a/modules/models.py b/modules/models.py index 63060d4324..6df67d3c91 100644 --- a/modules/models.py +++ b/modules/models.py @@ -11,6 +11,8 @@ from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig) +from peft import PeftModel + import modules.shared as shared transformers.logging.set_verbosity_error() diff --git a/modules/shared.py b/modules/shared.py index da5efbd3a7..908455e1e6 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -2,7 +2,8 @@ model = None tokenizer = None -model_name = "" +model_name = "None" +lora_name = "None" soft_prompt_tensor = None soft_prompt = False is_RWKV = False diff --git a/requirements.txt b/requirements.txt index b9a9b38590..fcf000a930 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ flexgen==0.1.7 gradio==3.18.0 markdown numpy +peft==0.2.0 requests rwkv==0.4.2 safetensors==0.3.0 diff --git a/server.py b/server.py index 2024fd421f..dd35d9aa9e 100644 --- a/server.py +++ b/server.py @@ -17,6 +17,7 @@ from modules.html_generator import generate_chat_html from modules.models import load_model, load_soft_prompt from modules.text_generation import generate_reply +from modules.LoRA import add_lora_to_model # Loading custom settings settings_file = None @@ -48,6 +49,9 @@ def get_available_extensions(): def get_available_softprompts(): return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('softprompts').glob('*.zip'))), key=str.lower) +def get_available_loras(): + return ['None'] + sorted([item.name for item in list(Path('loras/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) + def load_model_wrapper(selected_model): if selected_model != shared.model_name: shared.model_name = selected_model @@ -59,6 +63,13 @@ def load_model_wrapper(selected_model): return selected_model +def load_lora_wrapper(selected_lora): + if not shared.args.cpu: + gc.collect() + torch.cuda.empty_cache() + add_lora_to_model(selected_lora) + return selected_lora + def load_preset_values(preset_menu, return_dict=False): generate_params = { 'do_sample': True, @@ -181,6 +192,7 @@ def set_interface_arguments(interface_mode, extensions, cmd_active): available_presets = get_available_presets() available_characters = get_available_characters() available_softprompts = get_available_softprompts() +available_loras = get_available_loras() # Default extensions extensions_module.available_extensions = get_available_extensions() @@ -401,6 +413,19 @@ def create_interface(): shared.gradio['Stop'].click(None, None, None, cancels=gen_events) shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}") + with gr.Tab("LoRA", elem_id="lora"): + with gr.Row(): + with gr.Column(): + gr.Markdown("Load") + with gr.Row(): + shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA') + ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button') + with gr.Column(): + gr.Markdown("Train (TODO)") + gr.Button("Practice your button clicking skills") + + shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu']], show_progress=True) + with gr.Tab("Interface mode", elem_id="interface-mode"): modes = ["default", "notebook", "chat", "cai_chat"] current_mode = "default" From 0cecfc684c6f5fa2531980d856c5ea56bc6f97ee Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 16 Mar 2023 21:35:53 -0300 Subject: [PATCH 2/7] Add files --- loras/place-your-loras-here.txt | 0 modules/LoRA.py | 15 +++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 loras/place-your-loras-here.txt create mode 100644 modules/LoRA.py diff --git a/loras/place-your-loras-here.txt b/loras/place-your-loras-here.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/modules/LoRA.py b/modules/LoRA.py new file mode 100644 index 0000000000..84e128fb78 --- /dev/null +++ b/modules/LoRA.py @@ -0,0 +1,15 @@ +from pathlib import Path + +from peft import PeftModel + +import modules.shared as shared +from modules.models import load_model + + +def add_lora_to_model(lora_name): + + # Is there a more efficient way of returning to the base model? + if lora_name == "None": + shared.model, shared.tokenizer = load_model(shared.model_name) + else: + shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}")) From 214dc6868ecb07c0c7c974a2b2afa48ce766e8ce Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 17 Mar 2023 11:24:52 -0300 Subject: [PATCH 3/7] Several QoL changes related to LoRA --- modules/shared.py | 5 +++ server.py | 8 ++++- settings-template.json | 69 ++++++++++++++++++++++-------------------- 3 files changed, 48 insertions(+), 34 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 908455e1e6..9d4484c4cf 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -53,6 +53,10 @@ '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n', '(rosey|chip|joi)_.*_instruct.*': 'User: \n', 'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>' + }, + 'lora_prompts': { + 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', + 'alpaca-lora-7b': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n" } } @@ -68,6 +72,7 @@ def str2bool(v): parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog,max_help_position=54)) parser.add_argument('--model', type=str, help='Name of the model to load by default.') +parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.') parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.') parser.add_argument('--chat', action='store_true', help='Launch the web UI in chat mode.') parser.add_argument('--cai-chat', action='store_true', help='Launch the web UI in chat mode with a style similar to Character.AI\'s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot\'s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture.') diff --git a/server.py b/server.py index dd35d9aa9e..8dacc13292 100644 --- a/server.py +++ b/server.py @@ -225,10 +225,16 @@ def set_interface_arguments(interface_mode, extensions, cmd_active): print() shared.model_name = available_models[i] shared.model, shared.tokenizer = load_model(shared.model_name) +if shared.args.lora: + shared.lora_name = shared.args.lora + print(f"Adding the LoRA {shared.lora_name} to the model...") + add_lora_to_model(shared.lora_name) # Default UI settings default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')] -default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')] +default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] +if default_text == '': + default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')] title ='Text generation web UI' description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n' suffix = '_pygmalion' if 'pygmalion' in shared.model_name.lower() else '' diff --git a/settings-template.json b/settings-template.json index 9da4397012..3d1129ad13 100644 --- a/settings-template.json +++ b/settings-template.json @@ -1,35 +1,38 @@ { - "max_new_tokens": 200, - "max_new_tokens_min": 1, - "max_new_tokens_max": 2000, - "name1": "Person 1", - "name2": "Person 2", - "context": "This is a conversation between two people.", - "stop_at_newline": true, - "chat_prompt_size": 2048, - "chat_prompt_size_min": 0, - "chat_prompt_size_max": 2048, - "chat_generation_attempts": 1, - "chat_generation_attempts_min": 1, - "chat_generation_attempts_max": 5, - "name1_pygmalion": "You", - "name2_pygmalion": "Kawaii", - "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", - "stop_at_newline_pygmalion": false, - "default_extensions": [], - "chat_default_extensions": [ - "gallery" - ], - "presets": { - "default": "NovelAI-Sphinx Moth", - "pygmalion-*": "Pygmalion", - "RWKV-*": "Naive", - "(rosey|chip|joi)_.*_instruct.*": "Instruct Joi (Contrastive Search)" - }, - "prompts": { - "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", - "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n", - "(rosey|chip|joi)_.*_instruct.*": "User: \n", - "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>" - } + "max_new_tokens": 200, + "max_new_tokens_min": 1, + "max_new_tokens_max": 2000, + "name1": "Person 1", + "name2": "Person 2", + "context": "This is a conversation between two people.", + "stop_at_newline": true, + "chat_prompt_size": 2048, + "chat_prompt_size_min": 0, + "chat_prompt_size_max": 2048, + "chat_generation_attempts": 1, + "chat_generation_attempts_min": 1, + "chat_generation_attempts_max": 5, + "name1_pygmalion": "You", + "name2_pygmalion": "Kawaii", + "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", + "stop_at_newline_pygmalion": false, + "default_extensions": [], + "chat_default_extensions": [ + "gallery" + ], + "presets": { + "default": "NovelAI-Sphinx Moth", + "pygmalion-*": "Pygmalion", + "RWKV-*": "Naive" + }, + "prompts": { + "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", + "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n", + "(rosey|chip|joi)_.*_instruct.*": "User: \n", + "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>" + }, + "lora_prompts": { + "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", + "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n" + } } From 29fe7b1c74c9dc583c60c9865bb93854a04a8e4c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 17 Mar 2023 11:39:48 -0300 Subject: [PATCH 4/7] Remove LoRA tab, move it into the Parameters menu --- modules/LoRA.py | 2 ++ modules/shared.py | 2 +- server.py | 26 +++++++++++--------------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index 84e128fb78..c95da6ee89 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -10,6 +10,8 @@ def add_lora_to_model(lora_name): # Is there a more efficient way of returning to the base model? if lora_name == "None": + print(f"Reloading the model to remove the LoRA...") shared.model, shared.tokenizer = load_model(shared.model_name) else: + print(f"Adding the LoRA {lora_name} to the model...") shared.model = PeftModel.from_pretrained(shared.model, Path(f"loras/{lora_name}")) diff --git a/modules/shared.py b/modules/shared.py index 9d4484c4cf..488a1e967b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -56,7 +56,7 @@ }, 'lora_prompts': { 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', - 'alpaca-lora-7b': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n" + 'alpaca-lora-7b': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" } } diff --git a/server.py b/server.py index 8dacc13292..7d5ecc7463 100644 --- a/server.py +++ b/server.py @@ -64,11 +64,15 @@ def load_model_wrapper(selected_model): return selected_model def load_lora_wrapper(selected_lora): + shared.lora_name = selected_lora + default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] + if not shared.args.cpu: gc.collect() torch.cuda.empty_cache() add_lora_to_model(selected_lora) - return selected_lora + + return selected_lora, default_text def load_preset_values(preset_menu, return_dict=False): generate_params = { @@ -156,6 +160,10 @@ def create_settings_menus(default_preset): shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') + with gr.Row(): + shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA') + ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button') + with gr.Accordion('Soft prompt', open=False): with gr.Row(): shared.gradio['softprompts_menu'] = gr.Dropdown(choices=available_softprompts, value='None', label='Soft prompt') @@ -167,6 +175,7 @@ def create_settings_menus(default_preset): shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio['do_sample'], shared.gradio['temperature'], shared.gradio['top_p'], shared.gradio['typical_p'], shared.gradio['repetition_penalty'], shared.gradio['encoder_repetition_penalty'], shared.gradio['top_k'], shared.gradio['min_length'], shared.gradio['no_repeat_ngram_size'], shared.gradio['num_beams'], shared.gradio['penalty_alpha'], shared.gradio['length_penalty'], shared.gradio['early_stopping']]) + shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True) shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True) shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']]) @@ -226,8 +235,8 @@ def set_interface_arguments(interface_mode, extensions, cmd_active): shared.model_name = available_models[i] shared.model, shared.tokenizer = load_model(shared.model_name) if shared.args.lora: + print(shared.args.lora) shared.lora_name = shared.args.lora - print(f"Adding the LoRA {shared.lora_name} to the model...") add_lora_to_model(shared.lora_name) # Default UI settings @@ -419,19 +428,6 @@ def create_interface(): shared.gradio['Stop'].click(None, None, None, cancels=gen_events) shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}") - with gr.Tab("LoRA", elem_id="lora"): - with gr.Row(): - with gr.Column(): - gr.Markdown("Load") - with gr.Row(): - shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA') - ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button') - with gr.Column(): - gr.Markdown("Train (TODO)") - gr.Button("Practice your button clicking skills") - - shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu']], show_progress=True) - with gr.Tab("Interface mode", elem_id="interface-mode"): modes = ["default", "notebook", "chat", "cai_chat"] current_mode = "default" From 7d97287e691edf48012f193828562a42f0b41674 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 17 Mar 2023 11:41:12 -0300 Subject: [PATCH 5/7] Update settings-template.json --- settings-template.json | 72 +++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/settings-template.json b/settings-template.json index 3d1129ad13..df7403d600 100644 --- a/settings-template.json +++ b/settings-template.json @@ -1,38 +1,38 @@ { - "max_new_tokens": 200, - "max_new_tokens_min": 1, - "max_new_tokens_max": 2000, - "name1": "Person 1", - "name2": "Person 2", - "context": "This is a conversation between two people.", - "stop_at_newline": true, - "chat_prompt_size": 2048, - "chat_prompt_size_min": 0, - "chat_prompt_size_max": 2048, - "chat_generation_attempts": 1, - "chat_generation_attempts_min": 1, - "chat_generation_attempts_max": 5, - "name1_pygmalion": "You", - "name2_pygmalion": "Kawaii", - "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", - "stop_at_newline_pygmalion": false, - "default_extensions": [], - "chat_default_extensions": [ - "gallery" - ], - "presets": { - "default": "NovelAI-Sphinx Moth", - "pygmalion-*": "Pygmalion", - "RWKV-*": "Naive" - }, - "prompts": { - "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", - "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n", - "(rosey|chip|joi)_.*_instruct.*": "User: \n", - "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>" - }, - "lora_prompts": { - "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", - "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a Python script that generates text using the transformers library.\n### Response:\n" - } + "max_new_tokens": 200, + "max_new_tokens_min": 1, + "max_new_tokens_max": 2000, + "name1": "Person 1", + "name2": "Person 2", + "context": "This is a conversation between two people.", + "stop_at_newline": true, + "chat_prompt_size": 2048, + "chat_prompt_size_min": 0, + "chat_prompt_size_max": 2048, + "chat_generation_attempts": 1, + "chat_generation_attempts_min": 1, + "chat_generation_attempts_max": 5, + "name1_pygmalion": "You", + "name2_pygmalion": "Kawaii", + "context_pygmalion": "Kawaii's persona: Kawaii is a cheerful person who loves to make others smile. She is an optimist who loves to spread happiness and positivity wherever she goes.\n", + "stop_at_newline_pygmalion": false, + "default_extensions": [], + "chat_default_extensions": [ + "gallery" + ], + "presets": { + "default": "NovelAI-Sphinx Moth", + "pygmalion-*": "Pygmalion", + "RWKV-*": "Naive" + }, + "prompts": { + "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", + "^(gpt4chan|gpt-4chan|4chan)": "-----\n--- 865467536\nInput text\n--- 865467537\n", + "(rosey|chip|joi)_.*_instruct.*": "User: \n", + "oasst-*": "<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>" + }, + "lora_prompts": { + "default": "Common sense questions and answers\n\nQuestion: \nFactual answer:", + "alpaca-lora-7b": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n" + } } From a717fd709d4ef5ab1a5bf97b9e59593ea7e36569 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 17 Mar 2023 11:42:25 -0300 Subject: [PATCH 6/7] Sort the imports --- modules/callbacks.py | 1 + modules/chat.py | 3 ++- modules/models.py | 3 +-- server.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/callbacks.py b/modules/callbacks.py index faa4a5e999..12a90cc353 100644 --- a/modules/callbacks.py +++ b/modules/callbacks.py @@ -7,6 +7,7 @@ import modules.shared as shared + # Copied from https://github.com/PygmalionAI/gradio-ui/ class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria): diff --git a/modules/chat.py b/modules/chat.py index d7202bee58..3f313db2e6 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -12,7 +12,8 @@ import modules.shared as shared from modules.extensions import apply_extensions from modules.html_generator import generate_chat_html -from modules.text_generation import encode, generate_reply, get_max_prompt_length +from modules.text_generation import (encode, generate_reply, + get_max_prompt_length) # This gets the new line characters right. diff --git a/modules/models.py b/modules/models.py index 6df67d3c91..e4507e57e2 100644 --- a/modules/models.py +++ b/modules/models.py @@ -8,11 +8,10 @@ import torch import transformers from accelerate import infer_auto_device_map, init_empty_weights +from peft import PeftModel from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig) -from peft import PeftModel - import modules.shared as shared transformers.logging.set_verbosity_error() diff --git a/server.py b/server.py index 7d5ecc7463..5c21f4cd72 100644 --- a/server.py +++ b/server.py @@ -15,9 +15,9 @@ import modules.shared as shared import modules.ui as ui from modules.html_generator import generate_chat_html +from modules.LoRA import add_lora_to_model from modules.models import load_model, load_soft_prompt from modules.text_generation import generate_reply -from modules.LoRA import add_lora_to_model # Loading custom settings settings_file = None From 614dad007530574e6c4680362d0497c20a9da07d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 17 Mar 2023 11:43:11 -0300 Subject: [PATCH 7/7] Remove unused import --- modules/LoRA.py | 2 +- modules/models.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index c95da6ee89..74030c25c7 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -10,7 +10,7 @@ def add_lora_to_model(lora_name): # Is there a more efficient way of returning to the base model? if lora_name == "None": - print(f"Reloading the model to remove the LoRA...") + print("Reloading the model to remove the LoRA...") shared.model, shared.tokenizer = load_model(shared.model_name) else: print(f"Adding the LoRA {lora_name} to the model...") diff --git a/modules/models.py b/modules/models.py index e4507e57e2..63060d4324 100644 --- a/modules/models.py +++ b/modules/models.py @@ -8,7 +8,6 @@ import torch import transformers from accelerate import infer_auto_device_map, init_empty_weights -from peft import PeftModel from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig)