add support for LM Studio (#1663)

### What problem does this PR solve? #1602 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Zhedong Cen <[email protected]>
infiniflow · Jul 24, 2024 · d96348e · lazydao · Aug 6, 2024 · d96348e
1 parent 100b316
commit d96348e
Show file tree

Hide file tree

Showing 10 changed files with 9,791 additions and 26 deletions.
diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py
@@ -21,7 +21,7 @@
 from api.db.db_models import TenantLLM
 from api.utils.api_utils import get_json_result
 from rag.llm import EmbeddingModel, ChatModel, RerankModel,CvModel
-
+import requests
 
 @manager.route('/factories', methods=['GET'])
 @login_required
@@ -189,9 +189,13 @@ def add_llm():
                 "ons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/256"
                 "0px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
             )
-            m, tc = mdl.describe(img_url)
-            if not tc:
-                raise Exception(m)
+            res = requests.get(img_url)
+            if res.status_code == 200:
+                m, tc = mdl.describe(res.content)
+                if not tc:
+                    raise Exception(m)
+            else:
+                raise ConnectionError("fail to download the test picture")
         except Exception as e:
             msg += f"\nFail to access model({llm['llm_name']})." + str(e)
     else:

diff --git a/conf/llm_factories.json b/conf/llm_factories.json
@@ -2208,6 +2208,13 @@
                     "model_type": "image2text"
                 }
             ]
+        },
+        {
+            "name": "LM-Studio",
+            "logo": "",
+            "tags": "LLM,TEXT EMBEDDING,IMAGE2TEXT",
+            "status": "1",
+            "llm": []
         }
     ]
 }
diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py
@@ -34,8 +34,9 @@
     "BAAI": DefaultEmbedding,
     "Mistral": MistralEmbed,
     "Bedrock": BedrockEmbed,
-    "Gemini":GeminiEmbed,
-    "NVIDIA":NvidiaEmbed
+    "Gemini": GeminiEmbed,
+    "NVIDIA": NvidiaEmbed,
+    "LM-Studio": LmStudioEmbed
 }
 
 
@@ -47,10 +48,11 @@
     "Tongyi-Qianwen": QWenCV,
     "ZHIPU-AI": Zhipu4V,
     "Moonshot": LocalCV,
-    'Gemini':GeminiCV,
-    'OpenRouter':OpenRouterCV,
-    "LocalAI":LocalAICV,
-    "NVIDIA":NvidiaCV
+    "Gemini": GeminiCV,
+    "OpenRouter": OpenRouterCV,
+    "LocalAI": LocalAICV,
+    "NVIDIA": NvidiaCV,
+    "LM-Studio": LmStudioCV
 }
 
 
@@ -69,12 +71,13 @@
     "MiniMax": MiniMaxChat,
     "Minimax": MiniMaxChat,
     "Mistral": MistralChat,
-    'Gemini' : GeminiChat,
+    "Gemini": GeminiChat,
     "Bedrock": BedrockChat,
     "Groq": GroqChat,
-    'OpenRouter':OpenRouterChat,
-    "StepFun":StepFunChat,
-    "NVIDIA":NvidiaChat
+    "OpenRouter": OpenRouterChat,
+    "StepFun": StepFunChat,
+    "NVIDIA": NvidiaChat,
+    "LM-Studio": LmStudioChat
 }
 
 
@@ -83,7 +86,8 @@
     "Jina": JinaRerank,
     "Youdao": YoudaoRerank,
     "Xinference": XInferenceRerank,
-    "NVIDIA":NvidiaRerank
+    "NVIDIA": NvidiaRerank,
+    "LM-Studio": LmStudioRerank
 }
 
 

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
@@ -976,3 +976,15 @@ def chat_streamly(self, system, history, gen_conf):
             yield ans + "\n**ERROR**: " + str(e)
 
         yield total_tokens
+
+
+class LmStudioChat(Base):
+    def __init__(self, key, model_name, base_url):
+        from os.path import join
+
+        if not base_url:
+            raise ValueError("Local llm url cannot be None")
+        if base_url.split("/")[-1] != "v1":
+            self.base_url = join(base_url, "v1")
+        self.client = OpenAI(api_key="lm-studio", base_url=self.base_url)
+        self.model_name = model_name
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
@@ -440,15 +440,8 @@ def __init__(self, key, model_name, base_url, lang="Chinese"):
         self.lang = lang
 
     def describe(self, image, max_tokens=300):
-        if not isinstance(image, bytes) and not isinstance(
-            image, BytesIO
-        ):  # if url string
-            prompt = self.prompt(image)
-            for i in range(len(prompt)):
-                prompt[i]["content"]["image_url"]["url"] = image
-        else:
-            b64 = self.image2base64(image)
-            prompt = self.prompt(b64)
+        b64 = self.image2base64(image)
+        prompt = self.prompt(b64)
         for i in range(len(prompt)):
             for c in prompt[i]["content"]:
                 if "text" in c:
@@ -680,3 +673,14 @@ def chat_prompt(self, text, b64):
                 "content": text + f' <img src="data:image/jpeg;base64,{b64}"/>',
             }
         ]
+
+
+class LmStudioCV(LocalAICV):
+    def __init__(self, key, model_name, base_url, lang="Chinese"):
+        if not base_url:
+            raise ValueError("Local llm url cannot be None")
+        if base_url.split('/')[-1] != 'v1':
+            self.base_url = os.path.join(base_url,'v1')
+        self.client = OpenAI(api_key="lm-studio", base_url=self.base_url)
+        self.model_name = model_name
+        self.lang = lang
diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
@@ -500,3 +500,24 @@ def encode(self, texts: list, batch_size=None):
     def encode_queries(self, text):
         embds, cnt = self.encode([text])
         return np.array(embds[0]), cnt
+
+
+class LmStudioEmbed(Base):
+    def __init__(self, key, model_name, base_url):
+        if not base_url:
+            raise ValueError("Local llm url cannot be None")
+        if base_url.split("/")[-1] != "v1":
+            self.base_url = os.path.join(base_url, "v1")
+        self.client = OpenAI(api_key="lm-studio", base_url=self.base_url)
+        self.model_name = model_name
+
+    def encode(self, texts: list, batch_size=32):
+        res = self.client.embeddings.create(input=texts, model=self.model_name)
+        return (
+            np.array([d.embedding for d in res.data]),
+            1024,
+        )  # local embedding for LmStudio donot count tokens
+
+    def encode_queries(self, text):
+        res = self.client.embeddings.create(text, model=self.model_name)
+        return np.array(res.data[0].embedding), 1024
diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py
@@ -202,3 +202,11 @@ def similarity(self, query: str, texts: list):
         }
         res = requests.post(self.base_url, headers=self.headers, json=data).json()
         return (np.array([d["logit"] for d in res["rankings"]]), token_count)
+
+
+class LmStudioRerank(Base):
+    def __init__(self, key, model_name, base_url):
+        pass
+
+    def similarity(self, query: str, texts: list):
+        raise NotImplementedError("The LmStudioRerank has not been implement")