From ef8063310df5a85c0efb7b69bda91c7347fc9ed8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=BB=84=E8=85=BE?=
 <101850389+hangters@users.noreply.github.com>
Date: Thu, 11 Jul 2024 15:41:00 +0800
Subject: [PATCH] add support for Gemini (#1465)

### What problem does this PR solve?

#1036

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
---
 api/db/init_data.py                           |  37 +++++-
 rag/llm/chat_model.py                         |  61 ++++++++++
 rag/llm/cv_model.py                           |  23 ++++
 rag/llm/embedding_model.py                    |  26 +++-
 requirements.txt                              |   1 +
 requirements_arm.txt                          |   1 +
 requirements_dev.txt                          |   1 +
 web/src/assets/svg/llm/gemini.svg             | 114 ++++++++++++++++++
 .../user-setting/setting-model/index.tsx      |   1 +
 9 files changed, 263 insertions(+), 2 deletions(-)
 create mode 100644 web/src/assets/svg/llm/gemini.svg

diff --git a/api/db/init_data.py b/api/db/init_data.py
index 5405fc3e732..c649e0e7bc6 100644
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@@ -175,6 +175,11 @@ def init_superuser():
     "logo": "",
     "tags": "LLM,TEXT EMBEDDING",
     "status": "1",
+},{
+    "name": "Gemini",
+    "logo": "",
+    "tags": "LLM,TEXT EMBEDDING,IMAGE2TEXT",
+    "status": "1",
 }
     # {
     #     "name": "文心一言",
@@ -898,7 +903,37 @@ def init_llm_factory():
             "tags": "TEXT EMBEDDING",
             "max_tokens": 2048,
             "model_type": LLMType.EMBEDDING.value
-        },
+        }, {
+            "fid": factory_infos[17]["name"],
+            "llm_name": "gemini-1.5-pro-latest",
+            "tags": "LLM,CHAT,1024K",
+            "max_tokens": 1024*1024,
+            "model_type": LLMType.CHAT.value
+        }, {
+            "fid": factory_infos[17]["name"],
+            "llm_name": "gemini-1.5-flash-latest",
+            "tags": "LLM,CHAT,1024K",
+            "max_tokens": 1024*1024,
+            "model_type": LLMType.CHAT.value
+        }, {
+            "fid": factory_infos[17]["name"],
+            "llm_name": "gemini-1.0-pro",
+            "tags": "LLM,CHAT,30K",
+            "max_tokens": 30*1024,
+            "model_type": LLMType.CHAT.value
+        }, {
+            "fid": factory_infos[17]["name"],
+            "llm_name": "gemini-1.0-pro-vision-latest",
+            "tags": "LLM,IMAGE2TEXT,12K",
+            "max_tokens": 12*1024,
+            "model_type": LLMType.IMAGE2TEXT.value
+        }, {
+            "fid": factory_infos[17]["name"],
+            "llm_name": "text-embedding-004",
+            "tags": "TEXT EMBEDDING",
+            "max_tokens": 2048,
+            "model_type": LLMType.EMBEDDING.value
+        }
     ]
     for info in factory_infos:
         try:
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index f7f137b6fc9..f0fcb39bcf7 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -621,3 +621,64 @@ def chat_streamly(self, system, history, gen_conf):
             yield ans + f"ERROR: Can't invoke '{self.model_name}'. Reason: {e}"
 
         yield num_tokens_from_string(ans)
+
+class GeminiChat(Base):
+
+    def __init__(self, key, model_name,base_url=None):
+        from google.generativeai import client,GenerativeModel 
+        
+        client.configure(api_key=key)
+        _client = client.get_default_generative_client()
+        self.model_name = 'models/' + model_name
+        self.model = GenerativeModel(model_name=self.model_name)
+        self.model._client = _client
+        
+    def chat(self,system,history,gen_conf):
+        if system:
+            history.insert(0, {"role": "user", "parts": system})
+        if 'max_tokens' in gen_conf:
+            gen_conf['max_output_tokens'] = gen_conf['max_tokens']
+        for k in list(gen_conf.keys()):
+            if k not in ["temperature", "top_p", "max_output_tokens"]:
+                del gen_conf[k]
+        for item in history:
+            if 'role' in item and item['role'] == 'assistant':
+                item['role'] = 'model'
+            if  'content' in item :
+                item['parts'] = item.pop('content')
+        
+        try:
+            response = self.model.generate_content(
+                history,
+                generation_config=gen_conf)
+            ans = response.text
+            return ans, response.usage_metadata.total_token_count
+        except Exception as e:
+            return "**ERROR**: " + str(e), 0
+
+    def chat_streamly(self, system, history, gen_conf):
+        if system:
+            history.insert(0, {"role": "user", "parts": system})
+        if 'max_tokens' in gen_conf:
+            gen_conf['max_output_tokens'] = gen_conf['max_tokens']
+        for k in list(gen_conf.keys()):
+            if k not in ["temperature", "top_p", "max_output_tokens"]:
+                del gen_conf[k]
+        for item in history:
+            if 'role' in item and item['role'] == 'assistant':
+                item['role'] = 'model'
+            if  'content' in item :
+                item['parts'] = item.pop('content')
+        ans = ""
+        try:
+            response = self.model.generate_content(
+                history,
+                generation_config=gen_conf,stream=True)
+            for resp in response:
+                ans += resp.text
+                yield ans
+
+        except Exception as e:
+            yield ans + "\n**ERROR**: " + str(e)
+
+        yield  response._chunks[-1].usage_metadata.total_token_count
\ No newline at end of file
diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
index 9c25ffd87f2..19843a352bd 100644
--- a/rag/llm/cv_model.py
+++ b/rag/llm/cv_model.py
@@ -203,6 +203,29 @@ def describe(self, image, max_tokens=300):
         )
         return res.choices[0].message.content.strip(), res.usage.total_tokens
 
+class GeminiCV(Base):
+    def __init__(self, key, model_name="gemini-1.0-pro-vision-latest", lang="Chinese", **kwargs):
+        from google.generativeai import client,GenerativeModel 
+        client.configure(api_key=key)
+        _client = client.get_default_generative_client()
+        self.model_name = model_name
+        self.model = GenerativeModel(model_name=self.model_name)
+        self.model._client = _client
+        self.lang = lang 
+    
+    def describe(self, image, max_tokens=2048):
+        from PIL.Image import open
+        gen_config = {'max_output_tokens':max_tokens}
+        prompt = "请用中文详细描述一下图中的内容，比如时间，地点，人物，事情，人物心情等，如果有数据请提取出数据。" if self.lang.lower() == "chinese" else \
+            "Please describe the content of this picture, like where, when, who, what happen. If it has number data, please extract them out."
+        b64 = self.image2base64(image) 
+        img = open(BytesIO(base64.b64decode(b64))) 
+        input = [prompt,img]
+        res = self.model.generate_content(
+            input,
+            generation_config=gen_config,
+        )
+        return res.text,res.usage_metadata.total_token_count
 
 class LocalCV(Base):
     def __init__(self, key, model_name="glm-4v", lang="Chinese", **kwargs):
diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
index 48081e0124d..06cc56975d6 100644
--- a/rag/llm/embedding_model.py
+++ b/rag/llm/embedding_model.py
@@ -31,7 +31,7 @@
 import asyncio
 from api.utils.file_utils import get_home_cache_dir
 from rag.utils import num_tokens_from_string, truncate
-
+import google.generativeai as genai 
 
 class Base(ABC):
     def __init__(self, key, model_name):
@@ -419,3 +419,27 @@ def encode_queries(self, text):
 
         return np.array(embeddings), token_count
 
+class GeminiEmbed(Base):
+    def __init__(self, key, model_name='models/text-embedding-004',
+                 **kwargs):
+        genai.configure(api_key=key)
+        self.model_name = 'models/' + model_name
+        
+    def encode(self, texts: list, batch_size=32):
+        texts = [truncate(t, 2048) for t in texts]
+        token_count = sum(num_tokens_from_string(text) for text in texts)
+        result = genai.embed_content(
+            model=self.model_name,
+            content=texts,
+            task_type="retrieval_document",
+            title="Embedding of list of strings")
+        return np.array(result['embedding']),token_count
+    
+    def encode_queries(self, text):
+        result = genai.embed_content(
+            model=self.model_name,
+            content=truncate(text,2048),
+            task_type="retrieval_document",
+            title="Embedding of single string")
+        token_count = num_tokens_from_string(text)
+        return np.array(result['embedding']),token_count
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 048761dabf2..497c946832d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -147,3 +147,4 @@ markdown==3.6
 mistralai==0.4.2
 boto3==1.34.140
 duckduckgo_search==6.1.9
+google-generativeai==0.7.2
\ No newline at end of file
diff --git a/requirements_arm.txt b/requirements_arm.txt
index 448b093619b..a5cbf5d7009 100644
--- a/requirements_arm.txt
+++ b/requirements_arm.txt
@@ -148,3 +148,4 @@ markdown==3.6
 mistralai==0.4.2
 boto3==1.34.140
 duckduckgo_search==6.1.9
+google-generativeai==0.7.2
\ No newline at end of file
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 8bc49f07ef0..f6b6799b56f 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -133,3 +133,4 @@ markdown==3.6
 mistralai==0.4.2
 boto3==1.34.140
 duckduckgo_search==6.1.9
+google-generativeai==0.7.2
\ No newline at end of file
diff --git a/web/src/assets/svg/llm/gemini.svg b/web/src/assets/svg/llm/gemini.svg
new file mode 100644
index 00000000000..3f06bf3b5a1
--- /dev/null
+++ b/web/src/assets/svg/llm/gemini.svg
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Generator: Adobe Illustrator 28.2.0, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
+<svg version="1.0" id="katman_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
+	 viewBox="0 0 1589 1092" style="enable-background:new 0 0 1589 1092;" xml:space="preserve">
+<style type="text/css">
+	.st0{fill:#8779CD;}
+	.st1{fill:#4991E7;}
+	.st2{fill:#8A78CB;}
+	.st3{fill:url(#SVGID_1_);}
+	.st4{fill:#4D8BEB;}
+	.st5{fill:#7F7BD1;}
+	.st6{fill:url(#SVGID_00000002374047799971512340000007471530466755245738_);}
+	.st7{fill:url(#SVGID_00000157275849013902826440000016458342546856776875_);}
+</style>
+<g>
+	<path class="st0" d="M1124.57,373.55c0.31,0,0.32-0.07,0.05-0.2c-0.17-0.08-0.34-0.12-0.52-0.12
+		c-23.87-0.46-44.64-8.81-62.33-25.04c-18.73-17.18-30.31-42.27-29.75-68.18c0-0.04-0.02-0.06-0.06-0.06
+		c-0.15-0.01-0.17-0.01-0.04,0c0.04,0.01,0.06,0.03,0.05,0.07c-0.56,6.79-1.25,12.28-2.08,16.45c-5.2,26.17-18.72,46.59-40.55,61.26
+		c-15.05,10.11-31.88,15.26-50.49,15.47c-0.11,0-0.16,0.05-0.16,0.16v0.01c0,0.11,0.06,0.17,0.17,0.17
+		c27.03-0.05,54.53,13.25,71.42,34.26c9.03,11.23,15.25,23.74,18.65,37.52c0.81,3.27,1.43,6.72,1.86,10.34
+		c0.45,3.77,0.81,7.31,1.07,10.64c0.07,0.83,0.11,0.83,0.13-0.01c0.12-5.43,0.28-10.69,1.22-15.96
+		c6.06-33.79,29.86-60.29,61.88-71.75C1104.58,375.18,1114.41,373.5,1124.57,373.55z"/>
+</g>
+<g>
+	<path class="st1" d="M468.99,570.58H323.75h0c-0.29,0-0.53,0.24-0.53,0.53l0.01,33.95c0,0.43,0.35,0.77,0.78,0.77h108.5
+		c0.47,0,0.68,0.23,0.65,0.69c-1.24,15.39-4.56,28.52-9.97,39.41c-13.71,27.61-36.17,45.26-67.38,52.94
+		c-12.12,2.98-24.87,4.19-38.26,3.62c-23.38-0.99-44.83-8.27-64.36-21.86c-27.04-18.83-44.26-49.58-48.13-82.08
+		c-1.91-16-1.38-31.61,1.59-46.82c4.5-23.09,16.19-44.7,33.49-61.05c19.55-18.48,43.26-29.07,71.13-31.76
+		c34.53-3.33,72.86,8.95,95.88,35.39c0.27,0.31,0.54,0.31,0.83,0.02l25.75-26.48c0.29-0.3,0.28-0.58-0.05-0.84
+		c-1.89-1.49-3.22-3.46-4.97-5.13c-8.05-7.73-16.45-14.07-25.19-19.02c-27.14-15.33-58.47-22.05-89.79-20.37
+		c-26.99,1.44-51.79,9.13-74.41,23.07c-25.29,15.59-44.66,36.97-58.1,64.14c-13.12,26.53-17.74,56.08-15.28,85.68
+		c2.32,27.87,11.53,53.36,27.62,76.45c26.8,38.46,68.51,62.31,115.38,65.98c48.55,3.81,97.2-11.31,129.15-49.08
+		c15.45-18.27,25.56-39.58,30.35-63.93c1.26-6.41,2.15-13.18,2.67-20.31c0.84-11.31,0.24-22.53-1.81-33.65
+		C469.27,570.69,469.14,570.58,468.99,570.58z"/>
+</g>
+<g>
+	<circle class="st2" cx="1108.46" cy="451.38" r="26.99"/>
+</g>
+<g>
+	<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="1373.5259" y1="451.3777" x2="1427.4858" y2="451.3777">
+		<stop  offset="0" style="stop-color:#439DDF"/>
+		<stop  offset="0" style="stop-color:#4F87ED"/>
+		<stop  offset="0" style="stop-color:#9476C5"/>
+		<stop  offset="0" style="stop-color:#BC688E"/>
+		<stop  offset="1" style="stop-color:#D6645D"/>
+	</linearGradient>
+	<circle class="st3" cx="1400.51" cy="451.38" r="26.98"/>
+</g>
+<g>
+	<g>
+		<path class="st4" d="M614.94,510.07c-27.34-3.09-53.3,2.03-75.45,18.67c-27.64,20.76-42.19,52.35-44.27,86.89
+			c-0.61,10.28-0.17,20.38,1.33,30.3c3.69,24.45,13.67,44.97,29.94,61.57c25.12,25.64,60.04,34.54,95.3,29.6
+			c11.11-1.56,20.53-4.19,28.26-7.89c21.5-10.29,37.89-26.02,49.17-47.19c0.01-0.01,0.01-0.02,0.01-0.03
+			c0.11-0.22,0.01-0.48-0.21-0.59l-31.42-14.87c-0.03-0.01-0.06-0.03-0.1-0.04c-0.42-0.14-0.87,0.09-1.01,0.52
+			c-0.12,0.37-0.28,0.72-0.47,1.06c-9.29,15.92-25.76,30.49-44.18,34.45c-9.83,2.11-19.13,2.43-27.88,0.97
+			c-30.48-5.08-53.56-27.7-59.25-58.04c-0.88-4.72-1.45-9.12-1.7-13.2c-0.03-0.45,0.19-0.67,0.64-0.67H702.1
+			c0.45,0,0.7-0.23,0.74-0.68c2.69-28.85-3.42-58.64-20.13-82.12C666.94,526.62,642.21,513.15,614.94,510.07z M663,600.58H535.82
+			c-0.43,0-0.6-0.21-0.51-0.64c2.95-13.33,8.25-24.64,15.9-33.91c10.88-13.18,26.74-21.54,43.93-22.57
+			c3.74-0.22,7.72-0.21,11.93,0.04c16.35,0.95,32.82,8.76,43.04,21.59c7.82,9.8,12.29,21.44,13.42,34.91
+			C663.56,600.38,663.38,600.58,663,600.58z"/>
+	</g>
+</g>
+<g>
+	<path class="st5" d="M1054.69,576.29c-1.93-16.86-8.45-33.49-19.59-46.27c-9.62-11.03-23.29-17.2-37.81-19.46
+		c-11.6-1.81-23.18-1.75-34.74,0.18c-7.59,1.33-14.15,3.4-19.66,6.2c-15.08,7.65-27.25,18.71-36.5,33.2
+		c-0.37,0.57-0.66,0.54-0.87-0.1c-0.63-1.85-1.42-3.65-2.38-5.41c-8.86-16.26-25.41-28.81-43.44-33.15
+		c-13.41-3.23-26.6-2.98-39.55,0.73c-17.31,5.21-31.41,14.86-42.31,28.93c-1.33,1.71-2.6,3.7-3.8,5.96
+		c-0.16,0.29-0.41,0.45-0.74,0.48l-0.5,0.04c-0.38,0.03-0.57-0.14-0.57-0.52l0.02-30.78c0,0,0-0.01,0-0.01
+		c0-0.13-0.1-0.23-0.23-0.23h-35.5c-0.42,0-0.76,0.34-0.76,0.76l0.01,214.35c0,0.25,0.12,0.38,0.37,0.38l37.37,0.01
+		c0.37,0,0.55-0.18,0.56-0.55c0.03-37.07,0-75.86-0.09-116.39c-0.02-6.81,0.32-12.29,1.01-16.44c4.42-26.52,23.44-53.23,52.48-54.48
+		c24.5-0.56,42.87,10.8,47.47,35.65c1.19,6.43,1.79,12.91,1.8,19.46c0.06,42.99,0.08,87.05,0.05,132.2c0,0.36,0.18,0.54,0.53,0.54
+		l36.76,0.01c0.23,0,0.42-0.19,0.42-0.42c0.09-37.85,0.07-75.53-0.04-113.04c-0.03-8.1,0.3-14.47,0.98-19.11
+		c1.67-11.49,5.87-22.17,12.59-32.03c3.2-4.71,7.28-9.01,12.24-12.91c9.58-7.53,20.51-10.95,32.79-10.28
+		c13.51,0.18,26.8,5.06,35.04,15.92c7.31,9.65,9.7,24.58,9.73,36.42c0.1,41.75,0.11,86.68,0.04,134.79c0,0.43,0.21,0.65,0.64,0.65
+		l36.52,0.01c0.4,0,0.6-0.2,0.6-0.6c-0.11-43.76-0.11-88.64,0.02-134.65C1055.67,588.72,1055.35,582.04,1054.69,576.29z"/>
+</g>
+<g>
+	
+		<linearGradient id="SVGID_00000006692382290725070250000008342888873359191228_" gradientUnits="userSpaceOnUse" x1="1162.6759" y1="620.3867" x2="1350.1307" y2="620.3867">
+		<stop  offset="0" style="stop-color:#439DDF"/>
+		<stop  offset="0" style="stop-color:#4F87ED"/>
+		<stop  offset="0" style="stop-color:#9177C7"/>
+		<stop  offset="0.7815" style="stop-color:#9476C5"/>
+		<stop  offset="0.8883" style="stop-color:#BC688E"/>
+		<stop  offset="1" style="stop-color:#D6645D"/>
+	</linearGradient>
+	<path style="fill:url(#SVGID_00000006692382290725070250000008342888873359191228_);" d="M1341,549.28
+		c-10.36-21.4-28.17-34.24-51.19-38.36c-26.08-4.67-51.48-0.1-72.37,16.89c-6.89,5.26-12.22,11.18-15.98,17.77
+		c-1.85,3.25-2.78,3.01-2.78-0.73l0.01-28.19c0-0.39-0.19-0.58-0.58-0.58h-35.08c-0.19,0-0.35,0.16-0.35,0.36V730.9
+		c0,0.45,0.22,0.68,0.67,0.68l37.12-0.01c0.34,0,0.51-0.17,0.51-0.51c0.07-38.4,0.06-77.08-0.03-116.03
+		c-0.02-7.34,0.45-13.46,1.4-18.35c3.01-15.38,10.38-28.53,22.11-39.45c1.76-1.37,3.5-2.7,5.22-3.97
+		c11.17-8.28,23.33-10.43,36.92-9.26c16.58,1.43,33.15,9.83,39.82,25.25c3.53,8.16,5.3,17.25,5.32,27.28
+		c0.07,43.53,0.09,88.45,0.04,134.74c0,0.21,0.1,0.31,0.3,0.31h37.45c0.37,0,0.56-0.19,0.56-0.57c0.07-46.67,0.06-93.28-0.05-139.83
+		C1350,575.62,1346.98,561.65,1341,549.28z M1217.67,529.37c0.01,0,0.03,0,0.04,0c0.02,0.01,0.04,0.02,0.06,0.02
+		C1217.73,529.38,1217.7,529.37,1217.67,529.37z"/>
+</g>
+<g>
+	<path class="st2" d="M1127.23,516.08h-37.32c-0.15,0-0.27,0.12-0.27,0.27v214.96c0,0.15,0.12,0.27,0.27,0.27h37.32
+		c0.15,0,0.27-0.12,0.27-0.27V516.35C1127.5,516.2,1127.38,516.08,1127.23,516.08z"/>
+</g>
+<g>
+	
+		<linearGradient id="SVGID_00000158714738904643395990000007397907749964941716_" gradientUnits="userSpaceOnUse" x1="1381.8658" y1="623.8276" x2="1419.6459" y2="623.8276">
+		<stop  offset="0" style="stop-color:#439DDF"/>
+		<stop  offset="0" style="stop-color:#4F87ED"/>
+		<stop  offset="0" style="stop-color:#9476C5"/>
+		<stop  offset="0" style="stop-color:#BC688E"/>
+		<stop  offset="1" style="stop-color:#D6645D"/>
+	</linearGradient>
+	<path style="fill:url(#SVGID_00000158714738904643395990000007397907749964941716_);" d="M1419.38,516.08h-37.24
+		c-0.15,0-0.27,0.12-0.27,0.27v214.96c0,0.15,0.12,0.27,0.27,0.27h37.24c0.15,0,0.27-0.12,0.27-0.27V516.35
+		C1419.65,516.2,1419.53,516.08,1419.38,516.08z"/>
+</g>
+</svg>
diff --git a/web/src/pages/user-setting/setting-model/index.tsx b/web/src/pages/user-setting/setting-model/index.tsx
index dd927e5cbc0..42f58a61e57 100644
--- a/web/src/pages/user-setting/setting-model/index.tsx
+++ b/web/src/pages/user-setting/setting-model/index.tsx
@@ -61,6 +61,7 @@ const IconMap = {
   Mistral: 'mistral',
   'Azure-OpenAI': 'azure',
   Bedrock: 'bedrock',
+  Gemini:'gemini',
 };
 
 const LlmIcon = ({ name }: { name: string }) => {