-
Notifications
You must be signed in to change notification settings - Fork 0
/
stapp1.5-nuextractTINY.py
194 lines (175 loc) · 7.63 KB
/
stapp1.5-nuextractTINY.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
"""
optimum-cli export openvino --model NuExtract-1.5-tiny --task text-generation-with-past --trust-remote-code --weight-format int8 ov_NuExtract-1.5-tiny
Followed official tutorial
https://docs.openvino.ai/2024/notebooks/llm-question-answering-with-output.html
"""
import streamlit as st
import warnings
warnings.filterwarnings(action='ignore')
import datetime
import random
import string
from time import sleep
import tiktoken
import json
import openvino_genai as ov_genai
# https://docs.openvino.ai/2024/openvino-workflow/model-optimization-guide/weight-compression.html
# https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide/genai-guide.html
# example from https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/chat_sample/chat_sample.py
# API documentation: https://docs.openvino.ai/2024/api/genai_api/_autosummary/openvino_genai.html#module-openvino_genai
# for counting the tokens in the prompt and in the result
encoding = tiktoken.get_encoding("cl100k_base")
# GLOBALS
modelname = "NuExtract1.5-Tiny"
modelfile = 'ov_NuExtract-1.5-tiny'
# Set the webpage title
st.set_page_config(
page_title=f"Your LocalGPT ✨ with {modelname}",
page_icon="🌟",
layout="wide")
# SET Session States
if "hf_model" not in st.session_state:
st.session_state.hf_model = modelname
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
if "repeat" not in st.session_state:
st.session_state.repeat = 1.35
if "temperature" not in st.session_state:
st.session_state.temperature = 0.1
if "maxlength" not in st.session_state:
st.session_state.maxlength = 500
if "speed" not in st.session_state:
st.session_state.speed = 0.0
if "time" not in st.session_state:
st.session_state.time = ''
if "firstrun" not in st.session_state:
st.session_state.firstrun = 0
# Defining internal functions
def writehistory(filename,text):
with open(filename, 'a', encoding='utf-8') as f:
f.write(text)
f.write('\n')
f.close()
def genRANstring(n):
"""
n = int number of char to randomize
"""
N = n
res = ''.join(random.choices(string.ascii_uppercase +
string.digits, k=N))
return res
# CACHED RESOURCES
@st.cache_resource
def create_chat():
# Set HF API token and HF repo
#device = 'CPU' # GPU can be used as well
#pipe = openvino_genai.LLMPipeline('ov_NuExtract-1.5-tiny', device)
start = datetime.datetime.now()
model_dir = 'ov_NuExtract-1.5-tiny'
pipe = ov_genai.LLMPipeline(model_dir, 'CPU')
delta = datetime.datetime.now() - start
print(f'loading {modelfile} with pure Openvino-genAI pipeline in {delta}...')
return pipe
# create the log file
if "logfilename" not in st.session_state:
logfile = f'{genRANstring(5)}_log.txt'
st.session_state.logfilename = logfile
#Write in the history the first 2 sessions
writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT JSON extractor with 🌀 {modelname}\n---\n🧠🫡: You are a helpful assistant.')
writehistory(st.session_state.logfilename,f'🌀: How may I help you today?')
# INSTANTIATE THE API CLIENT to the LLM
llm = create_chat()
### START STREAMLIT UI
# Create a header element
mytitle = f'## Extract data with {modelname}'
st.markdown(mytitle, unsafe_allow_html=True)
# CREATE THE SIDEBAR
with st.sidebar:
st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.1, step=0.01)
st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
value=500, step=50)
st.session_state.presence = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.11, step=0.02)
st.markdown(f"**Logfile**: {st.session_state.logfilename}")
statspeed = st.markdown(f'💫 speed: {st.session_state.speed} t/s')
gentime = st.markdown(f'⏱️ gen time: {st.session_state.time} seconds')
btnClear = st.button("Load example",type="primary", use_container_width=True)
st.image('logo.png', use_container_width=True)
# MAIN WINDOWN
st.session_state.jsonformat = st.text_area('JSON Schema to be applied', value="", height=150,
placeholder='here your schema', disabled=False, label_visibility="visible")
st.session_state.origintext = st.text_area('Source Document', value="", height=150,
placeholder='here your text', disabled=False, label_visibility="visible")
extract_btn = st.button("Extract Data",type="primary", use_container_width=False)
st.markdown('---')
st.session_state.extractedJSON = st.empty()
st.session_state.onlyJSON = st.empty()
def create_example():
jsontemplate = """{
"Model": {
"Name": "",
"Number of parameters": "",
"Number of max token": "",
"Architecture": []
},
"Usage": {
"Use case": [],
"Licence": ""
}
}"""
text = """We introduce Mistral 7B, a 7–billion-parameter language model engineered for
superior performance and efficiency. Mistral 7B outperforms the best open 13B
model (Llama 2) across all evaluated benchmarks, and the best released 34B
model (Llama 1) in reasoning, mathematics, and code generation. Our model
leverages grouped-query attention (GQA) for faster inference, coupled with sliding
window attention (SWA) to effectively handle sequences of arbitrary length with a
reduced inference cost. We also provide a model fine-tuned to follow instructions,
Mistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and
automated benchmarks. Our models are released under the Apache 2.0 license.
Code: <https://github.com/mistralai/mistral-src>
Webpage: <https://mistral.ai/news/announcing-mistral-7b/>"""
st.session_state.jsonformat=jsontemplate
st.session_state.origintext=text
# ACTIONS
#if btnClear:
# create_example()
if extract_btn:
prompt = f"""<|input|>\n### Template:
{st.session_state.jsonformat}
### Text:
{st.session_state.origintext}
<|output|>
"""
print(prompt)
with st.spinner("Thinking..."):
start = datetime.datetime.now()
# https://platform.openai.com/docs/api-reference/completions/create
output = llm.generate(prompt, temperature=st.session_state.temperature,
do_sample=True,
max_new_tokens=st.session_state.maxlength,
repetition_penalty=st.session_state.presence,
eos_token_id = 151643)
delta = datetime.datetime.now() -start
print(output)
result = output
st.write(result)
#adapter = result #.replace("'",'"')
#final = json.loads(adapter)
totalTokens = len(encoding.encode(prompt))+len(encoding.encode(result))
totalseconds = delta.total_seconds()
st.session_state.time = totalseconds
st.session_state.speed = totalTokens/totalseconds
statspeed.markdown(f'💫 speed: {st.session_state.speed:.2f} t/s')
gentime.markdown(f'⏱️ gen time: {st.session_state.time:.2f} seconds')
totalstring = f"""GENERATED STRING
{result}
---
Generated in {delta}
---
JSON FORMAT:
"""
#WRITE THE OUTPUT AND THE LOGS
st.session_state.onlyJSON.json(result)
writehistory(st.session_state.logfilename,f'✨: {prompt}')
writehistory(st.session_state.logfilename,f'🌀: {result}')
writehistory(st.session_state.logfilename,f'---\n\n')