-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_app.py
64 lines (43 loc) · 2.77 KB
/
cluster_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# With this magic you'll save the content of the cell into a file named "cluster_app.py"
import streamlit as st # We list every library that we must import
import seaborn as sns
import pandas as pd
import pickle
import matplotlib.pyplot as plt
dataset = pd.read_csv('data.csv') # For what I have in mind I'll need the data
model = pickle.load(open('model.pkl', 'rb')) # We load the trained model
st.write("""
# Let's cluster some numbers 😎
""") # This is the main title (check the image!)
st.sidebar.header("User input's") # This is the sidebar title
def user_input_features(): # We define functions to read the user inputs over the streamlit sliders controls and return them as a pandas dataframe
x_ = st.sidebar.slider('x value', 1, 1000, int(dataset['x'].mean())) # We establish the range 1-1000 for the inputs and set the default in the mean of the value in the train set
y_ = st.sidebar.slider('y value', 1, 1000, int(dataset['y'].mean())) # Same as above!
data = {'y': y_,
'x': x_,
'data_type': 'prediction'}
features = pd.DataFrame(data, index=[0])
return features
df = user_input_features() # We retrieve the input using the function defined above and save it as "df"
st.subheader('Your x & y values:') # This is the title of the next print (the df)
st.write(df) # We print the data just to show the selected values in case the user closes the sidebar
cluster_n = model.predict(df[['x', 'y']]) # We are saving the predicted label for the user input, you'll see why!
temp_df = pd.concat([dataset[['x', 'y']], df], axis= 0) # Now we put the input and the train dataset together
temp_df['cluster'] = model.predict(temp_df[['x', 'y']]) # Add a column named 'cluster'
temp_df.fillna('train', inplace=True) # This is to fill the "data_type" field and distinguish between training and user input instances
st.subheader('# of cluster') # Just a title
st.write(cluster_n ) # Now we do use the predicted cluster of the input we stored before
st.write(temp_df) # We print the full dataset (check the interactivity, neat!)
st.title("Scatterplot") # Now we add a title for the scatterplot
fig, ax = plt.subplots() # First define the scatterplot
ax =sns.scatterplot(data=temp_df,
x="x",
y="y",
hue="cluster", # Color by cluster
style="data_type", # To drawn different shapes
size='data_type', # We're going to set some difference in the size also
sizes={'prediction': 100, # You can just state the sizes in a dictionary
'train': 20})
ax.legend(bbox_to_anchor=(1, 1), # This is to move the legends outside the plot
borderaxespad=1)
st.pyplot(fig) # Finally, we render this scatterplot in streamlit