-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuserFiltering.py
66 lines (50 loc) · 2.22 KB
/
userFiltering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import warnings
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
# Suppress all warnings
warnings.filterwarnings("ignore")
# dictionary for output
res = {
"A": "Auditory",
"V":"Visual",
"K": "Kinesthetic"
}
# Function to load and preprocess data
def load_and_preprocess_data(file_path):
# Load the dataset
data = pd.read_csv(file_path)
# Encode the target column (Learner Type)
label_encoder = LabelEncoder()
data['Learner'] = label_encoder.fit_transform(data['Learner'])
# Encode categorical features (e.g., gender)
data['Gender'] = LabelEncoder().fit_transform(data['Gender'])
# Separate features and target
X = data.drop(columns=['Learner']) # All features including gender, age, and responses
y = data['Learner']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale the features to standardize the range
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
return X_train, y_train, scaler, label_encoder
# Function to predict the learner type for a new user
def predict_learner_type(new_user_input, file_path="./extras/SL_csv.csv", k=5):
# Load and preprocess the data (only once)
X_train, y_train, scaler, label_encoder = load_and_preprocess_data(file_path)
# Scale the input using the same scaler
new_user_scaled = scaler.transform([new_user_input])
# Compute cosine similarity between the new user and training data
new_user_similarity = cosine_similarity(new_user_scaled, X_train).flatten()
# Find the indices of the top k similar users
top_k_indices = np.argsort(new_user_similarity)[-k:]
# Get the types of the top k similar users
top_k_types = y_train.iloc[top_k_indices]
# Predict the type as the most common type among the top k users
predicted_label = top_k_types.mode()[0]
predicted_type = label_encoder.inverse_transform([predicted_label])[0]
predicted_type = res[predicted_type]
return predicted_type