-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit.py
23 lines (18 loc) · 975 Bytes
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import pandas as pd
from sklearn.model_selection import train_test_split
# Read data from CSV
data = pd.read_csv('^NSEI.csv')
# Split the dataset into features (X) and target variable (y)
X = data.drop(['Close', 'Date'], axis=1) # Drop 'Close' column and 'Date' column (if not used as a feature)
y = data['Close']
# Split the dataset into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
# Save datasets to CSV files
X_train.to_csv('X_train.csv', index=False)
y_train.to_csv('y_train.csv', index=False, header=['Close']) # Save target variable with a header
X_val.to_csv('X_val.csv', index=False)
y_val.to_csv('y_val.csv', index=False, header=['Close'])
X_test.to_csv('X_test.csv', index=False)
y_test.to_csv('y_test.csv', index=False, header=['Close'])
print("Datasets saved successfully.")