-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprocessing_helper.py
55 lines (47 loc) · 1.63 KB
/
processing_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from datetime import datetime
COLUMNS = [
'uid',
'rental_place',
'return_place'
]
nodes_to_remove = [
'.GOTOWE DO REZERWACJI',
'Poza stacją',
'.RELOKACYJNA',
'.RELOKACYJNA A1-4',
'# Rowery skradzione Wrocław 2014',
'#Rowery zapasowe Warszawa'
]
def trim_and_remove_slash(s):
return s.strip().replace('/', '-').replace('"', '').replace(',', ' -')
def extract_data(train_file_path, columns=COLUMNS):
# Read csv file and return
all_data = pd.read_csv(train_file_path, usecols=columns)
for place in nodes_to_remove:
all_data = all_data[all_data['rental_place'] != place]
all_data = all_data[all_data['return_place'] != place]
all_data = all_data[all_data['return_place'] != all_data['rental_place']]
all_data['rental_place'] = all_data['rental_place'].apply(trim_and_remove_slash)
all_data['return_place'] = all_data['return_place'].apply(trim_and_remove_slash)
stations = all_data['rental_place'].unique()
all_data = all_data.dropna()
return all_data, stations
#
# data = extract_data('data/historia_przejazdow_2019-03.csv')
#
# print(data.head())
# print()
# print(len(data[data['return_place'] == 1][data['rental_place'] == 0]))
# print()
# grouped_data = data.groupby(['rental_place', 'return_place']).size().reset_index()
# grouped_data = grouped_data.rename(columns={
# 'rental_place': 'Source',
# 'return_place': 'Target',
# 0: 'Weight'
# })
# grouped_data['Type'] = 'Directed'
# print(grouped_data)
# grouped_data.to_csv('out.csv', index=False)