-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMenu.py
173 lines (157 loc) · 5.47 KB
/
Menu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import glob
import os
from pyspark.sql import SparkSession
import pyspark.pandas as ps
from pyspark.sql.functions import col
from ML_Part import *
# Initialize a SparkSession -- entry point for PySpark
spark = SparkSession.builder.getOrCreate()
# Set up the data frame and return the data frame
def setup():
print("\nHello!\n")
# Current file path to the csv files
# Should change if on another machine
# file_path = "C:/Users/zacla/OneDrive/Desktop/Capstone/Spark/csv_files"
file_path = "/Users/cade/School/2023_Spring/Capstone/capstone/Data/processed"
os.chdir(file_path)
# Get only csv files
my_files = glob.glob('*.csv')
count = 1
# Display current files in csv_files folder
for i in my_files:
print(str(count) + ") " + i)
count += 1
# Have a option to combine all csv files together
print(str(count) + ") Combine all")
# Take the user input
choose_csv = input("Please choose an option from the list to begin: ")
# If the user chooses the combine all option
if int(choose_csv) > len(my_files):
# Place all paths into an array
path = ['csv_files/asheville_airport_2020_daily.csv',
'csv_files/asheville_airport_2021_daily.csv',
'csv_files/asheville_airport_2022_daily.csv']
sdf = spark.read.options(header = True).csv(path)
else:
# take the chosen csv file and read into data frame
choice = int(choose_csv) - 1
hold = my_files[int(choice)]
full_Path = file_path + "/" + hold
df = ps.read_csv(full_Path)
sdf = df.to_spark()
return sdf
# Define select, for now just print columns
def select(columns, sdf, spark):
check = False
if ', ' in columns:
check = True
elif ' ' in columns:
columns = columns.replace(' ', ', ')
check = True
else:
check = True
if check == True:
sdf.createOrReplaceTempView("tempTable")
sql_input = "SELECT " + columns + " from tempTable"
spark.sql(sql_input).show()
menu(sdf)
else:
print("Something went wrong")
# Create SQL statement
def second_option(ui, sdf, spark):
sdf.createOrReplaceTempView(ui)
statement = input("Create your own SQL statement: ")
spark.sql(statement).show()
menu(sdf)
# Set up a SQL between option -- Does not work
def between(first, second, column, sdf, spark):
sdf.createOrReplaceTempView("temp")
sql_input = "SELECT * FROM temp WHERE " + column + " BETWEEN " + first + " AND " + second
print(sql_input)
spark.sql(sql_input).show()
menu(sdf)
# Displays the current data frame
def display(sdf):
sdf.show()
menu(sdf)
# Displays the column headers and there data types
def display_schema(sdf):
sdf.printSchema()
menu(sdf)
# Calls machine menu in ML_Part
def machine(sdf):
machine_menu(sdf)
menu(sdf)
# Shows the min and max for both precipitation and snowfall
def min_max(sdf):
options = "1) Max on Daily Precipitation\n2) Min on Daily Precipitation\n"
options += "3) Max on Daily Snowfall\n4) Min on Daily Snowfall\n"
print(options)
pick = input("Choose an option: ")
sdf.createOrReplaceTempView("temp")
if pick == "1":
sql_input = "SELECT MAX(DailyPrecipitation) FROM temp"
spark.sql(sql_input).show()
menu(sdf)
elif pick == "2":
sql_input = "SELECT MIN(DailyPrecipitation) FROM temp"
spark.sql(sql_input).show()
menu(sdf)
elif pick == "3":
sql_input = "SELECT MAX(DailySnowfall) FROM temp"
spark.sql(sql_input).show()
menu(sdf)
elif pick == "4":
sql_input = "SELECT MIN(DailySnowfall) FROM temp"
spark.sql(sql_input).show()
menu(sdf)
else:
print("Wrong input, try again")
# Main menu displays options to manipulate the current data frame
def menu(sdf):
header = "\n Main Menu "
line = "---------------------"
options = "\n1) Simple SELECT statement\n2) Create your own SQL statement\n3) SQL between\n"
options += "4) Display Table\n5) Print Schema\n6) Min and Max\n"
options += "7) Machine Learning\n0) exit\n"
menu = header + "\n" + line + options +line
print(menu)
# Pick a option
user_input = input("Pick a option: ")
work(user_input, sdf)
# Take user input
def work(ui, sdf):
# Switch case
if ui == "1":
columns = input("Provide column names you would wish to see: ")
select(columns, sdf, spark)
elif ui == "2":
# Create your own SQL statement
temp_table = input("Provide a temporary table label to hold result: ")
second_option(temp_table, sdf, spark)
elif ui == "3":
column = input("Provide column name to show between: ")
first = input("Enter first value: ")
second = input("Enter second value: ")
between(first, second, column, sdf, spark)
elif ui == "4":
display(sdf)
elif ui == "5":
display_schema(sdf)
elif ui == "6":
min_max(sdf)
elif ui == "7":
machine(sdf)
elif ui == "0":
print("Goodbye")
quit()
else:
print("Not an option, PLease try again.")
menu(sdf)
# Call setup for start passes user selected options to work
def main():
sdf = setup()
result = menu(sdf)
work(result, sdf)
if __name__ == "__main__":
main()