-
Notifications
You must be signed in to change notification settings - Fork 0
/
sql_queries_etl_stage.py
44 lines (36 loc) · 1.04 KB
/
sql_queries_etl_stage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import configparser
# CONFIG
config = configparser.ConfigParser()
config.read('dwh_035_access.cfg')
AWS_REGION = config.get('AWS', 'REGION')
LOG_DATA = config.get('S3', 'LOG_DATA')
LOG_JSONPATH = config.get('S3', 'LOG_JSONPATH')
SONG_DATA = config.get('S3', 'SONG_DATA')
IAM_ROLE = config.get('IAM_ROLE', 'ARN')
# STAGING TABLES
# Reference: https://knowledge.udacity.com/questions/784957
staging_events_copy = (f"""
COPY staging_events
FROM '{LOG_DATA}'
CREDENTIALS 'aws_iam_role={IAM_ROLE}'
FORMAT AS JSON '{LOG_JSONPATH}'
TIMEFORMAT AS 'epochmillisecs'
TRUNCATECOLUMNS EMPTYASNULL BLANKSASNULL
COMPUPDATE OFF
REGION '{AWS_REGION}'
;
"""
)
# Reference: https://knowledge.udacity.com/questions/784957
staging_songs_copy = (f"""
COPY staging_songs
FROM '{SONG_DATA}'
CREDENTIALS 'aws_iam_role={IAM_ROLE}'
COMPUPDATE OFF
REGION '{AWS_REGION}'
FORMAT AS JSON 'auto'
TRUNCATECOLUMNS EMPTYASNULL BLANKSASNULL
;
""")
# QUERY LISTS
copy_table_queries = [staging_events_copy, staging_songs_copy]