forked from mauro-eb/muzooka-artist-importer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
xml_parser.py
62 lines (47 loc) · 1.65 KB
/
xml_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import xml.etree.cElementTree as etree
def strip_tag_name(t):
idx = t.rfind('}')
if idx != -1:
t = t[idx + 1:]
return t
def calculate_level(event, tname, level, node):
if event == 'start' and tname == node:
return 1
elif event == 'start':
return level + 1
elif event == 'end':
return level - 1
USE_EXTRA_ARTISTS = False
def parse_artists(pathXML):
artist = None
level = 0
for event, elem in etree.iterparse(pathXML, events=('start', 'end')):
try:
tname = strip_tag_name(elem.tag)
level = calculate_level(event, tname, level, 'artist')
if event == 'start' and tname == 'artist':
artist = {
'id': None,
'biography': None,
'profile': None,
'spotify_id': None,
'social_media_links': [],
}
elif event != 'end' and level == 2:
if tname == 'id':
artist['id'] = elem.text
if tname == 'name':
artist['name'] = elem.text
if tname == 'spotify_id':
artist['spotify_id'] = elem.text
if tname == 'profile':
artist['profile'] = elem.text
elif event != 'end' and level == 3:
if tname == 'url':
artist['social_media_links'].append(elem.text)
if event == 'end' and tname == 'artist':
yield artist
except etree.ParseError:
print('>>>error parsing...continue')
finally:
elem.clear()