-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathrfc2bib.py
48 lines (42 loc) · 1.62 KB
/
rfc2bib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#URL_RFC_XML = 'ftp://ftp.rfc-editor.org/in-notes/rfc-index.xml'
URL_RFC_XML = 'http://www.rfc-editor.org/in-notes/rfc-index.xml'
from lxml import etree
import requests
BIB_ENTRY_FORMAT = """\
@techreport{%(key)s,
title = {{%(title)s}},
author = {%(author)s},
type = {RFC},
number = {%(number)s},
institution = {IETF},
month = %(month)s,
year = %(year)s,
url = {%(url)s},
}
"""
def tag_prefix(s):
return '{http://www.rfc-editor.org/rfc-index}' + s
def normalize_authors(authors):
# INPUT: a list of authors
# OUTPUT: bib normalized authors
# https://github.com/hupili/utility/blob/master/latex/tex-bib-author.py
authors = filter(lambda a: a != '', map(lambda a: a.strip(), authors))
authors_reversed = map(lambda a: a.split()[-1] + ', ' + ' '.join(a.split()[0:-1]), authors)
return ' and '.join(authors_reversed)
index = etree.fromstring(requests.get(URL_RFC_XML).content)
rfcs = index.findall(tag_prefix('rfc-entry'))
#for r in rfcs[0:30]:
for r in rfcs:
d = {}
d['key'] = r.find(tag_prefix('doc-id')).text
d['number'] = int(d['key'][3:])
d['title'] = r.find(tag_prefix('title')).text
_a = []
for a in r.findall(tag_prefix('author')):
_a += [a.find(tag_prefix('name')).text]
d['author'] = normalize_authors(_a)
d['year'] = r.find(tag_prefix('date')).find(tag_prefix('year')).text
d['month'] = r.find(tag_prefix('date')).find(tag_prefix('month')).text[:3].lower()
d['url'] = 'http://tools.ietf.org/rfc/%s.txt' % d['key'].lower()
#d['obsoletes'] = ','.join([o.find('doc-id').text for o in r.findall(tag_prefix('obsoletes'))])
print(BIB_ENTRY_FORMAT % d)