diff --git a/data.yml b/data.yml index 47229559..e1db190b 100644 --- a/data.yml +++ b/data.yml @@ -198,6 +198,31 @@ Pulmonary Arterial Hypertension: company: Actelion generic: epoprostenol systemic regex: '[A-Z][A-Z][A-Z]-[0-9][0-9][0-9][0-9][0-9]' +Respiratory - Chronic Obstructive Pulmonary Disease: + bevespi.com: + dates: + 20170430: + code: 2047004-3340900 + drug: + company: AstraZeneca + generic: formoterol and glycopyrrolate + regex: '[0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9][0-9]' + mysymbicort.com: + dates: + 20170228: + code: 1945203-3326734 + drug: + company: AstraZeneca + generic: budesonide and formoterol + regex: '[0-9][0-9][0-9][0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9][0-9][0-9][0-9]' + startwithanoro.com: + dates: + 20170228: + code: 783658R0 + drug: + company: GSK + generic: umeclidinium and vilanterol + regex: '[0-9][0-9][0-9][0-9][0-9][0-9][A-Z][0-9]' Rheumatoid Arthritis: arava.com: dates: @@ -252,6 +277,6 @@ Rheumatoid Arthritis: 20170528: code: 0026VMV02 drug: - company: Horizon + company: Horizon Pharma generic: esomeprazole / naproxen systemic regex: '[0-9][0-9][0-9][0-9][A-Z][A-Z][A-Z][0-9][0-9]' diff --git a/moai.py b/moai.py index 71288a65..f4b8c7e4 100644 --- a/moai.py +++ b/moai.py @@ -29,7 +29,7 @@ def dict_constructor(loader, node): # find regulatory code changes for indication in data: - + # what indication? print indication @@ -41,8 +41,9 @@ def dict_constructor(loader, node): # get the html request = urllib2.Request('http://' + website, headers={'User-Agent' : "Moai"}) html_content = urllib2.urlopen(request).read() + # search for the code using the regex defined per website - live_matches = re.findall(data[indication][website]['regex'], html_content); + live_matches = re.findall(data[indication][website]['regex'], re.sub('<[^<]+?>', '', html_content)); # get the most recent date most_recent_date = data[indication][website]['dates'].keys()[-1] @@ -109,7 +110,7 @@ def dict_constructor(loader, node): plt.savefig('data/' + website.replace("/","-") + '.png', bbox_inches='tight') plt.close('all') - + content += '\n| [{0}](http://{0}) | {1} | {2} | ![{3}](data/{3}.png) |'.format(website, data[indication][website]['drug']['company'], data[indication][website]['drug']['generic'], website.replace("/","-"))