User:PotatoBot/Code/5

  1. !/usr/bin/python
  2. -*- coding: utf-8 -*-

import wikipedia as w

import codecs, catlib, pagegenerators, time, mysave, re

from datetime import date

  1. PotatoBot Task 5: Creates redirects from trade names to drug articles

excludes = ('', u'Adiuretin', u'Amicacin', u'Apirelina', u'Bayer Brand of Azlocillin‎', u'Benzchinamide', u'BZQ', u'Brevital sodium', u'Cephaloglycine', u'Cephaoglycin acid', u'Chlorazepate', u'Chlorazepic acid', u'Cialis/Tadalafil', u'Cialis/Taladafil Hcl', u'Citrovorum factor', u'Clorazepic acid', u'Co-Vidarabine', u'Covidarabine', u'Dapropterin', u'Deacetyllanatoside C', u'Dextrin, Caloreen', u'Diovan HCT', u'Dl-Tranylcypromine', u'Dolasteron', u'Fish berry', u'Fondaparinux sodium', u'FR-008-III', u'Fujimycin', u'Gamatran citrate', u'Genzyme)', u'Glycocoll', u'Guanabenz(USAN)', u'Indian berry', u'IRINOTECAN, CPT-11', u'Karnitin', u'Kitasamycin A3', u'Kyselina jantarova', u'Lrbesartan', u'Lyophilized Cytoxan', u'Lysuride', u'Metamfetamine', u'Methenamide', u'Methoxamedrine', u'Metossamina', u'Metoxamina', u'Micardis HCT', u'Naphcillin', u'Navelbine Base', u'Oriental berry', u'Polymyxin E. Sulfate', u'Polymyxin E sulfate', u'Propilniazida', u'Pyridium Plus', u'Quetiapin hemifumarate', u'Sapropterin', u'Secremax, SecreFlo', u'Sodium sulamyd', u'Spongoadenosine', u'SU-11248', u'THIORPHAN', u'Trifluopromazine', u'Turimycin A5', u'Wellcome U3B', u'Xiaflextm',

)

def main():

# Prepare log

listout = '\n'

db = {}

Rtemplate = {False: '{{R from trade name}}', True: '{{R from alternative name}}'}

# Load DrugBank data

n, id= 0, ''

with codecs.open('data/drugcards.txt', 'r', 'utf-8') as f:

for l in f:

line = l.strip()

if line[:15] == '#BEGIN_DRUGCARD':

n = int(line[18:23])

list = [[], n]

generic = ''

elif line[:13] == '#END_DRUGCARD':

if generic == '':

w.output(' \03{red}ERROR IN DRUGCARD %d: no generic name\03{default}' % n)

listout += '# %s: Error in Drugcard %d (no generic name found)\n' % n

elif n != list[1]:

w.output(' \03{red}ERROR IN DRUGCARD %d: IDs do not match\03{default}' % n)

listout += '# %s: Error in Drugcard %d (IDs of start and end tag do not match)\n' % n

elif generic in db:

w.output(' \03{red}ERROR IN DRUGCARDS %d, %d: generic name found twice\03{default}' % (n, db[generic][1]))

listout += '# %s: Error in Drugcards %d, %d (generic name found twice)\n' % (n, db[generic][1])

elif len(list[0]) > 8:

db[generic] = list

n = 0

elif line[:2] == '# ':

id = line[2:-1]

elif line != '' and n != 0:

if id == 'Generic_Name':

generic = line

elif id == 'Brand_Names' and line != 'Not Available' and 'hydrochloride' not in line.lower() and len(line) < 25:

p = line.find(' (')

if p == -1: p = line.find(' [')

if p == -1: p = line.find(' Roche')

if p > -1: line = line[:p].strip()

if line not in excludes:

list[0] += [line]

w.output('DrugBank entries loaded: %d' % len(db))

# Create redirects

for drug in db:

w.output('* %s (#%d)' % (drug, db[drug][1]))

drugpage = mysave.resolveredir(w.Page(w.getSite(), drug))

if not drugpage.exists():

drugpage = mysave.resolveredir(w.Page(w.getSite(), drug.capitalize()))

notfound = []

for tradename in db[drug][0]:

if tradename.lower().find(drug.lower() + ' ') != 0:

tradepage = w.Page(w.getSite(), tradename)

if drugpage.title()[-1] == 'e': drug_e = drugpage.title()[:-1]

else: drug_e = drugpage.title()

alt = tradename in (drug_e, drug_e + 'a', drug_e + 'e', drug_e + 'o', drug_e + 'um')

if tradepage.exists() and tradepage.isRedirectPage():

tradetext = tradepage.get(get_redirect=True)

brandTsearch = re.search(r'\{\{\s*[Rr] from brand name\s*\}\}', tradetext)

alterTsearch = re.search(r'\{\{\s*[Rr] from alternative name\s*\}\}', tradetext)

tradeTsearch = re.search(r'\{\{\s*[Rr] from trade name\s*\}\}', tradetext)

if tradepage.getRedirectTarget() != drugpage:

w.output(' \03{yellow}%s doesn\'t redirect to the right page (%s)?\03{default}' \

% (tradepage.title(), drugpage.title()))

listout += '# %s: redirects to %s instead of %s\n'\

% (tradepage.aslink(), tradepage.getRedirectTarget().aslink(), drugpage.aslink())

elif brandTsearch:

listout += mysave.savepage(tradepage, tradetext[:brandTsearch.start()] + Rtemplate[alt] + tradetext[brandTsearch.end():],

'Replace {{R from brand name}} with ' + Rtemplate[alt], minor = True)

elif alterTsearch and not alt:

listout += mysave.savepage(tradepage, tradetext[:alterTsearch.start()] + Rtemplate[alt] + tradetext[alterTsearch.end():],

'Replace ' + Rtemplate[not alt] + ' with ' + Rtemplate[alt], minor = True)

elif tradeTsearch and alt:

listout += mysave.savepage(tradepage, tradetext[:tradeTsearch.start()] + Rtemplate[alt] + tradetext[tradeTsearch.end():],

'Replace ' + Rtemplate[alt] + ' with ' + Rtemplate[not alt], minor = True)

elif not tradeTsearch and not alterTsearch:

minusCats = w.removeCategoryLinks(tradetext, w.getSite())

listout += mysave.savepage(tradepage, minusCats + ' ' + Rtemplate[alt] + tradetext[len(minusCats):], \

'Add ' + Rtemplate[alt], minor = True)

elif drugpage.exists():

listout += mysave.makeredir(tradepage, drugpage, Rtemplate[alt])

else:

notfound += [tradepage.aslink()]

if notfound != []:

listout += '# %s: target %s not found\n' % (', '.join(notfound), drugpage.aslink())

# Output log

listout += '\nTrade names from DrugBank completely included.'

w.output('')

logpage = w.Page(w.getSite(), 'User:PotatoBot/Lists/Trade names log')

mysave.savepage(logpage, logpage.get() + listout, 'Creating trade names log')

if __name__ == "__main__":

try:

main()

finally:

w.stopme()

{{User:PotatoBot/Code/Save}}