User:Psiĥedelisto/VisualEditor ref namer.py

The VisualEditor, (very annoyingly!), doesn't name references added by users, and gives them names like :0, :1, etc. This script fixes that automatically. Might be buggy, only ever tested on osteogenesis imperfecta and furry fandom.

Requires [https://github.com/earwig/mwparserfromhell mwparserfromhell]. Input filename is first and only argument. Outputs completed wiki page to stdout, and some info on what changed to stderr.

  1. !/usr/bin/env/python3

import mwparserfromhell

from mwparserfromhell.wikicode import Tag, Wikicode, Wikilink

import re

import sys

_, input_filename = sys.argv

with open(input_filename) as f:

inp = f.read()

parsed = mwparserfromhell.parse(inp)

get_all_links = lambda parsed: parsed.ifilter(forcetype=Wikilink, recursive=True)

get_all_tags = lambda: parsed.ifilter(forcetype=Tag, matches="<\\s*ref\\s*", recursive=True)

tags = list(filter(None, [t if t.has("name") else None for t in get_all_tags()]))

tags_noname = list(filter(None, [t if not t.has("name") else None for t in get_all_tags()]))

tags_noname_idxs = list()

for tag in tags_noname:

for i, tag2 in enumerate(get_all_tags()):

if tag == tag2:

tags_noname_idxs.append(i)

assert len(tags_noname_idxs) == len(tags_noname)

refs = list(filter(lambda s: re.search("^:\d+$", str(s.get("name").value)) and not re.search("/>$", str(s)), tags))

def find_date(template):

date_candidates = {v: template.has(v) for v in ["date", "year", "airdate"]}

if any(date_candidates.values()):

date = [k for k, v in date_candidates.items() if v][0]

date = str(template.get(date).value)

else:

return None

m = re.search("\d{4}", date)

return (str(m.group(0)) if m else None)

def by_work(v, template):

parsed_v = mwparserfromhell.parse(v)

for v in get_all_links(parsed_v):

parsed_v.replace(v, str(v.title))

v = str(parsed_v)

date = find_date(template)

if date is None:

return None

work = re.sub("\s", "", v)

if len(work.strip()) == 0:

return None

return "{}{}".format(work, date)

def by_surname(v, template):

if "," in v:

last = v[:v.index(",")]

elif " " in v:

last = v[:v.index(" ")]

else:

last = v

if len(last.strip()) == 0:

return None

date = find_date(template)

if date is None:

return None

return "{}{}".format(last, date)

def build_refs(refs):

global tags_noname_idxs

pretty = dict()

for (i, ref) in enumerate(refs):

template = ref.contents.get(0)

if not getattr(template, "has", False):

continue

last_candidates = {v: template.has(v) for v in ["vauthors", "authors", "last"]}

work_candidates = {v: template.has(v) for v in ["work", "website", "publisher", "series-link", "series"]}

if any(last_candidates.values()):

last = [k for k, v in last_candidates.items() if v][0]

v = by_surname(str(template.get(last).value), template)

elif any(work_candidates.values()):

work = [k for k, v in work_candidates.items() if v][0]

v = by_work(str(template.get(work).value), template)

else:

continue

if v is None:

continue

elif len(v.strip()) <= 1:

continue

if ref.has("name"):

pretty[str(ref.get("name").value)] = v

else:

pretty[tags_noname_idxs[i]] = v

return pretty

pretty = build_refs(refs)

pretty_noname = build_refs(tags_noname)

for i, tag in enumerate(get_all_tags()):

if tag.has("name"):

k = str(tag.get("name").value)

if k in pretty:

tag.attributes[0].value = pretty[k]

else:

if i in pretty_noname:

tag.add("name", value = pretty_noname[i])

for template in parsed.ifilter_templates():

tn = template.name.strip()

if tn.lower() == "rp" or tn.lower() == "ill" or tn.lower() == "lang" or tn.lower().startswith("lang-") or tn.lower() == "respell" or tn.lower() == "abbr":

template.name = tn[0].lower()+tn[1:]

else:

template.name = tn[0].upper()+tn[1:]

print(tn, "⇒", template.name, file=sys.stderr)

print(parsed)

for k,v in pretty.items():

print(k, "⇒", v, file=sys.stderr)

for i,v in pretty_noname.items():

print("NONAME", i, "⇒", v, file=sys.stderr)

uniq = len(set(pretty.values()))

total = len(pretty.values())

if uniq == total:

print("All replacements unique", file=sys.stderr)

else:

print("Some replacements not unique: {}/{}!".format(total-uniq, total))