User:Psiĥedelisto/VisualEditor ref namer.py
The VisualEditor, (very annoyingly!), doesn't name references added by users, and gives them names like :0
, :1
, etc. This script fixes that automatically. Might be buggy, only ever tested on osteogenesis imperfecta and furry fandom.
Requires [https://github.com/earwig/mwparserfromhell mwparserfromhell]. Input filename is first and only argument. Outputs completed wiki page to stdout, and some info on what changed to stderr.
- !/usr/bin/env/python3
import mwparserfromhell
from mwparserfromhell.wikicode import Tag, Wikicode, Wikilink
import re
import sys
_, input_filename = sys.argv
with open(input_filename) as f:
inp = f.read()
parsed = mwparserfromhell.parse(inp)
get_all_links = lambda parsed: parsed.ifilter(forcetype=Wikilink, recursive=True)
get_all_tags = lambda: parsed.ifilter(forcetype=Tag, matches="<\\s*ref\\s*", recursive=True)
tags = list(filter(None, [t if t.has("name") else None for t in get_all_tags()]))
tags_noname = list(filter(None, [t if not t.has("name") else None for t in get_all_tags()]))
tags_noname_idxs = list()
for tag in tags_noname:
for i, tag2 in enumerate(get_all_tags()):
if tag == tag2:
tags_noname_idxs.append(i)
assert len(tags_noname_idxs) == len(tags_noname)
refs = list(filter(lambda s: re.search("^:\d+$", str(s.get("name").value)) and not re.search("/>$", str(s)), tags))
def find_date(template):
date_candidates = {v: template.has(v) for v in ["date", "year", "airdate"]}
if any(date_candidates.values()):
date = [k for k, v in date_candidates.items() if v][0]
date = str(template.get(date).value)
else:
return None
m = re.search("\d{4}", date)
return (str(m.group(0)) if m else None)
def by_work(v, template):
parsed_v = mwparserfromhell.parse(v)
for v in get_all_links(parsed_v):
parsed_v.replace(v, str(v.title))
v = str(parsed_v)
date = find_date(template)
if date is None:
return None
work = re.sub("\s", "", v)
if len(work.strip()) == 0:
return None
return "{}{}".format(work, date)
def by_surname(v, template):
if "," in v:
last = v[:v.index(",")]
elif " " in v:
last = v[:v.index(" ")]
else:
last = v
if len(last.strip()) == 0:
return None
date = find_date(template)
if date is None:
return None
return "{}{}".format(last, date)
def build_refs(refs):
global tags_noname_idxs
pretty = dict()
for (i, ref) in enumerate(refs):
template = ref.contents.get(0)
if not getattr(template, "has", False):
continue
last_candidates = {v: template.has(v) for v in ["vauthors", "authors", "last"]}
work_candidates = {v: template.has(v) for v in ["work", "website", "publisher", "series-link", "series"]}
if any(last_candidates.values()):
last = [k for k, v in last_candidates.items() if v][0]
v = by_surname(str(template.get(last).value), template)
elif any(work_candidates.values()):
work = [k for k, v in work_candidates.items() if v][0]
v = by_work(str(template.get(work).value), template)
else:
continue
if v is None:
continue
elif len(v.strip()) <= 1:
continue
if ref.has("name"):
pretty[str(ref.get("name").value)] = v
else:
pretty[tags_noname_idxs[i]] = v
return pretty
pretty = build_refs(refs)
pretty_noname = build_refs(tags_noname)
for i, tag in enumerate(get_all_tags()):
if tag.has("name"):
k = str(tag.get("name").value)
if k in pretty:
tag.attributes[0].value = pretty[k]
else:
if i in pretty_noname:
tag.add("name", value = pretty_noname[i])
for template in parsed.ifilter_templates():
tn = template.name.strip()
if tn.lower() == "rp" or tn.lower() == "ill" or tn.lower() == "lang" or tn.lower().startswith("lang-") or tn.lower() == "respell" or tn.lower() == "abbr":
template.name = tn[0].lower()+tn[1:]
else:
template.name = tn[0].upper()+tn[1:]
print(tn, "⇒", template.name, file=sys.stderr)
print(parsed)
for k,v in pretty.items():
print(k, "⇒", v, file=sys.stderr)
for i,v in pretty_noname.items():
print("NONAME", i, "⇒", v, file=sys.stderr)
uniq = len(set(pretty.values()))
total = len(pretty.values())
if uniq == total:
print("All replacements unique", file=sys.stderr)
else:
print("Some replacements not unique: {}/{}!".format(total-uniq, total))