User:Monkbot/task 13: remove replace deprecated subscription registration parameters

Task 13 is a single use task that removes or replaces deprecated {{para|subscription}} and {{para|registrarion}} parameters in existing cs1|2 templates.

description

cs1|2 has deprecated {{para|subscription}} and {{para|registration}} at this RFC (aspect B3). This task:

  1. applies only to canonically named templates; redirects and template wrappers are not acknowledged
  2. ignores cs1|2 templates that have {{para|subscription}} and {{para|registration}} parameters that are not assigned one of the three allowed values (yes, y, true)
  3. does nothing when the citation template holds:
  4. :any of these url parameters (with assigned values):
  5. :*{{para|url}}, {{para|article-url}}, {{para|chapter-url}}, {{para|entry-url}}, {{para|section-url}} (the url list)
  6. :AND holds any of these identifier parameters (with assigned values):
  7. :*{{para|doi}}, {{para|DOI}}, {{para|jstor}}, {{para|JSTOR}}, {{para|bibcode}}, {{para|hdl}}, {{para|HDL}}, {{para|ol}}, {{para|OL}}, {{para|osti}}, {{para|OSTI}} (the identifier list)
  8. :this because the task cannot know which of the url parameter or the identifier parameter the original editor intended to be 'marked' by the deprecated parameters (could be one, the other, or both)
  9. does nothing when the citation template holds more than one of the url list parameters; again, could be one, the other, or both
  10. removes {{para|subscription}} and {{para|registration}} parameters when the citation template does not have any of the url list parameters; cs1|2 identifier parameters are presumed to lie behind a paywall or registration barrier; cs1|2 does not highlight the norm so {{para|subscription}} and {{para|registration}} are superfluous in these citation templates
  11. replaces {{para|subscription}} and {{para|registration}} with the appropriate {{para|<{{var|xxx-}}>url-access}} parameter when the citation template holds only one of the url list parameters

Task 13 skips pages that include {{tlx|bots|deny{{=}}Monkbot13}}.

ancillary tasks

Empty {{para|subscription}} and {{para|registration}} parameters are deleted. This task does not do awb general fixes.

script

// this script removes / replaces deprecated |subscription= and |registration= parameters from cs1|2 templates

//

// to make a list for awb use category: CS1 errors: deprecated parameters

string IS_CS1 = @"(?:[Cc]ite\s*(?=(?:AV media(?: notes)?)|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|biorxiv|book|conference|document|encyclopa?edia|episode|interview|journal|magazine|mailing ?list|manual|(?:news(?!group|paper))|paper|podcast|press release|report|serial|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";

bool gSkip_subscription = true; // presume that we will skip this page

bool gSkip_registration = true;

string[] url_params = { "url", "article-url", "chapter-url", "entry-url", "section-url"};

//---------------------------< P R O C E S S A R T I C L E >--------------------------------------------------

public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)

{

Skip = false;

// gSkip_subscription = false; // debug; for now we will not skip anything

// gSkip_registration = false;

string pattern;

//---------------------------< E M P T I E S >----------------------------------------------------------------

// delete empty |subscription= and |registration= parameters

ArticleText = Regex.Replace(ArticleText, @"\| *subscription *=\s*([\|\}])", "$1");

ArticleText = Regex.Replace(ArticleText, @"\| *registration *=\s*([\|\}])", "$1");

//---------------------------< H I D E >----------------------------------------------------------------------

// HIDE TEMPLATES: find templates that are not CS1; replace the opening {{ with __0P3N__ and the closing }} with __CL0S3__

while (Regex.Match (ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}").Success)

{

ArticleText = Regex.Replace(ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}", "__0P3N__$1__CL0S3__");

}

// wikilinks with parenthetical disambiguation

pattern = @"\[\[([^\|\]]+) +\(([^\)\|]+)\)\|([^\]]+)\]\]";

ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__D4B_O__$2__D4B_C____P1P3__$3__WL_CL0S3__");

// link label wikilinks

pattern = @"\[\[([^\|\]]+)\|([^\]]+)\]\]";

ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__P1P3__$2__WL_CL0S3__");

//---------------------------< S U B S C R I P T I O N >------------------------------------------------------

ArticleText = sup_reg_common (ArticleText, "subscription");

//---------------------------< R E G I S T R A T I O N >------------------------------------------------------

ArticleText = sup_reg_common (ArticleText, "registration");

//---------------------------< U N H I D E >------------------------------------------------------------------

// UNHIDE: replace __WL_0P3N__ with , __WL_CL0S3__ with

ArticleText = Regex.Replace(ArticleText, @"__WL_0P3N__", "[[");

ArticleText = Regex.Replace(ArticleText, @"__D4B_O__", " ("); // make sure that there is a space before the '('

ArticleText = Regex.Replace(ArticleText, @"__D4B_C__", ")");

ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|");

ArticleText = Regex.Replace(ArticleText, @"__WL_CL0S3__", "]]");

// UNHIDE: replace __0P3N__ with {{

ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{");

// UNHIDE: replace __CL0S3__ with }}

ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "}}");

Skip = gSkip_subscription && gSkip_registration;

// Summary = "Task 13: (developmental testing): ";

Summary = "Task 13: (BRFA testing): ";

// Summary = "Task 13: ";

if (!gSkip_subscription && !gSkip_registration)

Summary = Summary + "Fix deprecated |subscription= and |registration= in cs1|2 templates;";

else if (!gSkip_subscription)

Summary = Summary + "Fix deprecated |subscription= in cs1|2 templates;";

else

Summary = Summary + "Fix deprecated |registration= in cs1|2 templates;";

gSkip_subscription = true; // reset

gSkip_registration = true;

return ArticleText;

}

//---------------------------< S U P _ R E G _ C O M M O N >--------------------------------------------------

string sup_reg_common (string ArticleText, string sr_param)

{

string pattern = @"(\{\{\s*" + IS_CS1 + @"[^\}]*)\|\s*" + sr_param + @"\s*=\s*(?:yes|true|y)([^\}]*)";

ArticleText = Regex.Replace(ArticleText, pattern,

delegate(Match match)

{

string raw_capture = match.Groups[0].Value; // the whole captured citation

string raw_prefix = match.Groups[1].Value; // citation template up to the start of |subscription=

string raw_postfix = match.Groups[2].Value; // citation after |subscription=

int url_count = 0; // number of url-holding parameters to which |subscription= might apply

string url_param = @""; // will be assigned the last-found url-holding parameter name

foreach (string param in url_params)

{

pattern = @"\|\s*" + param + @"\s*=\s*[^\|\}]"; // just looking for url-holding parameter with something in it

if (Regex.Match (raw_capture, pattern).Success) // look in the raw capture for url-holding parameters

{

url_count++; // count this one

url_param = param; // save the parameter name

}

}

if (1 < url_count) // more than one url-holding parameter, can't know which parameter |subscription= was meant for

return raw_capture; // so do nothing

pattern = @"\|\s*(?:doi|DOI|jstor|JSTOR|bibcode|hdl|HDL|ol|OL|osti|OSTI)\s*=\s*[^\|\}]"; // access params apply to these

if (Regex.Match (raw_capture, pattern).Success) // look for identifiers that have a value to which |subscription= might apply

{

if (0 == url_count)

{

if ("subscription" == sr_param) // for edit summary

gSkip_subscription = false;

else

gSkip_registration = false;

return raw_prefix + raw_postfix; // identifier without url-holding parameter; remove |subscription= because does not apply

}

else // if here, url_count must be 1

return raw_capture; // identifier plus url-holding parameter; can't know to which |subscription= applies

}

else // no identifiers found

{

if (0 == url_count)

{

if ("subscription" == sr_param) // for edit summary

gSkip_subscription = false;

else

gSkip_registration = false;

return raw_prefix + raw_postfix; // no identifier and no url-holding parameter; remove |subscription= because does not apply

}

else // if here, url_count must be 1

{

if ("subscription" == sr_param) // for edit summary

gSkip_subscription = false;

else

gSkip_registration = false;

return raw_prefix + @"|" + url_param + @"-access=" + sr_param + @" " + raw_postfix; // replace |subscription= with |-access=subscription

}

}

});

return ArticleText;

}