User:Polbot/source/Judges.pl

use strict;

use Perlwikipedia;

use LWP::UserAgent;

my $firstletter = shift;

my $startat = shift;

my $test = 0;

my $soonest_next_op = time;

print "\nStarting polbot\n" ;

my $pw=Perlwikipedia->new();

  1. $pw->{debug} = 1;

$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');

print "Logging in\n";

my $login_status=$pw->login('Polbot','(bot password)');

die "I can't log in." unless ($login_status eq 0);

  1. Get exceptions (to skip)

my @exceptions = ();

print "Getting list of completed judges to skip.\n";

my $todo_list = $pw->get_text('Wikipedia:WikiProject Law/United States federal judges - finished');

my @lines = split(/\n/, $todo_list);

foreach my $line (@lines) {

# Ignore non-listed lines

if ($line =~ /^\*\s*\[\[([^]]*)\]\]/) {

push @exceptions, $1;

}

}

  1. Pull from FJC

print "Getting list of all judges starting with $firstletter\n";

my @judge_ids = ();

my $url = 'http://www.fjc.gov/servlet/tAsearch?lname=' . $firstletter;

print " $url\n";

my $ua = LWP::UserAgent->new;

$ua->agent("Mozilla/6.0");

my $res = $ua->get($url);

die "could not connect" unless ($res->is_success);

my $html = $res->content;

while ($html =~ m/([^<]*)

my $thisjid = $1;

my $thisname = $2;

if ($thisname ge $startat) {

unless (grep {$_ eq $thisname} @exceptions) {

push @judge_ids, $thisjid;

}

}

}

print scalar(@judge_ids) . " judges found.\n\n";

foreach my $jid (@judge_ids) {

my $url = "http:\/\/www.fjc.gov\/servlet\/tGetInfo\?jid=$jid";

print "\n$jid: ";

$res = $ua->get($url);

die "could not connect" unless ($res->is_success);

$html = $res->content;

$html =~ s/\`/'/g;

my @eds = ();

my @jus = ();

my @pcs = ();

my @jcats = ();

my $rev_name;

my $name;

my $last_name;

my $art_name;

my $persondata_name;

my $birth_date;

my $birth_year;

my $birth_loc;

my $death_date;

my $death_year;

my $death_loc;

my $pronoun = "He";

my $active = 0;

my $wiki_out;

# initial change

$html =~ s/(\d)\-\/$1-the present
/;

# extract name

$html =~ m/\\([^\n]*?) *\<\/B\>\<\/FONT\>/m;

$rev_name = $1;

$rev_name =~ s/ +/ /g;

$rev_name =~ s/\[//g;

$rev_name =~ s/\]//g;

$rev_name =~ m/^(.*?)\, (.*?)( Jr\.| II| III| IV)?$/;

$last_name = $1;

$name = "$2 $last_name$3";

if ($pw->get_text("$name") =~ /\w/) {

$art_name = "User:Polbot/fjc/" . $name;

} else {

$art_name = $name;

}

print "==$rev_name at $art_name==\n";

# extract gender

if ($html =~ m/
Gender:<\/B> Female/) { $pronoun = "She"; }

# extract birth and death info

if ($html =~ m/

Born +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?
/) {

$birth_date = "$1 $2";

$birth_year = $3;

$birth_loc = $4;

$birth_loc =~ s/^, +in +//;

} elsif ($html =~ m/

Born +(\d+)( +in +[^<]*)?
/) {

$birth_year = $1;

$birth_loc = $2;

$birth_loc =~ s/^ +in +//;

}

if ($html =~ m/
Died +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?
/) {

$death_date = "$1 $2";

$death_year = $3;

$death_loc = $4;

$death_loc =~ s/^, +in +//;

} elsif ($html =~ m/
Died +(\d+)( +in +[^<]*)?
/) {

$death_year = $1;

$death_loc = $2;

$death_loc =~ s/^ +in +//;

}

$birth_loc = Expand_states($birth_loc);

$death_loc = Expand_states($death_loc);

#print "birth: '$birth_date', '$birth_year', '$birth_loc'\n";

#print "death: '$death_date', '$death_year', '$death_loc'\n";

# Extract education

if ($html =~ m/
\s*
Education:<\/B>
(.*?)
\s*
/i) {

my $ed_string = $1;

@eds = split(/<[Bb][Rr]><[Bb][Rr]>/, $ed_string);

foreach my $ed (@eds) {

if ($ed =~ m/^(.*), (.*), (\d+)$/) {

$ed = "$pronoun received a $2 from $1 in $3";

} elsif ($ed =~ m/^Read law, (\d+)$/) {

$ed = "$pronoun read law in $1";

} elsif ($ed =~ m/^(.*), (\d+)$/) {

$ed = "$pronoun graduated from $1 in $2";

}

#print " ED: $ed\n";

}

}

# Extract Professional Career

if ($html =~ m/Professional Career:<\/B>
(.*?)
\s*
/i) {

my $pc_string = $1;

@pcs = split(/ *<[Bb][Rr]> */, $pc_string);

foreach my $pc (@pcs) {

if ($pc =~ m/^Private practice, (.*?), (\d+)\-(\d+|the present)$/) {

$pc = "$pronoun was in private practice of law in $1 from $2 to $3";

} elsif ($pc =~ m/^Private practice, (.*?), (\d+)$/) {

$pc = "$pronoun was in private practice of law in $1 in $2";

} elsif ($pc =~ m/^Judge, (.*?), (\d+)\-(\d+|the present)$/) {

$pc = "$pronoun was a judge to the $1 from $2 to $3";

} elsif ($pc =~ m/^Judge, (.*?), (\d+)$/) {

$pc = "$pronoun was a judge to the $1 in $2";

} elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)\-(\d+|the present)$/) {

$pc = "$pronoun was in the United States $1$2 from $3 to $4";

} elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)$/) {

$pc = "$pronoun was in the United States $1$2 in $3";

} elsif ($pc =~ m/^(.*), (\d+)\-(\d+|the present)$/) {

$pc = "$pronoun was a $1 from $2 to $3";

} elsif ($pc =~ m/^(.*), (\d+)$/) {

$pc = "$pronoun was a $1 in $2";

}

#print "PC: $pc\n";

}

}

# Extract judgeships

if ($html =~ m/Federal Judicial Service:<\/B>
(.*?)
\s*
\s*/si) {

my $ju_string = $1;

@jus = split(/ *<[Bb][Rr]><[Bb][Rr]> */, $ju_string);

foreach my $ju (@jus) {

if ($ju =~ s/Judge, U\. S\. District Court, ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the United States District Court for the $1. /) {

push @jcats, "Judges of the United States District Court for the $1";

}

if ($ju =~ s/Justice, U\. S\. District Court for the District of Columbia \[Supreme Court of the District of Columbia\]\s*<[Bb][Rr]>/$last_name was a federal judge to the United States District Court for the District of Columbia. /) {

push @jcats, "Judges of the United States District Court for the District of Columbia";

}

if ($ju =~ s/Judge, U\. S\. Circuit Courts ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the United States circuit court $1. /) {

push @jcats, "Judges of the United States circuit courts";

}

if ($ju =~ s/Judge, U\. S\. Court of Appeals for District of Columbia Circuit<[Bb][Rr]>/$last_name was a federal judge to the United States Court of Appeals for the D.C. Circuit. /) {

push @jcats, "Judges of the United States Court of Appeals for the D.C. Circuit";

}

if ($ju =~ s/Judge, U\. S\. Court of Appeals ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the United States Court of Appeals $1. /) {

push @jcats, "Judges of the United States Court of Appeals $1";

}

if ($ju !~ m/Service terminated/i) {

$active = 1;

}

}

}

# Mash together.

$wiki_out = "{{Cleanup FJC Bio}}\n$name ";

if ($birth_date) {

if ($death_date) {

$wiki_out .= "($birth_date, $birth_year \&ndash\; $death_date, $death_year) ";

} elsif ($death_year) {

$wiki_out .= "($birth_date, $birth_year \&ndash\; $death_year) ";

} else {

$wiki_out .= "(born $birth_date, $birth_year) ";

}

} elsif ($birth_year) {

if ($death_date) {

$wiki_out .= "($birth_year \&ndash\; $death_date, $death_year) ";

} elsif ($death_year) {

$wiki_out .= "($birth_year\&ndash\;$death_year) ";

} else {

$wiki_out .= "(born $birth_year) ";

}

} else {

if ($death_date) {

$wiki_out .= "(died $death_date, $death_year) ";

} elsif ($death_year) {

$wiki_out .= "(died $death_year) ";

}

}

if ($death_year) {

$wiki_out .= "was a ";

} else {

if ($active) {

$wiki_out .= "is a ";

} else {

$wiki_out .= "is a former ";

}

}

$wiki_out .= "United States federal judge.\n\n";

if ($birth_loc) {

$wiki_out .= "$last_name was born in $birth_loc. ";

}

foreach my $ed (@eds) {

$wiki_out .= "$ed. ";

}

foreach my $pc (@pcs) {

$wiki_out .= "$pc. ";

}

$wiki_out .= "\n\n";

foreach my $ju (@jus) {

$wiki_out .= "$ju\n\n";

}

if ($death_loc) {

$wiki_out .= "$pronoun died in $death_loc.\n\n";

}

$persondata_name = $rev_name;

$persondata_name =~ s/\'//g;

$persondata_name =~ s/\b(\w+)\b/\u\L$1/g;

$wiki_out .= "==External links==\n* {{FJC Bio|$jid}}\n\n";

$wiki_out .= '' . "\n{{Persondata\n|NAME=$persondata_name\n";

$wiki_out .= "|ALTERNATIVE NAMES=\n|SHORT DESCRIPTION=United States federal judge\n|DATE OF BIRTH=";

if ($birth_date) {

$wiki_out .= "$birth_date, $birth_year\n";

} else {

$wiki_out .= "$birth_year\n";

}

$wiki_out .= "|PLACE OF BIRTH=";

if ($birth_loc) {

$wiki_out .= "$birth_loc";

}

$wiki_out .= "\n|DATE OF DEATH=";

if ($death_date) {

$wiki_out .= "$death_date, $death_year\n";

} else {

$wiki_out .= "$death_year\n";

}

$wiki_out .= "|PLACE OF DEATH=";

if ($death_loc) {

$wiki_out .= "$death_loc";

}

$wiki_out .= "\n}}\n{{DEFAULTSORT:$rev_name}}\n";

if ($birth_year) {

$wiki_out .= "Category:$birth_year births\n";

}

if ($death_year) {

$wiki_out .= "Category:$death_year deaths\n";

} else {

$wiki_out .= "Category:Living people\n";

}

foreach my $jcat (@jcats) {

$wiki_out .= "Category:$jcat\n";

}

# Final substitutions - multiple

$wiki_out =~ s/Nominated by /$last_name was nominated by /g;

$wiki_out =~ s/Received a recess appointment from /$last_name received a recess appointment from /g;

$wiki_out =~ s/Confirmed by the Senate/$pronoun was confirmed by the United States Senate/g;

$wiki_out =~ s/vacated by (.*?);/vacated by $1./g;

$wiki_out =~ s/Reassigned /$pronoun was reassigned on /g;

$wiki_out =~ s/Service terminated on /$last_name's service was terminated on /g;

$wiki_out =~ s/He was a State attorney general, ([^\.\;]*?) from/$pronoun was the state attorney general of $1 from/g;

$wiki_out =~ s/ was a Member of the faculty, / was a member of the faculty of /g;

$wiki_out =~ s/ was a Faculty, / was a member of the faculty of /g;

$wiki_out =~ s/on (\w+ \d+, \d+), and received commission on \1/on $1, and received commission the same day/g;

$wiki_out =~ s/(attorney|general|treasurer|secretary|senator), /$1 of /g;

$wiki_out =~ s/ a ([AEIO])/ an $1/g;

$wiki_out =~ s/, (\d+)\-(\d+) from / from $1 to $2 and from /g;

$wiki_out =~ s/, (\d+) from / in $1 and from /g;

# Final substitutions - single

$wiki_out =~ s/recess appointment/recess appointment/;

$wiki_out =~ s/senior status/senior status/;

$wiki_out =~ s/U.S. Attorney(,| from)/United States Attorney$1/;

$wiki_out =~ s/United States Senate/United States Senate/;

$wiki_out =~ s/\[\[J\.D\.\]\]/J.D./;

$wiki_out =~ s/Law clerk/law clerk/;

# Presidents

$wiki_out =~ s/(from|by) (George W. Bush)/$1 President $2/;

$wiki_out =~ s/(from|by) (William J. Clinton)/$1 President $2/;

$wiki_out =~ s/(from|by) (George H.W. Bush)/$1 President $2/;

$wiki_out =~ s/(from|by) (Ronald Reagan)/$1 President $2/;

$wiki_out =~ s/(from|by) (Jimmy Carter)/$1 President $2/;

$wiki_out =~ s/(from|by) (Gerald Ford)/$1 President $2/;

$wiki_out =~ s/(from|by) (Richard M. Nixon)/$1 President $2/;

$wiki_out =~ s/(from|by) (Lyndon B. Johnson)/$1 President $2/;

$wiki_out =~ s/(from|by) (John F. Kennedy)/$1 President $2/;

$wiki_out =~ s/(from|by) (Dwight D. Eisenhower)/$1 President $2/;

$wiki_out =~ s/(from|by) (Harry S Truman)/$1 President $2/;

$wiki_out =~ s/(from|by) (Franklin D. Roosevelt)/$1 President $2/;

$wiki_out =~ s/(from|by) (Herbert Hoover)/$1 President $2/;

$wiki_out =~ s/(from|by) (Calvin Coolidge)/$1 President $2/;

$wiki_out =~ s/(from|by) (Warren G. Harding)/$1 President $2/;

$wiki_out =~ s/(from|by) (Woodrow Wilson)/$1 President $2/;

$wiki_out =~ s/(from|by) (William H. Taft)/$1 President $2/;

$wiki_out =~ s/(from|by) (Theodore Roosevelt)/$1 President $2/;

$wiki_out =~ s/(from|by) (William McKinley)/$1 President $2/;

$wiki_out =~ s/(from|by) (Benjamin Harrison)/$1 President $2/;

$wiki_out =~ s/(from|by) (Grover Cleveland)/$1 President $2/;

$wiki_out =~ s/(from|by) (Chester A. Arthur)/$1 President $2/;

$wiki_out =~ s/(from|by) (James A. Garfield)/$1 President $2/;

$wiki_out =~ s/(from|by) (Rutherford B. Hayes)/$1 President $2/;

$wiki_out =~ s/(from|by) (Ulysses Grant)/$1 President $2/;

$wiki_out =~ s/(from|by) (Andrew Johnson)/$1 President $2/;

$wiki_out =~ s/(from|by) (Abraham Lincoln)/$1 President $2/;

$wiki_out =~ s/(from|by) (James Buchanan)/$1 President $2/;

$wiki_out =~ s/(from|by) (Franklin Pierce)/$1 President $2/;

$wiki_out =~ s/(from|by) (Millard Fillmore)/$1 President $2/;

$wiki_out =~ s/(from|by) (Zachary Taylor)/$1 President $2/;

$wiki_out =~ s/(from|by) (James K. Polk)/$1 President $2/;

$wiki_out =~ s/(from|by) (John Tyler)/$1 President $2/;

$wiki_out =~ s/(from|by) (Martin Van Buren)/$1 President $2/;

$wiki_out =~ s/(from|by) (Andrew Jackson)/$1 President $2/;

$wiki_out =~ s/(from|by) (John Quincy Adams)/$1 President $2/;

$wiki_out =~ s/(from|by) (James Monroe)/$1 President $2/;

$wiki_out =~ s/(from|by) (James Madison)/$1 President $2/;

$wiki_out =~ s/(from|by) (Thomas Jefferson)/$1 President $2/;

$wiki_out =~ s/(from|by) (John Adams)/$1 President $2/;

$wiki_out =~ s/(from|by) (George Washington)/$1 President $2/;

# Write

if ($test) {

print "Output to file\n";

open(outfile, ">judges.txt");

print outfile $wiki_out;

die;

}

$|=1;

print "Waiting " . ($soonest_next_op - time) . " secs... ";

$|=1;

while (time < $soonest_next_op) {};

$soonest_next_op = time + 9;

if ($name eq $art_name) {

$pw->edit($art_name, $wiki_out, "Auto-generating new article based on $url");

my $talkmessage = "{{WPBiography\n|living=";

if ($death_year) {

$talkmessage .= 'no';

} else {

$talkmessage .= 'yes';

}

$talkmessage .= "\n|class=start\n|priority=low\n|needs-infobox=yes\n|politician-work-group=yes\n}}\n\nThis article was automatically created by a perl script. It could use a human's loving touch. ~~~~";

$pw->edit("Talk:$art_name", $talkmessage, "Auto-adding WPbiography template");

my $listsofar = $pw->get_text("User:Polbot/fjc");

$listsofar .= "|-\n| $rev_name || yes || $art_name\n";

$pw->edit("User:Polbot/fjc", $listsofar, "Adding $art_name");

} else {

$wiki_out =~ s/\[\[Category/[[:Category/g;

$pw->edit($art_name, $wiki_out, "Auto-generating subpage based on $url");

my $otherpage = $pw->get_text("$name");

if ($otherpage =~ m/\#\s*Redirect\s*\[\[\s*(.*?)\s*\]\]/is) {

$name = $1;

}

my $talksofar = $pw->get_text("Talk:$name");

$talksofar .= "\n==Bot-created subpage==\n\nA temporary subpage at $art_name was automatically created by a perl script, based on [$url this article] at the Biographical Directory of Federal Judges. The subpage should either be merged into this article, or moved and disambiguated. ~~~~\n";

$pw->edit("Talk:$name", $talksofar, "Auto-adding link to subpage at $art_name");

my $listsofar = $pw->get_text("User:Polbot/fjc");

$listsofar .= "|-\n| $rev_name || no || $art_name\n";

$pw->edit("User:Polbot/fjc", $listsofar, "Adding $art_name");

}

print "Article created.\n";

}

sub Expand_states {

my $place = shift;

$place =~ s/AL/Alabama/;

$place =~ s/AK/Alaska/;

$place =~ s/AZ/Arizona/;

$place =~ s/AR/Arkansas/;

$place =~ s/CA/California/;

$place =~ s/CO/Colorado/;

$place =~ s/CT/Connecticut/;

$place =~ s/DE/Delaware/;

$place =~ s/DC/District of Columbia/;

$place =~ s/FL/Florida/;

$place =~ s/GA/Georgia/;

$place =~ s/HI/Hawaii/;

$place =~ s/ID/Idaho/;

$place =~ s/IL/Illinois/;

$place =~ s/IN/Indiana/;

$place =~ s/IA/Iowa/;

$place =~ s/KS/Kansas/;

$place =~ s/KY/Kentucky/;

$place =~ s/LA/Louisiana/;

$place =~ s/ME/Maine/;

$place =~ s/MD/Maryland/;

$place =~ s/MA/Massachusetts/;

$place =~ s/MI/Michigan/;

$place =~ s/MN/Minnesota/;

$place =~ s/MS/Mississippi/;

$place =~ s/MO/Missouri/;

$place =~ s/MT/Montana/;

$place =~ s/NE/Nebraska/;

$place =~ s/NV/Nevada/;

$place =~ s/NH/New Hampshire/;

$place =~ s/NJ/New Jersey/;

$place =~ s/NM/New Mexico/;

$place =~ s/NY/New York/;

$place =~ s/NC/North Carolina/;

$place =~ s/ND/North Dakota/;

$place =~ s/OH/Ohio/;

$place =~ s/OK/Oklahoma/;

$place =~ s/OR/Oregon/;

$place =~ s/PA/Pennsylvania/;

$place =~ s/PR/Puerto Rico/;

$place =~ s/RI/Rhode Island/;

$place =~ s/SC/South Carolina/;

$place =~ s/SD/South Dakota/;

$place =~ s/TN/Tennessee/;

$place =~ s/TX/Texas/;

$place =~ s/UT/Utah/;

$place =~ s/VT/Vermont/;

$place =~ s/VA/Virginia/;

$place =~ s/WA/Washington/;

$place =~ s/WV/West Virginia/;

$place =~ s/WI/Wisconsin/;

$place =~ s/WY/Wyoming/;

return $place;

}