if(defined($image_url))
{
my $filename;
my $image_data;
notelog("Fetching image $image_url\n");
($filename) = $image_url =~ /(\/[^\/]+)$/;
$filename = $image_path . $filename;
if(! -e $filename)
{
if($test_only)
{
notelog("Would save to $filename...");
}
else
{
$image_url = Pearle::urlDecode($image_url);
$image_data = Pearle::getURL($image_url);
notelog("Saving to $filename...");
if(defined($filename) and $filename)
{
open OUTFILE, ">", $filename;
print OUTFILE $image_data;
close OUTFILE;
notelog("Image saved\n");
Pearle::myLog("Image $image saved as $filename\n");
}
else
{
notelog("Failed\n");
}
}
}
else
{
notelog("File already exists\n");
}
}
}
sub RemoveImageFromPage
{
my $image = shift;
my $page = shift;
my $image_regex = shift;
my $removal_prefix = shift;
my $removal_comment = shift;
my ($text, $editTime, $startTime, $token);
my ($match1, $match2);
my $old_length;
my $new_length;
my $change_len;
my $match_len;
# Fetch an article page
($text, $editTime, $startTime, $token) = Pearle::getPage($page);
if(!defined($text))
{
Pearle::myLog("Error: Bad edit page $page\n");
userwarnlog(FixupLinks("*Error: Bad edit page $page\n"));
sleep(300);
return 0;
}
if($text =~ /^\s*$/)
{
# Might be protected instead of empty
Pearle::myLog("Error: Empty page $page\n");
userwarnlog(FixupLinks("*Error: Empty page $page\n"));
sleep(300);
return 0;
}
if($text =~ /^#redirect/i)
{
Pearle::myLog("Redirect found for page $page (image :$image)\n");
userwarnlog(FixupLinks("*Redirect found for page $page (image :$image)\n"));
return 0;
}
# Remove the image
my $regex3 = "(\\[\\[${image_regex}.*?(\\[\\[.*?\\]\\].*?|)+\\]\\][ \\t]*)"; # Regex to match images
my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w"; # Regex to try to spot inline images
my $regex3c = ""; # Regex to spot images in comments
my $regex3g = "(${image_regex}.*)"; # Regex to match gallery images
my $regex3gc = ""; # Regex to spot gallery images in comments
my ($raw_image) = $image =~ /Image:(.*)/;
my $regex4a = "([Cc]over\\s*=\\s*)" . MakeWikiRegex($raw_image);
my $regex4b = "(image_skyline\\s*=\\s*)" . MakeWikiRegex($raw_image);
my $regex4i = "(image\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "image = " sections in infoboxes
my $regex4p = "(picture\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "picture = " sections in infoboxes
my $regex4m = "\\[\\([^*)\\]\\]"; # Regex to match inline Media: links
my $regex4g = "(img\\s*=\\s*)" . MakeWikiRegex($raw_image); # Regex to match "img = " sections in infoboxes
Pearle::myLog("Regex 3: $regex3\n");
notelog("Regex 3: $regex3\n");
notelog("Regex 3 extended: $regex3ex\n");
notelog("Regex 3 gallery: $regex3g\n");
Pearle::myLog("Raw regex: $raw_image\n");
notelog("Regex 4 Album: $regex4a\n");
notelog("Regex 4 City: $regex4b\n");
notelog("Regex 4 Image: $regex4i\n");
notelog("Regex 4 Media: $regex4m\n");
notelog("Regex 4 Picture: $regex4p\n");
notelog("Regex 4 Img: $regex4g\n");
if($text =~ /$regex3ex/)
{
Pearle::myLog("Possible inline image in $page\n");
userwarnlog(FixupLinks("*Possible inline image :$image in $page\n"));
return 0; # Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox
}
if($text =~ /$regex3c/ or $text =~ /$regex3gc/)
{
Pearle::myLog("Image in comment in $page\n");
- userwarnlog(FixupLinks("*Image in comment in $page\n"));
return 0; # Can't do gallery matching because that also matches regular images
}
$text =~ /$regex3/;
$match_len = length($1);
$match2 = $text =~ s/$regex3//g;
$new_length = length($text);
print "Num: $match2 Len: $match_len\n";
if($match2)
{
# If a whole lot of text was removed, log a warning
if($match_len > (500 + length($image)))
{
userwarnlog(FixupLinks("*Long caption of $match_len bytes replaced in $page\n"));
if($match_len > (1000 + length($image)))
{
notelog("Unusually long caption found. Exiting.\n");
Pearle::myLog("Unusually long caption of $match_len found in $page ($match2 matches).\n");
exit;
}
}
if($match_len < (4 + length($image)))
{
notelog("*Short replacement of $match_len bytes in $page\n");
Pearle::myLog("Short replacement of $match_len bytes (min " . (length($image) + 4) . ") in $page ($match2 matches). Exiting.\n");
Pearle::myLog("Text:\n$text\n");
exit;
}
# If many matches, log a warning
if($match2 > 2)
{
Pearle::myLog("More than one match ($match2) in page $page\n");
- userwarnlog(FixupLinks("*More than one match ($match2) in page $page\n"));
}
if($match2 > 100)
{
Pearle::myLog("Too many matches ($match2) in page $page. Skipping.\n");
userwarnlog("Too many matches ($match2) in page $page. Skipping.\n");
return 0;
}
# If there might be a reference, log a warning
- if($text =~ /(?:see (?:image|picture|graph|diagram|right|left)|\(left\)|\(right\)|\(below\)|\(above\))/)
- {
- Pearle::myLog("Possible image reference in page $page\n");
- userwarnlog("*Possible image reference in page $page\n");
- }
if($text =~ /-->\]/)
{
Pearle::myLog("Possible bracket mixup in page $page\n");
userwarnlog(FixupLinks("*Possible bracket mixup in page $page\n"));
}
- if($text =~ /\[\[(?: |)/)