User:Proteins/articlestructure.js

//

// Analyze the article's structure

// with kind respects to Dr. pda, whose excellent prosesizebytes.js script was the inspiration

//

// To use this script, add "importScript('User:Proteins/articlestructure.js');" to your monobook.js subpage

// under your user page, as you can see at User:Proteins/monobook.js

function articleStructure() {

var alert_string = "";

var diagnostic_string = "";

var read_entire_article = true;

var show_lead_diagnostics = true;

var show_section_diagnostics = false;

var display_individual_words = false;

var using_Internet_Explorer = false;

var spaced_text = "";

var untagged_text = "";

var stripped_text = "";

var unescaped_text = "";

var anchors;

var temp_anchor;

var section_name = "";

var temp_anchor_name = "";

var num_anchors = 0;

var anchor_index = 0;

var anchor_level = 0;

var prev_anchor_level = 0;

var num_H2_anchors = 0;

var H2_anchor_index = 0;

var cutoff_anchor_index = 0;

var cutoff_H2_anchor_index = 0;

var cutoff_child_node_index = 0;

var last_P_child_node_index = 0;

var cutoff_element_node_index = 0;

var num_sections = 0;

var section_index = 0;

var element_node;

var num_element_nodes = 0;

var element_node_index = 0;

var temp_node_name = "";

var parent_node;

var grandparent_node;

var greatgrandparent_node;

var sibling_node;

var next_sibling_node;

var child_node;

var child_nodes;

var prev_child_node;

var num_child_nodes = 0;

var child_node_index = 0;

var child_node_name = "";

var num_prose_counted_nodes = 0;

var grandchild_node;

var grandchild_nodes;

var num_grandchild_nodes = 0;

var grandchild_node_index = 0;

var path_names;

var file_name = "";

var num_characters = 0;

var del_num_characters = 0;

var temp_num_characters = 0;

var temp_word = "";

var num_words = 0;

var word_count = 0;

var word_index = 0;

var nonempty_word_index = 0;

var tentative_num_words = 0;

var num_spaces = 0;

var paragraph_count = 0;

var list_item_count = 0;

var prose_size_bytes = 0;

var total_word_count = 0;

var total_paragraph_count = 0;

var total_list_item_count = 0;

var total_prose_size_bytes = 0;

var section_word_count = new Array();

var section_paragraph_count = new Array();

var section_list_item_count = new Array();

var section_prose_size_bytes = new Array();

var word_count_string = "";

var paragraph_count_string = "";

var list_item_count_string = "";

var prose_size_bytes_string = "";

var temp_paragraph;

var text_paragraphs;

var num_paragraphs = 0;

var paragraph_index = 0;

var temp_list_item;

var text_list_items;

var num_list_items = 0;

var list_item_index = 0;

var temp_image;

var num_pixels = 0;

var image_index = 0;

var image_counter = 0;

var num_raw_images = 0;

var num_nonicon_images = 0;

var num_anchors = 0;

var num_raw_links = 0;

var num_raw_tables = 0;

var num_raw_references = 0;

// check for Internet Explorer browser

using_Internet_Explorer = false;

if (navigator.userAgent.indexOf("MSIE") > -1) {

using_Internet_Explorer = true;

// alert_string = "This script works correctly in every browser — except Internet Explorer. Please be patient!"

// window.alert(alert_string);

}

// Find the cutoff H2 anchor index, where we stop counting things

alert_string = "";

num_H2_anchors = 0;

section_name = "lead section";

prev_anchor_level = 1; //begin at the H1 heading

read_entire_article = true;

anchors = document.anchors;

num_anchors = anchors.length;

for (anchor_index=1; anchor_index

temp_anchor = anchors[anchor_index];

parent_node = temp_anchor.parentNode;

if (!parent_node) { continue; }

sibling_node = parent_node.nextSibling;

if (!sibling_node) { continue; }

// Check headings for jumps upwards in heading level

anchor_level = 0;

if (sibling_node.nodeName == "H1") {

alert_string += " WARNING: Illegal H1 heading in this section\n";

} else if (sibling_node.nodeName == "H2") {

anchor_level = 2;

} else if (sibling_node.nodeName == "H3") {

anchor_level = 3;

} else if (sibling_node.nodeName == "H4") {

anchor_level = 4;

} else if (sibling_node.nodeName == "H5") {

anchor_level = 5;

} else {

next_sibling_node = sibling_node.nextSibling;

if (!next_sibling_node) { continue; }

// Check headings for jumps upwards in heading level

if (next_sibling_node.nodeName == "H1") {

alert_string += " WARNING: Illegal H1 heading in this section\n";

} else if (next_sibling_node.nodeName == "H2") {

anchor_level = 2;

} else if (next_sibling_node.nodeName == "H3") {

anchor_level = 3;

} else if (next_sibling_node.nodeName == "H4") {

anchor_level = 4;

} else if (next_sibling_node.nodeName == "H5") {

anchor_level = 5;

}

} // closes assignment of the anchor level, if any

if (((anchor_level - prev_anchor_level) > 1) && (prev_anchor_level != 0)) {

if (num_H2_anchors == 0) {

alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in the lead\n";

} else {

alert_string += " WARNING: H" + prev_anchor_level + " to H" + anchor_level + " jump in \"" + section_name.replace(/(_+)/ig, " ") + "\"\n";

}

}

if (anchor_level > 0) { prev_anchor_level = anchor_level; }

//Check major section headings for closing sections

if (anchor_level == 2) {

num_H2_anchors++;

section_name = temp_anchor.name;

temp_anchor_name = temp_anchor.name;

alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + "\n";

// alert_string += "Section " + num_H2_anchors + " : " + section_name.replace(/(_+)/ig, " ") + " " + temp_anchor.parentNode.nodeName + " " + sibling_node.nodeName + "\n";

temp_anchor_name = temp_anchor_name.replace(/:$/ig,""); // eliminate colons at end

temp_anchor_name = temp_anchor_name.replace(/s$/ig,""); // eliminate plurals at end

temp_anchor_name = temp_anchor_name.replace(/See_also/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Related_topic/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Related_article/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Further_reading/ig,"");

temp_anchor_name = temp_anchor_name.replace(/External_link/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Footnote/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Note/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Reference/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Citation/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Source/ig,"");

temp_anchor_name = temp_anchor_name.replace(/Link/ig,"");

temp_anchor_name = temp_anchor_name.replace(/s([_\s]+)and([_\s]+)/ig,"");

temp_anchor_name = temp_anchor_name.replace(/([_\s]+)and([_\s]+)/ig,"");

temp_anchor_name = temp_anchor_name.replace(/([_\s]+)/ig,"");

if (temp_anchor_name == "") { break; }

// diagnostic_string = "Section " + num_H2_anchors + " : " + temp_anchor_name + " L: " + temp_anchor_name.length;

// window.alert(diagnostic_string);

} // closes check for H2 anchor

} // closes loop over the anchors

cutoff_anchor_index = anchor_index;

cutoff_H2_anchor_index = num_H2_anchors;

if (cutoff_anchor_index < num_anchors) {

read_entire_article = false;

alert_string += "\nProse counting will stop before the \"" + temp_anchor.name.replace(/(_+)/ig, " ") + "\" section.\n";

} else {

read_entire_article = true;

alert_string += "\nProse counting will cover the entire article.\n";

}

window.alert(alert_string);

// Count child and element nodes

alert_string = "";

num_element_nodes = 0;

child_nodes = document.getElementById("bodyContent").childNodes;

num_child_nodes = child_nodes.length;

// if (num_child_nodes > 40) { num_child_nodes = 40;} // truncate loop for testing

for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {

child_node = child_nodes[child_node_index];

if (child_node.nodeType != 1) {

// alert_string += "Child node " + child_node_index + " : " + child_node.nodeName + "\n";

continue;

} // examine only Element nodes

num_element_nodes++;

// alert_string += "Element node " + num_element_nodes + " : " + child_node.nodeName + "\n";

} // closes loop counting the element nodes

// window.alert(alert_string);

// Determine the corresponding childNode index cutoff

alert_string = "";

if (read_entire_article == true) {

cutoff_child_node_index = num_child_nodes;

cutoff_element_node_index = num_element_nodes;

} else {

H2_anchor_index = 0;

element_node_index = 0;

last_P_child_node_index = -1;

last_P_element_node_index = -1;

for (child_node_index=0; child_node_index < num_child_nodes; child_node_index++) {

child_node = child_nodes[child_node_index];

if (child_node.nodeType != 1) { continue; } // examine only Element nodes

element_node_index++;

if (child_node.nodeName == "P") {

last_P_child_node_index = child_node_index;

last_P_element_node_index = num_element_nodes;

} else if (child_node.nodeName == "H2") {

H2_anchor_index++;

if (H2_anchor_index == cutoff_H2_anchor_index) {

cutoff_child_node_index = last_P_child_node_index;

cutoff_element_node_index = last_P_element_node_index;

break;

}

}

// alert_string += "Section " + H2_anchor_index + ", Element node " + num_element_nodes + " : " + child_node.nodeName + " " + child_node.childNodes.length + "\n";

// if (num_element_nodes > 45) { break; } // for debugging

} // closes loop over the childNodes of the Document

if (last_P_child_node_index < 0) { // if no cutoff was discovered; should never happen

cutoff_child_node_index = num_child_nodes;

cutoff_element_node_index = num_element_nodes;

}

} // closes check whether to read entire article

alert_string = "\nThe child_node_index and element_node_index cutoffs are " + cutoff_child_node_index + " and " + cutoff_element_node_index + ", respectively.\n";

// window.alert(alert_string);

// Count the words, paragraphs and prose size bytes by section

word_count = 0;

paragraph_count = 0;

list_item_count = 0;

prose_size_bytes = 0;

num_prose_counted_nodes = 0;

H2_anchor_index = 0;

for (child_node_index=0; child_node_index < cutoff_child_node_index; child_node_index++) {

child_node = child_nodes[child_node_index];

if (child_node.nodeType != 1) { continue; } // examine only Element nodes

element_node_index++;

if (child_node.nodeName == "H2") {

section_word_count.push(word_count);

section_paragraph_count.push(paragraph_count);

section_list_item_count.push(list_item_count);

section_prose_size_bytes.push(prose_size_bytes);

H2_anchor_index++;

word_count = 0;

paragraph_count = 0;

list_item_count = 0;

prose_size_bytes = 0;

}

// if the child node meets the criteria, add to the prose size, word and paragraph counts

if ((child_node.nodeName == "P") || (child_node.nodeName == "PRE")) {

untagged_text = child_node.innerHTML;

untagged_text = untagged_text.replace(//ig,""); // keep simple superscript text

untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text

untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags

untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >

untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <

untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &

untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

// spaced_text = filterStringForProseSizeCounting(untagged_text);

words = spaced_text.split(' ');

tentative_num_words = words.length;

if (tentative_num_words > 0) { // verify that the paragraph contributes text

num_words = 0;

num_characters = 0;

for (word_index=0; word_index

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

num_words++;

num_characters += del_num_characters;

}

}

if (num_words > 0) {

paragraph_count++;

num_prose_counted_nodes++;

word_count += num_words;

prose_size_bytes += num_characters;

num_spaces = num_words - 1;

prose_size_bytes += num_spaces; // add spaces to character count

child_node.style.cssText = "background-color:yellow";

// Code for testing output

if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {

continue;

}

diagnostic_string = "";

nonempty_word_index = 0;

temp_num_characters = 0;

for (word_index=0; word_index

if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {

window.alert(diagnostic_string);

diagnostic_string = "Continued from previous screen:\n\n";

}

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

nonempty_word_index++;

temp_num_characters += del_num_characters;

diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + " " + temp_num_characters + "\n";

}

}

temp_num_characters += num_spaces;

diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";

if (display_individual_words) {

diagnostic_string += "\nEND of paragraph " + paragraph_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";

window.alert(diagnostic_string);

}

} // closes check for non-empty paragraph

} // tentative check for words

} else if ((child_node.nodeName == "UL") || (child_node.nodeName == "OL")) { // unordered and ordered lists

grandchild_nodes = child_node.childNodes; // not all LI elements because of possible nesting

num_grandchild_nodes = grandchild_nodes.length;

for (grandchild_node_index=0; grandchild_node_index

grandchild_node = grandchild_nodes[grandchild_node_index];

if (grandchild_node.nodeName == "LI") {

untagged_text = grandchild_node.innerHTML;

untagged_text = untagged_text.replace(//ig,""); // keep simple superscript text

untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text

untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags

untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >

untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <

untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &

untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' ');

tentative_num_words = words.length;

if (tentative_num_words > 0) { // verify that the list item contributes text

num_words = 0;

num_characters = 0;

for (word_index=0; word_index

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

num_words++;

num_characters += del_num_characters;

}

}

if (num_words > 0) {

list_item_count++;

num_prose_counted_nodes++;

word_count += num_words;

prose_size_bytes += num_characters;

num_spaces = num_words - 1;

prose_size_bytes += num_spaces; // add spaces to character count

child_node.style.cssText = "background-color:yellow";

// Code for testing output

if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {

continue;

}

diagnostic_string = "";

nonempty_word_index = 0;

temp_num_characters = 0;

for (word_index=0; word_index

if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {

window.alert(diagnostic_string);

diagnostic_string = "Continued from previous screen:\n\n";

}

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

nonempty_word_index++;

temp_num_characters += del_num_characters;

diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";

}

}

temp_num_characters += num_spaces;

diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";

if (display_individual_words) {

diagnostic_string += "\nEND of list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";

window.alert(diagnostic_string);

}

} // closes check for non-empty list item

} // tentative check for words

} // closes check for a list item (LI) node

} // closes loop over grandchild nodes of an ordered (OL) or unordered (UL) list

} else if (child_node.nodeName == "DL") { // discursive lists

grandchild_nodes = child_node.childNodes;

num_grandchild_nodes = grandchild_nodes.length;

for (grandchild_node_index=0; grandchild_node_index

grandchild_node = grandchild_nodes[grandchild_node_index];

if ((grandchild_node.nodeName == "DT") || (grandchild_node.nodeName == "DD")) {

// Exceptions that shouldn't be counted

if (grandchild_node.childNodes.length > 0) {

temp_node_name = grandchild_node.childNodes[0].nodeName;

if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }

}

if (grandchild_node.childNodes.length > 1) {

temp_node_name = grandchild_node.childNodes[1].nodeName;

if ((temp_node_name == "DIV") || (temp_node_name == "SPAN")) { continue; }

}

untagged_text = grandchild_node.innerHTML;

untagged_text = untagged_text.replace(//ig,""); // keep simple superscript text

untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text

untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags

untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >

untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <

untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &

untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' ');

tentative_num_words = words.length;

if (tentative_num_words > 0) { // verify that the list item contributes text

num_words = 0;

num_characters = 0;

for (word_index=0; word_index

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

num_words++;

num_characters += del_num_characters;

}

}

if (num_words > 0) {

list_item_count++;

num_prose_counted_nodes++;

word_count += num_words;

prose_size_bytes += num_characters;

num_spaces = num_words - 1;

prose_size_bytes += num_spaces; // add spaces to character count

child_node.style.cssText = "background-color:yellow";

// Code for testing output

if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {

continue;

}

diagnostic_string = "";

nonempty_word_index = 0;

temp_num_characters = 0;

for (word_index=0; word_index

if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {

window.alert(diagnostic_string);

diagnostic_string = "Continued from previous screen:\n\n";

}

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

nonempty_word_index++;

temp_num_characters += del_num_characters;

diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", List item " + list_item_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n"; }

}

temp_num_characters += num_spaces;

diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";

if (display_individual_words) {

diagnostic_string += "\nEND of discursive list item " + list_item_count + " of Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";

window.alert(diagnostic_string);

}

} // closes check for non-empty list item

} // tentative check for words

} // closes check for a discursive list item (DT or DD) node

} // closes loop over grandchild nodes of a discursive list DL

} else if (child_node.nodeName == "BLOCKQUOTE") {

grandchild_nodes = child_node.getElementsByTagName("P");

num_grandchild_nodes = grandchild_nodes.length;

for (grandchild_node_index=0; grandchild_node_index

grandchild_node = grandchild_nodes[grandchild_node_index];

if (grandchild_node.nodeName == "P") {

untagged_text = grandchild_node.innerHTML;

untagged_text = untagged_text.replace(//ig,""); // keep simple superscript text

untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text

untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags

untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >

untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <

untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &

untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' ');

tentative_num_words = words.length;

if (tentative_num_words > 0) { // verify that the list item contributes text

num_words = 0;

num_characters = 0;

for (word_index=0; word_index

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

num_words++;

num_characters += del_num_characters;

}

}

if (num_words > 0) {

// don't count blockquotes, for now

num_prose_counted_nodes++;

word_count += num_words;

prose_size_bytes += num_characters;

num_spaces = num_words - 1;

prose_size_bytes += num_spaces; // add spaces to character count

child_node.style.cssText = "background-color:yellow";

// Code for testing output

if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {

continue;

}

diagnostic_string = "";

nonempty_word_index = 0;

temp_num_characters = 0;

for (word_index=0; word_index

if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {

window.alert(diagnostic_string);

diagnostic_string = "Continued from previous screen:\n\n";

}

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

nonempty_word_index++;

temp_num_characters += del_num_characters;

diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";

}

}

temp_num_characters += num_spaces;

diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";

if (display_individual_words) {

diagnostic_string += "\nEND of BLOCKQUOTE in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";

window.alert(diagnostic_string);

}

} // closes check for non-empty list item

} // tentative check for words

} // closes check for a paragraph (P) node in a BLOCKQUOTE

} // closes loop over grandchild nodes in a BLOCKQUOTE

} else if (child_node.nodeName == "TABLE") {

if (child_node.className != "cquote") { continue; } // count only tables that are cquotes

grandchild_nodes = child_node.getElementsByTagName("TD");

num_grandchild_nodes = grandchild_nodes.length;

for (grandchild_node_index=0; grandchild_node_index

grandchild_node = grandchild_nodes[grandchild_node_index];

if (grandchild_node.nodeName == "TD") {

untagged_text = grandchild_node.innerHTML;

untagged_text = untagged_text.replace(//ig,""); // keep simple superscript text

untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text

untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags

untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >

untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <

untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &

untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' ');

tentative_num_words = words.length;

if (tentative_num_words > 0) { // verify that the list item contributes text

num_words = 0;

num_characters = 0;

for (word_index=0; word_index

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

num_words++;

num_characters += del_num_characters;

}

}

if (num_words > 0) {

// don't count cquotes, for now

num_prose_counted_nodes++;

word_count += num_words;

prose_size_bytes += num_characters;

num_spaces = num_words - 1;

prose_size_bytes += num_spaces; // add spaces to character count

child_node.style.cssText = "background-color:yellow";

// Code for testing output

if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {

continue;

}

diagnostic_string = "";

nonempty_word_index = 0;

temp_num_characters = 0;

for (word_index=0; word_index

if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {

window.alert(diagnostic_string);

diagnostic_string = "Continued from previous screen:\n\n";

}

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

nonempty_word_index++;

temp_num_characters += del_num_characters;

diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";

}

}

temp_num_characters += num_spaces;

diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";

if (display_individual_words) {

diagnostic_string += "\nEND of CQUOTE paragraph in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";

window.alert(diagnostic_string);

}

} // closes check for non-empty list item

} // tentative check for words

} // closes check for a paragraph (P) node in a CQUOTE

} // closes loop over grandchild nodes in a CQUOTE

} else if (child_node.nodeName == "DIV") { // Poems

if (child_node.className != "poem") { continue; } // allow only poem DIV's

grandchild_nodes = child_node.getElementsByTagName("P");

num_grandchild_nodes = grandchild_nodes.length;

for (grandchild_node_index=0; grandchild_node_index

grandchild_node = grandchild_nodes[grandchild_node_index];

if (grandchild_node.nodeName == "P") {

untagged_text = grandchild_node.innerHTML;

untagged_text = untagged_text.replace(//ig,""); // keep simple superscript text

untagged_text = untagged_text.replace(/(]+)>)(.*?<\/sup>)/ig,""); // remove superscript text

untagged_text = untagged_text.replace(/(<([^>]+)>)/ig,""); // remove remaining tags

untagged_text = untagged_text.replace(/>/ig, ">"); // convert > to a single character >

untagged_text = untagged_text.replace(/</ig, "<"); // convert < to a single character <

untagged_text = untagged_text.replace(/&/ig, "&"); // convert & to a single character &

untagged_text = untagged_text.replace(/—/ig, ", "); // replace em-dashes with comma+space

spaced_text = untagged_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/ /ig, " "); // convert non-breaking spaces to spaces

spaced_text = spaced_text.replace(/\s+/ig, " "); // convert all whitespace to a single space

words = spaced_text.split(' ');

tentative_num_words = words.length;

if (tentative_num_words > 0) { // verify that the list item contributes text

num_words = 0;

num_characters = 0;

for (word_index=0; word_index

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

num_words++;

num_characters += del_num_characters;

}

}

if (num_words > 0) {

// don't count blockquotes, for now

num_prose_counted_nodes++;

word_count += num_words;

prose_size_bytes += num_characters;

num_spaces = num_words - 1;

prose_size_bytes += num_spaces; // add spaces to character count

child_node.style.cssText = "background-color:yellow";

// Code for testing output

if ((!show_section_diagnostics) && ((!show_lead_diagnostics) || (H2_anchor_index != 0))) {

continue;

}

diagnostic_string = "";

nonempty_word_index = 0;

temp_num_characters = 0;

for (word_index=0; word_index

if ((word_index%45 == 1) && (word_index>45) && (display_individual_words)) {

window.alert(diagnostic_string);

diagnostic_string = "Continued from previous screen:\n\n";

}

temp_word = words[word_index];

del_num_characters = temp_word.length;

if (del_num_characters > 0) {

nonempty_word_index++;

temp_num_characters += del_num_characters;

diagnostic_string += "Section " + H2_anchor_index + ", Paragraph " + paragraph_count + ", Word " + nonempty_word_index + " : " + temp_word + " " + del_num_characters + "\n";

}

}

temp_num_characters += num_spaces;

diagnostic_string += "Added " + num_spaces + " spaces to the byte count.\n\n";

if (display_individual_words) {

diagnostic_string += "\nEND of in Section " + H2_anchor_index + ": character count = " + temp_num_characters + " total= " + prose_size_bytes + "\n";

window.alert(diagnostic_string);

}

} // closes check for non-empty list item

} // tentative check for words

} // closes check for a paragraph (P) node in a poem

} // closes loop over grandchild nodes in a poem

} // closes check for appropriate elements

} // closes loop over the child nodes

section_word_count.push(word_count);

section_paragraph_count.push(paragraph_count);

section_list_item_count.push(list_item_count);

section_prose_size_bytes.push(prose_size_bytes);

// Output the various counts

word_count_string = " word";

paragraph_count_string = " paragraph";

list_item_count_string = " list item";

prose_size_bytes_string = " byte";

if (section_word_count[0] != 1) { word_count_string += "s";}

if (section_paragraph_count[0] != 1) { paragraph_count_string += "s";}

if (section_list_item_count[0] != 1) { list_item_count_string += "s";}

if (section_prose_size_bytes[0] != 1) { prose_size_bytes_string += "s";}

alert_string = "Lead section: " + section_paragraph_count[0] + paragraph_count_string + ", " + section_list_item_count[0] + list_item_count_string + ", " + section_word_count[0] + word_count_string + ", " + section_prose_size_bytes[0] + prose_size_bytes_string + "\n\n";

total_word_count = section_word_count[0];

total_paragraph_count = section_paragraph_count[0];

total_list_item_count = section_list_item_count[0];

total_prose_size_bytes = section_prose_size_bytes[0];

num_sections = section_word_count.length;

for (section_index=1; section_index

total_word_count += section_word_count[section_index];

total_paragraph_count += section_paragraph_count[section_index];

total_list_item_count += section_list_item_count[section_index];

total_prose_size_bytes += section_prose_size_bytes[section_index];

word_count_string = " word";

paragraph_count_string = " paragraph";

list_item_count_string = " list item";

prose_size_bytes_string = " byte";

if (section_word_count[section_index] != 1) { word_count_string += "s";}

if (section_paragraph_count[section_index] != 1) { paragraph_count_string += "s";}

if (section_list_item_count[section_index] != 1) { list_item_count_string += "s";}

if (section_prose_size_bytes[section_index] != 1) { prose_size_bytes_string += "s";}

alert_string += "Section " + section_index + " : " + section_paragraph_count[section_index] + paragraph_count_string + ", " + section_list_item_count[section_index] + list_item_count_string + ", " + section_word_count[section_index] + word_count_string + ", " + section_prose_size_bytes[section_index] + prose_size_bytes_string + "\n";

}

if (num_sections>1) {alert_string += "\n";} // Make space for the totals

word_count_string = " word";

paragraph_count_string = " paragraph";

list_item_count_string = " list item";

prose_size_bytes_string = " byte";

if (total_word_count != 1) { word_count_string += "s";}

if (total_paragraph_count != 1) { paragraph_count_string += "s";}

if (total_list_item_count != 1) { list_item_count_string += "s";}

if (total_prose_size_bytes != 1) { prose_size_bytes_string += "s";}

alert_string += "Totals: " + total_paragraph_count + paragraph_count_string + ", " + total_list_item_count + list_item_count_string + ", " + total_word_count + word_count_string + ", " + total_prose_size_bytes + prose_size_bytes_string + "\n";

window.alert(alert_string);

// Count the article images

num_nonicon_images = 0;

num_raw_images = document.images.length;

alert_string = "This document has " + num_raw_images + " images.\n";

for (image_index=0; image_index

temp_image = document.images[image_index];

parent_node = temp_image.parentNode;

grandparent_node = parent_node.parentNode;

greatgrandparent_node = grandparent_node.parentNode;

num_pixels = temp_image.width * temp_image.height;

if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }

if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }

if (num_pixels > 5000) { num_nonicon_images++; }

}

if (num_nonicon_images == 1) {

alert_string = "This document has 1 image with more than 5000 pixels.\n\n";

} else {

alert_string = "This document has " + num_nonicon_images + " images with more than 5000 pixels.\n\n";

}

image_counter = 0;

for (image_index=0; image_index

temp_image = document.images[image_index];

parent_node = temp_image.parentNode;

grandparent_node = parent_node.parentNode;

greatgrandparent_node = grandparent_node.parentNode;

num_pixels = temp_image.width * temp_image.height;

if (temp_image.src.match(/Replace_this_image_male\.svg/)) { continue; }

if (temp_image.src.match(/Replace_this_image_female\.svg/)) { continue; }

if (num_pixels < 5001) { continue; }

image_counter++;

alert_string += image_counter + " " + temp_image.width + "x" + temp_image.height + " " + num_pixels + " ";

path_names = temp_image.src.split("/");

file_name = path_names.pop();

file_name = file_name.replace(/^(\d+)px-/, "");

alert_string += file_name + "\n";

}

window.alert(alert_string);

return;

// Count the article tables and check for infoboxes and navigation templates

num_raw_tables = document.getElementsByTagName("table").length;

// Check for className = "infobox vcard" or "navbox-group"

alert_string = "This document has " + num_raw_tables + " tables.\n";

window.alert(alert_string);

// Count the article references

num_raw_references = document.getElementsByTagName("li").length;

// Count the article interwikis

num_raw_interwikis = document.getElementsByTagName("li").length;

// Count the article categories

num_raw_categories = document.getElementsByTagName("table").length;

// Count the article anchors; for each anchor...

alert_string = "This document has " + document.anchors.length + " anchors:\n";

for (anchor_index=0; anchor_index

temp_anchor = document.anchors[anchor_index];

alert_string += "Name " + anchor_index + ": " + temp_anchor.name + "\n";

}

window.alert(alert_string);

} // closes function articleStructure()

addOnloadHook(function () {

mw.util.addPortletLink('p-cactions', 'javascript:articleStructure()', 'structure', 'ca-structure', 'Structure of the article', 'g', '');

});

//