Module:Str find word/sandbox

-- 2023-04-17 STABLE wrt basics, quotes "" '' * with base sep; working on resltstring & report

-- todo: report options, more options

-- todo: options count, pattern, out-table, out-htmllist, keepinputordersource

require('strict')

local p = {}

local mArgs = require('Module:Arguments')

local str = require('Module:String')

local yesno = require('Module:Yesno')

local tTools = require('Module:TableTools')

local strDeEnCode = require('Module:DecodeEncode')

local iMaxWords = 12 -- alpha-status, Apr2023. when stable, can be higher

local tArgs = {}

local report = nil -- initinated when explain=T

local function parseReportType( tArgs )

local xpReportTF = false

local xpReportType = yesno( tArgs.explain, tArgs.explain ) or false -- to be parsed beyond T/F

-- in: nil, false: FALSE type=nil

-- in: true, preview: type=true TRUE (dflt: if prev)

-- in: doc, testcases: by page TRUE (persistent on those pages)

-- in: foo, other: FALSE

xpReportTF = false

if yesno( xpReportType, false ) == nil then -- nil, false

elseif xpReportType == 'testcases' then

xpReportType = 'testcases'

xpReportTF = true

elseif xpReportType == 'doc' then

xpReportType = 'doc'

xpReportTF = true

elseif xpReportType == true then

xpReportType = 'preview'

xpReportTF = true

else

xpReportTF = false -- unk word

end

tArgs.explain = xpReportTF

return xpReportType

end

local function initReport( tArgs )

report = require('Module:Str find word/report')

report.xpCheckExplain() -- dummy

end

local function isPreview( ) -- here or in report?

local ifPreview = require('Module:If preview')

-- return not ( ifPreview._warning( {'is_preview'} ) == '' )

return ifPreview.main( true, false )

end

-- Turn "A" into "A" etc. asap

-- and reduce multi-spaces (including nbsp etc.) into single space

local function sDecodeTrim( str )

if str == nil then return nil end

str = mw.ustring.gsub( strDeEnCode._decode( str ), '%s+' , ' ' )

return mw.text.trim( str )

end

-- %-Escape any word (character string) before feeding it into a string pattern function

-- will be %-escaped: "([%(%)%.%%%+%-%*%?%[%^%$%]])" = 12 characters ().%+-*?[^$]

local function escape_word( word )

return str._escapePattern( word )

end

-- remove \' \" outer pair (& rm outer spaces);

-- any result (=the inner string) is trimmed by T/F option (case " abc ").

local function removeOuterQuotes( s, bTrimAfter )

if s == nil then return nil end

if mw.ustring.match( s, "^%s*\'" ) ~= nil then

s = mw.ustring.gsub( s, "^%s*%\'(.*)%\'%s*$", "%1" )

elseif mw.ustring.match( s, '^%s*\"' ) ~= '' then

s = mw.ustring.gsub( mw.text.trim( s ), '^%\"(.*)%\"$', '%1' )

end

if bTrimAfter == true then

s = mw.text.trim( s )

end

return s

end

-- separator-in

-- todo: check characters '" _ {}(); & accept?'

local function setSepIn( sSep, sDefaultSep )

if sSep == nil then return sDecodeTrim( sDefaultSep ) end

-- remove all %w (alphanumeric) and %s (WS)

sSep = mw.ustring.gsub( sDecodeTrim( sSep ), '[%w%s]*', ) or

if sSep == '' then

return sDecodeTrim( sDefaultSep )

else

return sSep

end

end

-- separator

local function setSepOut( sSep, sDefaultSep )

sSep = sDecodeTrim( sSep ) or nil

if sSep == nil then return sDefaultSep end

sSep = removeOuterQuotes( sSep, false )

if sSep == '' then

return sDefaultSep

else

return sSep

end

end

-- Check whether a single word is in a table (simple array of words)

-- returns hitword or nil; iPosition is helper to keep outlist ordered

local function findWordInTable( tSource, word )

---local bHit = false

---local iPosition = -1

for i, v in ipairs( tSource ) do

if v == word then

--- bHit = true --- del todo

---iPosition = i

return word

--- break

end

end

return nil

end

-- Reads and parses a word list and returns a table with words (simple array)

-- words list can be: source, andwords-to-check, orwords-to-check

-- step 1: basic preparation of the csv wordstring

-- step 2: when case-insensitive, turn string into lowercase

-- step 3: read (parse) quoted '..'

-- step 4: read (parse) quoted ".."

-- step 5: read (parse) comma-separated words

-- step 6: merge quoted wordlists; keep in order

-- step 7: when booleans=T, change boolean words into true/false (module:yesno rules)

-- step 8: replace synonyms (by inout "|_nov=November, 11" input)

-- step 9: remove duplicates from wordtable (rm latest)

-- all words returned are trimmed

-- return the table (a straight array)

local function buildWordTable( sWordlist )

local wordTable = {}

local hitWord = ''

local hitCount = -1

local _

local sPattern

local cQ1 = '_Q0027_' -- U+0027 = \'

local cQ2 = '_Q0022_' -- U+0022 = \"

local tQ1hits = {} -- Q1-hits, reused to restore order

local tQ2hits = {} -- Q2-hits, reused to restore order

local sMsg = '' -- xpmessage only

local xpHasQuotes = false

-- Step 1: prepare sWordList

sDecodeTrim( sWordlist )

if sWordlist == '' or sWordlist == nil then return wordTable end

sWordlist = tArgs.sep .. sWordlist .. tArgs.sep

-- test. dev only:

xpHasQuotes = mw.ustring.match( sWordlist, '[\"\']' ) ~= '' -- unused

if xpHasQuotes then

--- report.xpMessage( 'xpHasQuotes [unused]: ' .. tostring( xpHasQuotes ) )

end

-- Step 2: case sensitive

if yesno( tArgs.case, true ) == false then

sWordlist = string.lower( sWordlist )

end

-- Step 3: Q1 read quotes (single quotes '..')

sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\'\'%s*%f[' .. tArgs.sep_pattern .. ']'

-- initial:

hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''

while hitWord ~= '' do

--- now into function/ to check if both \' and \" are not mixed

--- hitWord = sDecodeTrim( mw.ustring.gsub( hitWord, "^%\'(.+)%\'$", "%1" ) ) -- remove outer Qs \"

hitWord = removeOuterQuotes( hitWord, true )

table.insert( tQ1hits, hitWord )

sWordlist = mw.ustring.gsub( sWordlist, sPattern, cQ1, 1 ) -- removes current 1st hit; replace with code

-- next

hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''

end

--- report.xpMessage( 'sWL1: ' .. sWordlist )

--- report.xpMessage( 'Qhits: ' .. table.concat( tQ1hits, '; ' ) )

-- Step 4: Q2 read quotes (double quotes "..")

sPattern = '%f[^' .. tArgs.sep_pattern .. ']%s*%b\"\"%s*%f[' .. tArgs.sep_pattern .. ']'

-- initial search

hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''

while hitWord ~= '' do

--- hitWord = sDecodeTrim( mw.ustring.gsub( hitWord, '^%\"(.+)%\"$', '%1' ) ) -- remove outer Qs \"

hitWord = removeOuterQuotes( hitWord, true )

table.insert( tQ2hits, hitWord )

sWordlist = mw.ustring.gsub( sWordlist, sPattern, cQ2, 1 ) -- removes current '1st' hit; replace with code

-- next

hitWord = sDecodeTrim( mw.ustring.match( sWordlist, sPattern ) ) or ''

end

---report.xpMessage( 'sWL2:' .. sWordlist )

---report.xpMessage( 'Qhits: ' .. table.concat( tQ2hits, '; ' ) )

-- Step 5: parse plain sep-delimited words

sPattern = '%f[^' .. tArgs.sep_pattern .. '][^' .. tArgs.sep_pattern .. ']+%f[' .. tArgs.sep_pattern .. ']'

hitCount = 0

while hitCount < iMaxWords do

hitWord = sDecodeTrim( str._match( sWordlist, sPattern, 1, hitCount + 1, false, tArgs.sep ) ) or ''

if hitWord == sDecodeTrim(tArgs.sep) then

-- no more words found in the string

break

elseif hitWord ~= '' then

hitCount = hitCount + 1

table.insert( wordTable, hitWord )

else -- blank word, to skip (note: but blank quotes as in .., " ", ..are kept = blank dcell '')

hitCount = hitCount + 1

end

end

if hitCount >= iMaxWords then report.xpMessage( 'ERR701 wordcount ' .. hitCount .. ' > maxwords' .. iMaxWords ) end

-- Step 6: merge quoted words & wordtable, keep order

for iQ, sQW in ipairs( tQ1hits ) do

for iW, sW in ipairs( wordTable ) do

if sW == cQ1 then

wordTable[iW] = sQW

break

end

end

end

for iQ, sQW in ipairs( tQ2hits ) do

for iW, sW in ipairs( wordTable ) do

if sW == cQ2 then

wordTable[iW] = sQW

break

end

end

end

-- Step 7: when read as booleans, converse words to true/false

if tArgs.booleans then

local sBool

for i, v in ipairs( wordTable ) do

sBool = yesno( v )

if sBool ~= nil then

wordTable[i] = tostring( sBool )

end

end

end

-- Step 8: replace synonyms

if #tArgs['synonymsTables'] >= 1 then

for aka1, tAkas in pairs ( tArgs['synonymsTables'] ) do

for iW, w in ipairs( wordTable ) do

if findWordInTable( tAkas, w ) then -- todo must be ... ~= nil ??? 26-3

wordTable[iW] = aka1

end

end

end

end

if true then

wordTable = tTools.removeDuplicates( wordTable )

else -- lol works but not needed, use ttools

-- Step 9: remove duplicates from list

local iR, iK -- iR = reader, iK = killer

local hit = false

iR = 1

while iR < #wordTable do

iK = #wordTable -- will be counting downwards

while iK > iR do

if wordTable[iK] == wordTable[iR] then

hit = true

sMsg = sMsg .. '=syn=' .. wordTable[iK]

table.remove( wordTable, iK )

tTools.compressSparseArray( wordTable )

end

iK = iK - 1

end

tTools.compressSparseArray( wordTable )

iR = iR + 1

end

end

return wordTable

end

-- AND-logic with ANDwords words: ALL words must be found

-- returns {T/F, hittable}

-- T when *all* AND words are found

-- hittable with all hit words

-- note 1: when F, the hittable still contains the words that were found

-- note 2: empty AND-wordlist => True by logic (because: not falsified)

local function checkANDwords( tWorkf )

local bANDchk = true -- main conclusion

local result1 = nil -- per word hit

local tHits = {} -- hit table

---local iPos = -1 -- helper info just to keep in order

if #tWorkf.ANDwords > 0 then

bANDchk = true

for i, word in ipairs( tWorkf.ANDwords ) do

result1 = findWordInTable( tWorkf.SOURCEwords, word ) or nil

if result1 == nil then

bANDchk = false -- Falsified!

-- We could break now logically, but we continue to complete the hit table (feature)

-- bAND remains false till & at end of loop

else

table.insert( tHits, result1 )

end

end

else

bANDchk = true -- not falsified

end

tTools.compressSparseArray( tHits )

return bANDchk, tHits

end

-- OR-logic with tORwords words: at least one word must be found

-- returns {T/F, hittable}

-- True when at least one OR word is found

-- hittable has all hit words

-- note 1: empty OR-wordlist => True by logic (because: not falsified)

-- note 2: while just one hitword is a True result, the hittable contains all words found

local function checkORwords( tWork )

local result1

local bORchk

local tHits

bORchk = false

tHits = {}

result1 = nil

if #tWork.ORwords > 0 then

for i, word in ipairs( tWork.ORwords ) do

result1 = findWordInTable( tWork.SOURCEwords, word ) or nil

if result1 == nil then

-- this one is false; bOR unchanged; do next

else

bORchk = true -- Confirmed!

table.insert( tHits, result1 )

-- could break here logically, but complete the check; bOR will not be set to False

end

end

else

bORchk = true

end

tTools.compressSparseArray( tHits )

return bORchk, tHits

end

-- Determine the requested return value (a string)

-- sRESULTstring is the _main return value (logically defined value)

-- this function applies tArgs.out_true / tArgs.out_false return value

-- note: out_true='' implies: blank return value

-- note: no parameter out_true= (that is, out_true=nil) implies: by default, return the sRESULTstring

--- todo add pref, suff

local function yesnoReturnstring( tResults )

if tResults.resultALL == false then -- result False

return tArgs.out_false or ''

else -- result True

if tArgs.out_true == nil then

return table.concat( tResults.tTRUE, tArgs.out_sep )

else -- some |out-true= value is entered, could be ''

return '_out-true' .. tArgs.out_true

end

end

end

local function tCombinedSourceorderedTRUEtables( tResult )

local tOut = {}

if tResult.tANDhits == nil then

tOut = tResult.tORhits

elseif tResult.tORhits == nil then

tOut = tResult.tANDhits

else

tOut = tResult.tANDhits

for i, v in ipairs( tResult.tORhits ) do

table.insert( tOut, i, v )

end

end

if tOut == nil then

report.xpMessage( 'ERR921 BUG tOut is nil??? - tCombinedSourceorderedTRUEtables' )

end

return tOut -- unsorted; never nil

end

local function concatAndLists( s1, s2 )

local tLists = {} -- args in: both s1 and s2 to concat

table.insert( tLists, s1 )

table.insert( tLists, s2 )

return table.concat( tLists, tArgs.sep )

end

-- ===== ===== ===== ===== ===== ===== ===== ===== =====

-- PARSE arguments

local function parseArgs( origArgs )

local tNewArgs = {}

local tDefault = {}

tDefault['sep'] = ','

tDefault['case'] = false

tDefault['booleans'] = false

tDefault['out_sep'] = ', '

tNewArgs.sep = setSepIn( origArgs['sep'], tDefault['sep'] )

tNewArgs.sep_pattern = escape_word( tNewArgs.sep )

tNewArgs.out_sep = setSepOut( origArgs['out-sep'] or origArgs['sep'], tDefault['out_sep'] )

tNewArgs.case = yesno( origArgs['case'] or origArgs['casesensitive'] ) or tDefault['case']

tNewArgs.booleans = yesno( origArgs['bool'] or origArgs['booleans'] ) or tDefault['booleans']

tNewArgs.out_true = sDecodeTrim( origArgs.out_true ) or nil -- nil =default so return sRESULTstring; keep '' as legal input & return value

tNewArgs.out_false = sDecodeTrim( origArgs.out_false ) or ''

tNewArgs.prefix = sDecodeTrim( origArgs.prefix or origArgs.p ) or ''

tNewArgs.suffix = sDecodeTrim( origArgs.suffix or origArgs.s ) or ''

tNewArgs.out_format = 'default' -- todo: table, default, htmllisttype, flatlidt , first,

tNewArgs.explain = false -- TEST17Apr origArgs.explain

tNewArgs.explain_type = parseReportType( tNewArgs ) or nil

tNewArgs.test = origArgs.test

-- the wordlists:

tNewArgs['source'] = origArgs['source'] or origArgs['s'] or ''

tNewArgs['sANDlist'] = concatAndLists(

origArgs['word'] or origArgs['w'] or nil,

origArgs['andwords'] or origArgs['andw'] or nil )

tNewArgs['sORlist'] = origArgs['orwords'] or origArgs['orw'] or ''

tNewArgs['synonyms'] = {}

tNewArgs['synonymsTables'] = {} -- to be populated later

for k, v in pairs( origArgs ) do

if str._match( k, '^_%S', 1, 1, false, false ) then

local syn1

syn1 = mw.ustring.gsub( k, '^_', '', 1 )

table.insert( tNewArgs['synonyms'], syn1 )

tNewArgs['synonyms'][syn1] = v

end

end

if tNewArgs.explain == true then

initReport( tNewArgs.explain )

report.xpMessage( 'EXPLAIN: ' .. origArgs.explain .. '=>' .. tNewArgs.explain_type or 'unk')

report.xpReportSynonyms( tNewArgs )

end

if false then

for aka1, sAkalist in pairs ( tNewArgs['synonyms'] ) do

report.xpMessage( 'SYNONYMS: ' .. aka1 .. '=' .. sAkalist )

end

end

return tNewArgs

end

-- ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== ===== =====

-- _main function: check for presence of words in source string

-- Checks and returns:

-- when T: the string of all hitwords ( default ), or the |yes=... input

-- when F: empty string '' ( default ), or the |no=... input

-- steps:

-- 1. input word strings are prepared ( parsed into an array of words )

-- 2. words checks are made ( applying AND-logic, OR-logic )

-- 3. final conclusion drawn ( T/F )

-- 4. optionally, the preview report is prepared ( debug, feedback )

-- 5. based on T or F status, the return value ( string ) is established and returned

-- note 1: each return value ( yes=.., no=.. ) can be '' ( nullstring )

function p._main( origArgs )

local tWork = {}

local tResults = {}

tArgs = parseArgs( origArgs )

-- make synonyms into tables

-- 'aka1' = target synonym (= the synonym that remains)

for aka1, sAkalist in pairs( tArgs['synonyms'] ) do

tArgs['synonymsTables'][aka1] = buildWordTable( tArgs['synonyms'][aka1] )

end

-- build the worktables

tWork['SOURCEwords'] = buildWordTable( tArgs.source )

tWork['ANDwords'] = buildWordTable( tArgs.sANDlist )

tWork['ORwords'] = buildWordTable( tArgs.sORlist )

-- apply logic & conclude

tResults.resultALL = nil -- best be set explicitly

if ( #tWork.SOURCEwords == 0 ) or ( #tWork.ANDwords + #tWork.ORwords == 0 ) then

-- No words to check

tResults.resultALL = false

if yesno( tArgs.explain, true ) then

report.xpMessage( 'ERR201 No words to check' )

end

else

tResults['bAND'], tResults['tANDhits'] = checkANDwords( tWork )

tResults['bOR'], tResults['tORhits'] = checkORwords( tWork )

tResults.resultALL = ( tResults.bAND ) and ( tResults.bOR )

end

tResults.sRESULTstring = 'notinit'

if tResults.resultALL == true then

tResults.tTRUE = tCombinedSourceorderedTRUEtables( tResults ) or {}

end

tResults.sRESULTstring = yesnoReturnstring( tResults )

local sReport = ''

if tArgs.explain then

sReport = 'xp endfinal Report here L485'

--sReport = report.xpPresent( tArgs, tWork, tResults )

end

local test = 'Tunk'

test = tArgs.test or '_unk'

if tArgs.explain then

test = tostring(tArgs.explain)

else

test = 'not'

end

return string.upper( tostring( tResults.resultALL ) ) .. tResults.sRESULTstring

end

function p.main( frame )

local origArgs = mArgs.getArgs( frame )

return p._main( origArgs )

end

return p