1, //previous input values will be shown in form fields 'size' => '30', 'button' => FmtPageName(' $[Search] ', $pagename), 'searchlabel' => FmtPageName('$[Search for ]', $pagename), 'pageslabel' => FmtPageName('$[On pages]', $pagename), 'caselabel' => FmtPageName('$[Match case]', $pagename), 'phraselabel' => FmtPageName('$[Match phrase]', $pagename), 'wordlabel' => FmtPageName('$[Match whole word]', $pagename), 'regexlabel' => FmtPageName('$[Regular expression]', $pagename), )); // defaults array SDVA($TextExtractOpt, array ( 'markup' => 'cut', //code, text, source, on 'unit' => 'dsent', //page, para, line, sent, dline, dsent 'highlight'=> 'yellow', //background color, 'bold', 'none' 'linenum-color' => 'green', 'matchnum-color' => 'green', 'pagenum-color' => 'green', 'title' => 'Search results for ', //XL('Text Extract'), 'header' => 'full', 'phead' => 'link', 'linewrap' => 1, 'rowspacing'=> '0.5em', 'case' => 0, 'phrase' => 0, 'regex' => 0, 'strict' => 0, 'acc-restore' => 1, 'error' => 1, 'timer' => 0, 'pagenum' => 0, 'matchnum' => 0, 'linenum' => 0, 'textlinks' => 0, 'linktext' => 'blue', 'shorten' => 0, 'lwords' => 5, 'rwords' => 10, 'ellipsis' => '…', 'linebreaks'=> 1, )); //list of inline markup which needs to be removed for the search SDVA($TERemoveMarkupPatterns, array( "/'''(.*?)'''/" => "$1", //'''strong''' (bold) "/''(.*?)''/" => "$1", //''emphasis'' (italic) "/'\\-(.*?)\\-'/" => "$1", //'-smaller-' "/\\[(([-+])+)(.*?)\\1\\]/" => "$1", //[+big+], [-small-] "/'\\^(.*?)\\^/" => "$1", //'^super script^' "/'_(.*?)_'/" => "$1", //'_sub script_' "/\\{\\+(.*?)\\+\\}/" => "$1", //{+ins+} (underline) "/\\{-(.*?)-\\}/" => "$1", //{-del-} (strike through) "/(%.*?%)/" => "", //%wiki styles% %% "/^-+[<>]\\s*/" => "", //->indents, - "$2", //* unordered list bullets "/^(\\#+)(.*?)$/m" => "$2", //# ordered list bullets "/^(:+)(?=(\s*)([^:]+):)/m" => " ", //: definition : list "/ +/" => " ", //multiple spaces to single space )); //list of replacement patterns for Latin1 (ISO-8859-1) accented characters $ISOLatin1ReplPatterns = array( 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'Ae', 'Å' => 'Ao', 'Æ' => 'Ae', 'Ç' => 'C', 'Œ' => 'Oe', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ú' => 'U', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'Oe', 'Ø' => 'Oe', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'Ue', 'Ý' => 'Y', 'Þ' => 'Th', 'ß' => 'ss', 'œ' => 'oe', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'ae', 'å' => 'ao', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'oe', 'ø' => 'oe', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'ue', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y' ); //restore patterns for German $DeAccRestorePatterns = array('ae'=>'ä', 'oe'=>'ö', 'ue'=>'ü', 'Ae'=>'Ä', 'Oe'=>'Ö', 'Ue'=>'Ü', 'ss' => 'ß'); //default array for accent replace //to disable accent replacement patterns set option strict=1, or $TERemoveMarkupPatterns['strict'] = 1; //so additional search terms will not be created in a search SDV($TEAccReplacePatterns, $ISOLatin1ReplPatterns); //default array for accent restore, using German language restore patterns //you may want to set up your own custom patterns array, //or set $TERemoveMarkupPatterns['acc-restore'] = 0; SDV($TEAccRestorePatterns, $DeAccRestorePatterns); //set css styles into HTML header function TESetStyles($opt) { global $HTMLStylesFmt; //header, footer, pagelink prefix styles if ($opt['header']=='full') $opt['footer'] = 1; if ($opt['header']) { SDV($HTMLStylesFmt['teheader'], " .te-header {margin-top:0.5em; padding:0.3em; border-top:1px solid #ccc; border-bottom:1px solid #ccc; background:#f7f7f7;} "); } if ($opt['footer']) { SDV($HTMLStylesFmt['tefooter'], " .te-footer {margin-top:0.5em; padding:0.3em; border-top:1px solid #ccc; border-bottom:1px solid #ccc; background:#f7f7f7;} "); } if ($opt['phead']) { SDV($HTMLStylesFmt['teprefix'], " .te-pageheader {margin:.8em 0 .5em 0; padding:.2em .2em 0 .2em;} .te-pageheader {border-top:1px solid #ccc; border-bottom:1px solid #ccc; background:#f7f7f7;} .te-pheadlink {font-size:larger; font-weight:600;} "); } //number color defaults for css styling foreach(array('line','match','page') as $c) { if (isset($opt[$c.'num'])) $HTMLStylesFmt[$c.'num'] = " .{$c}num { color: {$opt[$c.'num-color']} ;} "; } SDV($HTMLStylesFmt['telinktext'], " .te-linktext {color: {$opt['linktext']} } "); SDV($HTMLStylesFmt['teimages'], " .image {max-width:10em; } "); // wrap lines of preformatted text and code if ($opt['linewrap']==1) { # whitespace wrap (perhaps copy styles to css stylesheet) $HTMLStylesFmt['prewrap'] = " code, div.te-results pre, div.te-results code, code.escaped, pre.escaped { white-space: pre-wrap; padding-left: 1em; } "; } if ($opt['rowspacing']!=0) { SDV($HTMLStylesFmt['rowspacing'], " .spacer { min-height: {$opt['rowspacing']};}"); } } // main function for text extract processing function TextExtract($pagename, $list, $opt = NULL) { global $TextExtractOpt, $TEModeDefaults, $TextExtract, $TextExtractExclude, $FmtV, $HTMLPNewline, $HTMLStylesFmt, $KeepToken, $KPV, $PageListArgPattern, $StrFoldFunction, $StrRestoreFunction; ##DEBUG echo "
LIST "; print_r($list); echo "
"; foreach($opt as $k => $v) { if (is_array($v)) foreach($v as $kk =>$vv) $opt[$k][$kk] = stripmagic($vv); else $opt[$k] = stripmagic($v); } //internal arg array $par = array(); //start time StopWatch('TextExtract start'); if ($opt['stime']) $par['stime'] = $opt['stime']; else $par['stime'] = strtok(microtime(), ' ') + strtok(''); $opt = array_merge($TextExtractOpt, $opt); $HTMLPNewline = ($opt['linebreaks']==1) ? '
' : ''; switch ($opt['unit']) { case 'sentence': $opt['unit'] = 'sent'; break; case 'paragraph': $opt['unit'] = 'para'; break; case 'dline': $opt['unit'] = 'line'; $opt['double'] = 1; break; case 'dsent': $opt['unit'] = 'sent'; $opt['double'] = 1; break; } if($opt['markup']=='text') $opt['textlinks'] = 1; //for 'text' mode links shown as text //check if we got utf8, for setting /u modifier for search pattern if (function_exists('utf8string')) { $opt['utf8'] = 1; $par['umod'] = 'u'; $umod = 'u'; } else { $opt['utf8'] = 0; $par['umod'] = ''; $umod = ''; } ##DEBUG echo "
OPT "; print_r($opt); echo "
"; //input parameter check if (!in_array($opt['unit'], array('line','para','page','sent')) OR !in_array($opt['markup'], array('code','cut','source','text','on'))) return "%red%$[Error: check input parameters!]"; foreach((array)@$opt['+'] as $i) $opt[''][] = $i; if (!isset($opt['']) && !isset($opt['pattern'])) return '%red%$[Error: search term missing!]'; //determine the search pattern //if term is regular expression if ($opt['regex']==1) { $par['pat'] = $pat = $par['terms'] = $opt[''][0] = $opt['pattern']; //exclude various input patterns SDVA($TextExtractExclude, array("*","?","+","(",")","[","]","^","$","|","??","\\")); foreach($TextExtractExclude as $v) if($pat==$v) return '%red%$[Error: disallowed character input!]'; } //else term to be parsed and preg charcters escaped else { $terms = implode(" + ", $opt['']); if (preg_match('/\\".*\\"/', $terms)) $opt['phrase'] = 1; if (isset($opt['phrase']) && $opt['phrase']==1) $par['terms'] = $terms = implode(" ", $opt['']); if (isset($opt['-'])) $terms .= " -".implode(" -", $opt['-']); //display terms in results header $par['terms'] = $terms; //create additional search terms with accent charcters replaced by ascii if ($opt['utf8']==1 && $opt['strict']==0 && isset($opt[''])) { foreach($opt[''] as $v) { if (preg_match('~[\\x80-\\xff]~', $v)) $opt[''][] = TE_alias_replace($v); else if($opt['acc-restore']==1) $opt[''][] = TE_alias_restore($v); } $opt[''] = array_unique($opt['']); } //escape regex special characters $pregchars = array('.','?','!','*','|','$','(',')','[',']','{','}',); foreach ($pregchars as $v) { $opt[''] = str_replace($v,'\\'.$v, $opt['']); if (isset($opt['-'])) $opt['-'] = str_replace($v,'\\'.$v, $opt['-']); } //set search regex pattern //set word boundaries on each term, seperate with | (OR) if (isset($opt['word']) && $opt['word']==1) { $pat = ''; foreach($opt[''] as $w) $pat .= "\b".$w."\b|"; $pat = trim($pat,'|'); } else $pat = implode("|", $opt['']); //set case in-sensitive search var for providing /i modifier or (?i) for regex if (isset($opt['case']) && $opt['case']==0) { $pat = "(?i)".$pat; $par['imod'] = 'i'; //used in function TEMarkupCleaner } else $par['imod'] = ''; $par['pat'] = $pat; //$par['pat'] for other functions, always same as $pat //add excludes from 'cut' and '-' options to make pattern 'xcut' to cut/dismiss rows if (isset($opt['cut']) && $opt['cut']!='') { $xar = explode(" ",$opt['cut']); if (isset($opt['-'])) $xar = array_unique(array_merge($opt['-'], $xar)); } else if (isset($opt['-'])) $xar = $opt['-']; if (isset($xar)) { if (isset($opt['word']) && $opt['word']==1) { foreach($xar as $i => $w) $xar[$i] ="\b".$w."\b"; } $xcut = implode("|", $xar); } if (isset($xcut) && $xcut!='') { $par['xcut'] = $xcut; if (isset($opt['case']) && $opt['case']==0) $par['xcut'] = "(?i)".$par['xcut']; } } ##DEBUG echo "
pat= ".($pat)." --terms= ".$par['terms']; if(isset($par['xcut'])) echo " Excludes --par['xcut']= ".$par['xcut']; //always wrap lines when displaying preformatted 'source' code if ($opt['markup']=='source') $opt['linewrap'] = 1; //set css styling TESetStyles($opt); //setting keep values here, and keeptokens directly in TEHighLight() //instead of calling Keep again and again switch ($opt['highlight']) { case 'none': $KPV['01-TE'] = $KPV['02-TE'] = ""; break; case 'bold': $KPV['01-TE'] = ""; $KPV['02-TE'] = ""; break; case '1': default: $KPV['01-TE'] = ""; $KPV['02-TE'] = ""; $HTMLStylesFmt['te-hilight'] = " .te-hilight { background-color: {$opt['highlight']}; } "; } $par['hitoklen'] = 2* (5 + 2 * strlen($KeepToken)); // 2* ( KeepToken-length + KPV-key-length + KeepToken-length ) $KPV['03-TE'] = "
"; $par['br-tag'] = $KeepToken."03-TE".$KeepToken; $KPV['04-TE'] = "
"; $par['vspace'] = $KeepToken."04-TE".$KeepToken; //inits $par['sorcnt']=$par['matchnum']=$par['matchcnt']=$par['rowcnt']=$par['pagecnt']=0; $par['pagenum']= 1; $par['title'] = $opt['title']; //count of pages with matches $par['listcnt'] = ($FmtV['$MatchSearched']) ? $FmtV['$MatchSearched'] : count($list); $new = array(); $j = 0; //process each page from list of pagelist matches in turn foreach($list as $pn) { $par['source'] = $pn; //full name $par['pname'] = PageVar($pn,'$Name'); //substr(strstr($pn, '.'),1); $par['ptitle'] = PageVar($pn,'$Title'); $par['ptitlespaced'] = PageVar($pn,'$Titlespaced'); $par['pgroup'] = PageVar($pn,'$Group'); $par['pmatchnum'] = 0; $par['prevpmnum'] = 0; $par['hit'] = 0; //get rows from source page $rows = TETextRows($pagename, $pn, $opt, $par); ##DEBUG echo "
".$pn; show($rows,'rows'); if (!$rows) continue; //next page $j++; $originals = array(); //processing lines (rows) foreach ($rows as $k => $row) { $par['linenum'] = $k+1; //skip pages which don't match #if ($opt['unit']=='page') if(!preg_match("($pat)".$umod, $row)) continue; //preserve empty rows for 'all including' pattern if (($opt['unit']=='line'|| $opt['unit']=='sent') && $row=="" && $pat==".") { $new[$j]['rows'][] = $row; continue; } //use row 'as is' if markup=on or whole page or paragraph, no futher row processing if ($opt['markup']=='on' && ($pat=="." || $opt['unit']=='page' || $opt['unit']=='para')) { if ($opt['unit']=='para' && !preg_match("($pat)".$umod, $row)) continue; $new[$j]['phead'] = TEPageHeader($pagename, $pn, $opt, $par); $new[$j]['rows'][] = $row; $par['rowcnt']++; continue; //start with next source row } //change some markup into code or 'defuse', so it will not get rendered, or cut it $row = TEMarkupCleaner($row, $opt, $par); //skip rows which don't match, but for 'double' set 'hit' flag if ($opt['unit']=='line' || $opt['unit']=='sent' || $opt['unit']=='para' || $opt['unit']=='page') { if (preg_match("($pat)sm".$umod, $row)) { $par['hit'] = 1; //match } else if (isset($opt['double']) && $opt['double']==1 && $par['hit']==1) { $par['hit']=0; } else continue; } //exclude lines containing matches with xcut pattern if (isset($par['xcut']) && $par['xcut']!='') if (preg_match("({$par['xcut']})".$umod, $row)) { continue; } //count matches in row $par['rowmatchcnt'] = preg_match_all("($pat)".$umod, $row, $mr); //snip out anything from row if(isset($opt['snip'])) $row = preg_replace("({$opt['snip']})".$umod, '', $row); $row = ltrim($row); //empty row if ($row=='') continue; //highlight matches if(isset($opt['highlight']) && $pat!='.') { $row = TEHighlight($opt, $par, $row); } //numbering $par['pagenum'] = $j; //$par['pagecnt']+1; //from prev version $par['rowcnt']++; //show($par['pagenum'],'par pagenum'); $new[$j]['rowcnt'] = $par['rowcnt']; $new[$j]['pmatchcnt'] = $par['rowmatchcnt'] ; $par['prevmnum'] = $par['matchnum']; $par['matchcnt'] = $par['matchnum'] += $par['rowmatchcnt']; $par['prevpmnum'] = $par['pmatchnum']; $par['pmatchnum'] += $par['rowmatchcnt']; $rownum = ($opt['linenum']==1 || $opt['matchnum']==1 || $opt['pagenum']==1) ? TERowNumbers($opt, $par) : ''; if(!isset($rownum)) continue; //add new result row $rc = $new[$j]['rowcnt']; if($par['hit']==1) { $new[$j]['rows'][$rc] = $rownum.$row; } else { //hit=0 #$new[$j]['rows'][$rc-1] = ''; $new[$j]['rows'][$rc-1] = trim($new[$j]['rows'][$rc-1],"\t\n\r\0\x0B")." ".trim($row); } //add vertical spacing to para and double if (($opt['unit']=='para') && $opt['markup']!='source') $new[$j]['rows'][] = "\n"; } //end of page rows processing if (isset($new[$j]) && is_countable($new[$j]['rows']) && count($new[$j]['rows'])>0) { //add pagelink (prefix) row if($opt['phead']) $new[$j]['phead'] = TEPageHeader($pagename, $pn, $opt, $par); $par['sorcnt']++; if (isset($opt['pfoot'])) $new[$j]['pfoot'] = TEPageFooter($pagename, $pn, $opt, $par); $new[$j]['name'] = $pn; } } //end of source pages processing //slice list if we got #section if (@$opt['section'] && @$opt['count']) TESliceList($new, $opt); $par['pagecnt'] = count($new); //sort list by results per page, subsort by name if (isset($opt['order']) && $opt['order']=='results') TESort($new); ## DEBUG echo "
NEW "; print_r($new); echo "
"; //output text from array of rows, adding page prefix header (and footer) $out = ''; foreach ($new as $i => $ar) { //markup pageheader if($opt['phead']) $out .= MarkupToHTML($pagename, $new[$i]['phead']); //add vspace foreach($new[$i]['rows'] as $k => $r) { if(isset($new[$i]['rows'][$k])) { $new[$i]['rows'][$k] = TEVSpace($r, $par, $opt); //add vertical spacing } } //markup rows $rnew = implode("\n", $new[$i]['rows']); global $LinkFunctions; if ($opt['textlinks']==1) { $lf = $LinkFunctions; foreach($LinkFunctions as $k => $v) $LinkFunctions[$k] = 'TELinkText'; } $out .= ($opt['markup']=='source') ? "".$rnew."" : MarkupToHTML($pagename, $rnew); if ($opt['textlinks']==1) $LinkFunctions = $lf; //markup pagefooter if (isset($opt['pfoot'])) $out .= MarkupToHTML($pagename, $new[$i]['pfoot']); } //stop timer TEStopwatch($par); //make header and footer $header = TEHeader($opt, $par); $header = MarkupToHTML($pagename, $header); $footer = TEFooter($opt, $par); $footer = MarkupToHTML($pagename, $footer); $out = $header."
".$out."
".$footer; StopWatch('TextExtract end'); return Keep($out); } //}}} //make rows array from source page function TETextRows($pagename, $source, $opt, &$par ) { if ($source==$pagename) return ''; $page = ReadPage($source); if (!$page) return ''; $text = $page['text']; //use pagename#section if present if(isset($opt['section'])) $text = TextSection($text, $source.$opt['section']); //remove inline markup from text if ($opt['phrase']==1 || $opt['markup']=='text' ||(isset($opt['text']) && $opt['text']==1)) $text = TERemoveInlineMarkup($text); //skip page if it has an exclude match /* not needed, pagelist already dropped page on excludes, so not on list! if (isset($opt['pat']['-']) && $opt['pat']['-']!='') foreach ($opt['-'] as $pat) { if (preg_match("($pat)", $text)) echo $pat; return; } */ $text = rtrim(Qualify($source, $text)); $rows = explode("\n", rtrim($text)); //make text lines into rows array //use range of lines if(isset($opt['lines'])) { $ol = $opt['lines']; $cnt = count($rows); if(strstr($ol,'..')) { preg_match_all("/\d*/", $ol, $k); $a=$k[0][0]; $b=$k[0][3]; $c=$k[0][2]; if($a && $b) $rows = array_slice($rows, $a-1, $b-$a+1); else if($a) $rows = array_slice($rows, $a-1); else if($c) $rows = array_slice($rows, 0, $c); } else if($ol[0]=='-') $rows = array_slice($rows, $ol); else $rows = array_slice($rows, 0, $ol); } switch ($opt['unit']) { //unit=line - already got line rows default: break; //unit=sent (sentence) - split lines into sentences case 'sent': $re = '/# Split sentences on whitespace between them. (?<=[.!?]|[.!?][\'"])(? $r) { if($r=='') $r = ' '; //continue; $nr = array_merge($nr, preg_split($re, $r, -1, PREG_SPLIT_NO_EMPTY)); }; $rows = $nr; break; //unit=para: - combine rows to paragraph rows case 'para': $paras = array(); $j=0; $paras[0] = ''; foreach($rows as $r) { $r = rtrim($r); if ($r=='') { $j++; $paras[$j] = ''; continue; } $paras[$j] .= $r."\n"; } $rows = $paras; break; //unit=page: - combine rows into one row case 'page': $part = implode("\n",$rows); unset($rows); $rows[0] = $part; break; } return $rows; } //}}} function TERemoveInlineMarkup($text) { global $TERemoveMarkupPatterns; foreach($TERemoveMarkupPatterns as $pat => $rep) $text = preg_replace($pat, $rep, $text); return $text; } //}}} //cleanup of markup function TEMarkupCleaner($row, $opt, $par) { global $KeepToken,$TEPermittedDirectives; if ($opt['markup']=='source') { //clean <>"tag" characters $row = str_replace("<","<", $row); $row = str_replace(">",">", $row); //that's all for 'source' processing return $row; } $new = array(); //fix orphaned @],[@,=],[= foreach(array("@","=") as $x) { $a = strpos($row,'['.$x); $b = strpos($row,$x.']'); if ($b!=0 && ($a===false || $a>$b)) $row = '['.$x.$row; else if ($a!=0 && ($b===false || $a>$b)) $row .= $x.']'; } //keep escaped text using tokens $keep = array(); if (preg_match_all("/\\[([=@])(.*?)\\1\\]/s".$par['imod'].$par['umod'], $row, $m)) { foreach ($m[0] as $i => $v) { $keep[$i][0] = $v; $keep[$i][1] = $m[1][$i]; $row = str_replace( $v, "<__TOK__".$i."__>", $row); } } //directives (: ... :) possibly multi-line. Preserve permitted directives if ($opt['markup']=='cut' || $opt['markup']=='text') { if (preg_match_all("/\\(:(\\w+)\\b.*?:\\)/s".$par['imod'].$par['umod'], $row, $m)) { foreach($m[1] as $x) { if (isset($TEPermittedDirectives) && in_array($x,$TEPermittedDirectives)) continue; $row = preg_replace("/\\(:($x\\b.*?):\\)/s", "", $row); } } } $lines = explode("\n", $row); foreach ($lines as $k => $row) { //extra spaces $row = preg_replace("/\\n\\s+/", "\n", $row); //directives (: ... :) encoding if ($opt['markup']=='code') { $row = preg_replace("/\\(:(comment)\\s+(.*?)\\s*:\\)/", "[@(:$1:@] $2 :)", $row); $row = preg_replace("/\\(:(\\w+\\b.*?):\\)/", "[@(:$1:)@]", $row); } //fixing double and empty [@ and [= $row = preg_replace("/\\[([@=])\\s*\\[\\1/","[\\1",$row); $row = preg_replace("/([@=])\\]\\s*\\1\\]/","\\1]",$row); $row = preg_replace("/\\[([@=])\\s*\\1\\]/","",$row); //whitespace $row = preg_replace("/^\\s+/", "", $row); //A: Q: $row = preg_replace("/^[AQ]:\\s+/", "", $row); //code and cut treat some markup differently if ($opt['textlinks']==1) { //variable link global $WikiWordPattern; $row = preg_replace("/\\$($WikiWordPattern)\\b/", "$$1", $row); } switch($opt['markup']) { case 'text': $row = TERemoveInlineMarkup($row); //follow on with 'cut' case 'cut': //divs >>...<< : remove $row = preg_replace("/>>(.*?)<]\\s*/", "", $row); //unordered list items: bullets to * $row = preg_replace("/^(\\*+)(.*?)$/", "*$2", $row); //ordered list items: numerals to # $row = preg_replace("/^(\\#+)(.*?)$/", "#$2", $row); //definition list items: to : $row = preg_replace("/^(:+)(?=(\s*)([^:]+):)/", ": ", $row); //divs >>...<< : escape $row = preg_replace("/>>(.*?)<>$1<<@]", $row); //anchors: escape $row = preg_replace("/(\\[\\[#[A-Za-z][-.:\\w]*\\]\\])/","[@$1@]",$row); //wiki styles %...% : escape $row = preg_replace("/(%.*?%)/", "[@$1@]", $row); //tables || || || @ escape $row = preg_replace("/^\\|\\|(.*)$/", "[@||$1 @]", $row); break; } //change all headings to large and bold text $row = preg_replace("/^(!{1,6})(.*)/","[+''' $2 '''+]" , $row); //markup expression encoding $row = preg_replace("/\\{\\((\\w+\\b.*?)\\)\\}/", "[@{($1)}@]", $row); $row = trim($row); if ($row=='') continue; $new[$k] = $row; } $row = implode("\n", $new); //re-inserting code strings via tokens foreach ($keep as $i => $v) $row = str_replace("<__TOK__".$i."__>", $keep[$i][0], $row); return $row; } //}}} //insert markup for highlighting matches function TEHighlight($opt, &$par, $row) { global $LinkPattern, $UrlExcludeChars, $ImgExtPattern, $KeepToken, $KPV; $pat = $par['pat']; //for source view we don't want whole links highlight: if ($opt['markup']=='source') $linkpat = $urlpat = ''; else { //matches in links: highlight entire link, and other matches $linkpat = "\\[\\[\\s*(.*?)\\]\\]"; $urlpat = "($LinkPattern)\\/\\/([^\\s$UrlExcludeChars]*[^\\s.,?!$UrlExcludeChars])"; } if (preg_match_all("/($linkpat)|($urlpat)|($pat)/".$par['umod'], $row, $m, PREG_OFFSET_CAPTURE)) { ## DEBUG echo "
PATTERN: ".$pat; echo "
Matches "; print_r($m[0]); echo "
"; $k = 0; $mpos = array(); foreach($m[0] as $i => $v) { if (!preg_match("($pat)".$par['umod'], $v[0])) continue; if (isset($m[4]) && preg_match("/$LinkPattern/",$m[4][$i][0])) $item = $v[0]." "; else $item = $v[0]; $pos = $v[1] + $k * $par['hitoklen']; $row = substr_replace($row, $KeepToken."01-TE".$KeepToken.$item.$KeepToken."02-TE".$KeepToken, $pos, strlen($item)); $row = rtrim($row,'% '); $k++; $mpos[] = $pos; } if ($opt['shorten']>0 && $opt['markup']!='source') $row = TEShortenRow($row, $par, $opt); } return $row; } //}}} function TEVSpace($row, $par, $opt) { global $HTMLPNewline; if ($opt['markup']=='source') return trim($row); if($HTMLPNewline !='') return $row; if($opt['shorten']>0) { $HTMLPNewline = ''; return $row.$par['vspace']; } else return $row.$par['br-tag']; } //}}} //shorten row function TEShortenRow($row, $par, $opt) { global $KeepToken; //number of words left and right of highlight $a = ($opt['shorten']>1) ? $opt['shorten'] : $opt['lwords']; $b = ($opt['shorten']>1) ? 2*$opt['shorten'] : $opt['rwords']; $hi = $new = array(); $words = explode(' ', $row); foreach ($words as $i => $wd) if (strpos($wd, $KeepToken)!==false) $hi[] = $i; for ($i=0; $i < count($words); $i++) { foreach ($hi as $k => $n) { if (($n-$a) > $i) { if (($n-$a) == $i+1) if (!isset($new[$i])) $new[$i] = $opt['ellipsis']; if (isset($new[$i-1]) && $new[$i-1]!=$opt['ellipsis']) $new[$i] = $opt['ellipsis']; continue 2; } if ($i == end($hi)+$b+1) $new[$i] = $opt['ellipsis']; if ($i > $n+$b) continue; if(isset($hi[$k+1]) && $i==($hi[$k+1]-$a)) continue; if (isset($new[$i])) continue 2; $new[$i] = $words[$i]; continue 2; } } $row = implode(' ', $new); return $row; } //}}} //make header function TEHeader(&$opt, $par) { $cnt = $par['matchnum']; $out = ""; if ($opt['header']) $out .= "(:div001 class='te-header':)\n"; switch($opt['header']) { default: $out .= TEVarReplace($opt['header'], $par); break; case 'count': case 'counter': $out .= "'''$[Results:] $cnt'''"; break; case 'all': case 'full': $time = ($opt['timer']) ? 'in '.$par['time'] : ''; $pgs = ($par['listcnt']>1) ? '$[pages]' : '$[page]'; $from = "$[from] {$par['listcnt']} $pgs $[searched]"; if ($par['pagecnt']>1) $from = "$[on] {$par['pagecnt']} $[pages] ".$from; $out .= "[[#extracttop]]%lfloat%[+ '''{$opt['title']}   %green%{$par['terms']}%%''' +] %right%''{$cnt} $[results] {$from} {$time}''"; $opt['footer'] = "%center% '''$[End of] {$opt['title']}'''    [[#extracttop|$[(start)]]]"; break; } if ($opt['header']) $out .= "\n(:div001end:)"; return $out; } //}}} //make footer function TEFooter($opt, $par) { $out = ''; if ($opt['footer'] && $par['pagecnt']>0) { $out .= "\n(:div002 class='te-footer':)".TEVarReplace($opt['footer'], $par)."\n(:div002end:)"; } if($opt['error']==1) { $error = ($par['pagecnt']==0) ? "\n%red%$[Found no matches!]%%" : ''; $error = ($par['listcnt']==0) ? "\n%red%$[Error: no pages to be searched!]%%" : ''; $out .= $error; } return $out; } //}}} //make page header function TEPageHeader($pagename, $source, $opt, &$par) { $pnum = ($opt['pagenum']==1) ? ($par['pagenum']).". " : ''; $out = "\n>>te-pageheader<<\n"; if ($opt['phead']=='link') { if($opt['pagenum']==1 && $opt['pagenum-color']!='') $out .= "%te-pheadlink color={$opt['pagenum-color']}%{$pnum}%% %te-pheadlink%[[$source]]%%"; else $out .= "%te-pheadlink%[[$source ]]%%"; } elseif ($opt['phead']=='linkmod' ) { $lmod = PageVar($source,'$LastModified'); $lmby = PageVar($source,'$LastModifiedBy'); $out .= "%rfloat%''$[last modified by] [[~{$lmby}]] $[on] {$lmod}'' %left%'''%color={$opt['pagenum-color']}%{$pnum}%%[+ [[$source]] +]'''"; } elseif ($opt['phead']=='linktitle') $out .= "%te-pheadlink% [[$source| {$par['ptitlespaced']} ]]%%"; elseif ($opt['phead']=='linkgrouptitle') $out .= "%te-pheadlink% [[$source|{$par['pgroup']}: {$par['ptitlespaced']} ]] %%"; else $out .= TEVarReplace($opt['phead'], $par); $out .= "\n>><<\n"; return $out; } //}} //make page footer function TEPageFooter($pagename, $source, $opt, &$par) { $out = "\n".$opt['pfoot']; return $out; } //}} //make results (line) numbers function TERowNumbers($opt, $par) { #show($par,'PAR'); $new = ''; if ($opt['linenum']==1) { if ($opt['pagenum']==1) { $new = Keep("{$par['pagenum']}.{$par['linenum']}. ",'T'); } else $new = Keep("{$par['linenum']}. ",'T'); } else if ($opt['matchnum']==1 && $par['pat']!=".") { if ($opt['pagenum']==1) { if ($par['rowmatchcnt']>1) $num = ($par['prevpmnum']+1)."-".$par['pmatchnum']; else $num = $par['pmatchnum']; $new = Keep("{$par['pagenum']}.$num. ",'T'); } else { if ($par['rowmatchcnt']>1) $num = ($par['prevmnum']+1)."-".$par['matchnum']; else $num = $par['matchnum']; $new = Keep("$num. ",'T'); } } return $new; } //}}} //substitution of pseudo template variables function TEVarReplace ($text, $par) { foreach($par as $k => $v) { if (is_array($v)) continue; $text = str_replace('{$$'.$k.'}' , $v, $text); } return $text; } //}}} //Link function to suppress links function TELinkText($pagename,$imap,$path,$title,$txt,$fmt=NULL) { return "".$txt."".$title.""; } //}}} //timer function TEStopwatch(&$par) { $wtime = strtok(microtime(), ' ') + strtok('') - $par['stime']; $xtime = sprintf("%04.2f %s", $wtime, ''); //time in secs $par['time'] = $xtime." $[seconds]"; } //}}} // markup (:extract ....:) search form Markup('extractform', 'directives','/\\(:extract\\s*(.*?)\\s*:\\)/', "TEFormMarkup"); // extractor search form function TEFormMarkup($m) { global $ExtractFormOpt, $InputValues, $EnablePathInfo,$ExtractFormInputType; extract($GLOBALS['MarkupToHTML']); $opt = ParseArgs($m[1]); if (isset($opt['page'])) $hiddenpagefield = 1; $opt = array_merge((array)$ExtractFormOpt, (array)$opt); $opt['action'] = 'search'; $opt['fmt'] = 'extract'; $target = (isset($opt['target'])) ? MakePageName($pagename, $opt['target']) : $pagename; $opt['n'] = IsEnabled($EnablePathInfo, 0) ? '' : $target; if ($opt['retain-input']==1) { foreach ($_GET as $k=>$v) $InputValues[$k] = htmlspecialchars($v); } foreach ($opt as $k => $v) { if ($v == '' || is_array($v)) continue; $v = str_replace("'", "'", $v); $opt[$k] = $v; if (!isset($InputValues[$k])) $InputValues[$k] = $v; } if(!isset($InputValues['q'])) $InputValues['q'] = ''; if (!isset($InputValues['q']) && isset($opt['pattern'])) $InputValues['q'] = $opt['pattern']; if (!isset($InputValues['name']) && isset($opt['defaultpage'])) $InputValues['name'] = $opt['defaultpage']; else $InputValues['name'] = ''; $checkword = (isset($InputValues['word']))? "checked=1" : ''; $checkcase = (isset($InputValues['case']))? "checked=1" : ''; $checkphrase = (isset($InputValues['phrase']))? "checked=1" : ''; $checkregex = (isset($InputValues['regex']))? "checked=1" : ''; SDV($ExtractFormInputType, 'text'); //form $out = FmtPageName("
", $target); $out .= "\n"; if (isset($opt['pattern'])) $out .= " \n"; else $out .= " \n"; if (!isset($hiddenpagefield)) $out .= " \n"; if (!isset($opt['pattern'])) { if (!isset($opt['case'])) $out .= ""; if (!isset($opt['phrase'])) $out .= ""; if (!isset($opt['word'])) $out .= ""; } if (isset($opt['regex'])) $out .= ""; $out .= "
{$opt['searchlabel']}
{$opt['pageslabel']}
{$opt['caselabel']}
{$opt['phraselabel']}
{$opt['wordlabel']}
{$opt['regexlabel']}
    
\n"; //set other optional parameters as hidden fields foreach ($opt as $k => $v) { if ($v == '' || is_array($v)) continue; if (in_array($k, array('pattern','name','defaultpage','q','label','value','size','searchlabel','pageslabel','wordlabel','caselabel','regexlabel','regex'))) continue; $k = str_replace("'", "'", $k); $v = str_replace("'", "'", $v); $out.= "\n"; } $out .= "
"; return Keep($out); } //}}} ## (extract ......) same as PowerTools (pagelist.... fmt=extract) [all pagelist parameters allowed] $MarkupExpr['extract'] = 'MxTextExtract($pagename, $argp, $args)'; function MxTextExtract($pagename, $argp, $args) { StopWatch('extract start'); unset($argp['#']); $opt['fmt'] = 'extract'; foreach($argp as $k => $v) $opt[$k] = $v; foreach($args as $k => $v) $opt['q'] .= ' "'.$v.'"'; $out = FmtPageList('$MatchList', $pagename, $opt, 0); $out = preg_replace("/[\n]+/s","\n",$out); StopWatch('extract end'); return $out; } //}}} //fmt=extract for (:extract:) and (:pagelist:) and (:searchbox:) SDV($FPLFormatOpt['extract'], array('fn' => 'FPLTextExtract')); function FPLTextExtract($pagename, &$matches, $opt) { ##DEBUG echo "
OPT "; print_r($opt); echo "
"; global $FmtV, $EnableStopWatch, $KeepToken, $KPV, $PageListFilters; $PageListFilters['PageListTermsTargets'] = -10; //not used $PageListFilters['TEListTermsTargets'] = 160; //used as alternative $EnableStopWatch = 1; StopWatch('TextExtract pagelist begin'); $opt['stime'] = strtok(microtime(), ' ') + strtok(''); $opt['q'] = ltrim($opt['q']); //if search term contains terms in double quotes switch on 'text' option to remove all inline markup when searching if (preg_match('/\\".*\\"/',$opt['q'])) $opt['text'] = 1; if (@$opt['']) foreach ($opt[''] as $k => $v) $opt[''][$k] = htmlspecialchars_decode($v); //treat single . search term as request for regex 'all characters' if(isset($opt[''][0]) && $opt[''][0]=='.') $opt['regex'] = 1; if(isset($opt['pattern']) && $opt['pattern']=='.') $opt['regex'] = 1; //MakePageList() does not evaluate terms as regular expressions, so we save them for later if (@$opt['regex']==1) { $opt['pattern'] = implode(' ', $opt['']); unset($opt['']); } if (!isset($opt['name']) && isset($opt['page'])) $opt['name'] = $opt['page']; elseif (isset($opt['name']) && isset($opt['page'])) $opt['name'] .= ",".$opt['page']; if (isset($opt['name'])) unset($opt['page']); //allow search of anchor sections if (isset($opt['name'])) { if($sa=strpos($opt['name'],'#')) { $opt['section'] = strstr($opt['name'],'#'); $opt['name'] = substr($opt['name'],0,$sa); } } //unless unit=page temp unset excludes for page matching // (for unit=page MakePageList needs excludes to reduce page matches) if (isset($opt['unit']) && $opt['unit']!='page' && isset($opt['-'])) { $excl = $opt['-']; unset($opt['-']); } //create page list by searching pages for search terms $list = MakePageList($pagename, $opt, 0); if (isset($excl)) $opt['-'] = $excl; #DEBUG echo "
list after MakePageList "; print_r($list); echo "
"; //extract page subset according to 'count=' parameter if (@$opt['count'] && !$opt['section']) TESliceList($list, $opt); return TextExtract($pagename, $list, $opt); } //}}} //alternative for PageListTermsTargets with hook to TERemoveInlineMarkup for option 'text' //this allows page matches to a search phrase even if part of the phrase is enclosed with inline markup function TEListTermsTargets(&$list, &$opt, $pn, &$page) { global $FmtV, $StrFoldFunction; static $reindex = array(); $fold = $StrFoldFunction; switch ($opt['=phase']) { case PAGELIST_PRE: $FmtV['$MatchSearched'] = count($list); $incl = array(); $excl = array(); foreach((array)@$opt[''] as $i) { $incl[] = $fold($i); } foreach((array)@$opt['+'] as $i) { $incl[] = $fold($i); } foreach((array)@$opt['-'] as $i) { $excl[] = $fold($i); } $indexterms = PageIndexTerms($incl); foreach($incl as $i) { $delim = (!preg_match('/[^\\w\\x80-\\xff]/', $i)) ? '$' : '/'; $opt['=inclp'][] = $delim . preg_quote($i,$delim) . $delim . 'i'; } if ($excl) $opt['=exclp'][] = '$'.implode('|', array_map('preg_quote',$excl)).'$i'; if (@$opt['link']) { $link = MakePageName($pn, $opt['link']); $opt['=linkp'] = "/(^|,)$link(,|$)/i"; $indexterms[] = " $link "; } if (@$opt['=cached']) return 0; if ($indexterms) { StopWatch("PageListTermsTargets begin count=".count($list)); $xlist = PageIndexGrep($indexterms, true); $list = array_diff($list, $xlist); StopWatch("PageListTermsTargets end count=".count($list)); } if (@$opt['=inclp'] || @$opt['=exclp'] || @$opt['=linkp']) return PAGELIST_ITEM|PAGELIST_POST; return 0; case PAGELIST_ITEM: if (!$page) { $page = ReadPage($pn, READPAGE_CURRENT); $opt['=readc']++; } if (!$page) return 0; if (@$opt['=linkp'] && !preg_match($opt['=linkp'], @$page['targets'])) { $reindex[] = $pn; return 0; } if (@$opt['=inclp'] || @$opt['=exclp']) { $text = $fold($pn."\n".@$page['targets']."\n".@$page['text']); if (isset($opt['text']) && $opt['text']==1) $text = TERemoveInlineMarkup($text); foreach((array)@$opt['=exclp'] as $i) if (preg_match($i, $text)) return 0; foreach((array)@$opt['=inclp'] as $i) if (!preg_match($i, $text)) { if ($i[0] == '$') $reindex[] = $pn; return 0; } } return 1; case PAGELIST_POST: if ($reindex) PageIndexQueueUpdate($reindex); $reindex = array(); return 0; } } //}}} //slice list for count= option function TESliceList(&$list, $opt) { list($r0, $r1) = CalcRange($opt['count'], count($list)); if ($r1 < $r0) $list = array_reverse(array_slice($list, $r1-1, $r0-$r1+1)); else $list = array_slice($list, $r0-1, $r1-$r0+1); } //}}} //sort by match count and subsort by name function TESort(&$new) { usort($new,"TESortByMatchCnt"); $anew = $temp = array(); $cnt = count($new); for ($i=0; $i<$cnt; $i++) { $temp[] = $new[$i]; if (($new[$i]['pmatchcnt'] > $new[$i+1]['pmatchcnt']) || $i+1==$cnt) { if (count($temp)>1) usort($temp, "TESortByName"); $anew = array_merge($anew, $temp); unset($temp); } } $new = $anew; } //}}} //is_countable substitute for php versions <7.3 if (!function_exists('is_countable')) { function is_countable($c) { return is_array($c) || $c instanceof Countable; } } //sort helper functions function TESortByMatchCnt($a, $b) { return $b['pmatchcnt'] - $a['pmatchcnt']; } function TESortByName($a, $b) { return strnatcasecmp($a['name'], $b['name']); } //replace utf8 accent characters into ascii characters using custom array function TE_alias_replace($x) { if(is_null($x) || $x === '') return ''; global $TEAccReplacePatterns; static $source, $target; if (!@$source) { $source = array_keys($TEAccReplacePatterns); $target = array_values($TEAccReplacePatterns); } return str_replace($source, $target, $x); } function TE_alias_restore($x) { if(is_null($x) || $x === '') return ''; global $TEAccRestorePatterns; static $source, $target; if (!@$source) { $source = array_keys($TEAccRestorePatterns); $target = array_values($TEAccRestorePatterns); } return str_replace($source, $target, $x); } //EOF