'30', 'button' => FmtPageName(' $[Search] ', $pagename), 'searchlabel' => FmtPageName('$[Search for ]', $pagename), 'pageslabel' => FmtPageName('$[On pages]', $pagename), 'caselabel' => FmtPageName('$[Match case]', $pagename), 'phraselabel' => FmtPageName('$[Match phrase]', $pagename), 'wordlabel' => FmtPageName('$[Match whole word]', $pagename), 'regexlabel' => FmtPageName('$[Regular expression]', $pagename), )); // defaults array SDVA($TextExtractOpt, array ( 'markup' => 'cut', //code, text, source, on 'unit' => 'dsent', //page, para, line, sent, dline, dsent 'highlight'=> 'yellow', //background color, 'bold', 'none' 'linenum-color' => 'green', 'matchnum-color' => 'green', 'pagenum-color' => 'green', 'title' => 'Search results for ', //XL('Text Extract'), 'header' => 'full', 'phead' => 'link', 'linewrap' => 1, 'rowspacing' => '0.5em', 'case' => 0, 'phrase' => 0, 'regex' => 0, 'strict' => 1, 'serial' => 0, 'error' => 1, 'timer' => 0, 'pagenum' => 0, 'matchnum' => 0, 'linenum' => 0, 'textlinks' => 0, 'linktext' => 'blue', 'shorten' => 0, 'lwords' => 5, 'rwords' => 10, 'ellipsis' => '…', )); // main function for text extract processing function TextExtract($pagename, $list, $opt = NULL) { global $TextExtractOpt, $TEModeDefaults, $TextExtract, $TextExtractExclude, $FmtV, $HTMLStylesFmt, $KeepToken, $KPV, $PageListArgPattern; ##DEBUG echo "
LIST "; print_r($list); echo "
"; foreach($opt as $k => $v) { if (is_array($v)) foreach($v as $kk =>$vv) $opt[$k][$kk] = stripmagic($vv); else $opt[$k] = stripmagic($v); } //internal arg array $par = array(); //start time StopWatch('TextExtract start'); if ($opt['stime']) $par['stime'] = $opt['stime']; else $par['stime'] = strtok(microtime(), ' ') + strtok(''); /*/set default options foreach ($TEModeDefaults as $mode => $ar ) { foreach ($ar as $k => $val) if (isset($opt['markup']) && $opt['markup']==$mode && !$opt[$k]) $opt[$k] = $val; }*/ $opt = array_merge($TextExtractOpt, $opt); switch ($opt['unit']) { case 'sentence': $opt['unit'] = 'sent'; break; case 'paragraph': $opt['unit'] = 'para'; break; case 'dline': $opt['unit'] = 'line'; $opt['double'] = 1; break; case 'dsent': $opt['unit'] = 'sent'; $opt['double'] = 1; break; } if($opt['markup']=='text') $opt['textlinks'] = 1; //for 'text' mode linksshown as text ##DEBUG echo "
OPT "; print_r($opt); echo "
"; //input parameter check if (!in_array($opt['unit'], array('line','para','page','sent')) OR !in_array($opt['markup'], array('code','cut','source','text','on'))) return "%red%$[Error: check input parameters!]"; foreach((array)@$opt['+'] as $i) $opt[''][] = $i; if (!isset($opt['']) && !isset($opt['pattern'])) return '%red%$[Error: search term missing!]'; //term is regular expression if ($opt['regex']==1) { $par['pat'] = $pat = $par['pattern'] = $opt[''][0] = $opt['pattern']; //exclude various input patterns SDVA($TextExtractExclude, array("*","?","+","(",")","[","]","^","$","|","??","\\")); foreach($TextExtractExclude as $v) if($pat==$v) return '%red%$[Error: disallowed character input!]'; } //term is phrase else if ($opt['phrase']==1) { $par['pattern'] = $terms = implode(" ", $opt['']); #$pat = ($opt['word']==1)? '\\b'.$terms.'\\b' : $terms; $pat = ($opt['word']==1)? '(? $pt) #$opt[''][$i] = '\\b'.$pt.'\\b'; $opt[''][$i] = '(?pat: ".$pat; $HTMLStylesFmt['teimages'] = " .image {max-width:10em; } "; //always wrap lines when displaying preformatted 'source' code if ($opt['markup']=='source') $opt['linewrap'] = 1; // wrap lines of preformatted text and code if($opt['linewrap']==1) { # whitespace wrap (perhaps copy styles to css stylesheet) $HTMLStylesFmt['prewrap'] = " code, div.te-results pre, div.te-results code, code.escaped, pre.escaped { white-space: pre-wrap; padding-left: 1em; } "; } if($opt['rowspacing']!=0) { $HTMLStylesFmt['rowspacing'] = ".spacer { min-height: {$opt['rowspacing']};} p.vspace {height:0;}"; } //setting keep values here, and keeptokens directly in TEHighLight() //instead of calling Keep again and again switch ($opt['highlight']) { case 'none': $KPV['01-TE'] = $KPV['02-TE'] = ""; break; case 'bold': $KPV['01-TE'] = ""; $KPV['02-TE'] = ""; break; case '1': default: $KPV['01-TE'] = ""; $KPV['02-TE'] = ""; $HTMLStylesFmt['te-hilight'] = " .te-hilight { background-color: {$opt['highlight']}; } "; } $par['hitoklen'] = 2* (5 + 2 * strlen($KeepToken)); // 2* ( KeepToken-length + KPV-key-length + KeepToken-length ) $KPV['03-TE'] = "
"; $par['br-tag'] = $KeepToken."03-TE".$KeepToken; $KPV['04-TE'] = "
"; $par['vspace'] = $KeepToken."04-TE".$KeepToken; //header, footer, pagelink prefix styles if ($opt['header']=='full') $opt['footer'] = 1; if ($opt['phead']) { SDV($HTMLStylesFmt['teprefix'], " .te-pageheader { margin:.8em 0 .5em 0; padding:.2em .2em 0 .2em;} .te-pageheader { border-top:1px solid #ccc; border-bottom:1px solid #ccc; background:#f7f7f7;} "); } if ($opt['header']) { SDV($HTMLStylesFmt['teheader'], " .te-header {margin-top:0.5em; padding:0.3em; border-top:1px solid #ccc; border-bottom:1px solid #ccc; background:#f7f7f7;} "); } if ($opt['footer']) { SDV($HTMLStylesFmt['tefooter'], " .te-footer {margin-top:0.5em; padding:0.3em; border-top:1px solid #ccc; border-bottom:1px solid #ccc; background:#f7f7f7;} "); } //number color defaults for css styling foreach(array('line','match','page') as $c) { if (isset($opt[$c.'num'])) $HTMLStylesFmt[$c.'num'] = " .{$c}num { color: {$opt[$c.'num-color']} ;} "; } SDV($HTMLStylesFmt['telinktext'], " .te-linktext {color: {$opt['linktext']} } "); //case insensitive search $qi = $par['qi'] = (@$opt['case']==1) ? '' : 'i'; $par['listcnt'] = ($FmtV['$MatchSearched']) ? $FmtV['$MatchSearched'] : count($list); //inits $par['sorcnt']=$par['matchnum']=$par['matchcnt']=$par['rowcnt']=$par['pagecnt']=0; $par['pagenum']= 1; $par['title'] = $opt['title']; $new = array(); $j = 0; //process each source page in turn foreach($list as $i => $pn) { $par['source'] = $pn; $par['pname'] = substr(strstr($pn, '.'),1); $par['pmatchnum'] = 0; $par['prevpmnum'] = 0; $par['hit'] = 0; //get rows from source page $rows = TETextRows($pagename, $pn, $opt, $par); if (!$rows) continue; //next page $j++; $list[$j] = $pn; //processing lines (rows) foreach ($rows as $k => $row) { $par['linenum'] = $k+1; //skip pages which don't match if ($opt['unit']=='page') if(!preg_match("($pat)".$qi, $row)) continue; //preserve empty rows for 'all including' pattern if (($opt['unit']=='line'|| $opt['unit']=='sent') && $row=="" && $pat==".") { $new[$j]['rows'][] = $row; continue; } //use row 'as is' if markup=on or whole page, no futher row processing if ($opt['markup']=='on' && ($pat=="." || $opt['unit']=='page' || $opt['unit']=='para')) { if ($opt['unit']=='para' && !preg_match("($pat)".$qi, $row)) continue; $new[$j]['phead'] = TEPageHeader($pagename, $pn, $opt, $par); $new[$j]['rows'][] = $row; $par['rowcnt']++; continue; //start with next source row } //change some markup into code or 'defuse', so it will not get rendered, or cut it $row = TEMarkupCleaner($row, $opt, $par); //skip rows which don't match if ($opt['unit']=='line' || $opt['unit']=='para' || $opt['unit']=='sent') { if(preg_match("($pat)".$qi, $row)) $par['hit'] = 1; else { if(isset($opt['double']) && $opt['double']==1 && $par['hit']==1) $par['hit']=0; else continue; } } //exclude lines containing matches with cut pattern if (isset($opt['cut'])) if(preg_match("({$opt['cut']})".$qi, $row)) continue; //count matches in row $par['rowmatchcnt'] = preg_match_all("(".$par['pat'].")".$qi, $row, $mr); //check if textrow needs processing if(isset($opt['snip'])) $row = preg_replace("({$opt['snip']})", '', $row); $row = ltrim($row); //empty row if ($row=='') continue; //highlight matches if(isset($opt['highlight']) && $pat!='.') $row = TEHighlight($opt, $par, $row); //numbering $par['pagenum'] = $j; //$par['pagecnt']+1; //from prev version $par['rowcnt']++; //show($par['pagenum'],'par pagenum'); $new[$j]['rowcnt'] = $par['rowcnt']; $new[$j]['pmatchcnt'] = $par['rowmatchcnt'] ; $par['prevmnum'] = $par['matchnum']; $par['matchcnt'] = $par['matchnum'] += $par['rowmatchcnt']; $par['prevpmnum'] = $par['pmatchnum']; $par['pmatchnum'] += $par['rowmatchcnt']; $rownum = ($opt['linenum']==1 || $opt['matchnum']==1 || $opt['pagenum']==1) ? TERowNumbers($opt, $par) : ''; if(!isset($rownum)) continue; //add new result row $rc = $new[$j]['rowcnt']; if($par['hit']==1) { $new[$j]['rows'][$rc] = $rownum.$row; } else { //hit=0 #$new[$j]['rows'][$rc-1] = ''; $new[$j]['rows'][$rc-1] = trim($new[$j]['rows'][$rc-1],"\t\n\r\0\x0B")." ".trim($row); } //add vertical spacing to para and double if (($opt['unit']=='para') && $opt['markup']!='source') $new[$j]['rows'][] = "\n"; } //end of page rows processing if(!isset($new[$j])) $j--; if (isset($new[$j]) && is_countable($new[$j]['rows']) && count($new[$j]['rows'])>0) { //add pagelink (prefix) row if($opt['phead']) $new[$j]['phead'] = TEPageHeader($pagename, $pn, $opt, $par); $par['sorcnt']++; if (isset($opt['pfoot'])) $new[$j]['pfoot'] = TEPageFooter($pagename, $pn, $opt, $par); $new[$j]['name'] = $pn; } } //end of source pages processing //slice list if we got #section if (@$opt['section'] && @$opt['count']) TESliceList($new, $opt); $par['pagecnt'] = count($new); //sort list by results per page, subsort by name if (isset($opt['order']) && $opt['order']=='results') TESort($new); ## DEBUG echo "
NEW "; print_r($new); echo "
"; //output text from array of rows, adding page prefix header (and footer) $out = ''; foreach ($new as $i => $ar) { //markup pageheader if($opt['phead']) $out .= MarkupToHTML($pagename, $new[$i]['phead']); //add vspace foreach($new[$i]['rows'] as $k => $r) { if(isset($new[$i]['rows'][$k])) { $new[$i]['rows'][$k] = TEVSpace($r, $par, $opt); //add vertical spacing } } //markup rows $rnew = implode("\n", $new[$i]['rows']); global $LinkFunctions; if ($opt['textlinks']==1) { $lf = $LinkFunctions; foreach($LinkFunctions as $k => $v) $LinkFunctions[$k] = 'TELinkText'; } $out .= ($opt['markup']=='source') ? "".$rnew."" : MarkupToHTML($pagename, $rnew); if ($opt['textlinks']==1) $LinkFunctions = $lf; //markup pagefooter if (isset($opt['pfoot'])) $out .= MarkupToHTML($pagename, $new[$i]['pfoot']); } //stop timer TEStopwatch($par); //make header and footer $header = TEHeader($opt, $par); $header = MarkupToHTML($pagename, $header); $footer = TEFooter($opt, $par); $footer = MarkupToHTML($pagename, $footer); $out = $header."
".$out."
".$footer; StopWatch('TextExtract end'); return Keep($out); } //}}} //make rows array from source page function TETextRows($pagename, $source, $opt, &$par ) { if ($source==$pagename) return ''; $page = ReadPage($source); if (!$page) return ''; $text = $page['text']; //use pagename#section if present if(isset($opt['section'])) $text = TextSection($text, $source.$opt['section']); //remove inline markup from text if ($opt['phrase']==1 || $opt['markup']=='text') $text = TERemoveInlineMarkup($text, $par); //skip page if it has an exclude match if (isset($opt['pat']['-']) && $opt['pat']['-']!='') foreach ($opt['-'] as $pat) { if (preg_match("($pat)".$par['qi'], $text)) return; } //skip page if it has no match; all inclusive elements need to match (AND condition) foreach ($opt[''] as $pat) { if (!preg_match("($pat)".$par['qi'], $text)) return; } $text = rtrim(Qualify($source, $text)); $rows = explode("\n", rtrim($text)); //make text lines into rows array //use range of lines if(isset($opt['lines'])) { $ol = $opt['lines']; $cnt = count($rows); if(strstr($ol,'..')) { preg_match_all("/\d*/", $ol, $k); $a=$k[0][0]; $b=$k[0][3]; $c=$k[0][2]; if($a && $b) $rows = array_slice($rows, $a-1, $b-$a+1); else if($a) $rows = array_slice($rows, $a-1); else if($c) $rows = array_slice($rows, 0, $c); } else if($ol[0]=='-') $rows = array_slice($rows, $ol); else $rows = array_slice($rows, 0, $ol); } switch ($opt['unit']) { //unit=line - already got line rows default: break; //unit=sent (sentence) - split lines into sentences case 'sent': $re = '/# Split sentences on whitespace between them. (?<=[.!?]|[.!?][\'"])(? $r) { if($r=='') $r = ' '; //continue; $nr = array_merge($nr, preg_split($re, $r, -1, PREG_SPLIT_NO_EMPTY)); }; $rows = $nr; break; //unit=para: - combine rows to paragraph rows case 'para': $newpara = array(); $j=0; foreach($rows as $i => $row) { $row = rtrim($row); if ($row=='') { continue; } $j++; $newpara[$j] = ''; $newpara[$j] .= $row."\n"; } $rows = $newpara; break; //unit=page: - combine rows into one row case 'page': $part = implode("\n",$rows); unset($rows); $rows[0] = $part; break; } return $rows; } //}}} function TERemoveInlineMarkup($text, $par) { global $TERemoveMarkupPatterns; SDVA($TERemoveMarkupPatterns, array( "/'''(.*?)'''/" => "$1", //'''strong''' (bold) "/''(.*?)''/" => "$1", //''emphasis'' (italic) "/'\\-(.*?)\\-'/" => "$1", //'-smaller-' "/\\[(([-+])+)(.*?)\\1\\]/" => "$1", //[+big+], [-small-] "/'\\^(.*?)\\^/" => "$1", //'^super script^' "/'_(.*?)_'/" => "$1", //'_sub script_' "/\\{\\+(.*?)\\+\\}/" => "$1", //{+ins+} (underline) "/\\{-(.*?)-\\}/" => "$1", //{-del-} (strike through) "/(%.*?%)/" => "", //%wiki styles% %% "/^-+[<>]\\s*/" => "", //->indents, - "$2", //* unordered list bullets "/^(\\#+)(.*?)$/m" => "$2", //# ordered list bullets "/^(:+)(?=(\s*)([^:]+):)/m" => " ", //: definition : list "/ +/" => " ", //multiple spaces to single space )); foreach($TERemoveMarkupPatterns as $pat => $rep) $text = preg_replace($pat, $rep, $text); return $text; } //}}} //cleanup of markup function TEMarkupCleaner($row, $opt, $par) { global $KeepToken; if ($opt['markup']=='source') { //clean <>"tag" characters $row = str_replace("<","<", $row); $row = str_replace(">",">", $row); //that's all for 'source' processing return $row; } $new = array(); //fix orphaned @],[@,=],[= foreach(array("@","=") as $x) { $a = strpos($row,'['.$x); $b = strpos($row,$x.']'); if ($b!=0 && ($a===false || $a>$b)) $row = '['.$x.$row; else if ($a!=0 && ($b===false || $a>$b)) $row .= $x.']'; } //keep escaped text using tokens $keep = array(); if (preg_match_all("/\\[([=@])(.*?)\\1\\]/s".$par['qi'], $row, $m)) { foreach ($m[0] as $i => $v) { $keep[$i][0] = $v; $keep[$i][1] = $m[1][$i]; $row = str_replace( $v, "<__TOK__".$i."__>", $row); } } //directives (: ... :) possibly multi-line if ($opt['markup']=='cut' || $opt['markup']=='text') { $row = preg_replace("/\\(:(\\w+\\b.*?):\\)/s", "", $row); } $lines = explode("\n", $row); foreach ($lines as $k => $row) { //extra spaces $row = preg_replace("/\\n\\s+/", "\n", $row); //directives (: ... :) encoding if ($opt['markup']=='code') { $row = preg_replace("/\\(:(comment)\\s+(.*?)\\s*:\\)/", "[@(:$1:@] $2 :)", $row); $row = preg_replace("/\\(:(\\w+\\b.*?):\\)/", "[@(:$1:)@]", $row); } //fixing double and empty [@ and [= $row = preg_replace("/\\[([@=])\\s*\\[\\1/","[\\1",$row); $row = preg_replace("/([@=])\\]\\s*\\1\\]/","\\1]",$row); $row = preg_replace("/\\[([@=])\\s*\\1\\]/","",$row); //whitespace $row = preg_replace("/^\\s+/", "", $row); //A: Q: $row = preg_replace("/^[AQ]:\\s+/", "", $row); //code and cut treat some markup differently if ($opt['textlinks']==1) { //variable link global $WikiWordPattern; $row = preg_replace("/\\$($WikiWordPattern)\\b/", "$$1", $row); } switch($opt['markup']) { case 'text': $row = TERemoveInlineMarkup($row,$par); //follow on with 'cut' case 'cut': //divs >>...<< : remove $row = preg_replace("/>>(.*?)<]\\s*/", "", $row); //unordered list items: bullets to * $row = preg_replace("/^(\\*+)(.*?)$/", "*$2 {$par['br-tag']}", $row); //ordered list items: numerals to # $row = preg_replace("/^(\\#+)(.*?)$/", "#$2 {$par['br-tag']}", $row); //definition list items: to : $row = preg_replace("/^(:+)(?=(\s*)([^:]+):)/", ": ", $row); //divs >>...<< : escape $row = preg_replace("/>>(.*?)<>$1<<@]", $row); //anchors: escape $row = preg_replace("/(\\[\\[#[A-Za-z][-.:\\w]*\\]\\])/","[@$1@]",$row); //wiki styles %...% : escape $row = preg_replace("/(%.*?%)/", "[@$1@]", $row); //tables || || || @ escape $row = preg_replace("/^\\|\\|(.*)$/", "[@||$1 @] {$par['br-tag']}", $row); break; } //change all headings to large and bold text $row = preg_replace("/^(!{1,6})(.*)/","[+''' $2 '''+]" , $row); //markup expression encoding $row = preg_replace("/\\{\\((\\w+\\b.*?)\\)\\}/", "[@{($1)}@]", $row); $row = trim($row); if ($row=='') continue; $new[$k] = $row; } $row = implode("\n", $new); //re-inserting code strings via tokens foreach ($keep as $i => $v) $row = str_replace("<__TOK__".$i."__>", $keep[$i][0], $row); return $row; } //}}} //insert markup for highlighting matches function TEHighlight($opt, &$par, $row) { global $LinkPattern, $UrlExcludeChars, $ImgExtPattern, $KeepToken, $KPV; //for source view we don't want whole links highlight: if ($opt['markup']=='source') $linkpat = $urlpat = ''; else { //matches in links: highlight entire link, and other matches $linkpat = "\\[\\[\\s*(.*?)\\]\\]"; $urlpat = "($LinkPattern)\\/\\/([^\\s$UrlExcludeChars]*[^\\s.,?!$UrlExcludeChars])"; } if (preg_match_all("(($linkpat)|($urlpat)|({$par['pat']}))".$par['qi'], $row, $m, PREG_OFFSET_CAPTURE)) { ## DEBUG echo "
PATTERN: ".$par['pat']; echo "
OTHER "; print_r($m[0]); echo "
"; $k = 0; $mpos = array(); foreach($m[0] as $i => $v) { if (!preg_match("({$par['pat']})".$par['qi'], $v[0])) continue; if (isset($m[4]) && preg_match("/$LinkPattern/",$m[4][$i][0])) $item = $v[0]." "; else $item = $v[0]; $pos = $v[1] + $k * $par['hitoklen']; $row = substr_replace($row, $KeepToken."01-TE".$KeepToken.$item.$KeepToken."02-TE".$KeepToken, $pos, strlen($item)); $row = rtrim($row,'% '); $k++; $mpos[] = $pos; } if ($opt['shorten']>0 && $opt['markup']!='source') $row = TEShortenRow($row, $par, $opt); } return $row; } //}}} function TEVSpace($row, $par, $opt) { global $HTMLPNewline; if ($opt['markup']=='source') return trim($row); if($HTMLPNewline !='') return $row; if($opt['shorten']>0) { $HTMLPNewline = ''; return $row.$par['vspace']; } else return $row.$par['br-tag']; } //}}} //shorten row function TEShortenRow($row, $par, $opt) { global $KeepToken; //number of words left and right of highlight $a = ($opt['shorten']>1) ? $opt['shorten'] : $opt['lwords']; $b = ($opt['shorten']>1) ? 2*$opt['shorten'] : $opt['rwords']; $hi = $new = array(); $words = explode(' ', $row); foreach ($words as $i => $wd) if (strpos($wd, $KeepToken)!==false) $hi[] = $i; for ($i=0; $i < count($words); $i++) { foreach ($hi as $k => $n) { if (($n-$a) > $i) { if (($n-$a) == $i+1) if (!isset($new[$i])) $new[$i] = $opt['ellipsis']; if (isset($new[$i-1]) && $new[$i-1]!=$opt['ellipsis']) $new[$i] = $opt['ellipsis']; continue 2; } if ($i == end($hi)+$b+1) $new[$i] = $opt['ellipsis']; if ($i > $n+$b) continue; if(isset($hi[$k+1]) && $i==($hi[$k+1]-$a)) continue; if (isset($new[$i])) continue 2; $new[$i] = $words[$i]; continue 2; } } $row = implode(' ', $new); return $row; } //}}} //make header function TEHeader(&$opt, $par) { $cnt = $par['matchnum']; $out = ""; if ($opt['header']) $out .= "(:div001 class='te-header':)\n"; switch($opt['header']) { default: $out .= TEVarReplace($opt['header'], $par); break; case 'count': case 'counter': $out .= "'''$[Results:] $cnt'''"; break; case 'all': case 'full': $time = ($opt['timer']) ? 'in '.$par['time'] : ''; $pgs = ($par['listcnt']>1) ? '$[pages]' : '$[page]'; $from = "$[from] {$par['listcnt']} $pgs $[searched]"; if ($par['pagecnt']>1) $from = "$[on] {$par['pagecnt']} $[pages] ".$from; $out .= "[[#extracttop]]%lfloat%[+ '''{$opt['title']}   %green%{$par['pattern']}%%''' +] %right%''{$cnt} $[results] {$from} {$time}''"; $opt['footer'] = "%center% '''$[End of] {$opt['title']}'''    [[#extracttop|$[(start)]]]"; break; } if ($opt['header']) $out .= "\n(:div001end:)"; return $out; } //}}} //make footer function TEFooter($opt, $par) { $out = ''; if ($opt['footer'] && $par['pagecnt']>0) { $out .= "\n(:div002 class='te-footer':)".TEVarReplace($opt['footer'], $par)."\n(:div002end:)"; } if($opt['error']==1) { $error = ($par['pagecnt']==0) ? "\n%red%$[Found no matches!]%%" : ''; $error = ($par['listcnt']==0) ? "\n%red%$[Error: no pages to be searched!]%%" : ''; $out .= $error; } return $out; } //}}} //make page header function TEPageHeader($pagename, $source, $opt, &$par) { $pnum = ($opt['pagenum']==1) ? ($par['pagenum']).". " : ''; $out = "\n>>te-pageheader<<\n"; if($opt['phead']=='link') { if($opt['pagenum']==1 && $opt['pagenum-color']!='') $out .= "'''%color={$opt['pagenum-color']}%{$pnum}%% [+ [[$source]] +]'''"; else $out .= "'''[+ [[$source]] +]'''"; } elseif($opt['phead']=='linkmod' ) { $lmod = PageVar($source,'$LastModified'); $lmby = PageVar($source,'$LastModifiedBy'); $out .= "%rfloat%''$[last modified by] [[~{$lmby}]] $[on] {$lmod}'' %left%'''%color={$opt['pagenum-color']}%{$pnum}%%[+ [[$source]] +]'''"; } else { $out .= TEVarReplace($opt['phead'], $par); } $out .= "\n>><<\n"; return $out; } //}} //make page footer function TEPageFooter($pagename, $source, $opt, &$par) { $out = "\n".$opt['pfoot']; return $out; } //}} //make results (line) numbers function TERowNumbers($opt, $par) { #show($par,'PAR'); $new = ''; if ($opt['linenum']==1) { if ($opt['pagenum']==1) { $new = Keep("{$par['pagenum']}.{$par['linenum']}. ",'T'); } else $new = Keep("{$par['linenum']}. ",'T'); } else if ($opt['matchnum']==1 && $par['pat']!=".") { if ($opt['pagenum']==1) { if ($par['rowmatchcnt']>1) $num = ($par['prevpmnum']+1)."-".$par['pmatchnum']; else $num = $par['pmatchnum']; $new = Keep("{$par['pagenum']}.$num. ",'T'); } else { if ($par['rowmatchcnt']>1) $num = ($par['prevmnum']+1)."-".$par['matchnum']; else $num = $par['matchnum']; $new = Keep("$num. ",'T'); } } return $new; } //}}} //substitution of pseudo template variables function TEVarReplace ($text, $par) { foreach($par as $k => $v) { if (is_array($v)) continue; $text = str_replace('{$$'.$k.'}' , $v, $text); } return $text; } //}}} //Link function to suppress links function TELinkText($pagename,$imap,$path,$title,$txt,$fmt=NULL) { return "".$txt."".$title.""; } //}}} //timer function TEStopwatch(&$par) { $wtime = strtok(microtime(), ' ') + strtok('') - $par['stime']; $xtime = sprintf("%04.2f %s", $wtime, ''); //time in secs $par['time'] = $xtime." $[seconds]"; } //}}} // markup (:extract ....:) search form Markup('extractform', 'directives','/\\(:extract\\s*(.*?)\\s*:\\)/', "TEFormMarkup"); // extractor search form function TEFormMarkup($m) { global $ExtractFormOpt, $InputValues, $EnablePathInfo,$ExtractFormInputType; extract($GLOBALS['MarkupToHTML']); $opt = ParseArgs($m[1]); if (isset($opt['page'])) $hiddenpagefield = 1; $opt = array_merge((array)$ExtractFormOpt, @$_GET, (array)$opt); $opt['action'] = 'search'; $opt['fmt'] = 'extract'; $target = (isset($opt['target'])) ? MakePageName($pagename, $opt['target']) : $pagename; $opt['n'] = IsEnabled($EnablePathInfo, 0) ? '' : $target; foreach ($opt as $k => $v) { if ($v == '' || is_array($v)) continue; $v = str_replace("'", "'", $v); $opt[$k] = $v; if (!isset($InputValues[$k])) $InputValues[$k] = $v; } if(!isset($InputValues['q'])) $InputValues['q'] = ''; if (!isset($InputValues['q']) && isset($opt['pattern'])) $InputValues['q'] = $opt['pattern']; if (!isset($InputValues['name']) && isset($opt['defaultpage'])) $InputValues['name'] = $opt['defaultpage']; else $InputValues['name'] = ''; $checkword = (isset($InputValues['word']))? "checked=1" : ''; $checkcase = (isset($InputValues['case']))? "checked=1" : ''; $checkphrase = (isset($InputValues['phrase']))? "checked=1" : ''; $checkregex = (isset($InputValues['regex']))? "checked=1" : ''; SDV($ExtractFormInputType, 'text'); //form $out = FmtPageName("
", $target); $out .= "\n"; if (isset($opt['pattern'])) $out .= " \n"; else $out .= " \n"; if (!isset($hiddenpagefield)) $out .= " \n"; if (!isset($opt['pattern'])) { if (!isset($opt['case'])) $out .= ""; if (!isset($opt['phrase'])) $out .= ""; if (!isset($opt['word'])) $out .= ""; } if (isset($opt['regex'])) $out .= ""; $out .= "
{$opt['searchlabel']}
{$opt['pageslabel']}
{$opt['caselabel']}
{$opt['phraselabel']}
{$opt['wordlabel']}
{$opt['regexlabel']}
    
\n"; //set other optional parameters as hidden fields foreach ($opt as $k => $v) { if ($v == '' || is_array($v)) continue; if (in_array($k, array('pattern','name','defaultpage','q','label','value','size','searchlabel','pageslabel','wordlabel','caselabel','regexlabel','regex'))) continue; $k = str_replace("'", "'", $k); $v = str_replace("'", "'", $v); $out.= "\n"; } $out .= "
"; return Keep($out); } //}}} ## (extract ......) same as PowerTools (pagelist.... fmt=extract) [all pagelist parameters allowed] $MarkupExpr['extract'] = 'MxTextExtract($pagename, $argp, $args)'; function MxTextExtract($pagename, $argp, $args) { StopWatch('extract start'); unset($argp['#']); $opt['fmt'] = 'extract'; foreach($argp as $k => $v) $opt[$k] = $v; foreach($args as $k => $v) $opt['q'] .= ' "'.$v.'"'; $out = FmtPageList('$MatchList', $pagename, $opt, 0); $out = preg_replace("/[\n]+/s","\n",$out); StopWatch('extract end'); return $out; } //}}} //fmt=extract for (:extract:) and (:pagelist:) and (:searchbox:) SDV($FPLFormatOpt['extract'], array('fn' => 'FPLTextExtract')); function FPLTextExtract($pagename, &$matches, $opt) { ##DEBUG echo "
OPT "; print_r($opt); echo "
"; global $FmtV, $EnableStopWatch, $KeepToken, $KPV, $PageListFilters; $PageListFilters['PageListTermsTargets'] = -10; //not used $PageListFilters['TEListTermsTargets'] = 160; //used as alternative $EnableStopWatch = 1; StopWatch('TextExtract pagelist begin'); $opt['stime'] = strtok(microtime(), ' ') + strtok(''); $opt['q'] = ltrim($opt['q']); //if search term contains terms in double quotes switch on 'text' option to remove all inline markup when searching if (preg_match('/\\".*\\"/',$opt['q'])) $opt['text'] = 1; if (@$opt['']) foreach ($opt[''] as $k => $v) $opt[''][$k] = htmlspecialchars_decode($v); //treat single . search term as request for regex 'all characters' if(isset($opt[''][0]) && $opt[''][0]=='.') $opt['regex'] = 1; if(isset($opt['pattern']) && $opt['pattern']=='.') $opt['regex'] = 1; //MakePageList() does not evaluate terms as regular expressions, so we save them for later if (@$opt['regex']==1) { $opt['pattern'] = implode(' ', $opt['']); unset($opt['']); } if (!isset($opt['name']) && isset($opt['page'])) $opt['name'] = $opt['page']; elseif (isset($opt['name']) && isset($opt['page'])) $opt['name'] .= ",".$opt['page']; if (isset($opt['name'])) unset($opt['page']); //allow search of anchor sections if (isset($opt['name'])) { if($sa=strpos($opt['name'],'#')) { $opt['section'] = strstr($opt['name'],'#'); $opt['name'] = substr($opt['name'],0,$sa); } } //unset excludes for page matching, deal with them on unit basis later $excl = ''; if(isset($opt['strict']) && $opt['strict']==1) { if (isset($opt['-']) && $opt['unit']!='page') { $excl = $opt['-']; unset($opt['-']); } } //create page list by searching pages for search terms $list = MakePageList($pagename, $opt, 0); if (!isset($opt['-'])) $opt['-'] = $excl; //add excludes again #DEBUG echo "
list after MakePageList "; print_r($list); echo "
"; //extract page subset according to 'count=' parameter if (@$opt['count'] && !$opt['section']) TESliceList($list, $opt); return TextExtract($pagename, $list, $opt); } //}}} //alternative for PageListTermsTargets with hook to TERemoveInlineMarkup for option 'text' //this allows page matches to a search phrase even if part of the phrase is enclosed with inline markup function TEListTermsTargets(&$list, &$opt, $pn, &$page) { global $FmtV; static $reindex = array(); $fold = $GLOBALS['StrFoldFunction']; switch ($opt['=phase']) { case PAGELIST_PRE: $FmtV['$MatchSearched'] = count($list); $incl = array(); $excl = array(); foreach((array)@$opt[''] as $i) { $incl[] = $fold($i); } foreach((array)@$opt['+'] as $i) { $incl[] = $fold($i); } foreach((array)@$opt['-'] as $i) { $excl[] = $fold($i); } $indexterms = PageIndexTerms($incl); foreach($incl as $i) { $delim = (!preg_match('/[^\\w\\x80-\\xff]/', $i)) ? '$' : '/'; $opt['=inclp'][] = $delim . preg_quote($i,$delim) . $delim . 'i'; } if ($excl) $opt['=exclp'][] = '$'.implode('|', array_map('preg_quote',$excl)).'$i'; if (@$opt['link']) { $link = MakePageName($pn, $opt['link']); $opt['=linkp'] = "/(^|,)$link(,|$)/i"; $indexterms[] = " $link "; } if (@$opt['=cached']) return 0; if ($indexterms) { StopWatch("PageListTermsTargets begin count=".count($list)); $xlist = PageIndexGrep($indexterms, true); $list = array_diff($list, $xlist); StopWatch("PageListTermsTargets end count=".count($list)); } if (@$opt['=inclp'] || @$opt['=exclp'] || @$opt['=linkp']) return PAGELIST_ITEM|PAGELIST_POST; return 0; case PAGELIST_ITEM: if (!$page) { $page = ReadPage($pn, READPAGE_CURRENT); $opt['=readc']++; } if (!$page) return 0; if (@$opt['=linkp'] && !preg_match($opt['=linkp'], @$page['targets'])) { $reindex[] = $pn; return 0; } if (@$opt['=inclp'] || @$opt['=exclp']) { $text = $fold($pn."\n".@$page['targets']."\n".@$page['text']); if (isset($opt['text']) && $opt['text']==1) $text = TERemoveInlineMarkup($text); foreach((array)@$opt['=exclp'] as $i) if (preg_match($i, $text)) return 0; foreach((array)@$opt['=inclp'] as $i) if (!preg_match($i, $text)) { if ($i[0] == '$') $reindex[] = $pn; return 0; } } return 1; case PAGELIST_POST: if ($reindex) PageIndexQueueUpdate($reindex); $reindex = array(); return 0; } } //}}} //slice list for count= option function TESliceList(&$list, $opt) { list($r0, $r1) = CalcRange($opt['count'], count($list)); if ($r1 < $r0) $list = array_reverse(array_slice($list, $r1-1, $r0-$r1+1)); else $list = array_slice($list, $r0-1, $r1-$r0+1); } //}}} //sort by match count and subsort by name function TESort(&$new) { usort($new,"TESortByMatchCnt"); $anew = $temp = array(); $cnt = count($new); for ($i=0; $i<$cnt; $i++) { $temp[] = $new[$i]; if (($new[$i]['pmatchcnt'] > $new[$i+1]['pmatchcnt']) || $i+1==$cnt) { if (count($temp)>1) usort($temp, "TESortByName"); $anew = array_merge($anew, $temp); unset($temp); } } $new = $anew; } //}}} //sort helper functions function TESortByMatchCnt($a, $b) { return $b['pmatchcnt'] - $a['pmatchcnt']; } function TESortByName($a, $b) { return strnatcasecmp($a['name'], $b['name']); } //EOF