<?php if (!defined('PmWiki')) exit();

$RecipeInfo['Excerpts']['Version'] = '20171110';

/*
	Excerpts for PmWiki	
	Copyright 2006-2007 Mateusz Czaplinski (mateusz@czaplinski.pl)

	This file is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published
	by the Free Software Foundation; either version 2 of the License, or
	(at your option) any later version.

	---- Usage Information: ----

	* Put this file in 'cookbook/' directory of your PmWiki installation
	* In your 'local/config.php' file add the following line:

		include_once("$FarmD/cookbook/excerpts.php");

	* This enables additional markup in your wiki - example usage:

		(:excerpt PmWiki.InitialSetupTasks "when you":)
		- finds words around the first occurence of "when you"
		  phrase in the PmWiki.InitialSetupTasks page, which
		  shows as:

		...n file (''local/config.php'') When you first install PmWiki, the ''l...

		(:excerpts Group.Page word "longer phrase":)
		- creates a separate copy of (:excerpt:) for each phrase
		  given after the page name. It can also be used with
		  {$SearchQuery}, from PmWiki Cookbook recipe "SearchQueryVariable"


	---- Comments and More Information: ----

		http://pmwiki.org/wiki/Cookbook/Excerpts

	---- History: ----

	* 2017-11-10 Update for PHP 5.5-7.2, requires PmWiki 2.2.56+ (by Petko pmwiki.org/petko )
	* 2007-09-30 - v1.2 
	  - removed unused ExcertpsToupper() function
	  - added $ExcerptSideWidth
	* 2007-09-26 - v1.1 - (:excerpts:) with empty keywords doesn't produce (:excerpt '':)
	* 2006-12-21 - v0.9 - initial release

*/

############
## Excerpt
##
## A fragment of given page, surrounding given word in this page.
##
##-- TODO:
## - modify, so that it requres whitespace between 'excerpts' and parameters
##   (and update the (:excerpt:) recipe accordingly).
############

/**
=== TODO ideas ===
* Try to make the search case-insensitive for UTF-8 (international) characters.
  - possible using utf8toupper() (defined in scripts/xlpage-utf-8.php), then 
    PREG_OFFSET_CAPTURE in preg_match() to find the matched fragment,
    then strlen($match) & substr() to extract it with original lettering. However, 
    it seems quite an expensive algorithm.
  - or by somehow properly combining the /i and /u switches in preg_match() (they caused
    problems with non-UTF8 strings for me)

**/

## Number of letters (precisely: bytes) of page text shown
## on each side of matched text.
SDV($ExcerptSideWidth,30);

Markup('excerpt','directives',
	"/\\(:excerpt(.*?):\\)/i",
	"ExcerptMarkup"
	);

function ExcerptMarkup($m) {
  $par = $m[1];
	global $pagename,$ExcerptSideWidth;

#	echo '<pre>  ['; print_r($par); echo ']</pre>';
	$par = ParseArgs($par);
	$par = $par[''];
#	if(!$par[1]) return '';

	## Retrieve page text.
	$page = ReadPage(MakePageName($pagename,$par[0]));
	$page = $page['text'];

	## Compress whitespaces.
	$page = preg_replace( '/\s+/', ' ', $page );

	## Find searched term.
	$term = preg_replace(
		array( '/[^\\w\\x80-\\xff]+/', '/\s+/' ),
		array( ' ', '[^\\w\\x80-\\xff]+' ),
		html_entity_decode( trim( $par[1] ) ) );
	# Case-insensitive search (stripos() is available only since PHP 5.0)
	# Unfortunately, UTF-8 is still case-sensitive.
	$matches = array();
	$i = preg_match( "/(.{0,$ExcerptSideWidth})($term)(.{0,$ExcerptSideWidth})/i", $page, $matches );
	
	if($i===0) return '';

	return Keep( '...'.PHSC($matches[1]).'<strong>'.
		PHSC($matches[2]).'</strong>'.
		PHSC($matches[3]).'...');
}


###########
## A list of page excerpts.
###########

Markup('excerpts','<excerpt',
	"/\\(:excerpts(.*?):\\)/i",
	"ExcerptsMarkup"
	);

function ExcerptsMarkup($m) {
  $par = $m[1];
#	echo '<pre>['; print_r($par); echo ']</pre>';
	$par=ParseArgs($par);
	$par=$par[''];
	$page=$par[0];
	$res=array_filter(array_slice($par,1),
		'ExcerptsFilter');
	if(!$res) return '';
#	array_walk($res,create_function('$t,$k,$p','$t="(:excerpt $p $t:)";'),$page);
	$pre="(:excerpt $page '";
	$post="':)";
	return $pre . implode( $post.Keep('<br />').$pre, $res ) . $post;
}
function ExcerptsFilter($x) {
  return !ctype_space($x);
}