<?php if (!defined('PmWiki')) exit();

/*  === ConvertHTML ===
 *  Copyright 2008-2010 Eemeli Aro <eemeli@gmail.com>
 *
 *  A set of replace-on-edit rules for automatically converting
 *  HTML to PmWiki markup
 *
 *  Requires PmWiki 2.2.58 or more recent.
 *
 *  To install, add the following line to your configuration file :
    include_once("$FarmD/cookbook/convert-html.php");
 *
 *  For more information, please see the online documentation at
 *    http://www.pmwiki.org/wiki/Cookbook/ConvertHTML
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License,
 *  Version 2, as published by the Free Software Foundation.
 *  http://www.gnu.org/copyleft/gpl.html
 *
 *  Updated for PHP 5.5-8.0 by Petko Yotov www.pmwiki.org/petko
 */

$RecipeInfo['ConvertHTML']['Version'] = '20210207';

SDVA($ROEPatterns, array(
  '#\[([=@]).*?\1\]#s' => 'ch_keeph0',
  '#[ \t]*</?(?:html|head|body)>(\n?)\n*#i' => '$1',
  '#<title>\s*(.*?)\s*</title>\n*#is' => "(: title $1:)\n",
  '#<meta name=([\'"])(keywords|description)\1 content=([\'"])(.*?)\3 */>\n*#i' => "(:$2 $4:)\n",
  '#<!--(.*?)-->#' => "%comment% $1 %%",
  '#<(ul|ol|dl)\s+([^>]+)>\s*(<(li|dt)\b[^>]*>)#i' => '<$1>$3%apply=list $2% ',
  '#<(li|dt)\s+([^>]+)>#i' => "<$1>%apply=item $2% ",
  '#\s*(<(ul|ol)\s*>\s*<li.*</\2>)\n?#is' => 'ch_lists',
  '#\s*<dt\s*>(.*?)(?:</dt>)?\s*<dd\s*>\s*(.*?)(?:</dd>)?\n#is' => "\n:$1:$2\n",
  '#\s*</?dl\s*>#i' => '',
  '#\s*<table( [^>]*)?>(.*?)\s*</table>#is' => "\n(:table$1:)$2\n(:tableend:)",
  '#\s*<td( [^>]*)?>(.*?)</td>#is' => "\n(:cell$1:)$2",
  '#\s*<tr( [^>]*)?>\s*\(:cell\b(.*?)</tr>#is' => "\n(:cellnr$1$2",
  '#<(p|h\d)\s+([^>]+)>#i' => "<$1>%block $2% ",
  '#\s*<h(\d)\s*>(.*?)</h\1>\n*#is' => 'ch_headings',
  '#\s*<p\s*>\s*(.*?)</p>\n?#is' => "\n\n$1\n",
  '#\s*<p\s*>\s*#is' => "\n\n",
  '#\s*<div( [^>]*)?>\s*(.*?)\n?</div>\n*#is' => "\n(:div$1:)\n$2\n(:divend:)\n",
  '#<span\b\s*([^>]*)>(.*?)</span>#is' => 'ch_spans',
  '#\s*<blockquote>\s*(.*?)</blockquote>\n*#is' => "\n->$1\n",
  '#<br\s+clear=[\'"]?(all|left|right)[\'"]?\s*/?>\n*#i' => "[[<<]]\n",
  '#<br */?>\n*#i' => "\\\\\\\n",
  '#\s*<hr */?>\n*#i' => "\n----\n",
  '#</?(i|em)>#i' => "''",
  '#</?(b|strong)>#i' => "'''",
  '#</?(code|tt)>#i' => "@@",
  '#<pre>(.*?)</pre>#is' => "[@$1@]",
  '#<big>(.*?)</big>#is' => "'+$1+'",
  '#<small>(.*?)</small>#is' => "'-$1-'",
  '#<sup>(.*?)</sup>#is' => "'^$1^'",
  '#<sub>(.*?)</sub>#is' => "'_$1_'",
  '#<(ins|u)>(.*?)</\1>#is' => "{+$2+}",
  '#<del>(.*?)</del>#is' => "{-$1-}",
  '#(<(?:a|img)\b[^>]+\b(href|src)=)([\'"])([./][^\'"]*?)\3#i' => "$1$3Path:$4$3",
  '#(<(?:a|img)\b[^>]+\b(href|src)=)([\'"])([^/:\'"]+\.[^/:\'"]+?)\3#i' => "$1$3Attach:$4$3",
  '#<a\s[^>]*\bname=([\'"])([^\'"]*?)\1[^>]*>(.*?)</a>#is' => 'ch_anchors',
  '#<a\s+([^>]*)>(.*?)</a>#is' => 'ch_links',
  '#<img\s([^>]*)\bsrc=([\'"])([^\'"]*?)\2\s([^>]*?)\s*(?:/?|></img)>\n?#i' => "%apply=img $1$4%$3%%\n",
  '#(.*%apply=img\b[^%]+)\balign=([\'"]?)(l|r)(?:eft|ight)\2([^%]*%[^%]+)%%#i' => '%$3float% $1$4%%',
  '#(%apply=img\b[^%]+)\b(?:alt|title)=([\'"])([^\'"]+?)\2([^%]*%[^%]+)%%#i' => '$1$4"$3"%%',
  '#%apply=img\b(?:\s+(?:alt|title)=([\'"])\s*\1)*\s*%([^%]+)%%#' => '$2',
  '#<form\s*([^>]*)>#i' => '(:input form $1:)',
  '#</form>#i' => '(:input end:)',
  '#<input\b([^>]*)\stype=([\'"]?)(\w+)\2([^>]*?)/?>#i' => '(:input $3$1$4:)',
  '#<textarea\b([^>]*)>\s*(.*?)\s*</textarea>#i' => '(:input textarea$1 value=\'$2\':)',
  '#<select\b([^>]*)>(.*?)</select>#is' => 'ch_selectopt',
  "#$KeepToken(\d.*?H)$KeepToken#" => 'cb_expandkpv',
));

function ch_keeph0($m) { return Keep($m[0], 'H'); }
function ch_lists($m) { return ConvertHtmlList($m[1]); }
function ch_headings($m) { return "\n\n".str_repeat('!',$m[1]).' '.$m[2]."\n"; }
function ch_spans($m) { return "%".ConvertHtmlSpan($m[1])."% ".$m[2]." %%"; }
function ch_anchors($m) { return "[[#".preg_replace("/\\s+/","_",$m[2]).']] '.$m[3]; }
function ch_links($m) { return ConvertHtmlLink($m[1], $m[2]); }
function ch_selectopt($m) { return preg_replace("!<option\\b([^>]*)>\\s*(.*?)\\s*</option>!is","(:input select".$m[1]."$1 label=\"$2\":)",$m[2]); }


function ConvertHtmlLink($param, $txt) {
  $opt = array_change_key_case(ParseArgs($param), CASE_LOWER);
  if (empty($opt['href'])) return "<a $param>$txt</a>";
  $link = "[[{$opt['href']}|$txt]]";
  $ws = '';
  foreach( array('target','rel','accesskey') as $p ) if (!empty($opt[$p])) $ws .= "$p=\"{$opt[$p]}\" ";
  if (empty($ws)) return $link;
  $ws = str_replace('target="_blank"', 'newwin', rtrim($ws, ' '));
  return "%$ws%$link";
}

function ConvertHtmlSpan($param) {
  return preg_replace(
    array( '/%/', '/(?:class|style)=([\'"])(.*?)\1/' ),
    array( 'pct', '$2' ),
    $param );
}

function ConvertHtmlList($html) {
  $out = '';
  $lit = array();
  $strip = FALSE;
  $html = preg_replace('#(</?(?:ol|ul|li))\b([^>]+)>#i','$1>',$html);
  $lia = preg_split( '#\s*(</?(?:ol|ul|li)\s*>)\s*#i', $html, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
  foreach( $lia as $n ) switch($n) {
    case '<ul>': case '<UL>':
      $lit[] = "\n".str_repeat( '*', count($lit)+1 ).' ';
      break;
    case '<ol>': case '<OL>':
      $lit[] = "\n".str_repeat( '#', count($lit)+1 ).' ';
      break;
    case '</ul>': case '</UL>':
    case '</ol>': case '</OL>':
      array_pop($lit);
      $strip = FALSE;
      break;
    case '<li>': case '<LI>':
      if($lit) $out .= end($lit);
      $strip = TRUE;
      break;
    case '</li>': case '</LI>':
      $strip = FALSE;
      break;
    default:
      if ($strip) {
        $out .= preg_replace('/\s+/',' ',$n);
        $strip = FALSE;
      } else $out .= $n;
  }
  return $out;
}