<?php if (!defined('PmWiki')) exit();
/* dash-pagenames.php, an extension for PmWiki 2.3, copyright Hans Bracker 2023. 

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published
   by the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This script changes how spaces are treated in links to pages or to create pages.
   Pagenames and URLs are created with words separated by dashes/hyphens.
   In links spaces get converted to dashes/hyphens instead of WikiWords.
   Lower and upper case letters are preserved; if utf-8 is enabled, utf-8 characters are preserved.
   Links in groups PmWiki, Site and SiteAdmin, as well as  WikiWord links keep their functionality.
   Cross-linking pages between groups needs no special attention.
*/
$RecipeInfo['Dash-Pagenames']['Version'] = '2023-02-15';

// By default UtF-8 characters are allowed in page names and URLs (with exception of a few special).
// To force only unaccented ASCII characters: set in config $EnableUTF8PageNames = 0;
SDV($EnableUTF8PageNames, 1); 

// UTF-8 charset is set by scripts/xlpage-utf-8.php. 
// It also sets new $GroupPattern and $NamePattern which allow lower case letters at start of group or name
if ($Charset == 'UTF-8') {
    $PageNameChars = '-[:alnum:]\\x80-\\xfe';    //dash/hyphen and utf-8 characters 
} 
else { // $GroupPattern and $NamePattern need setting to allow lower case letters at start  
    $PageNameChars = '-[:alnum:]'; 
    $GroupPattern = '[\\w]*(?:-\\w+)*';
    $NamePattern = '[\\w]*(?:-\\w+)*';
}


// Additional custom replacement patterns, processed before MakePageNamePatterns
// Replacement patterns for substiting UTF-8 characters (rather than just to strip them)
SDVA($MPN_UTF8_ReplacePatterns, array(
  "/'/" => "\xe2\x80\x99", //replace Apostrophe '  with Right Single Quotation Mark ’ u2019
  "/\:/" => "\xc2\xb7",    //replace Colon ':' with mid-dot '·'
));
// Replacement patterns for special character to ASCII text substitutions (rather than just to strip them)
SDVA($MPN_ReplacePatterns, array(
  "/l\'(?=\w+)/" => 'le ',  //replace 'l'mot' with 'le mot' , for French
  "/\&/" => ' and ',        //replace '&' with 'and' , for English
  "/\@/" => ' at ',         //replace '@' with 'at' , for English
  "/\=/" => ' equals ',     //replace '=' with 'equals' , for English
  "/\+/" => ' plus ',       //replace '+' with 'plus', for English
));
//merging: make sure both arrays have different keys, otherwise the latter will be used!
if ($Charset == 'UTF-8' && $EnableUTF8PageNames==1)
    $MPN_ReplacePatterns = array_merge($MPN_UTF8_ReplacePatterns, $MPN_ReplacePatterns);


// PmWiki standard MakePageNamePatterns. Needs to  be here for switching over
$Pm_PNP = array(
   '/[?#].*$/' => '',                # strip everything after ? or #
   "/'/" => '',                      # strip single-quotes
   "/[^$PageNameChars]+/" => ' ',    # convert everything else to space
   '/((^|[^-\\w])\\w)/' => 'cb_toupper', # CamelCase, first letter to upper
   '/ /' => ''
);
//alternative MakePageNamePatterns. No CamelCase. Dash-hyphen as word separator. Letter cases will not be changed.
$Dash_PNP = array(
    '/[?#].*$/' => '',                # strip everything after ? or #
    "/'/" => '',                      # strip single-quotes
    "/[^$PageNameChars]+/" => '-',    # convert everything else to hyphen
    '/ /' => '',                      # strip spaces
    '/^-/' => '',                     # strip any dashes from start
    '/-$/' => '',                      # strip any dashes from end
    '/-+/' => '-',                    # strip extra dashes
);

// groups except from dashed-name-patterns 
SDVA($CamelCaseGroups, array('PmWiki'=>1,'Site'=>1,'SiteAdmin'=>1));

// setting alternative $MakePageNamePatterns and $AsSpacedFunction 
$group = PageVar($pagename,'$Group');
if (array_key_exists($group, $CamelCaseGroups))
    $MakePageNamePatterns =  $Pm_PNP;
else { //for all groups except PmWiki, Site, SiteAdmin
    $MakePageNamePatterns =  $Dash_PNP;
    $FmtPV['$Title'] = 'FmtPageTitle(@$page["title"], $name, 1)'; //same as $Titlespaced (dashes to spaces) 
    $AsSpacedFunction = 'DashSpaced';
}  

// replace dashes in pagenames with spaces and space WikiWords
function DashSpaced($x) { 
  global $Charset;
  $x = str_replace('-',' ',$x); 
  $x = ($Charset == 'UTF-8')? AsSpacedUTF8($x) : AsSpaced($x);
  return $x;
}

// helper function to set alternatives MakePageName patterns  
function SetPNP($group) { 
    global $Pm_PNP, $Dash_PNP, $CamelCaseGroups;
    if (array_key_exists($group, $CamelCaseGroups))
         return $Pm_PNP;
    else return $Dash_PNP;
}

 // replaces function MakePageName so we can switch $MakePageNamePatterns according to target group
 $MakePageNameFunction = 'MakePageNameAlt';
 function MakePageNameAlt($basepage, $str) {
    global $PagePathFmt, $MPN_ReplacePatterns, $EnableUTF8PageNames;
    $in = $str; //used for debug
    $str = preg_replace('~[#?].*$~', '', $str);    // strip anything from # or ?
    if ($EnableUTF8PageNames===0 && preg_match('~[\\x80-\\xff]~', $str)) 
        $str = Dash_UnaccentUTF8($str); 
    $str = htmlspecialchars_decode($str);  
    $str = preg_replace("/&#?[a-z0-9]+;/i", "", $str); //strip decoded html entities
    $str = PPRA($MPN_ReplacePatterns, $str);      // special language friendly replacements
    $m = preg_split('/[.\\/]/', $str);           //split pn by / or . into group and name parts
    if (count($m)<1 || count($m)>2 || $m[0]=='') return '';
    ##  handle "Group.Name" conversions according to link target group
    if (@$m[1] > '') {
      $pat = SetPNP($m[0]);
      $group = PPRA($pat, $m[0]);
      $name =  PPRA($pat, $m[1]); 
      return "$group.$name";
    } else
    $bp = preg_split('/[.\\/]/', $basepage); 
    $name = PPRA(SetPNP($bp[0]), $m[0]);  
    $isgrouphome = count($m) > 1;
    foreach((array)$PagePathFmt as $pg) {
      if ($isgrouphome && strncmp($pg, '$1.', 3) !== 0) continue;
      $pn = FmtPageName(str_replace('$1', $name, $pg), $basepage);
      if (PageExists($pn)) return $pn;
    }
    if ($isgrouphome) {
      foreach((array)$PagePathFmt as $pg) 
        if (strncmp($pg, '$1.', 3) == 0)
          return FmtPageName(str_replace('$1', $name, $pg), $basepage);
      return "$name.$name";
    }
    return preg_replace('/[^\\/.]+$/', $name, $basepage);
 }

 // converting accented characters to ASCII. PHP Intl module needs to be enabled 
 $DashTransliterator = Transliterator::createFromRules(
    ':: Latin-ASCII ; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC;', 
    Transliterator::FORWARD);
  function Dash_UnaccentUTF8($str) {
    global $DashTransliterator;
    $str = preg_replace("/ä|ö|ü|ø|Ä|Ö|Ü|Ø/", '$0e', $str);
    $str = preg_replace("/Ã¥|Ã…/", '$0o', $str);
    $str = str_replace("Æ", 'Ae', $str);
    $str = str_replace("Å’", 'Oe', $str);
    $str = str_replace("Þ", 'Th', $str);
    $str = preg_replace("/\xcc\x88/", 'e', $str);
    $str = $DashTransliterator->transliterate($str);
    return $str;
  }

 //EOF