Some-Group/Some-other-Name Spaces in links get converted to dashes/hyphens in the target name, instead of WikiWords. Links in groups PmWiki, Site and SiteAdmin, as well as WikiWord links keep their functionality. Cross-linking pages between groups needs no special attention. UTF-8 encoded characters are allowed, lower and upper case letters are preserved. */ $RecipeInfo['Dash-Pagenames']['Version'] = '2023-02-17'; // UTF-8 encoded characters in pagenames are allowed and preserved. // Set to 1 will restrict pagenames to ASCII characters. // Any accented UTF-8 characters will be substituted for ASCII characters. SDV( $ForcePageNamesToASCII, 0); // Upper and lower case letters in pagenames are allowed and preserved. // Set to 1 will force all characters in pagenames to lower case SDV( $ForcePageNamesToLowerCase, 0); // groups excempt from dashed-name-patterns to preserve their CamelCase and link functiomalities SDVA( $CamelCaseGroups, array('PmWiki'=>1,'Site'=>1,'SiteAdmin'=>1) ); if ($Charset == 'UTF-8') { // UTF-8 charset is set by /scripts/xlpage-utf-8.php, which needs to be included before this script. // It also sets new $GroupPattern and $NamePattern which allow lower case letters at start of group or name $PageNameChars = '-[:alnum:]\\x80-\\xfe'; } else { // $GroupPattern and $NamePattern need setting to allow lower case letters at start $PageNameChars = '-[:alnum:]'; $GroupPattern = '[\\w]*(?:-\\w+)*'; $NamePattern = '[\\w]*(?:-\\w+)*'; } // Replacement patterns for special URL characters to UTF-8 or ASCII text substitutions (rather than just to strip them) if ($Charset == 'UTF-8' && $ForcePageNamesToASCII==0) { SDVA( $MPN_ReplacePatterns, array( "/'/" => "\xe2\x80\x99", //replace Apostrophe ' with Right Single Quotation Mark ’ u2019 "/\:/" => "\xc2\xb7", //replace Colon ':' with mid-dot '·' "/\&/" => ' and ', //replace '&' with 'and' , for English "/\@/" => ' at ', //replace '@' with 'at' , for English "/\=/" => ' equals ', //replace '=' with 'equals' , for English "/\+/" => ' plus ', //replace '+' with 'plus', for English )); } else { //ASCII SDVA( $MPN_ReplacePatterns, array( "/(l)\'(?=\w+)/i" => '$1e ', //replace 'l'mot' with 'le mot' , for French "/\&/" => ' and ', //replace '&' with 'and' , for English "/\@/" => ' at ', //replace '@' with 'at' , for English "/\=/" => ' equals ', //replace '=' with 'equals' , for English "/\+/" => ' plus ', //replace '+' with 'plus', for English )); } // PmWiki standard MakePageNamePatterns. Used for groups exempt from dashed-pagename-patterns $Pm_PNP = array( '/[?#].*$/' => '', # strip everything after ? or # "/'/" => '', # strip single-quotes "/[^$PageNameChars]+/" => ' ', # convert everything else to space '/((^|[^-\\w])\\w)/' => 'cb_toupper', # CamelCase, first letter to upper '/ /' => '' ); // alternative MakePageNamePatterns. Dash-hyphen as word separator. No CamelCase. // letter cases will not be changed, unless $LowerCasePageNames = 1; $Dash_PNP = array( '/[?#].*$/' => '', # strip everything after ? or # "/'/" => '', # strip single-quotes "/[^$PageNameChars]+/" => '-', # convert everything else to hyphen '/ /' => '', # strip spaces '/^-/' => '', # strip any dashes from start '/-$/' => '', # strip any dashes from end '/-+/' => '-', # strip extra dashes ); // setting $MakePageNamePatterns and $AsSpacedFunction according to group $group = PageVar($pagename,'$Group'); if (array_key_exists($group, $CamelCaseGroups)) $MakePageNamePatterns = $Pm_PNP; else { //for all groups except PmWiki, Site, SiteAdmin, and any added to $CamelCaseGroups $MakePageNamePatterns = $Dash_PNP; $FmtPV['$Title'] = 'FmtPageTitle(@$page["title"], $name, 1)'; //same as $Titlespaced (dashes to spaces) $AsSpacedFunction = 'DashSpaced'; } // replace dashes in pagenames with spaces and space WikiWords function DashSpaced($x) { global $Charset; if ($Charset == 'UTF-8') return AsSpacedUTF8(str_replace('-',' ',$x)); else return AsSpaced(str_replace('-',' ',$x)); } // helper function to set alternatives MakePageName patterns function SetPNP($group) { global $Pm_PNP, $Dash_PNP, $CamelCaseGroups, $ForcePageNamesToLowerCase; if (array_key_exists($group, $CamelCaseGroups)) { return $Pm_PNP; } else { if ($ForcePageNamesToLowerCase==1) { $Dash_PNP['/(.*$)/'] = 'cb_tolower'; } return $Dash_PNP; } } // replaces function MakePageName so we can switch $MakePageNamePatterns according to target group $MakePageNameFunction = 'MakePageNameAlt'; function MakePageNameAlt($basepage, $str) { #echo $str; global $PagePathFmt, $MPN_ReplacePatterns, $ForcePageNamesToASCII; $in = $str; //used for debug $str = preg_replace('~[#?].*$~', '', $str); // strip anything from # or ? if ($ForcePageNamesToASCII==1 && preg_match('~[\\x80-\\xff]~', $str)) $str = Dash_UnaccentUTF8($str); $str = htmlspecialchars_decode($str); $str = preg_replace("/&#?[a-z0-9]+;/i", "", $str); //strip decoded html entities $str = PPRA($MPN_ReplacePatterns, $str); // special language friendly replacements $m = preg_split('/[.\\/]/', $str); //split pn by / or . into group and name parts if (count($m)<1 || count($m)>2 || $m[0]=='') return ''; ## handle "Group.Name" conversions according to link target group if (@$m[1] > '') { $pat = SetPNP($m[0]); $group = PPRA($pat, $m[0]); $name = PPRA($pat, $m[1]); return "$group.$name"; } else $bp = preg_split('/[.\\/]/', $basepage); $name = PPRA(SetPNP($bp[0]), $m[0]); $isgrouphome = count($m) > 1; foreach((array)$PagePathFmt as $pg) { if ($isgrouphome && strncmp($pg, '$1.', 3) !== 0) continue; $pn = FmtPageName(str_replace('$1', $name, $pg), $basepage); if (PageExists($pn)) return $pn; } if ($isgrouphome) { foreach((array)$PagePathFmt as $pg) if (strncmp($pg, '$1.', 3) == 0) return FmtPageName(str_replace('$1', $name, $pg), $basepage); return "$name.$name"; } return preg_replace('/[^\\/.]+$/', $name, $basepage); } // converting accented characters to ASCII. PHP Intl module needs to be enabled $DashTransliterator = Transliterator::createFromRules( ':: Latin-ASCII ; :: NFD; :: [:Nonspacing Mark:] Remove; :: NFC;', Transliterator::FORWARD); function Dash_UnaccentUTF8($x) { global $DashTransliterator, $Charset; if ($Charset!='UTF-8') $x = mb_convert_encoding($x, "UTF-8", $Charset); $x = preg_replace("/ä|ö|ü|ø|Ä|Ö|Ü|Ø/", '$0e', $x); $x = preg_replace("/å|Å/", '$0o', $x); $x = str_replace("Æ", 'Ae', $x); $x = str_replace("Œ", 'Oe', $x); $x = str_replace("Þ", 'Th', $x); $x = preg_replace("/\xcc\x88/", 'e', $x); $x = $DashTransliterator->transliterate($x); return $x; } //EOF