PITS /
01076: optimization for case folding and strtoupper
Summary: optimization for case folding and strtoupper
Created: 2009-02-25 06:16
Status: Discussion
Category: Feature
From: CarlosAB
Assigned:
Priority: 1
Version: latest
OS: php/apache/freebsd
Description: Just a small optimization to utf8toupper and utf8fold for when pmwiki goes utf-8 only.
// Search for any bytes outside the ASCII range... function u8isascii($str) {
return (preg_match('/(?:[^\x00-\x7F])/',$str) !== 1);
}
function utf8toupper($x) {
global $CaseConversions;
if(u8isascii($str)){
return strtoupper($str);
}else{
if(function_exists('mb_strtoupper')){
return mb_strtoupper($str);
}else{
if (strlen($x) <= 2 && @$CaseConversions[$x])
return $CaseConversions[$x];
static $lower, $upper;
if (!@$lower) {
$lower = array_keys($CaseConversions);
$upper = array_values($CaseConversions);
}
return str_replace($lower, $upper, $x);
}
}
}
function utf8fold($x) {
global $StringFolding;
if(u8isascii($str)){
return strtolower($str);
}else{
if(function_exists('mb_strtolower')){
return mb_strtolower($str);
}else{
static $source, $target;
if (!@$source) {
$source = array_keys($StringFolding);
$target = array_values($StringFolding);
}
return str_replace($source, $target, $x);
}
}
}
Hi. Pm and I discussed about these functions when utf8fold() was implemented (mid-April 2007). Various reviews from other people, and my own benchmarks on 6 different systems CPUs/PHPs found that the mb_* functions were about as fast as str_replace() that we use now. Considering that mb_* functions were not widely available, Pm decided to keep it this way. This may change though. --Petko May 07, 2009, at 09:45 PM