01076: optimization for case folding and strtoupper

Summary: optimization for case folding and strtoupper
Created: 2009-02-25 06:16
Status: Discussion
Category: Feature
From: CarlosAB
Assigned:
Priority: 1
Version: latest
OS: php/apache/freebsd

Description: Just a small optimization to utf8toupper and utf8fold for when pmwiki goes utf-8 only.

// Search for any bytes outside the ASCII range... function u8isascii($str) {

  return (preg_match('/(?:[^\x00-\x7F])/',$str) !== 1);

}

function utf8toupper($x) {

  global $CaseConversions;

  if(u8isascii($str)){
    return strtoupper($str);
  }else{
    if(function_exists('mb_strtoupper')){
      return mb_strtoupper($str);
    }else{
      if (strlen($x) <= 2 && @$CaseConversions[$x])
        return $CaseConversions[$x];
      static $lower, $upper;
      if (!@$lower) { 
        $lower = array_keys($CaseConversions); 
        $upper = array_values($CaseConversions);
      }
      return str_replace($lower, $upper, $x);
    }
  }

}

function utf8fold($x) {

  global $StringFolding;

  if(u8isascii($str)){
    return strtolower($str);
  }else{
    if(function_exists('mb_strtolower')){
      return mb_strtolower($str);
    }else{
    static $source, $target;
    if (!@$source) {
      $source = array_keys($StringFolding);
      $target = array_values($StringFolding);
    }
    return str_replace($source, $target, $x);
    }
  }

}

Hi. Pm and I discussed about these functions when utf8fold() was implemented (mid-April 2007). Various reviews from other people, and my own benchmarks on 6 different systems CPUs/PHPs found that the mb_* functions were about as fast as str_replace() that we use now. Considering that mb_* functions were not widely available, Pm decided to keep it this way. This may change though. --Petko May 07, 2009, at 09:45 PM