00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 class eZUTF8Codec
00040 {
00041
00042
00043
00044 function eZUTF8Codec()
00045 {
00046 }
00047
00048
00049
00050
00051 function convertStringToUnicode( $str )
00052 {
00053 $unicodeValues = array();
00054 $strLen = strlen( $str );
00055 for ( $offset = 0; $offset < $strLen; )
00056 {
00057 $charLen = 1;
00058 $unicodeValue = eZUTF8Codec::fromUTF8( $str, $offset, $charLen );
00059 if ( $unicodeValue !== false )
00060 $unicodeValues[] = $unicodeValue;
00061 $offset += $charLen;
00062 }
00063 return $unicodeValues;
00064 }
00065
00066
00067
00068
00069 function convertUnicodeToString( $unicodeValues )
00070 {
00071 if ( !is_array( $unicodeValues ) )
00072 return false;
00073 $text = '';
00074 foreach ( $unicodeValues as $unicodeValue )
00075 {
00076 $utf8Char = eZUTF8Codec::toUTF8( $unicodeValue );
00077 $text .= $utf8Char;
00078 }
00079 return $text;
00080 }
00081
00082
00083
00084
00085
00086 function &toUTF8( $char_code )
00087 {
00088 switch ( $char_code )
00089 {
00090 case 0:
00091 $char = chr( 0 );
00092 case !($char_code & 0xffffff80):
00093 $char = chr( $char_code );
00094 break;
00095 case !($char_code & 0xfffff800):
00096 $char = ( chr(0xc0 | (($char_code >> 6) & 0x1f)) .
00097 chr(0x80 | ($char_code & 0x3f)) );
00098 break;
00099 case !($char_code & 0xffff0000):
00100 $char = ( chr(0xe0 | (($char_code >> 12) & 0x0f)) .
00101 chr(0x80 | (($char_code >> 6) & 0x3f)) .
00102 chr(0x80 | ($char_code & 0x3f)) );
00103 break;
00104 case !($char_code & 0xffe00000):
00105 $char = ( chr(0xf0 | (($char_code >> 18) & 0x07)) .
00106 chr(0x80 | (($char_code >> 12) & 0x3f)) .
00107 chr(0x80 | (($char_code >> 6) & 0x3f)) .
00108 chr(0x80 | ($char_code & 0x3f)) );
00109 break;
00110 case !($char_code & 0xfc000000):
00111 $char = ( chr(0xf8 | (($char_code >> 24) & 0x03)) .
00112 chr(0x80 | (($char_code >> 18) & 0x3f)) .
00113 chr(0x80 | (($char_code >> 12) & 0x3f)) .
00114 chr(0x80 | (($char_code >> 6) & 0x3f)) .
00115 chr(0x80 | ($char_code & 0x3f)) );
00116 default:
00117 $char = ( chr(0xfc | (($char_code >> 30) & 0x01)) .
00118 chr(0x80 | (($char_code >> 24) & 0x3f)) .
00119 chr(0x80 | (($char_code >> 18) & 0x3f)) .
00120 chr(0x80 | (($char_code >> 12) & 0x3f)) .
00121 chr(0x80 | (($char_code >> 6) & 0x3f)) .
00122 chr(0x80 | ($char_code & 0x3f)) );
00123 }
00124 return $char;
00125 }
00126
00127
00128
00129
00130
00131
00132
00133
00134 function &fromUtf8( $multi_char, $offs, &$len )
00135 {
00136 $char_code = false;
00137 if ( ( ord( $multi_char[$offs + 0] ) & 0x80 ) == 0x00 )
00138 {
00139 $char_code = ord( $multi_char[$offs + 0] );
00140 $len = 1;
00141 }
00142 else if ( ( ord( $multi_char[$offs + 0] ) & 0xe0 ) == 0xc0 )
00143 {
00144 $len = 2;
00145 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 )
00146 return $char_code;
00147 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x1f ) << 6) +
00148 (( ord( $multi_char[$offs + 1] ) & 0x3f )) );
00149 if ( $char_code < 128 )
00150 {
00151 $char_code == false;
00152 }
00153 }
00154 else if ( ( ord( $multi_char[$offs + 0] ) & 0xf0 ) == 0xe0 )
00155 {
00156 $len = 3;
00157 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or
00158 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 )
00159 return $char_code;
00160 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x0f ) << 12) +
00161 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 6) +
00162 (( ord( $multi_char[$offs + 2] ) & 0x3f )) );
00163 if ( $char_code < 2048 )
00164 {
00165 $char_code == false;
00166 }
00167 }
00168 else if ( ( ord( $multi_char[$offs + 0] ) & 0xf8 ) == 0xf0 )
00169 {
00170 $len = 4;
00171 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or
00172 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 or
00173 ( ord( $multi_char[$offs + 3] ) & 0xc0 ) != 0x80 )
00174 return $char_code;
00175 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x07 ) << 18) +
00176 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 12) +
00177 (( ord( $multi_char[$offs + 2] ) & 0x3f ) << 6) +
00178 (( ord( $multi_char[$offs + 3] ) & 0x3f )) );
00179 if ( $char_code < 65536 )
00180 {
00181 $char_code == false;
00182 }
00183 }
00184 else if ( ( ord( $multi_char[$offs + 0] ) & 0xfc ) == 0xf8 )
00185 {
00186 $len = 5;
00187 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or
00188 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 or
00189 ( ord( $multi_char[$offs + 3] ) & 0xc0 ) != 0x80 or
00190 ( ord( $multi_char[$offs + 4] ) & 0xc0 ) != 0x80 )
00191 return $char_code;
00192 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x03 ) << 24) +
00193 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 18) +
00194 (( ord( $multi_char[$offs + 2] ) & 0x3f ) << 12) +
00195 (( ord( $multi_char[$offs + 3] ) & 0x3f ) << 6) +
00196 (( ord( $multi_char[$offs + 4] ) & 0x3f )) );
00197 if ( $char_code < 2097152 )
00198 {
00199 $char_code == false;
00200 }
00201 }
00202 else if ( ( ord( $multi_char[$offs + 0] ) & 0xfe ) == 0xfc )
00203 {
00204 $len = 6;
00205 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or
00206 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 or
00207 ( ord( $multi_char[$offs + 3] ) & 0xc0 ) != 0x80 or
00208 ( ord( $multi_char[$offs + 4] ) & 0xc0 ) != 0x80 or
00209 ( ord( $multi_char[$offs + 5] ) & 0xc0 ) != 0x80 )
00210 return $char_code;
00211 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x01 ) << 30) +
00212 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 24) +
00213 (( ord( $multi_char[$offs + 2] ) & 0x3f ) << 18) +
00214 (( ord( $multi_char[$offs + 3] ) & 0x3f ) << 12) +
00215 (( ord( $multi_char[$offs + 4] ) & 0x3f ) << 6) +
00216 (( ord( $multi_char[$offs + 5] ) & 0x3f )) );
00217 if ( $char_code < 67108864 )
00218 {
00219 $char_code == false;
00220 }
00221 }
00222 return $char_code;
00223 }
00224
00225 function &utf8LengthTable()
00226 {
00227 $table =& $GLOBALS["eZUTF8LengthTable"];
00228 if ( !is_array( $table ) )
00229 $table = array( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00230 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00231 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6 );
00232 return $table;
00233 }
00234
00235 function characterByteLength( &$str, $pos )
00236 {
00237 $table =& eZUTF8Codec::utf8LengthTable();
00238 $char = ord( $str[$pos] );
00239 return $table[($char >> 2) & 0x3f];
00240 }
00241
00242 function strlen( &$str )
00243 {
00244 $table =& eZUTF8Codec::utf8LengthTable();
00245 $len = strlen( $str );
00246 $strlen = 0;
00247 for ( $i = 0; $i < $len; )
00248 {
00249 $char = ord( $str[$i] );
00250 $char_len = $table[($char >> 2) & 0x3f];
00251 $i += $char_len;
00252 ++$strlen;
00253 }
00254 return $strlen;
00255 }
00256
00257
00258
00259
00260 function &instance()
00261 {
00262 $instance =& $GLOBALS["eZUTF8CodecInstance"];
00263 if ( get_class( $instance ) != "ezutf8codec" )
00264 {
00265 $instance = new eZUTF8Codec();
00266 }
00267 return $instance;
00268 }
00269 }
00270
00271 ?>