eZ Publish  [4.0]
ezconvertdbcharset.php
Go to the documentation of this file.
00001 #!/usr/bin/env php
00002 <?php
00003 //
00004 // Created on: <03-Dec-2007 09:51:56 dl>
00005 //
00006 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00007 // SOFTWARE NAME: eZ Publish
00008 // SOFTWARE RELEASE: 4.0.x
00009 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00010 // SOFTWARE LICENSE: GNU General Public License v2.0
00011 // NOTICE: >
00012 //   This program is free software; you can redistribute it and/or
00013 //   modify it under the terms of version 2.0  of the GNU General
00014 //   Public License as published by the Free Software Foundation.
00015 //
00016 //   This program is distributed in the hope that it will be useful,
00017 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00018 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019 //   GNU General Public License for more details.
00020 //
00021 //   You should have received a copy of version 2.0 of the GNU General
00022 //   Public License along with this program; if not, write to the Free
00023 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00024 //   MA 02110-1301, USA.
00025 //
00026 //
00027 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00028 //
00029 
00030 require 'autoload.php';
00031 
00032 
00033 
00034 define( 'EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME', 'ezcontentclass_attribute_tmp' );
00035 
00036 define( 'EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_MYSQL',
00037     "
00038     CREATE TEMPORARY TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " (
00039       id int(11) NOT NULL default '0',
00040       version int(11) NOT NULL default '0',
00041       is_always_available int(11) NOT NULL default '0',
00042       language_locale varchar(20) NOT NULL default '',
00043       name varchar(255) NOT NULL default '',
00044       PRIMARY KEY  (id,version,language_locale)
00045     )" );
00046 
00047 // create persistent tmp table since this table is needed between connect-sessions.
00048 define( 'EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_POSTGRESQL',
00049     "
00050     CREATE TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " (
00051         id integer DEFAULT 0 NOT NULL,
00052         version integer DEFAULT 0 NOT NULL,
00053         is_always_available integer DEFAULT 0 NOT NULL,
00054         language_locale character varying(20) DEFAULT ''::character varying NOT NULL,
00055         name character varying(255) DEFAULT ''::character varying NOT NULL
00056     )" );
00057 
00058 define( 'EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_ORACLE',
00059     "
00060     CREATE GLOBAL TEMPORARY TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " (
00061         id number(11) DEFAULT 0 NOT NULL,
00062         version number(11) DEFAULT 0 NOT NULL,
00063         is_always_available number(11) DEFAULT 0 NOT NULL,
00064         language_locale varchar2(20) NOT NULL,
00065         name varchar2(255) NOT NULL
00066     ) ON COMMIT PRESERVE ROWS;" );
00067 
00068 define( 'EZ_DROP_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL',
00069     "DROP TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME );
00070 
00071 
00072 
00073 /**
00074  * Class used to store some of the command line arguments
00075  **/
00076 class CommandLineArguments
00077 {
00078     protected static $iconvCharacterSet = false;
00079     protected static $logFilename = false;
00080 
00081     function iconvCharacterSet()
00082     {
00083         return CommandLineArguments::$iconvCharacterSet;
00084     }
00085 
00086     function setIconvCharacterSet( $iconvCharacterSet )
00087     {
00088         CommandLineArguments::$iconvCharacterSet = $iconvCharacterSet;
00089     }
00090 
00091     function logFilename()
00092     {
00093         return CommandLineArguments::$logFilename;
00094     }
00095 
00096     function setLogFilename( $logFilename )
00097     {
00098         CommandLineArguments::$logFilename = $logFilename;
00099     }
00100 
00101 }
00102 
00103 
00104 /**************************************************************
00105 * 'cli->output' wrappers                                      *
00106 ***************************************************************/
00107 function showError( $message, $addEOL = true, $bailOut = true )
00108 {
00109     global $cli, $script, $eZDir;
00110 
00111     $cli->output( $cli->stylize( 'error', "Error: " .  $message ), $addEOL );
00112 
00113     if( $bailOut )
00114     {
00115         chdir( $eZDir ); // since it might have changed while running...
00116         $script->shutdown( 1 );
00117     }
00118 }
00119 
00120 function showWarning( $message, $addEOL = true )
00121 {
00122     global $cli;
00123     $cli->output( $cli->stylize( 'warning', "Warning: " . $message ), $addEOL );
00124 }
00125 
00126 function showNotice( $message, $addEOL = true )
00127 {
00128     global $cli;
00129     $cli->output( $cli->stylize( 'notice', "Notice: " ) .  $message, $addEOL );
00130 }
00131 
00132 function showMessage( $message, $addEOL = true )
00133 {
00134     global $cli;
00135     $cli->output( $cli->stylize( 'blue', $message ), $addEOL );
00136 }
00137 
00138 function showMessage2( $message, $addEOL = true )
00139 {
00140     global $cli;
00141     $cli->output( $cli->stylize( 'red', $message ), $addEOL );
00142 }
00143 
00144 function showMessage3( $message, $addEOL = true )
00145 {
00146     global $cli;
00147     $cli->output( $message, $addEOL );
00148 }
00149 
00150 
00151 /*!
00152  prompt user to choose what to do next
00153 */
00154 function eZGetUserInput( $prompt )
00155 {
00156     echo $prompt;
00157 
00158     $userInput = fgets( STDIN );
00159     $userInput = trim( $userInput, "\n\r" );
00160 
00161     return $userInput;
00162 }
00163 
00164 function eZExecuteShellCommand( $command, $errMessage = '', $retry = true, $ignore = false )
00165 {
00166     $err = 0;
00167     do
00168     {
00169         $out = system( $command, $err );
00170         if ( $err )
00171         {
00172             if ( $errMessage )
00173             {
00174                 showMessage2( $errMessage );
00175             }
00176 
00177             if ( $retry )
00178             {
00179                 do
00180                 {
00181                     $action = $ignore ? eZGetUserInput( "Retry? [y/n/Ignore]: " ) : eZGetUserInput( "Retry? [y/n]: " );
00182                     if ( strpos( $action, 'y' ) === 0 )
00183                     {
00184                         $continue = false;
00185                     }
00186                     elseif ( $ignore && strpos( $action, 'I' ) === 0 )
00187                     {
00188                         $continue = true;
00189                         $retry = false;
00190                     }
00191                     else
00192                     {
00193                         // default action is not to retry but to abort
00194                         showError( "Aborting..." );
00195                     }
00196                 }
00197                 while ( !$continue );
00198             }
00199             else
00200             {
00201                 showError( "Aborting..." );
00202             }
00203         }
00204         else
00205         {
00206             $retry = false;
00207         }
00208     }
00209     while ( $retry );
00210 
00211     return $err;
00212 }
00213 
00214 /**************************************************************
00215 * helper functions                                            *
00216 ***************************************************************/
00217 /*!
00218  process xml attributes info
00219  \return \c false or an array of table infos.
00220 */
00221 function parseXMLAttributesOption( $xmlAttributesOption )
00222 {
00223     if ( !$xmlAttributesOption )
00224     {
00225         return false;
00226     }
00227 
00228     $xmlAttributesInfo = array();
00229 
00230     $xmlAttributesOption = split( ',', $xmlAttributesOption );
00231     foreach ( $xmlAttributesOption as $attributeTableInfoOption )
00232     {
00233         $attributeTableInfo = split( '\.', $attributeTableInfoOption );
00234         switch ( count( $attributeTableInfo ) )
00235         {
00236             case 1:
00237                 {
00238                     $attributeTableInfo = array( 'datatype' => trim( $attributeTableInfo[0] ),
00239                                                  'table' => 'ezcontentobject_attribute',
00240                                                  'data_field' => 'data_text' );
00241                 } break;
00242             case 3:
00243                 {
00244                     $attributeTableInfo = array( 'datatype' => trim( $attributeTableInfo[0] ),
00245                                                  'table' => trim( $attributeTableInfo[1] ),
00246                                                  'data_field' => trim( $attributeTableInfo[2] ) );
00247                 } break;
00248             default:
00249                 {
00250                     showError( "invalid 'extra-xml-attributes' '$attributeTableInfoOption' option" );
00251                 } break;
00252         }
00253 
00254         $xmlAttributesInfo[] = $attributeTableInfo;
00255     }
00256 
00257     return $xmlAttributesInfo;
00258 }
00259 
00260 /*!
00261   process custom xml data info
00262   \retruns \c false of an array of table infos.
00263 */
00264 function parseCustomXMLDataOption( $xmlCustomDataOption )
00265 {
00266     if ( !$xmlCustomDataOption )
00267     {
00268         return false;
00269     }
00270 
00271     $xmlCustomDataInfo = array();
00272 
00273     $xmlCustomDataOption = split( ',', $xmlCustomDataOption );
00274     foreach ( $xmlCustomDataOption as $tableInfoOption )
00275     {
00276         $tableInfo = split( '\.', $tableInfoOption );
00277         switch ( count( $tableInfo ) )
00278         {
00279             case 2:
00280                 {
00281                     $tableInfo = array( 'table' => trim( $tableInfo[0] ),
00282                                         'data_field' => trim( $tableInfo[1] ) );
00283                 } break;
00284             default:
00285                 {
00286                     showError( "invalid 'extra-xml-data' '$tableInfoOption' option" );
00287                 } break;
00288         }
00289 
00290         $xmlCustomDataInfo[] = $tableInfo;
00291     }
00292 
00293     return $xmlCustomDataInfo;
00294 }
00295 
00296 /*!
00297  process custom xml data info
00298  \returns \c false or an array of table infos.
00299 */
00300 function parseCustomSerializedDataOption( $serializedCustomDataOption )
00301 {
00302     if ( !$serializedCustomDataOption )
00303     {
00304         return false;
00305     }
00306 
00307     $db = eZDB::instance();
00308 
00309     $serializedDataInfo = array();
00310 
00311     $serializedCustomDataOption = split( ',', $serializedCustomDataOption );
00312     foreach ( $serializedCustomDataOption as $tableInfoOption )
00313     {
00314         $tableInfo = split( '\;', $tableInfoOption );
00315         if ( count( $tableInfo ) != 2 )
00316         {
00317             showError( "invalid 'extra-serialized-data' '$tableInfoOption' option" );
00318         }
00319 
00320         $dataInfo = split( '\.', $tableInfo[0] );
00321         $keyInfo = split( '\.', $tableInfo[1] );
00322 
00323         switch ( count( $dataInfo ) )
00324         {
00325             case 2:
00326                 {
00327                     $dataInfo = array( 'table' => trim( $dataInfo[0] ),
00328                                        'data_field' => trim( $dataInfo[1] ) );
00329                 } break;
00330             default:
00331                 {
00332                     showError( "invalid 'extra-serialized-data' '$tableInfoOption' option" );
00333                 } break;
00334         }
00335 
00336         foreach ( array_keys( $keyInfo ) as $key => $value )
00337         {
00338             trim( $keyInfo[$key] );
00339             // check column exists
00340             $result = $db->query( "SELECT " . $keyInfo[$key] . " from " . $dataInfo['table'] . " limit 1" );
00341             if ( $result === false )
00342             {
00343                 showError( "invalid 'extra-serialized-data' '$tableInfoOption' option" );
00344             }
00345         }
00346 
00347         $serializedDataInfo[] = array( 'table' => $dataInfo['table'],
00348                                        'data_field' => $dataInfo['data_field'],
00349                                        'blob_field' => $dataInfo['data_field'] . "_blob",
00350                                        'keys' => $keyInfo );
00351     }
00352 
00353     return $serializedDataInfo;
00354 }
00355 
00356 
00357 
00358 /*!
00359  Check db driver
00360 */
00361 function checkDBDriver()
00362 {
00363     $db = eZDB::instance();
00364     $dbType = $db->databaseName();
00365     switch( strtolower( $dbType ) )
00366     {
00367         case 'mysql':
00368         case 'postgresql':
00369         case 'oracle':
00370             break;
00371         default:
00372             {
00373                 return false;
00374             } break;
00375     }
00376 
00377     return true;
00378 }
00379 
00380 /*!
00381  Check db charset
00382 */
00383 function checkDBCharset( $iconvCharacterSet )
00384 {
00385     $db = eZDB::instance();
00386     
00387     if ( $iconvCharacterSet !== false )
00388     {
00389         $dbCharset = $iconvCharacterSet;
00390         showMessage3( 'Overriding iconv character set. Configured to be "' . $db->charset() . "\" in settings but will be using \"$dbCharset\" for iconv() conversion instead." );
00391     } else
00392     {
00393         $dbCharset = $db->charset();
00394     }
00395 
00396     switch( strtolower( $dbCharset ) )
00397     {
00398         case 'utf8':
00399         case 'utf-8':
00400             {
00401                 return false;
00402             } break;
00403         default:
00404             break;
00405     }
00406 
00407     return true;
00408 }
00409 
00410 /*!
00411  DB specific checks. A string is returned for error conditions (script halts after printing it)
00412 */
00413 function checkDBExtraConditions()
00414 {
00415     $db = eZDB::instance();
00416 
00417     $function = "checkDBExtraConditions" . strtoupper( $db->databaseName() );
00418     if ( function_exists( $function ) )
00419     {
00420         return $function();
00421     }
00422 
00423     return true;
00424 }
00425 
00426 /*!
00427  @todo We should really check for exp_full, imp_full, (sysdba) privileges rather than DBA role...
00428  @todo add check for RAC - do not try anything in such a case
00429 */
00430 function checkDBExtraConditionsORACLE()
00431 {
00432     global $oracleDbaAccount, $oracleHome;
00433 
00434     //$db = eZDB::instance();
00435     global $db;
00436 
00437     showMessage3( '' );
00438     showWarning( "The procedure of upgrading the character set of an Oracle database needs manual intervention.\n".
00439                  "This upgrade script will only create a database export, and later reimport it, leaving to\n".
00440                  "the end user (you) the responsibility of changing database character set.\n".
00441                  "Please refer to Oracle documentation for instructions on altering a database character set\n".
00442                  "(it usually involves creating a new database from scratch, and it always has effect on all\n".
00443                  "schemas contained in the database)." );
00444     showMessage3( '' );
00445     $continue = eZGetUserInput( "Do you want to continue?  (y to accept) " );
00446 
00447     if ( $continue != 'y' && $continue != 'Y' )
00448     {
00449         return "Aborting";
00450     }
00451 
00452     $oracleDbaAccount = false;
00453     while( $oracleDbaAccount === false )
00454     {
00455         // NB: we need export_full privileges for full export.
00456         // We could need SYSDBA for connecting AS SYSDBA and altering db - IF IT WORKED...
00457         // In that case we should try to support OSOPER connections, too...
00458         if ( $db->isConnected() )
00459         {
00460             $dba = $db->arrayQuery("select default_role from user_role_privs where granted_role in ('DBA')");
00461             //$dba = $db->arrayQuery("select sysdba from V\$PWFILE_USERS where username='".strtoupper($db->User)."'");
00462         }
00463         else
00464         {
00465             showWarning( "Supplied username/password are incorrect" );
00466             $dba = false;
00467         }
00468         if (count($dba) && $dba[0]['default_role'] == 'YES')
00469         //if (count($dba) && $dba[0]['sysdba'] == 'TRUE')
00470         {
00471             $oracleDbaAccount = array( 'user' => $db->User, 'password' => $db->Password );
00472             // reconnect using standard credentials, in case we have logged in as admin
00473             $db->close();
00474             $db = eZDB::instance( false, false, true );
00475         }
00476         else
00477         {
00478             //$account = eZGetUserInput( "Please enter username/password of a valid SYSDBA account (return to abort): ");
00479             $account = eZGetUserInput( "Please enter username/password of a valid DBA account (return to abort): ");
00480             if ( $account == "" )
00481             {
00482                 //return "The charset conversion script needs to run with an oracle user account that has SYSDBA privileges. Aborting.";
00483                 return "The charset conversion script needs to run with an oracle user account that has DBA privileges. Aborting.";
00484             }
00485             else
00486             {
00487                 $account = explode( '/', $account );
00488                 if ( count( $account ) > 1 )
00489                 {
00490                     $db->close();
00491                     $db = eZDB::instance( false, array( 'user' => $account[0], 'password' => $account[1] ), true );
00492                 }
00493             }
00494         }
00495     } // while
00496 
00497     $oracleHome = getenv( 'ORACLE_HOME' );
00498     if ( $oracleHome != "" )
00499     {
00500         $ok = eZGetUserInput( "Found ORACLE_HOME $oracleHome, is this the correct directory? (y to accept) ");
00501         if ( $ok != "y" && $ok != "Y" )
00502         {
00503             $oracleHome = '';
00504         }
00505     }
00506     $exe = eZSys::osType() == 'win32' ? '.exe' : '';
00507     //while( !is_dir( $oracleHome ) || !file_exists( $oracleHome . '/bin/sqlplus' . $exe ) || !file_exists( $oracleHome . '/bin/imp' . $exe ) || !file_exists( $oracleHome . '/bin/exp' . $exe ) )
00508     while( !is_dir( $oracleHome ) || !file_exists( $oracleHome . '/bin/imp' . $exe ) || !file_exists( $oracleHome . '/bin/exp' . $exe ) )
00509     {
00510         if ( $oracleHome != "" )
00511         {
00512             showWarning( "imp or exp tools not found in ORACLE_HOME $oracleHome" );
00513         }
00514         $oracleHome = eZGetUserInput( "Please enter the path to ORACLE_HOME, where the imp and exp tools are (return to abort): ");
00515         if ( $oracleHome == "" )
00516         {
00517             return "The charset conversion script needs to run with an oracle home where the imp and exp tools are available. Aborting.";
00518         }
00519     }
00520 
00521     return true;
00522 }
00523 
00524 /**************************************************************
00525 * handle content class / content class attributes             *
00526 ***************************************************************/
00527 function createContentClassAttributeTempTable()
00528 {
00529     $db = eZDB::instance();
00530     $sql = 'EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_' . strtoupper( $db->databaseName() );
00531     $sql = constant( $sql );
00532     $db->query( $sql );
00533 }
00534 
00535 function dropContentClassAttributeTempTable()
00536 {
00537     $db = eZDB::instance();
00538     $sql = EZ_DROP_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL;
00539     $db->query( $sql );
00540 }
00541 
00542 function unserializeContentClassAttributeNames()
00543 {
00544     $db = eZDB::instance();
00545 
00546     $attributeName = new eZContentClassAttributeNameList();
00547 
00548     $limit = 100;
00549     $offset = 0;
00550     $selectSQL = "SELECT id, version, serialized_name_list FROM ezcontentclass_attribute ORDER BY id, version";
00551 
00552     while ( $result = $db->arrayQuery( $selectSQL , array( 'limit' => $limit, 'offset' => $offset ) ) )
00553     {
00554         foreach ( $result as $row )
00555         {
00556             //showMessage3( "id: '" . $row['id'] . "' version: '" . $row['version'] . "'" );
00557 
00558             $attributeName->initFromSerializedList( $row['serialized_name_list'] );
00559 
00560             $nameList = $attributeName->cleanNameList();
00561             $alwaysAvailableLocale = $attributeName->alwaysAvailableLanguageLocale();
00562 
00563             $insertSQL = 'INSERT INTO ' . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . '(id, version, is_always_available, language_locale, name) VALUES (' .
00564                 "{$row['id']}, {$row['version']}";
00565 
00566             foreach ( $nameList as $locale => $name )
00567             {
00568                 $isAlwaysAvailable = ( $locale == $alwaysAvailableLocale ) ? 1 : 0;
00569 
00570                 $sql = $insertSQL . ", $isAlwaysAvailable, '" . $db->escapeString( $locale) . "', '" . $db->escapeString( $name ) . "')";
00571                 $db->query( $sql );
00572             }
00573         }
00574         $offset += $limit;
00575     }
00576 }
00577 
00578 
00579 function serializeContentClassNames()
00580 {
00581     $selectSQL = "SELECT contentclass_id as id,\n" .
00582         "     contentclass_version as version,\n" .
00583         "     language_id as is_always_available,\n" .
00584         "     language_locale, name\n" .
00585         "FROM ezcontentclass_name \n" .
00586         "ORDER BY id, version";
00587 
00588     $table = 'ezcontentclass';
00589 
00590     serializeNames( $selectSQL, $table );
00591 }
00592 
00593 function serializeContentClassAttributeNames()
00594 {
00595     $selectSQL = "SELECT * FROM " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " ORDER BY id, version";
00596     $table = 'ezcontentclass_attribute';
00597 
00598     serializeNames( $selectSQL, $table );
00599 }
00600 
00601 
00602 function serializeNames( $selectSQL, $storeToTable )
00603 {
00604     $db = eZDB::instance();
00605 
00606     $limit = 100;
00607     $offset = 0;
00608 
00609     //$selectSQL .= "\nLIMIT $limit";
00610 
00611     while ( $result = $db->arrayQuery( $selectSQL , array( 'limit' => $limit, 'offset' => $offset ) ) )
00612     {
00613         // since name data is splitted between rows,
00614         // need to adjust selected data:
00615         // exclude the last id/version and process it during next 'select' iteration
00616         // 1. get last id/version pair
00617         $lastIdx = count( $result ) - 1;
00618         if ( $lastIdx > 0 )
00619         {
00620             $lastID = $result[$lastIdx]['id'];
00621             $lastVersion = $result[$lastIdx]['version'];
00622 
00623             // 2. check remained data
00624             for ( $lastIdx = $lastIdx - 1; $lastIdx >= 0; $lastIdx-- )
00625             {
00626                 $row = $result[$lastIdx];
00627                 if ( $lastID != $row['id'] || $lastVersion != $row['version'] )
00628                 {
00629                     break;
00630                 }
00631             }
00632 
00633             // 3. check whether $lastIdx is valid
00634             if ( $lastIdx < 0 )
00635             {
00636                 // all selected data belongs to the same id/version
00637                 $lastIdx = count( $result ) - 1;
00638             }
00639         }
00640 
00641         // 4. adjust offset to include excluded data to the next 'select'
00642         $offset += $lastIdx + 1;
00643 
00644         // process selected data
00645         $serializedName = false;
00646         $prevId = false;
00647         $prevVersion = false;
00648         for ( $idx = 0; $idx <= $lastIdx; $idx++ )
00649         {
00650             $row = $result[$idx];
00651 
00652             // check whether serialized name is completely assembled
00653             if ( $prevId != $row['id'] || $prevVersion != $row['version'] )
00654             {
00655                 if ( $serializedName !== false )
00656                 {
00657                     // store serialized name
00658                     storeSerializedName( $serializedName, $prevId, $prevVersion, $storeToTable );
00659                 }
00660 
00661                 // create new $serializedName to collect data
00662                 $serializedName = new eZSerializedObjectNameList();
00663                 $serializedName->resetNameList();
00664             }
00665 
00666             $prevId = $row['id'];
00667             $prevVersion = $row['version'];
00668 
00669             $serializedName->setNameByLanguageLocale( $row['name'], $row['language_locale'] );
00670 
00671             if ( $row['is_always_available'] & 1 )
00672             {
00673                 $serializedName->setAlwaysAvailableLanguage( $row['language_locale'] );
00674             }
00675 
00676             if ( $idx == $lastIdx )
00677             {
00678                 // no more date => store serialized name
00679                 storeSerializedName( $serializedName, $prevId, $prevVersion, $storeToTable );
00680             }
00681         }
00682     }
00683 }
00684 
00685 function storeSerializedName( $serializedName, $id, $version, $table )
00686 {
00687     if ( $serializedName instanceof eZSerializedObjectNameList )
00688     {
00689         $serializedNameString = $serializedName->serializeNames();
00690 
00691         $db = eZDB::instance();
00692 
00693         $updateSQL = "UPDATE $table\n" .
00694             "SET serialized_name_list = '" . $db->escapeString( $serializedNameString ) . "'\n" .
00695             "WHERE id = $id AND version = $version";
00696 
00697         $db->query( $updateSQL );
00698     }
00699 }
00700 
00701 /**************************************************************
00702 * handle custom serialized data                               *
00703 ***************************************************************/
00704 
00705 /*!
00706  Logic:
00707   - create binary column as temp storage to not loose data when table will be converted
00708   - get original data
00709   - unseiazlize
00710   - convert data to utf-8
00711   - serialize
00712   - store data in binary column
00713  */
00714 function convertSerializedData( $serializedDataInfo )
00715 {
00716     if ( !is_array( $serializedDataInfo ) )
00717     {
00718         return;
00719     }
00720 
00721     $db = eZDB::instance();
00722 
00723     // create blob column
00724     $function = "createBLOBColumn" . strtoupper( $db->databaseName() );
00725     if ( function_exists( $function ) )
00726     {
00727         foreach ( $serializedDataInfo as $tableInfo )
00728         {
00729             $function( $tableInfo );
00730         }
00731     }
00732     else
00733     {
00734         showError( "no function to create BLOB column" );
00735     }
00736 
00737     // convert data
00738     $dbEncoding = strtolower( CommandLineArguments::iconvCharacterSet() );
00739 
00740     foreach ( $serializedDataInfo as $tableInfo )
00741     {
00742         showMessage3( $tableInfo['table'] . '.' . $tableInfo['data_field'] );
00743 
00744         $limit = 100;
00745         $offset = 0;
00746 
00747         $keysString = implode( ', ', $tableInfo['keys'] );
00748         $dataFieldName = $tableInfo['data_field'];
00749         $selectSQL = "SELECT " . $keysString . ', ' . $dataFieldName .
00750                      " FROM " . $tableInfo['table'];
00751                      //" LIMIT $limit";
00752 
00753         while ( $result = $db->arrayQuery( $selectSQL, array( 'limit' => $limit, 'offset' => $offset ) ) )
00754         {
00755             foreach ( $result as $row )
00756             {
00757                 $data = unserialize( $row[$dataFieldName] );
00758                 if ( !$data )
00759                 {
00760                     // nothing to do
00761                     continue;
00762                 }
00763 
00764                 $data = convertArray( $data, $dbEncoding, 'utf8' );
00765                 $data = serialize( $data );
00766 
00767                 $whereSql = '';
00768                 foreach ( $tableInfo['keys'] as $key )
00769                 {
00770                     if ( $whereSql != '' )
00771                     {
00772                         $whereSql .= " AND ";
00773                     }
00774                     $whereSql .= "$key = " . $row[$key];
00775                 }
00776 
00777                 $updateSql = "UPDATE " . $tableInfo['table'] .
00778                              " SET " . $tableInfo['blob_field'] . " = '" . $data . "'" .
00779                              " WHERE $whereSql";
00780 
00781                 $db->query( $updateSql );
00782             }
00783             $offset += $limit;
00784         }
00785 
00786     }
00787 }
00788 
00789 /*!
00790  Restore data from binary column
00791  */
00792 function restoreSerializedData( $serializedDataInfo )
00793 {
00794     if ( !is_array( $serializedDataInfo ) )
00795     {
00796         return;
00797     }
00798 
00799     $db = eZDB::instance();
00800 
00801     foreach ( $serializedDataInfo as $tableInfo )
00802     {
00803         $sql = "UPDATE " . $tableInfo['table'] .
00804                " SET " . $tableInfo['data_field'] . ' = ' . $tableInfo['blob_field'];
00805 
00806         $db->query( $sql );
00807     }
00808 }
00809 
00810 function dropBLOBColumns( $serializedDataInfo )
00811 {
00812     if ( !is_array( $serializedDataInfo ) )
00813     {
00814         return;
00815     }
00816 
00817     foreach ( $serializedDataInfo as $tableInfo )
00818     {
00819         dropBLOBColumn( $tableInfo );
00820     }
00821 }
00822 
00823 function convertArray( $array, $inCharset, $outCharset )
00824 {
00825     if ( !is_array( $array ) )
00826     {
00827         showError( "convertArray: not an array was passed" );
00828         var_dump( $array );
00829         return;
00830     }
00831 
00832     foreach ( array_keys( $array ) as $key )
00833     {
00834         $value = $array[$key];
00835 
00836         if ( is_string( $value ) )
00837         {
00838             // First check if $inCharset is correct
00839             $valueTest = iconv( $inCharset, $inCharset, $value );
00840             if( strlen( $valueTest ) <> strlen( $value ) )
00841             {
00842                 if( CommandLineArguments::logFilename() !== false )
00843                 {
00844                     $logString = "ERROR: Unable to predict correct character set while converting array. Value is $value, inCharset : $inCharset, outCharSet : $outCharSet\n";
00845                     file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
00846                 }
00847             }
00848             $array[$key] = iconv( $inCharset, $outCharset . '//TRANSLIT', $value );
00849         }
00850         else if ( is_array( $value ) )
00851         {
00852             $array[$key] = convertArray( $value, $inCharset, $outCharset );
00853         }
00854         else
00855         {
00856             //nothing to do
00857         }
00858     }
00859 
00860     return $array;
00861 }
00862 
00863 function createBLOBColumnMYSQL( $tableInfo )
00864 {
00865     $db = eZDB::instance();
00866     $query = "ALTER TABLE " . $tableInfo['table'] . " ADD COLUMN " . $tableInfo['blob_field'] . " BLOB";
00867     $db->query( $query );
00868 }
00869 
00870 function createBLOBColumnPOSTGRESQL( $tableInfo )
00871 {
00872 
00873 }
00874 
00875 
00876 function createBLOBColumnORACLE( $tableInfo )
00877 {
00878     $db = eZDB::instance();
00879     $query = "ALTER TABLE " . $tableInfo['table'] . " ADD " . $tableInfo['blob_field'] . " BLOB";
00880     $db->query( $query );
00881 }
00882 
00883 function dropBLOBColumn( $tableInfo )
00884 {
00885     $db = eZDB::instance();
00886     $query = "ALTER TABLE " . $tableInfo['table'] . " DROP COLUMN " . $tableInfo['blob_field'];
00887     $db->query( $query );
00888 }
00889 
00890 /**************************************************************
00891 * handle xml data                                             *
00892 ***************************************************************/
00893 function convertXMLDatatypes( $tableInfoList )
00894 {
00895     foreach ( $tableInfoList as $tableInfo )
00896     {
00897         showMessage3( "  converting '" . $tableInfo['datatype'] . "': " . $tableInfo['table'] . "." . $tableInfo['data_field'] );
00898 
00899         convertXMLData( $tableInfo, "xmlDatatypeSelectSQL", "xmlDatatypeUpdateSQL", "convertXMLDatatypeProgress" );
00900     }
00901 }
00902 
00903 function convertCustomXMLData( $tableInfoList )
00904 {
00905     foreach ( $tableInfoList as $tableInfo )
00906     {
00907         showMessage3( "  converting: '" . $tableInfo['table'] . "." . $tableInfo['data_field'] . "' table" );
00908 
00909         convertXMLData( $tableInfo, "xmlCustomDataSelectSQL", "xmlCustomDataUpdateSQL", "convertXMLCustomDataProgress" );
00910     }
00911 }
00912 
00913 function xmlDatatypeSelectSQL( $dataTableInfo )
00914 {
00915     $table = $dataTableInfo['table'];
00916     $data_field = $dataTableInfo['data_field'];
00917     $datatype = $dataTableInfo['datatype'];
00918 
00919     $selectSQL = "SELECT id, version, $data_field as xml_data\n" .
00920                  "FROM $table\n" .
00921                  "WHERE $data_field LIKE '<?xml%'\n" .
00922                     "AND data_type_string = '$datatype'\n" .
00923                     "ORDER BY id, version";
00924 
00925     return $selectSQL;
00926 }
00927 
00928 function xmlDatatypeUpdateSQL( $dataTableInfo, $row )
00929 {
00930     $db = eZDB::instance();
00931 
00932     $table = $dataTableInfo['table'];
00933     $data_field = $dataTableInfo['data_field'];
00934 
00935     $updateSQL = "UPDATE $table\n" .
00936                  "SET $data_field = '" . $db->escapeString( $row['xml_data'] ) . "'\n" .
00937                  "WHERE id = " . $row['id'] . "\n" .
00938                     "AND version = " . $row['version'];
00939 
00940     return $updateSQL;
00941 }
00942 
00943 function convertXMLDatatypeProgress( $row )
00944 {
00945     //showMessage3( "id: '" . $row['id'] . "' version: '" . $row['version'] . "'" );
00946 }
00947 
00948 function xmlCustomDataSelectSQL( $dataTableInfo )
00949 {
00950     $table = $dataTableInfo['table'];
00951     $data_field = $dataTableInfo['data_field'];
00952 
00953     $selectSQL = "SELECT id, $data_field as xml_data\n" .
00954                  "FROM $table\n" .
00955                  "WHERE $data_field LIKE '<?xml%'\n" .
00956                  "ORDER BY id";
00957 
00958     return $selectSQL;
00959 }
00960 
00961 function xmlCustomDataUpdateSQL( $dataTableInfo, $row )
00962 {
00963     $db = eZDB::instance();
00964 
00965     $table = $dataTableInfo['table'];
00966     $data_field = $dataTableInfo['data_field'];
00967 
00968     $updateSQL = "UPDATE $table\n" .
00969                  "SET $data_field = '" . $db->escapeString( $row['xml_data'] ) . "'\n" .
00970                  "WHERE id = " . $row['id'];
00971 
00972     return $updateSQL;
00973 }
00974 
00975 function convertXMLCustomDataProgress( $row )
00976 {
00977     showMessage3( "id: '" . $row['id'] );
00978 }
00979 
00980 /**
00981  * For some reason, some utf8 encoded text stored in the db might contain
00982  * illegal utf8 characters.
00983  * This function will strip/replace such known characters
00984  **/
00985 function removeIllegalUTF8Characters( $text )
00986 {
00987     // 0xE2 0x80 0x3F seems to be some kind of quote, replacing it with '
00988     // 0x3F is acutally a "?" and needs to be escaped
00989     return ereg_replace( "\xE2\x80\\\x3F", "'", $text );
00990 }
00991 
00992 /*!
00993  Convert xml text to db's charset. However for optimization the xml processing instruction 'encoding' will be set
00994  to utf-8.
00995  */
00996 function convertXMLData( $tableInfo, $xmlDataSelectSQLFunction, $xmlDataUpdateSQLFunction, $convertXMLProgressFunction )
00997 {
00998     $db = eZDB::instance();
00999 
01000     $dbEncoding = strtolower( CommandLineArguments::iconvCharacterSet() );
01001 
01002     $limit = 500;
01003     $offset = 0;
01004 
01005     $selectSQL = $xmlDataSelectSQLFunction( $tableInfo );
01006     //$selectSQL .= "\nLIMIT $limit";
01007 
01008     while ( $result = $db->arrayQuery( $selectSQL, array( 'limit' => $limit, 'offset' => $offset ) ) )
01009     {
01010         foreach ( $result as $row )
01011         {
01012             $convertXMLProgressFunction( $row );
01013 
01014             $xmlString = $row['xml_data'];
01015 
01016             $xmlEncoding = false;
01017             if ( ereg( '^<\?xml[^>]+encoding="([^"]+)"', $xmlString, $match ) )
01018             {
01019                 $xmlEncoding = strtolower( $match[1] );
01020             }
01021 
01022             // The big problem with iconv() is that it rearly returns false on errors
01023             // This is the reason why the dection methods are as they are below.....
01024             if ( !$xmlEncoding )
01025             {
01026                 // Encoding for xml was not found. Let's now check if it is in utf8
01027                 // We do this by converting the document from utf8 to utf8
01028                 // If the size is *not* the same, then it is not in utf8
01029                 
01030                 $xmlStringTest = removeIllegalUTF8Characters( $xmlStringTest );
01031                 $encodedXMLTest = iconv( 'utf8', 'utf8', $xmlStringTest );
01032                 if ( strlen( $encodedXMLTest ) <> strlen( $xmlStringTest ) )
01033                 {
01034                     // Now, lets do a final attempt and see if text is in $dbEncoding
01035                     $encodedXMLTest = iconv( $dbEncoding, $dbEncoding, $xmlString );
01036                     if ( strlen( $encodedXMLTest ) === strlen( $xmlString ) )
01037                     {
01038                         // data seems to be in $dbEncoding
01039                         if( CommandLineArguments::logFilename() !== false )
01040                         {
01041                             $logString = "WARNING: Encoding for xml was not defined but was autodetected to be in $dbEncoding. AttributeID : " . $row['id'] . ', version : ' . $row['version'] .  "\n";
01042                             file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
01043                          }
01044                     }
01045                     else
01046                     {
01047                         // data is not in $dbEncoding either
01048                         if( CommandLineArguments::logFilename() !== false )
01049                         {
01050                             $logString = 'ERROR: Encoding for xml was not defined and not autodetected either. Tried both utf8 and ' . $dbEncoding . '. AttributeID : ' . $row['id'] . ', version : ' . $row['version'] .  "\n";
01051                             file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
01052                          }
01053                     }
01054                 } else
01055                 {
01056                     $xmlString = $xmlStringTest;
01057                     // data is in utf8
01058                     if( CommandLineArguments::logFilename() !== false )
01059                     {
01060                         $logString = 'WARNING: Encoding in xml was not defined in xml header. However, character set was autodetected to be utf8. AttributeID : ' . $row['id'] . ', version : ' . $row['version'] .  "\n";
01061                         file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
01062                     }
01063                     // Let's convert it back to $dbEncoding
01064                     $xmlString = iconv( 'utf8', $dbEncoding . '//IGNORE', $xmlString );
01065                 }
01066                 $row['xml_data'] = ereg_replace( '(^<\?xml[^>]+)( \?>)', "\\1 encoding=\"utf-8\" ?>", $xmlString );
01067             }
01068             else if ( $xmlEncoding != $dbEncoding )
01069             {
01070                 //Verify that character set defined in xml header is correct
01071 
01072                 //first, remove illegal characters before doing utf8 test
01073                 if( strtolower( str_replace( '-', '', $xmlEncoding ) )  === 'utf8' )
01074                 {
01075                     $xmlStringTest = removeIllegalUTF8Characters( $xmlString );
01076                 }
01077                 else
01078                 {
01079                     $xmlStringTest = $xmlString;
01080                 }
01081                 $encodedXMLTest = iconv( $xmlEncoding, $xmlEncoding, $xmlStringTest );
01082                 if ( strlen( $encodedXMLTest ) <> strlen( $xmlStringTest ) )
01083                 {
01084                     //Ups, characterset is not in $xmlEncoding after all.
01085                     //Let's check if it is in $dbEncoding
01086                     $encodedXMLTest = iconv( $dbEncoding, $dbEncoding, $xmlString );
01087                     if ( strlen( $encodedXMLTest ) === strlen( $xmlString ) )
01088                     {
01089                         //It is in db's charset. Perform no char set conversion.
01090                         $convertedXMLString = $xmlString;
01091                         if( CommandLineArguments::logFilename() !== false )
01092                         {
01093                             $logString = "WARNING: Encoding for document doesn't match definition in xml header. Characterset was autodetected to be in database's encoding instead... AttributeID : " . $row['id'] . ', version : ' . $row['version'] . ", xmlEncoding : $xmlEncoding, dbEncoding, $dbEncoding \n";
01094                             file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
01095                         }
01096                     }
01097                     else
01098                     {
01099                         //not in $xmlEncoding or in $dbEncoding. error.
01100                         // We don't know the charset but let's try to convert it anyway, and ignore unknown characters.
01101                         $convertedXMLString = iconv( $xmlEncoding, $dbEncoding . '//IGNORE', $xmlString );
01102                         if( CommandLineArguments::logFilename() !== false )
01103                         {
01104                             $logString = "ERROR: Encoding for document not found.... AttributeID : " . $row['id'] . ', version : ' . $row['version'] . ", xmlEncoding : $xmlEncoding, dbEncoding, $dbEncoding \n";
01105                             file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
01106                         }
01107                     }
01108                 }
01109                 else
01110                 {
01111                     $xmlString = $xmlStringTest;
01112                     if( CommandLineArguments::logFilename() !== false )
01113                     {
01114                         $logString = 'NOTICE: xml Document converted by iconv. AttributeID : ' . $row['id'] . ', version : ' . $row['version'] . ", xmlEncoding : $xmlEncoding, dbEncoding, $dbEncoding \n";
01115                         file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
01116                     }
01117                     $convertedXMLString = iconv( $xmlEncoding, $dbEncoding, $xmlString );
01118                 }
01119 
01120                 if ( $convertedXMLString === false )
01121                 {
01122                     showWarning( "iconv failed to convert xml from '$xmlEncoding' to '$dbEncoding'" );
01123                     if( CommandLineArguments::logFilename() !== false )
01124                     {
01125                         $logString = 'ERROR: iconv returned false. AttributeID : ' . $row['id'] . ', version : ' . $row['version'] . ", xmlEncoding : $xmlEncoding, dbEncoding, $dbEncoding \n";
01126                         file_put_contents( CommandLineArguments::logFilename(), $logString, FILE_APPEND );
01127                     }
01128                     continue;
01129                 }
01130                 else
01131                 {
01132                     $row['xml_data'] = ereg_replace( '^(<\?xml[^>]+encoding)="([^"]+)"', "\\1=\"utf-8\"", $convertedXMLString );
01133                 }
01134             }
01135             else
01136             {
01137                 //showMessage3( "xml's and db's encodings are equal" );
01138                 $row['xml_data'] = ereg_replace( '^(<\?xml[^>]+encoding)="([^"]+)"', "\\1=\"utf-8\"", $xmlString );
01139             }
01140 
01141             $updateSQL = $xmlDataUpdateSQLFunction( $tableInfo, $row );
01142             $db->query( $updateSQL );
01143         }
01144 
01145         $offset += $limit;
01146     }
01147 
01148 }
01149 
01150 
01151 /**************************************************************
01152 * handle tables conversion                                    *
01153 ***************************************************************/
01154 function changeDBCharset( $charset, $collation )
01155 {
01156     $db = eZDB::instance();
01157 
01158     $function = "changeDBCharset" . strtoupper( $db->databaseName() );
01159     if ( function_exists( $function ) )
01160     {
01161         $function( $charset, $collation );
01162     }
01163     else
01164     {
01165         showError( "no function to change DB charset defined" );
01166     }
01167 
01168 }
01169 
01170 function changeDBCharsetMYSQL( $charset, $collation )
01171 {
01172     $db = eZDB::instance();
01173 
01174     $db->query( "ALTER DATABASE " . $db->DB . " CHARACTER SET $charset COLLATE $collation" );
01175 
01176     $tables = $db->arrayQuery( 'SHOW tables' );
01177     foreach ( $tables as $table )
01178     {
01179        $tableName = reset( $table ); // get first element of $table
01180        if ( $tableName )
01181        {
01182            showMessage3( '  changing table: ' . $tableName );
01183            $db->query( 'ALTER TABLE ' . $db->escapeString( $tableName ) . " CONVERT TO CHARACTER SET $charset COLLATE $collation" );
01184            showMessage3( '  optimizing table: ' . $tableName );
01185            $db->query( 'OPTIMIZE TABLE ' . $db->escapeString( $tableName ) );
01186        }
01187     }
01188 
01189     $db->query( "SET NAMES $charset" );
01190 }
01191 
01192 function changeDBCharsetPOSTGRESQL( $charset, $collation )
01193 {
01194     $db = eZDB::instance();
01195 
01196     // get database name
01197     $dbName = $db->DB;
01198 
01199     // get connection params
01200     $host = $db->Server;
01201     $port = $db->Port;
01202     $user = $db->User;
01203     // password is not allowed for command-line tools
01204     // $pass = $db->Password;
01205 
01206     $connectionParams = "--host=$host --port=$port --username=$user";
01207 
01208     // prepare utility commands
01209     $pgDump = "pg_dump $connectionParams";
01210     $psql = "psql $connectionParams";
01211     $dropdb = "dropdb $connectionParams";
01212     $createdb = "createdb $connectionParams";
01213 
01214     // get temporary dir to store dump
01215     $ini = eZINI::instance();
01216     $dumpDir = $ini->variable( 'FileSettings', 'TemporaryDir' ) . basename( __FILE__,  '.php');
01217     $dumpFile = $dbName . ".psql";
01218     $dumpPath = "$dumpDir/$dumpFile";
01219 
01220     // note: is this necessary? since we close the connection asap, and we are not in a transaction,
01221     // there is no need to commit or alter encoding, really...
01222     showMessage3( '  finalizing current changes' );
01223     // set output encoding
01224     $db->query( "ALTER DATABASE " . $dbName . " SET client_encoding = $charset" );
01225     // finalizing changes
01226     $db->commit();
01227     // close current connection
01228     $db->close();
01229 
01230 
01231     showMessage3( '  sleeping..' );
01232     sleep( 5 );
01233 
01234     // dump db
01235     showMessage3( "  taking the db dump, tmp storage is '$dumpPath'" );
01236     eZDir::mkdir( $dumpDir, false, true );
01237     $command = "$pgDump $dbName > '$dumpPath'";
01238     eZExecuteShellCommand( $command, "failed to dump db. tried command '$command'");
01239 
01240     showMessage3( "  re-creating db with charset '$charset'" );
01241     // drop db
01242     $command = "$dropdb $dbName";
01243     eZExecuteShellCommand( $command, "failed to drop db. tried command '$command'");
01244     // create new db in $charset
01245     $command = "$createdb $dbName --encoding=utf8";
01246     eZExecuteShellCommand( $command, "failed to create db. tried command '$command'");
01247 
01248     // restore dump into newly created db
01249     showMessage3( "  restoring db dump" );
01250     $command = "$psql $dbName < '$dumpPath'";
01251     eZExecuteShellCommand( $command, "failed to restore db dump. tried command '$command'");
01252 
01253     showMessage3( "  clean up" );
01254     // clean up
01255     eZDir::recursiveDelete( $dumpPath );
01256 
01257     // re-initialize db interface
01258     $db = eZDB::instance( false, false, true );
01259     //$db->begin();
01260 
01261     $db->query( "SET NAMES $charset" );
01262 }
01263 
01264 /**
01265 * NOTE: What if other data is also in the db? Either we do not convert it and
01266 *       have it most likely corrupted, or we convert it - and leave to the client
01267 *       for the other apps to set up NLS_LANG correctly to keep working.
01268 *
01269 *       We could use the csalter script iff we where sure that db version was > 9...
01270 *
01271 *       Oracle 9 exp exports data using the DB charset
01272 *
01273 * From http://www.experts-exchange.com/Database/Oracle/Q_22836430.html
01274 
01275 May be the best procedure is the following one:
01276 On PROD Database:
01277 1. export full=y rows=n file=export_db_structure.dmp
01278 2. export full=y file=export_db_date.dmp
01279 On TEST Database:
01280 1. create tablespaces with the same name on PROD
01281 2. import full=y file=export_db_structure.dmp ignore=y
01282 Now we have users of PROD, on TEST database. SYSTEM user is not imported, because already exists.
01283 3. import fromuser=user1,user2,user3 touser=user1,user2,user3  file=export_db_data.dmp ignore=y
01284 now we have user1..3 data on their tables...
01285 
01286 *       Unfortunately even if we do that, Oracle will nto let us convert a db from
01287 *       latin1 to utf8 charsets. The only way is to drop the db and creater it
01288 *       from scratch. Since we have no clue about db storage, we will let the admin
01289 *       take care of that part, and only do the export/import parts.
01290 *
01291 * @todo oracle servers might use UTF8 charset instead of AL32UTF8: check before executing!
01292 *
01293 * @todo using dbname as file name does not work with easy conection naming
01294 *
01295 * @todo log somewhere results of imp, exp commands for better understanding of errors
01296 */
01297 function changeDBCharsetORACLE( $charset, $collation )
01298 {
01299     global $db, $oracleDbaAccount, $oracleHome, $eZDir;
01300 
01301     //$db = eZDB::instance( false, array( 'user' => $oracleDbaAccount['user'], 'password' => $oracleDbaAccount['password'] ), true );
01302 
01303     // since we are here, we should be connected with a dba account (extra conditions check did it)
01304     $oracleCharset = $db->arrayQuery("select value from nls_database_parameters where parameter = 'NLS_CHARACTERSET'");
01305     $oracleCharset = $oracleCharset[0]['value'];
01306     //$oracleLocale = $db->arrayQuery("select language||'_'||territory as locale from (select value as language from nls_database_parameters where parameter = 'NLS_LANGUAGE'), (select value as territory from nls_database_parameters where parameter = 'NLS_TERRITORY')");
01307     //$oracleLocale = $oracleLocale[0]['locale'];
01308     /*$users = $db->arrayQuery("select username from all_users where username not in ('SYS', 'SYSTEM')");
01309     $oracleUsers = array();
01310     foreach( $users as $row )
01311     {
01312         $oracleUsers[] = $row['username'];
01313     }*/
01314 
01315     if ( $oracleCharset == 'AL32UTF8' )
01316     {
01317         // lucky case: client was configued to use another charset, but db was internally utf8 already!
01318         showMessage3( "  database charset is already UTF8: skipping." );
01319     }
01320     /*else if ( count( $oracleUsers  ) == 0 )
01321     {
01322         // this can only mean eZ Publish is installed using SYS or SYSTEM accounts. Brr...
01323         /// @todo find out best way to die here...
01324     }*/
01325     else
01326     {
01327 
01328     // get database name
01329     $dbName = $db->DB;
01330     //$dbversion = $db->databaseServerVersion();
01331 
01332     // get connection params
01333     //$host = $db->Server;
01334     //$port = $db->Port;
01335     //$user = $db->User;
01336     //$pass = $db->Password;
01337 
01338     $connectionParams = $oracleDbaAccount['user'].'/'.$oracleDbaAccount['password']."@$dbName";
01339 
01340     // get temporary dir to store dumps
01341     $ini = eZINI::instance();
01342     $dumpDir = $ini->variable( 'FileSettings', 'TemporaryDir' ) . basename( __FILE__, '.php' );
01343     $dumpFile1 = $dbName . "_structure_export.dmp";
01344     $dumpPath1 = "$dumpDir/$dumpFile1";
01345     $dumpFile2 = $dbName . "_full_export.dmp";
01346     $dumpPath2 = "$dumpDir/$dumpFile2";
01347     $commandPath = "$dumpDir/$dbName" . "_cmd.sql";
01348     $logPath = "$dumpDir/$dbName" . "_cmd.log";
01349     $users = implode( ',', $oracleUsers );
01350 
01351     //$ora_charset = "";
01352 
01353     // prepare utility commands
01354     $exe = eZSys::osType() == 'win32' ? '.exe' : '';
01355     $exec = eZSys::osType() == 'win32' ? '' : './';
01356 
01357     $expdb1 = "{$exec}exp{$exe} $connectionParams CONSISTENT=Y FULL=Y ROWS=N  FILE=$dumpPath1";
01358     $expdb2 = "{$exec}exp{$exe} $connectionParams CONSISTENT=Y FULL=Y FILE=$dumpPath2";
01359     $impdb1 = "{$exec}imp{$exe} $connectionParams FULL=Y IGNORE=Y BUFFER=30960 FILE=$dumpPath1";
01360     $impdb2 = "{$exec}imp{$exe} $connectionParams FROMUSER=$users TOUSER=$users IGNORE=Y BUFFER=30960 FILE=$dumpPath2";
01361 
01362     /*
01363     $alterdb = "SET ECHO ON
01364 SPOOL $logPath;
01365 WHENEVER SQLERROR EXIT FAILURE;
01366 WHENEVER OSERROR EXIT FAILURE;";
01367     if ( $dbversion['string'][0] == '8' )
01368     {
01369         /// @todo: check if using oracle < 8.1.5: we have touse svrmgrl then, as sqlplus was not good enough yet
01370         $sqlplus = "{$exec}sqlplus{$exe} $connectionParams";
01371         $alterdb .= "
01372 CONNECT $connectionParams AS SYSDBA";
01373     }
01374     else
01375     {
01376         $sqlplus = "{$exec}sqlplus{$exe} -L $connectionParams as sysdba";
01377     }
01378 
01379     //$dropdb = "dropdb $connectionParams";
01380     //$createdb = "createdb $connectionParams";
01381     foreach( $oracleUsers as $user )
01382     {
01383         $alterdb .= "
01384 DROP USER $user CASCADE;";
01385     }
01386     $alterdb .= "
01387 SHUTDOWN IMMEDIATE;
01388 STARTUP MOUNT;
01389 ALTER SYSTEM ENABLE RESTRICTED SESSION;
01390 ALTER SYSTEM SET JOB_QUEUE_PROCESSES=0;
01391 ALTER SYSTEM SET AQ_TM_PROCESSES=0;
01392 ALTER DATABASE OPEN;
01393 ALTER DATABASE CHARACTER SET AL32UTF8;
01394 SHUTDOWN IMMEDIATE; -- or SHUTDOWN NORMAL;
01395 STARTUP;
01396 EXIT;
01397 ";
01398     */
01399 
01400     // finalizing changes. Note that since we asked for admin connection before,
01401     // disconnecting and reconnecting, this is surely a NOOP
01402     //showMessage3( 'finalizing current changes' );
01403     //$db->commit();
01404 
01405     /// @todo add a database logfile switch or checkpoint here? it would be nice...
01406 
01407     // close current connection
01408     $db->close();
01409 
01410     showMessage3( '  sleeping..' );
01411     sleep( 5 );
01412 
01413     // dump db
01414     showMessage3( "  taking the db dump, tmp storage is '$dumpPath1', '$dumpPath2'" );
01415     eZDir::mkdir( $dumpDir, false, true );
01416     chdir( $oracleHome . '/bin' );
01417     eZExecuteShellCommand( $expdb1, "failed to dump db schema. tried command '$expdb1'", true, true );
01418     eZExecuteShellCommand( $expdb2, "failed to dump db data. tried command '$expdb2'", true, true );
01419     chdir( $eZDir );
01420 
01421     // verify that dump files exist
01422     if ( !file_exists( $dumpPath1 ) || !file_exists( $dumpPath2 ) )
01423     {
01424         showError( "DB Dump files cannot be found. Aborting..." );
01425     }
01426 
01427     /*
01428     showMessage3( "altering db with charset '$charset'" );
01429     $command = $sqlplus . " @$commandPath";
01430     file_put_contents( $commandPath, $alterdb );
01431     eZExecuteShellCommand( $command, "failed to alter db. tried command '$command'");
01432     */
01433     showMessage3( '' );
01434     showMessage3( "Now you will have to alter the database character set." );
01435     showMessage3( "The recommended way is to create a new database from scratch" );
01436     showMessage3( "using AL32UTF8 as character set (THIS IS VERY IMPORTANT)," );
01437     showMessage3( "and delete the existing one." );
01438     showMessage3( "The new database should be empty (all schemas will be recreated by this script)" );
01439     showMessage3( "and have the same DBA account as the old one." );
01440     showMessage3( "It should also use the same connect identifier as the old one." );
01441     showMessage3( '' );
01442     showMessage3( "PLEASE do not terminate this php script while doing that," );
01443     showMessage3( "use a different command line shell." );
01444     showMessage3( '' );
01445     $continue = eZGetUserInput( "Press Y when you are ready to continue... " );
01446     if ( $continue != 'y' && $continue != 'Y' )
01447     {
01448         showError( "Aborting" );
01449     }
01450 
01451     // connect to new db with dba account, check that charset is OK
01452     while( true )
01453     {
01454         $db = eZDB::instance( false, array( 'user' => $oracleDbaAccount['user'], 'password' => $oracleDbaAccount['password'] ), true );
01455         if ( !$db->isConnected() )
01456         {
01457             showWarning( "Cannot connect to the new database.\n".
01458                          "Please check that it is up and running before continuing" );
01459         }
01460         else
01461         {
01462             $oracleCharset = $db->arrayQuery("select value from nls_database_parameters where parameter = 'NLS_CHARACTERSET'");
01463             $oracleCharset = $oracleCharset[0]['value'];
01464             $db->close();
01465             if ( $oracleCharset == 'AL32UTF8' )
01466             {
01467                 break;
01468             }
01469             else
01470             {
01471                 showWarning( "The new database uses the $oracleCharset character set instead of AL32UTF8.\n".
01472                              "Please recreate the database using AL32UTF8 before continuing" );
01473             }
01474         }
01475         $continue = eZGetUserInput( "Press Y when you are ready to continue. Any other letter to abort " );
01476         if ( $continue != 'y' && $continue != 'Y' )
01477         {
01478             showError( "Aborting" );
01479         }
01480     }
01481 
01482     // restore dump into newly created db
01483     showMessage3( "  restoring db dump" );
01484     chdir( $oracleHome . '/bin' );
01485     eZExecuteShellCommand( $impdb1, "failed to restore db dump. tried command '$impdb1'");
01486     eZExecuteShellCommand( $impdb2, "failed to restore db dump. tried command '$impdb2'");
01487     chdir( $eZDir );
01488 
01489     showMessage3( "  cleaning up" );
01490     // clean up
01491     eZDir::recursiveDelete( $dumpPath1 );
01492     eZDir::recursiveDelete( $dumpPath2 );
01493 
01494     }
01495 
01496     // re-initialize db interface, *** this time in UTF8 - with the standard user ***
01497     $db = eZDB::instance( false, array( 'charset' => 'utf8' ), true );
01498     if ( !$db->isConnected() )
01499     {
01500         showError( "Cannot reconnect to DB. Aborting..." );
01501     }
01502     //$db->begin();
01503 }
01504 
01505 /**************************************************************
01506 * start script                                                *
01507 ***************************************************************/
01508 
01509 // work around a bug in eZSys that prevents it from telling us eZ Publish base dir
01510 $eZDir = getcwd();
01511 
01512 $cli = eZCLI::instance();
01513 
01514 $script = eZScript::instance( array( 'description' => ( "Changes your eZ Publish database tables to use UTF8" ),
01515                                       'use-session' => false,
01516                                       'use-modules' => false,
01517                                       'use-extensions' => true ) );
01518 
01519 $script->startup();
01520 
01521 $options = $script->getOptions( "[extra-xml-attributes:][extra-xml-data:][extra-serialized-data:][collation:][skip-class-translations][iconv-character-set:][log-filename:]",
01522                                 "",
01523                                 array( 'extra-xml-attributes' => "specify custom attributes which store its data in xml.\n" .
01524                                                                  "usage: <datatype_string>[.<table>.<field>][,<datatype_string>.<table>.<field>...].\n" .
01525                                                                  "default table is 'ezcontentobject_attribute', default data field is 'data_text'\n" .
01526                                                                  "note: your custom table must have 'id', 'version' and 'data_type_string' fields.",
01527                                        'extra-xml-data' => "specify custom xml data.\n" .
01528                                                             "usage: <table>.<field>[,<table>.<field>...].\n" .
01529                                                             "note: your custom table must have 'id' field.",
01530                                        'extra-serialized-data' => "specify custom serialized data.\n" .
01531                                                                   "usage: <table>.<field>;<key_field1>[.<key_field2>....][,<table>.<field>...].\n" .
01532                                                                   "ex: mytable.data_text;id.version,mytable2.data;id",
01533 
01534                                        'collation' => "specify collation for converted db. default is 'utf8_general_ci'",
01535                                        'skip-class-translations' => "Content class translations were added in eZ Publish 3.9. Use this options if upgrading from early version.",
01536                                        'iconv-character-set' => 'This setting is used when characters are converted by iconv(). This settings is typically needed when a character set is not called the same by the database and iconv. An example is "windows-1252" (iconv) and "iso-8859-1" (mysql 5)',
01537                                        'log-filename' => 'Specify a file where iconv conversions will be logged to' ),
01538                                 false,
01539                                 array( 'user' => true ) );
01540 
01541 
01542 $script->initialize();
01543 // workaround for bug #013661
01544 $db->OutputTextCodec = null;
01545 $db->InputTextCodec = null;
01546 
01547 $db = eZDB::instance();
01548 $db->OutputTextCodec = null;
01549 $db->InputTextCodec = null;
01550 
01551 if ( !$db->isConnected() )
01552 {
01553     showError( "Cannot connect to the database");
01554 }
01555 
01556 if ( !checkDBDriver() )
01557 {
01558     showError( "Unsupported db type '$dbType'");
01559 }
01560 
01561 $logFilename = $options['log-filename'] ? $options['log-filename'] : false;
01562 CommandLineArguments::setLogFilename( $logFilename );
01563 if( CommandLineArguments::logFilename() !== false )
01564 {
01565     //Let's create an empty logfile
01566     file_put_contents( CommandLineArguments::logFilename(), '' );
01567 }
01568 
01569 
01570 $iconvCharacterSet = $options["iconv-character-set"] ? $options["iconv-character-set"] : false;
01571 CommandLineArguments::setIconvCharacterSet( $iconvCharacterSet );
01572 
01573 if ( !checkDBCharset( $iconvCharacterSet ) )
01574 {
01575     showMessage( "The database is already in utf8." );
01576     $script->shutdown( 2 );
01577 }
01578 
01579 if ( $iconvCharacterSet === false )
01580 {
01581     CommandLineArguments::setIconvCharacterSet( $db->charset() );
01582 }
01583 
01584 // Display big fat warning that this script it might leave your database in an
01585 // inconsistent state
01586 showMessage2( "WARNING: BACK UP YOUR DATABASE!" );
01587 showMessage3( "Please make sure you have backed up your database before proceeding!");
01588 showMessage3( "If this script, for some reasons fails, your database may be left in an inconsistent state.\n" );
01589 showMessage3( "This script will continue in 25 seconds. Press ctrl+c to abort." );
01590 sleep( 10 );
01591 echo "Continuing in: ";
01592 for ( $i = 0; $i > 0; $i-- )
01593 {
01594     echo "$i ";
01595     sleep(1);
01596 }
01597 echo "\n";
01598 
01599 
01600 $skipClassTranslations = $options["skip-class-translations"];
01601 $collation = $options['collation'] ? $options['collation'] : 'utf8_general_ci';
01602 
01603 //
01604 // get info about extra xml attributes
01605 //
01606 $xmlAttributesOption = $options['extra-xml-attributes'] ? $options['extra-xml-attributes'] : '';
01607 
01608 //
01609 // add info about standard xml attributes
01610 //
01611 $xmlAttributesOption = $xmlAttributesOption ? $xmlAttributesOption . ',' : $xmlAttributesOption;
01612 
01613 $xmlAttributesOption .= 'ezxmltext';
01614 $xmlAttributesOption .= ', ezimage';
01615 $xmlAttributesOption .= ', ezmatrix';
01616 $xmlAttributesOption .= ', ezauthor';
01617 $xmlAttributesOption .= ', ezmultioption';
01618 $xmlAttributesOption .= ', ezmultioption2';
01619 $xmlAttributesOption .= ', ezoption';
01620 $xmlAttributesOption .= ', ezrangeoption';
01621 $xmlAttributesOption .= ', ezobjectrelationlist';
01622 $xmlAttributesOption .= ', ezselection.ezcontentclass_attribute.data_text5';
01623 $xmlAttributesOption .= ', ezmatrix.ezcontentclass_attribute.data_text5';
01624 $xmlAttributesOption .= ', ezobjectrelationlist.ezcontentclass_attribute.data_text5';
01625 
01626 
01627 $xmlAttributesInfo = parseXMLAttributesOption( $xmlAttributesOption );
01628 if ( $xmlAttributesInfo && count( $xmlAttributesInfo ) == 0 )
01629 {
01630     showWarning( "no xml attributes specified" );
01631 }
01632 
01633 
01634 //
01635 // get info about custom xml data
01636 //
01637 $xmlCustomDataOption = $options['extra-xml-data'] ? $options['extra-xml-data'] : '';
01638 $xmlCustomDataInfo = parseCustomXMLDataOption( $xmlCustomDataOption );
01639 
01640 
01641 //
01642 // get info about custom serialized data
01643 //
01644 $serializedCustomDataOption = $options['extra-serialized-data'] ? $options['extra-serialized-data'] : '';
01645 $serializedDataInfo = parseCustomSerializedDataOption( $serializedCustomDataOption );
01646 
01647 
01648 //
01649 // extra prerequisite checking that can be Db specific
01650 //
01651 if ( $msg = checkDBExtraConditions() !== true )
01652 {
01653     showError( $msg );
01654 }
01655 
01656 
01657 $db->begin();
01658 
01659 
01660 /**************************************************************
01661 * convert extra serialized data                               *
01662 ***************************************************************/
01663 if ( is_array( $serializedDataInfo ) )
01664 {
01665     showMessage( "Converting extra serialized data" );
01666     convertSerializedData( $serializedDataInfo );
01667 }
01668 
01669 
01670 /**************************************************************
01671 * backup content class serialized names                       *
01672 ***************************************************************/
01673 
01674 // do nothing, cause class names are stored in ezcontentclass_name table,
01675 // so, it's possible to restore converted names from this table.
01676 
01677 
01678 /**************************************************************
01679 * backup content class attribute serialized names             *
01680 ***************************************************************/
01681 if ( !$skipClassTranslations )
01682 {
01683     showMessage( "Unserializing content class attributes names..." );
01684     createContentClassAttributeTempTable();
01685     unserializeContentClassAttributeNames();
01686 }
01687 
01688 
01689 
01690 /**************************************************************
01691 * convert xml datatypes to db's charset                       *
01692 ***************************************************************/
01693 showMessage( "Converting xml datatypes..." );
01694 convertXMLDatatypes( $xmlAttributesInfo );
01695 
01696 
01697 /**************************************************************
01698 * convert custom xml data                                     *
01699 ***************************************************************/
01700 if ( is_array( $xmlCustomDataInfo ) )
01701 {
01702     showMessage( "Converting custom xml data..." );
01703     convertCustomXMLData( $xmlCustomDataInfo );
01704 }
01705 
01706 showMessage( "Commiting..." );
01707 $db->commit();
01708 
01709 /**************************************************************
01710 * convert tables                                              *
01711 ***************************************************************/
01712 showMessage( "Changing DB charset..." );
01713 changeDBCharset( 'utf8', $collation );
01714 
01715 $db->begin();
01716 
01717 /**************************************************************
01718 * restore class serialized names                              *
01719 ***************************************************************/
01720 if ( !$skipClassTranslations )
01721 {
01722     showMessage( "Serializing content class names..." );
01723     serializeContentClassNames();
01724 }
01725 
01726 
01727 /**************************************************************
01728 * restore class_attributes serialized names                   *
01729 ***************************************************************/
01730 if ( !$skipClassTranslations )
01731 {
01732     showMessage( "Serializing content class attributes names..." );
01733     serializeContentClassAttributeNames();
01734 }
01735 
01736 
01737 /**************************************************************
01738 * restore extra serialized data                               *
01739 ***************************************************************/
01740 if ( is_array( $serializedDataInfo ) )
01741 {
01742     showMessage( "Restoring extra serialized data" );
01743     restoreSerializedData( $serializedDataInfo );
01744 }
01745 
01746 showMessage( "Commiting..." );
01747 $db->commit();
01748 
01749 /**************************************************************
01750 * clean up                                                    *
01751 ***************************************************************/
01752 showMessage( "Cleaning up..." );
01753 if ( !$skipClassTranslations )
01754 {
01755     dropContentClassAttributeTempTable();
01756 }
01757 dropBLOBColumns( $serializedDataInfo );
01758 
01759 
01760 /**************************************************************
01761 * finalize                                                    *
01762 ***************************************************************/
01763 
01764 showMessage( "DB has been converted successfully." );
01765 showMessage( "PLEASE REMEMBER to alter the database connection definition in site.ini with charset=utf8" );
01766 $script->shutdown();
01767 
01768 ?>