eZPublish  4.4
ezconvertdbcharset.php File Reference

Classes

class  CommandLineArguments
 Class used to store some of the command line arguments. More...
 

Functions

 changeDBCharset ($charset, $collation)
 
 changeDBCharsetMYSQL ($charset, $collation)
 
 changeDBCharsetORACLE ($charset, $collation)
 NOTE: What if other data is also in the db? Either we do not convert it and have it most likely corrupted, or we convert it - and leave to the client for the other apps to set up NLS_LANG correctly to keep working. More...
 
 changeDBCharsetPOSTGRESQL ($charset, $collation)
 
 checkDBCharset ($iconvCharacterSet)
 
 checkDBDriver ()
 
 checkDBExtraConditions ()
 
 checkDBExtraConditionsORACLE ()
 
 convertArray ($array, $inCharset, $outCharset)
 
 convertCustomXMLData ($tableInfoList)
 
 convertSerializedData ($serializedDataInfo)
 
 convertXMLCustomDataProgress ($row)
 
 convertXMLData ($tableInfo, $xmlDataSelectSQLFunction, $xmlDataUpdateSQLFunction, $convertXMLProgressFunction)
 
 convertXMLDatatypeProgress ($row)
 
 convertXMLDatatypes ($tableInfoList)
 
 createBLOBColumnMYSQL ($tableInfo)
 
 createBLOBColumnORACLE ($tableInfo)
 
 createBLOBColumnPOSTGRESQL ($tableInfo)
 
 createContentClassAttributeTempTable ()
 
 dropBLOBColumn ($tableInfo)
 
 dropBLOBColumns ($serializedDataInfo)
 
 dropContentClassAttributeTempTable ()
 
 eZExecuteShellCommand ($command, $errMessage= '', $retry=true, $ignore=false)
 
 eZGetUserInput ($prompt)
 
 parseCustomSerializedDataOption ($serializedCustomDataOption)
 
 parseCustomXMLDataOption ($xmlCustomDataOption)
 
 parseXMLAttributesOption ($xmlAttributesOption)
 
 removeIllegalUTF8Characters ($text)
 For some reason, some utf8 encoded text stored in the db might contain illegal utf8 characters. More...
 
 restoreSerializedData ($serializedDataInfo)
 
 serializeContentClassAttributeNames ()
 
 serializeContentClassNames ()
 
 serializeNames ($selectSQL, $storeToTable)
 
 showError ($message, $addEOL=true, $bailOut=true)
 
 showMessage ($message, $addEOL=true)
 
 showMessage2 ($message, $addEOL=true)
 
 showMessage3 ($message, $addEOL=true)
 
 showNotice ($message, $addEOL=true)
 
 showWarning ($message, $addEOL=true)
 
 storeSerializedName ($serializedName, $id, $version, $table)
 
 unserializeContentClassAttributeNames ()
 
 xmlCustomDataSelectSQL ($dataTableInfo)
 
 xmlCustomDataUpdateSQL ($dataTableInfo, $row)
 
 xmlDatatypeSelectSQL ($dataTableInfo)
 
 xmlDatatypeUpdateSQL ($dataTableInfo, $row)
 

Variables

 $cli = eZCLI::instance()
 
 $collation = $options['collation'] ? $options['collation'] : 'utf8_general_ci'
 
 $db = eZDB::instance()
 
 $eZDir = getcwd()
 
if(CommandLineArguments::logFilename()!==false$iconvCharacterSet = $options["iconv-character-set"] ? $options["iconv-character-set"] : false
 
if(!$db->isConnected()) if(!checkDBDriver()) $logFilename = $options['log-filename'] ? $options['log-filename'] : false
 
 $options
 
 $script
 
 $serializedCustomDataOption = $options['extra-serialized-data'] ? $options['extra-serialized-data'] : ''
 
 $serializedDataInfo = parseCustomSerializedDataOption( $serializedCustomDataOption )
 
 $skipClassTranslations = $options["skip-class-translations"]
 
 $xmlAttributesInfo = parseXMLAttributesOption( $xmlAttributesOption )
 
 $xmlAttributesOption = $options['extra-xml-attributes'] ? $options['extra-xml-attributes'] : ''
 
 $xmlCustomDataInfo = parseCustomXMLDataOption( $xmlCustomDataOption )
 
if($xmlAttributesInfo &&count($xmlAttributesInfo)==0) $xmlCustomDataOption = $options['extra-xml-data'] ? $options['extra-xml-data'] : ''
 
const EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME 'ezcontentclass_attribute_tmp'
 
const EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_MYSQL " CREATE TEMPORARY TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " ( id int(11) NOT NULL default '0', version int(11) NOT NULL default '0', is_always_available int(11) NOT NULL default '0', language_locale varchar(20) NOT NULL default '', name varchar(255) NOT NULL default '', PRIMARY KEY (id,version,language_locale) )"
 
const EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_ORACLE " CREATE GLOBAL TEMPORARY TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " ( id number(11) DEFAULT 0 NOT NULL, version number(11) DEFAULT 0 NOT NULL, is_always_available number(11) DEFAULT 0 NOT NULL, language_locale varchar2(20) NOT NULL, name varchar2(255) NOT NULL ) ON COMMIT PRESERVE ROWS;"
 
const EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_POSTGRESQL " CREATE TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " ( id integer DEFAULT 0 NOT NULL, version integer DEFAULT 0 NOT NULL, is_always_available integer DEFAULT 0 NOT NULL, language_locale character varying(20) DEFAULT ''::character varying NOT NULL, name character varying(255) DEFAULT ''::character varying NOT NULL )"
 
const EZ_DROP_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL "DROP TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME
 
 for ($i=0;$i > 0;$i--)
 
$db InputTextCodec = null
 
$db OutputTextCodec = null
 

Function Documentation

changeDBCharset (   $charset,
  $collation 
)
changeDBCharsetMYSQL (   $charset,
  $collation 
)
changeDBCharsetORACLE (   $charset,
  $collation 
)

NOTE: What if other data is also in the db? Either we do not convert it and have it most likely corrupted, or we convert it - and leave to the client for the other apps to set up NLS_LANG correctly to keep working.

    We could use the csalter script iff we where sure that db version was > 9...

    Oracle 9 exp exports data using the DB charset

From http://www.experts-exchange.com/Database/Oracle/Q_22836430.html

May be the best procedure is the following one: On PROD Database:

  1. export full=y rows=n file=export_db_structure.dmp
  2. export full=y file=export_db_date.dmp On TEST Database:
  1. create tablespaces with the same name on PROD
  2. import full=y file=export_db_structure.dmp ignore=y Now we have users of PROD, on TEST database. SYSTEM user is not imported, because already exists.
  3. import fromuser=user1,user2,user3 touser=user1,user2,user3 file=export_db_data.dmp ignore=y now we have user1..3 data on their tables...
        Unfortunately even if we do that, Oracle will nto let us convert a db from
        latin1 to utf8 charsets. The only way is to drop the db and creater it
        from scratch. Since we have no clue about db storage, we will let the admin
        take care of that part, and only do the export/import parts.
    
Todo:
oracle servers might use UTF8 charset instead of AL32UTF8: check before executing!
Todo:
using dbname as file name does not work with easy conection naming
Todo:
log somewhere results of imp, exp commands for better understanding of errors
Todo:
add a database logfile switch or checkpoint here? it would be nice...
changeDBCharsetPOSTGRESQL (   $charset,
  $collation 
)
checkDBCharset (   $iconvCharacterSet)

Check db charset

checkDBDriver ( )

Check db driver

checkDBExtraConditions ( )

DB specific checks. A string is returned for error conditions (script halts after printing it)

checkDBExtraConditionsORACLE ( )
Todo:

We should really check for exp_full, imp_full, (sysdba) privileges rather than DBA role...

add check for RAC - do not try anything in such a case

convertArray (   $array,
  $inCharset,
  $outCharset 
)

Referenced by convertSerializedData().

convertCustomXMLData (   $tableInfoList)
convertSerializedData (   $serializedDataInfo)

Logic:

  • create binary column as temp storage to not loose data when table will be converted
  • get original data
  • unseiazlize
  • convert data to utf-8
  • serialize
  • store data in binary column
convertXMLCustomDataProgress (   $row)
convertXMLData (   $tableInfo,
  $xmlDataSelectSQLFunction,
  $xmlDataUpdateSQLFunction,
  $convertXMLProgressFunction 
)

Convert xml text to db's charset. However for optimization the xml processing instruction 'encoding' will be set to utf-8.

convertXMLDatatypeProgress (   $row)
convertXMLDatatypes (   $tableInfoList)
createBLOBColumnMYSQL (   $tableInfo)
createBLOBColumnORACLE (   $tableInfo)
createBLOBColumnPOSTGRESQL (   $tableInfo)
createContentClassAttributeTempTable ( )
dropBLOBColumn (   $tableInfo)
dropBLOBColumns (   $serializedDataInfo)
dropContentClassAttributeTempTable ( )
eZExecuteShellCommand (   $command,
  $errMessage = '',
  $retry = true,
  $ignore = false 
)

Referenced by changeDBCharsetORACLE().

eZGetUserInput (   $prompt)

prompt user to choose what to do next

Referenced by changeDBCharsetORACLE(), and checkDBExtraConditionsORACLE().

parseCustomSerializedDataOption (   $serializedCustomDataOption)

process custom xml data info

Returns
false or an array of table infos.
parseCustomXMLDataOption (   $xmlCustomDataOption)

process custom xml data info false of an array of table infos.

parseXMLAttributesOption (   $xmlAttributesOption)

process xml attributes info

Returns
false or an array of table infos.
removeIllegalUTF8Characters (   $text)

For some reason, some utf8 encoded text stored in the db might contain illegal utf8 characters.

This function will strip/replace such known characters

Referenced by convertXMLData().

restoreSerializedData (   $serializedDataInfo)

Restore data from binary column

serializeContentClassAttributeNames ( )
serializeContentClassNames ( )
serializeNames (   $selectSQL,
  $storeToTable 
)
showMessage (   $message,
  $addEOL = true 
)
showMessage2 (   $message,
  $addEOL = true 
)
showMessage3 (   $message,
  $addEOL = true 
)
showNotice (   $message,
  $addEOL = true 
)
showWarning (   $message,
  $addEOL = true 
)
storeSerializedName (   $serializedName,
  $id,
  $version,
  $table 
)
unserializeContentClassAttributeNames ( )
xmlCustomDataSelectSQL (   $dataTableInfo)
xmlCustomDataUpdateSQL (   $dataTableInfo,
  $row 
)
xmlDatatypeSelectSQL (   $dataTableInfo)
xmlDatatypeUpdateSQL (   $dataTableInfo,
  $row 
)

Variable Documentation

$cli = eZCLI::instance()
$collation = $options['collation'] ? $options['collation'] : 'utf8_general_ci'

Referenced by changeDBCharset().

$db = eZDB::instance()
$eZDir = getcwd()

Referenced by changeDBCharsetORACLE(), and showError().

if (CommandLineArguments::logFilename()!==false) $iconvCharacterSet = $options["iconv-character-set"] ? $options["iconv-character-set"] : false

Referenced by checkDBCharset().

if (!$db->isConnected()) if (!checkDBDriver()) $logFilename = $options['log-filename'] ? $options['log-filename'] : false
$options
Initial value:
= $script->getOptions( "[extra-xml-attributes:][extra-xml-data:][extra-serialized-data:][collation:][skip-class-translations][iconv-character-set:][log-filename:]",
"",
array( 'extra-xml-attributes' => "specify custom attributes which store its data in xml.\n" .
"usage: <datatype_string>[.<table>.<field>][,<datatype_string>.<table>.<field>...].\n" .
"default table is 'ezcontentobject_attribute', default data field is 'data_text'\n" .
"note: your custom table must have 'id', 'version' and 'data_type_string' fields.",
'extra-xml-data' => "specify custom xml data.\n" .
"usage: <table>.<field>[,<table>.<field>...].\n" .
"note: your custom table must have 'id' field.",
'extra-serialized-data' => "specify custom serialized data.\n" .
"usage: <table>.<field>;<key_field1>[.<key_field2>....][,<table>.<field>...].\n" .
"ex: mytable.data_text;id.version,mytable2.data;id",
'collation' => "specify collation for converted db. default is 'utf8_general_ci'",
'skip-class-translations' => "Content class translations were added in eZ Publish 3.9. Use this options if upgrading from early version.",
'iconv-character-set' => 'This setting is used when characters are converted by iconv(). This settings is typically needed when a character set is not called the same by the database and iconv. An example is "windows-1252" (iconv) and "iso-8859-1" (mysql 5)',
'log-filename' => 'Specify a file where iconv conversions will be logged to' ),
false,
array( 'user' => true ) )
$script
Definition: adddefaultstates.php:14
$script
Initial value:
= eZScript::instance( array( 'description' => ( "Changes your eZ Publish database tables to use UTF8" ),
'use-session' => false,
'use-modules' => false,
'use-extensions' => true ) )
static instance($settings=array())
Returns a shared instance of the eZScript class.
Definition: ezscript.php:1056
$serializedCustomDataOption = $options['extra-serialized-data'] ? $options['extra-serialized-data'] : ''
$serializedDataInfo = parseCustomSerializedDataOption( $serializedCustomDataOption )
$skipClassTranslations = $options["skip-class-translations"]
$xmlAttributesInfo = parseXMLAttributesOption( $xmlAttributesOption )
$xmlAttributesOption = $options['extra-xml-attributes'] ? $options['extra-xml-attributes'] : ''
$xmlCustomDataInfo = parseCustomXMLDataOption( $xmlCustomDataOption )
if ($xmlAttributesInfo &&count($xmlAttributesInfo)==0) $xmlCustomDataOption = $options['extra-xml-data'] ? $options['extra-xml-data'] : ''
const EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME 'ezcontentclass_attribute_tmp'
const EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_MYSQL " CREATE TEMPORARY TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " ( id int(11) NOT NULL default '0', version int(11) NOT NULL default '0', is_always_available int(11) NOT NULL default '0', language_locale varchar(20) NOT NULL default '', name varchar(255) NOT NULL default '', PRIMARY KEY (id,version,language_locale) )"
const EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_ORACLE " CREATE GLOBAL TEMPORARY TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " ( id number(11) DEFAULT 0 NOT NULL, version number(11) DEFAULT 0 NOT NULL, is_always_available number(11) DEFAULT 0 NOT NULL, language_locale varchar2(20) NOT NULL, name varchar2(255) NOT NULL ) ON COMMIT PRESERVE ROWS;"
const EZ_CREATE_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL_POSTGRESQL " CREATE TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME . " ( id integer DEFAULT 0 NOT NULL, version integer DEFAULT 0 NOT NULL, is_always_available integer DEFAULT 0 NOT NULL, language_locale character varying(20) DEFAULT ''::character varying NOT NULL, name character varying(255) DEFAULT ''::character varying NOT NULL )"
const EZ_DROP_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_SQL "DROP TABLE " . EZ_CONTENTCLASS_ATTRIBUTE_TMP_TABLE_NAME
for($i=0;$i > 0;$i--)