From d54d0c95750e2693da495b75ac5fa0280253972d Mon Sep 17 00:00:00 2001 From: Yasuo Honda Date: Wed, 12 Sep 2018 05:03:34 +0900 Subject: Use utf8mb4 character set by default for MySQL database (#33608) * Use utf8mb4 character set by default `utf8mb4` character set supports supplementary characters including emoji. `utf8` character set with 3-Byte encoding is not enough to support them. There was a downside of 4-Byte length character set with MySQL 5.5 and 5.6: "ERROR 1071 (42000): Specified key was too long; max key length is 767 bytes" for Rails string data type which is mapped to varchar(255) type. MySQL 5.7 supports 3072 byte key prefix length by default. * Remove `DEFAULT COLLATE` from Active Record unit test databases There should be no "one size fits all" collation in MySQL 5.7. Let MySQL server choose the default collation for Active Record unit test databases. Users can choose their best collation for their databases by setting `options[:collation]` based on their requirements. * InnoDB FULLTEXT indexes support since MySQL 5.6 it does not have to use MyISAM storage engine whose maximum key length is 1000 bytes. Using MyISAM storag engine with utf8mb4 character set would cause "Specified key was too long; max key length is 1000 bytes" https://dev.mysql.com/doc/refman/5.6/en/innodb-fulltext-index.html * References "10.9.1 The utf8mb4 Character Set (4-Byte UTF-8 Unicode Encoding)" https://dev.mysql.com/doc/refman/5.7/en/charset-unicode-utf8mb4.html "10.9.2 The utf8mb3 Character Set (3-Byte UTF-8 Unicode Encoding)" https://dev.mysql.com/doc/refman/5.7/en/charset-unicode-utf8.html "14.8.1.7 Limits on InnoDB Tables" https://dev.mysql.com/doc/refman/5.7/en/innodb-restrictions.html > If innodb_large_prefix is enabled (the default), the index key prefix limit is 3072 bytes > for InnoDB tables that use DYNAMIC or COMPRESSED row format. * CI against MySQL 5.7 Followed this instruction and changed root password to empty string. https://docs.travis-ci.com/user/database-setup/#MySQL-57 * The recommended minimum version of MySQL is 5.7.9 to support utf8mb4 character set and `innodb_default_row_format` MySQL 5.7.9 introduces `innodb_default_row_format` to support 3072 byte length index by default. Users do not have to change MySQL database configuration to support Rails string type. https://dev.mysql.com/doc/refman/5.7/en/innodb-parameters.html#sysvar_innodb_default_row_format https://dev.mysql.com/doc/refman/5.7/en/innodb-restrictions.html > If innodb_large_prefix is enabled (the default), > the index key prefix limit is 3072 bytes for InnoDB tables that use DYNAMIC or COMPRESSED row format. * The recommended minimum version of MariaDB is 10.2.2 MariaDB 10.2.2 is the first version of MariaDB supporting `innodb_default_row_format` Also MariaDB says "MySQL 5.7 is compatible with MariaDB 10.2". - innodb_default_row_format https://mariadb.com/kb/en/library/xtradbinnodb-server-system-variables/#innodb_default_row_format - "MariaDB versus MySQL - Compatibility" https://mariadb.com/kb/en/library/mariadb-vs-mysql-compatibility/ > MySQL 5.7 is compatible with MariaDB 10.2 - "Supported Character Sets and Collations" https://mariadb.com/kb/en/library/supported-character-sets-and-collations/ --- .travis.yml | 6 ++++++ activerecord/CHANGELOG.md | 7 +++++++ activerecord/Rakefile | 4 ++-- .../active_record/connection_adapters/abstract_mysql_adapter.rb | 4 ++-- activerecord/test/cases/adapters/mysql2/active_schema_test.rb | 2 +- activerecord/test/cases/adapters/mysql2/connection_test.rb | 4 ++-- activerecord/test/config.example.yml | 6 +++--- activerecord/test/schema/mysql2_specific_schema.rb | 2 +- .../generators/rails/app/templates/config/databases/mysql.yml.tt | 2 +- 9 files changed, 25 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index d7544fecb6..c18a24bea7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,11 +20,14 @@ addons: sources: - sourceline: "ppa:mc3man/trusty-media" - sourceline: "ppa:ubuntuhandbook1/apps" + - mysql-5.7-trusty packages: - ffmpeg - mupdf - mupdf-tools - poppler-utils + - mysql-server + - mysql-client bundler_args: --without test --jobs 3 --retry 3 before_install: @@ -37,6 +40,9 @@ before_install: - "[[ $GEM != 'av:ujs' ]] || nvm install node" - "[[ $GEM != 'av:ujs' ]] || node --version" - "[[ $GEM != 'av:ujs' ]] || (cd actionview && npm install)" + - "[[ $GEM != 'ar:mysql2' ]] || [[ $MYSQL == 'mariadb' ]] || sudo mysql -e \"use mysql; update user set authentication_string='' where User='root'; update user set plugin='mysql_native_password';FLUSH PRIVILEGES;\"" + - "[[ $GEM != 'ar:mysql2' ]] || [[ $MYSQL == 'mariadb' ]] || sudo mysql_upgrade" + - "[[ $GEM != 'ar:mysql2' ]] || [[ $MYSQL == 'mariadb' ]] || sudo service mysql restart" before_script: # Set Sauce Labs username and access key. Obfuscated, purposefully not encrypted. diff --git a/activerecord/CHANGELOG.md b/activerecord/CHANGELOG.md index 4487d70ce9..336946b756 100644 --- a/activerecord/CHANGELOG.md +++ b/activerecord/CHANGELOG.md @@ -1,3 +1,10 @@ +* Use MySQL utf8mb4 character set by default. + + `utf8mb4` character set with 4-Byte encoding supports supplementary characters including emoji. + The previous default 3-Byte encoding character set `utf8` is not enough to support them. + + *Yasuo Honda* + * Fix duplicated record creation when using nested attributes with `create_with`. *Darwin Wu* diff --git a/activerecord/Rakefile b/activerecord/Rakefile index 170c95b827..fae56a51bb 100644 --- a/activerecord/Rakefile +++ b/activerecord/Rakefile @@ -94,8 +94,8 @@ namespace :db do desc "Build the MySQL test databases" task :build do config = ARTest.config["connections"]["mysql2"] - %x( mysql --user=#{config["arunit"]["username"]} --password=#{config["arunit"]["password"]} -e "create DATABASE #{config["arunit"]["database"]} DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci ") - %x( mysql --user=#{config["arunit2"]["username"]} --password=#{config["arunit2"]["password"]} -e "create DATABASE #{config["arunit2"]["database"]} DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci ") + %x( mysql --user=#{config["arunit"]["username"]} --password=#{config["arunit"]["password"]} -e "create DATABASE #{config["arunit"]["database"]} DEFAULT CHARACTER SET utf8mb4" ) + %x( mysql --user=#{config["arunit2"]["username"]} --password=#{config["arunit2"]["password"]} -e "create DATABASE #{config["arunit2"]["database"]} DEFAULT CHARACTER SET utf8mb4" ) end desc "Drop the MySQL test databases" diff --git a/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb b/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb index 88fff83a9e..2d287d56e8 100644 --- a/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb +++ b/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb @@ -241,7 +241,7 @@ module ActiveRecord end # Create a new MySQL database with optional :charset and :collation. - # Charset defaults to utf8. + # Charset defaults to utf8mb4. # # Example: # create_database 'charset_test', charset: 'latin1', collation: 'latin1_bin' @@ -251,7 +251,7 @@ module ActiveRecord if options[:collation] execute "CREATE DATABASE #{quote_table_name(name)} DEFAULT COLLATE #{quote_table_name(options[:collation])}" else - execute "CREATE DATABASE #{quote_table_name(name)} DEFAULT CHARACTER SET #{quote_table_name(options[:charset] || 'utf8')}" + execute "CREATE DATABASE #{quote_table_name(name)} DEFAULT CHARACTER SET #{quote_table_name(options[:charset] || 'utf8mb4')}" end end diff --git a/activerecord/test/cases/adapters/mysql2/active_schema_test.rb b/activerecord/test/cases/adapters/mysql2/active_schema_test.rb index 6fc9df5083..261fee13eb 100644 --- a/activerecord/test/cases/adapters/mysql2/active_schema_test.rb +++ b/activerecord/test/cases/adapters/mysql2/active_schema_test.rb @@ -106,7 +106,7 @@ class Mysql2ActiveSchemaTest < ActiveRecord::Mysql2TestCase end def test_create_mysql_database_with_encoding - assert_equal "CREATE DATABASE `matt` DEFAULT CHARACTER SET `utf8`", create_database(:matt) + assert_equal "CREATE DATABASE `matt` DEFAULT CHARACTER SET `utf8mb4`", create_database(:matt) assert_equal "CREATE DATABASE `aimonetti` DEFAULT CHARACTER SET `latin1`", create_database(:aimonetti, charset: "latin1") assert_equal "CREATE DATABASE `matt_aimonetti` DEFAULT COLLATE `utf8mb4_bin`", create_database(:matt_aimonetti, collation: "utf8mb4_bin") end diff --git a/activerecord/test/cases/adapters/mysql2/connection_test.rb b/activerecord/test/cases/adapters/mysql2/connection_test.rb index 0c0e2a116e..3103589186 100644 --- a/activerecord/test/cases/adapters/mysql2/connection_test.rb +++ b/activerecord/test/cases/adapters/mysql2/connection_test.rb @@ -104,8 +104,8 @@ class Mysql2ConnectionTest < ActiveRecord::Mysql2TestCase end def test_mysql_connection_collation_is_configured - assert_equal "utf8_unicode_ci", @connection.show_variable("collation_connection") - assert_equal "utf8_general_ci", ARUnit2Model.connection.show_variable("collation_connection") + assert_equal "utf8mb4_unicode_ci", @connection.show_variable("collation_connection") + assert_equal "utf8mb4_general_ci", ARUnit2Model.connection.show_variable("collation_connection") end def test_mysql_default_in_strict_mode diff --git a/activerecord/test/config.example.yml b/activerecord/test/config.example.yml index 4bcb2aeea6..be337ddcd8 100644 --- a/activerecord/test/config.example.yml +++ b/activerecord/test/config.example.yml @@ -54,11 +54,11 @@ connections: mysql2: arunit: username: rails - encoding: utf8 - collation: utf8_unicode_ci + encoding: utf8mb4 + collation: utf8mb4_unicode_ci arunit2: username: rails - encoding: utf8 + encoding: utf8mb4 oracle: arunit: diff --git a/activerecord/test/schema/mysql2_specific_schema.rb b/activerecord/test/schema/mysql2_specific_schema.rb index 0f2f6ddd68..c384297658 100644 --- a/activerecord/test/schema/mysql2_specific_schema.rb +++ b/activerecord/test/schema/mysql2_specific_schema.rb @@ -36,7 +36,7 @@ ActiveRecord::Schema.define do t.index :var_binary end - create_table :key_tests, force: true, options: "ENGINE=MyISAM" do |t| + create_table :key_tests, force: true do |t| t.string :awesome t.string :pizza t.string :snacks diff --git a/railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt b/railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt index 1dc508b14f..dda979555a 100644 --- a/railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt +++ b/railties/lib/rails/generators/rails/app/templates/config/databases/mysql.yml.tt @@ -11,7 +11,7 @@ # default: &default adapter: mysql2 - encoding: utf8 + encoding: utf8mb4 pool: <%%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> username: root password: -- cgit v1.2.3