aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte/unicode_database.rb
diff options
context:
space:
mode:
authorManfred Stienstra <manfred@fngtps.com>2008-09-21 17:21:30 +0200
committerManfred Stienstra <manfred@fngtps.com>2008-09-21 17:21:30 +0200
commit22f75d539dca7b6f33cbf86e4e9d1944bb22731f (patch)
treef3c775cda7f82f5b527864adc363deb3c5eee354 /activesupport/lib/active_support/multibyte/unicode_database.rb
parent5f83e1844c83c19cf97c6415b943c6ec3cb4bb06 (diff)
downloadrails-22f75d539dca7b6f33cbf86e4e9d1944bb22731f.tar.gz
rails-22f75d539dca7b6f33cbf86e4e9d1944bb22731f.tar.bz2
rails-22f75d539dca7b6f33cbf86e4e9d1944bb22731f.zip
Simplify ActiveSupport::Multibyte and make it run on Ruby 1.9.
* Unicode methods are now defined directly on Chars instead of a handler * Updated Unicode database to Unicode 5.1.0 * Improved documentation
Diffstat (limited to 'activesupport/lib/active_support/multibyte/unicode_database.rb')
-rw-r--r--activesupport/lib/active_support/multibyte/unicode_database.rb71
1 files changed, 71 insertions, 0 deletions
diff --git a/activesupport/lib/active_support/multibyte/unicode_database.rb b/activesupport/lib/active_support/multibyte/unicode_database.rb
new file mode 100644
index 0000000000..3b8cf8f9eb
--- /dev/null
+++ b/activesupport/lib/active_support/multibyte/unicode_database.rb
@@ -0,0 +1,71 @@
+# encoding: utf-8
+
+module ActiveSupport #:nodoc:
+ module Multibyte #:nodoc:
+ # Holds data about a codepoint in the Unicode database
+ class Codepoint
+ attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
+ end
+
+ # Holds static data from the Unicode database
+ class UnicodeDatabase
+ ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
+
+ attr_writer(*ATTRIBUTES)
+
+ def initialize
+ @codepoints = Hash.new(Codepoint.new)
+ @composition_exclusion = []
+ @composition_map = {}
+ @boundary = {}
+ @cp1252 = {}
+ end
+
+ # Lazy load the Unicode database so it's only loaded when it's actually used
+ ATTRIBUTES.each do |attr_name|
+ class_eval(<<-EOS, __FILE__, __LINE__)
+ def #{attr_name}
+ load
+ @#{attr_name}
+ end
+ EOS
+ end
+
+ # Loads the Unicode database and returns all the internal objects of UnicodeDatabase.
+ def load
+ begin
+ @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
+ rescue Exception => e
+ raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
+ end
+
+ # Redefine the === method so we can write shorter rules for grapheme cluster breaks
+ @boundary.each do |k,_|
+ @boundary[k].instance_eval do
+ def ===(other)
+ detect { |i| i === other } ? true : false
+ end
+ end if @boundary[k].kind_of?(Array)
+ end
+
+ # define attr_reader methods for the instance variables
+ class << self
+ attr_reader(*ATTRIBUTES)
+ end
+ end
+
+ # Returns the directory in which the data files are stored
+ def self.dirname
+ File.dirname(__FILE__) + '/../values/'
+ end
+
+ # Returns the filename for the data file for this version
+ def self.filename
+ File.expand_path File.join(dirname, "unicode_tables.dat")
+ end
+ end
+
+ # UniCode Database
+ UCD = UnicodeDatabase.new
+ end
+end \ No newline at end of file