diff options
author | Manfred Stienstra <manfred@fngtps.com> | 2008-09-21 17:21:30 +0200 |
---|---|---|
committer | Manfred Stienstra <manfred@fngtps.com> | 2008-09-21 17:21:30 +0200 |
commit | 22f75d539dca7b6f33cbf86e4e9d1944bb22731f (patch) | |
tree | f3c775cda7f82f5b527864adc363deb3c5eee354 /activesupport/lib/active_support/multibyte/unicode_database.rb | |
parent | 5f83e1844c83c19cf97c6415b943c6ec3cb4bb06 (diff) | |
download | rails-22f75d539dca7b6f33cbf86e4e9d1944bb22731f.tar.gz rails-22f75d539dca7b6f33cbf86e4e9d1944bb22731f.tar.bz2 rails-22f75d539dca7b6f33cbf86e4e9d1944bb22731f.zip |
Simplify ActiveSupport::Multibyte and make it run on Ruby 1.9.
* Unicode methods are now defined directly on Chars instead of a handler
* Updated Unicode database to Unicode 5.1.0
* Improved documentation
Diffstat (limited to 'activesupport/lib/active_support/multibyte/unicode_database.rb')
-rw-r--r-- | activesupport/lib/active_support/multibyte/unicode_database.rb | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/activesupport/lib/active_support/multibyte/unicode_database.rb b/activesupport/lib/active_support/multibyte/unicode_database.rb new file mode 100644 index 0000000000..3b8cf8f9eb --- /dev/null +++ b/activesupport/lib/active_support/multibyte/unicode_database.rb @@ -0,0 +1,71 @@ +# encoding: utf-8 + +module ActiveSupport #:nodoc: + module Multibyte #:nodoc: + # Holds data about a codepoint in the Unicode database + class Codepoint + attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping + end + + # Holds static data from the Unicode database + class UnicodeDatabase + ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252 + + attr_writer(*ATTRIBUTES) + + def initialize + @codepoints = Hash.new(Codepoint.new) + @composition_exclusion = [] + @composition_map = {} + @boundary = {} + @cp1252 = {} + end + + # Lazy load the Unicode database so it's only loaded when it's actually used + ATTRIBUTES.each do |attr_name| + class_eval(<<-EOS, __FILE__, __LINE__) + def #{attr_name} + load + @#{attr_name} + end + EOS + end + + # Loads the Unicode database and returns all the internal objects of UnicodeDatabase. + def load + begin + @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read } + rescue Exception => e + raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable") + end + + # Redefine the === method so we can write shorter rules for grapheme cluster breaks + @boundary.each do |k,_| + @boundary[k].instance_eval do + def ===(other) + detect { |i| i === other } ? true : false + end + end if @boundary[k].kind_of?(Array) + end + + # define attr_reader methods for the instance variables + class << self + attr_reader(*ATTRIBUTES) + end + end + + # Returns the directory in which the data files are stored + def self.dirname + File.dirname(__FILE__) + '/../values/' + end + + # Returns the filename for the data file for this version + def self.filename + File.expand_path File.join(dirname, "unicode_tables.dat") + end + end + + # UniCode Database + UCD = UnicodeDatabase.new + end +end
\ No newline at end of file |