aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/lib/active_support/multibyte/unicode_database.rb
blob: 3b8cf8f9eb9a4947acf09270c8ea0dac98313818 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# encoding: utf-8

module ActiveSupport #:nodoc:
  module Multibyte #:nodoc:
    # Holds data about a codepoint in the Unicode database
    class Codepoint
      attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
    end

    # Holds static data from the Unicode database
    class UnicodeDatabase
      ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252

      attr_writer(*ATTRIBUTES)

      def initialize
        @codepoints = Hash.new(Codepoint.new)
        @composition_exclusion = []
        @composition_map = {}
        @boundary = {}
        @cp1252 = {}
      end

      # Lazy load the Unicode database so it's only loaded when it's actually used
      ATTRIBUTES.each do |attr_name|
        class_eval(<<-EOS, __FILE__, __LINE__)
          def #{attr_name}
            load
            @#{attr_name}
          end
        EOS
      end

      # Loads the Unicode database and returns all the internal objects of UnicodeDatabase.
      def load
        begin
          @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
        rescue Exception => e
            raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), ActiveSupport::Multibyte is unusable")
        end

        # Redefine the === method so we can write shorter rules for grapheme cluster breaks
        @boundary.each do |k,_|
          @boundary[k].instance_eval do
            def ===(other)
              detect { |i| i === other } ? true : false
            end
          end if @boundary[k].kind_of?(Array)
        end

        # define attr_reader methods for the instance variables
        class << self
          attr_reader(*ATTRIBUTES)
        end
      end

      # Returns the directory in which the data files are stored
      def self.dirname
        File.dirname(__FILE__) + '/../values/'
      end

      # Returns the filename for the data file for this version
      def self.filename
        File.expand_path File.join(dirname, "unicode_tables.dat")
      end
    end

    # UniCode Database
    UCD = UnicodeDatabase.new
  end
end