aboutsummaryrefslogtreecommitdiffstats
path: root/activesupport/bin/generate_tables
diff options
context:
space:
mode:
Diffstat (limited to 'activesupport/bin/generate_tables')
-rwxr-xr-xactivesupport/bin/generate_tables141
1 files changed, 0 insertions, 141 deletions
diff --git a/activesupport/bin/generate_tables b/activesupport/bin/generate_tables
deleted file mode 100755
index 18199b2171..0000000000
--- a/activesupport/bin/generate_tables
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env ruby
-# frozen_string_literal: true
-
-begin
- $:.unshift(File.expand_path("../lib", __dir__))
- require "active_support"
-rescue IOError
-end
-
-require "open-uri"
-require "tmpdir"
-require "fileutils"
-
-module ActiveSupport
- module Multibyte
- module Unicode
- class UnicodeDatabase
- def load; end
- end
-
- class DatabaseGenerator
- BASE_URI = "http://www.unicode.org/Public/#{UNICODE_VERSION}/ucd/"
- SOURCES = {
- codepoints: BASE_URI + "UnicodeData.txt",
- composition_exclusion: BASE_URI + "CompositionExclusions.txt",
- grapheme_break_property: BASE_URI + "auxiliary/GraphemeBreakProperty.txt",
- cp1252: "http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT"
- }
-
- def initialize
- @ucd = Unicode::UnicodeDatabase.new
- end
-
- def parse_codepoints(line)
- codepoint = Codepoint.new
- raise "Could not parse input." unless line =~ /^
- ([0-9A-F]+); # code
- ([^;]+); # name
- ([A-Z]+); # general category
- ([0-9]+); # canonical combining class
- ([A-Z]+); # bidi class
- (<([A-Z]*)>)? # decomposition type
- ((\ ?[0-9A-F]+)*); # decomposition mapping
- ([0-9]*); # decimal digit
- ([0-9]*); # digit
- ([^;]*); # numeric
- ([YN]*); # bidi mirrored
- ([^;]*); # unicode 1.0 name
- ([^;]*); # iso comment
- ([0-9A-F]*); # simple uppercase mapping
- ([0-9A-F]*); # simple lowercase mapping
- ([0-9A-F]*)$/ix # simple titlecase mapping
- codepoint.code = $1.hex
- codepoint.combining_class = Integer($4)
- codepoint.decomp_type = $7
- codepoint.decomp_mapping = ($8 == "") ? nil : $8.split.collect(&:hex)
- codepoint.uppercase_mapping = ($16 == "") ? 0 : $16.hex
- codepoint.lowercase_mapping = ($17 == "") ? 0 : $17.hex
- @ucd.codepoints[codepoint.code] = codepoint
- end
-
- def parse_grapheme_break_property(line)
- if line =~ /^([0-9A-F.]+)\s*;\s*([\w]+)\s*#/
- type = $2.downcase.intern
- @ucd.boundary[type] ||= []
- if $1.include? ".."
- parts = $1.split ".."
- @ucd.boundary[type] << (parts[0].hex..parts[1].hex)
- else
- @ucd.boundary[type] << $1.hex
- end
- end
- end
-
- def parse_composition_exclusion(line)
- if line =~ /^([0-9A-F]+)/i
- @ucd.composition_exclusion << $1.hex
- end
- end
-
- def parse_cp1252(line)
- if line =~ /^([0-9A-Fx]+)\s([0-9A-Fx]+)/i
- @ucd.cp1252[$1.hex] = $2.hex
- end
- end
-
- def create_composition_map
- @ucd.codepoints.each do |_, cp|
- if !cp.nil? && cp.combining_class == 0 && cp.decomp_type.nil? && !cp.decomp_mapping.nil? && cp.decomp_mapping.length == 2 && @ucd.codepoints[cp.decomp_mapping[0]].combining_class == 0 && !@ucd.composition_exclusion.include?(cp.code)
- @ucd.composition_map[cp.decomp_mapping[0]] ||= {}
- @ucd.composition_map[cp.decomp_mapping[0]][cp.decomp_mapping[1]] = cp.code
- end
- end
- end
-
- def normalize_boundary_map
- @ucd.boundary.each do |k, v|
- if [:lf, :cr].include? k
- @ucd.boundary[k] = v[0]
- end
- end
- end
-
- def parse
- SOURCES.each do |type, url|
- filename = File.join(Dir.tmpdir, UNICODE_VERSION, "#{url.split('/').last}")
- unless File.exist?(filename)
- $stderr.puts "Downloading #{url.split('/').last}"
- FileUtils.mkdir_p(File.dirname(filename))
- File.open(filename, "wb") do |target|
- open(url) do |source|
- source.each_line { |line| target.write line }
- end
- end
- end
- File.open(filename) do |file|
- file.each_line { |line| send "parse_#{type}".intern, line }
- end
- end
- create_composition_map
- normalize_boundary_map
- end
-
- def dump_to(filename)
- File.open(filename, "wb") do |f|
- f.write Marshal.dump([@ucd.codepoints, @ucd.composition_exclusion, @ucd.composition_map, @ucd.boundary, @ucd.cp1252])
- end
- end
- end
- end
- end
-end
-
-if __FILE__ == $0
- filename = ActiveSupport::Multibyte::Unicode::UnicodeDatabase.filename
- generator = ActiveSupport::Multibyte::Unicode::DatabaseGenerator.new
- generator.parse
- print "Writing to: #{filename}"
- generator.dump_to filename
- puts " (#{File.size(filename)} bytes)"
-end