From c1d00d6dbf2837be2ac690d1462f2577e93d617a Mon Sep 17 00:00:00 2001 From: Fumiaki MATSUSHIMA Date: Sun, 9 Oct 2016 17:40:07 +0900 Subject: Remove `AS::Multibyte`'s unicode table --- activesupport/bin/generate_tables | 141 -------------------------------------- 1 file changed, 141 deletions(-) delete mode 100755 activesupport/bin/generate_tables (limited to 'activesupport/bin') diff --git a/activesupport/bin/generate_tables b/activesupport/bin/generate_tables deleted file mode 100755 index 18199b2171..0000000000 --- a/activesupport/bin/generate_tables +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -begin - $:.unshift(File.expand_path("../lib", __dir__)) - require "active_support" -rescue IOError -end - -require "open-uri" -require "tmpdir" -require "fileutils" - -module ActiveSupport - module Multibyte - module Unicode - class UnicodeDatabase - def load; end - end - - class DatabaseGenerator - BASE_URI = "http://www.unicode.org/Public/#{UNICODE_VERSION}/ucd/" - SOURCES = { - codepoints: BASE_URI + "UnicodeData.txt", - composition_exclusion: BASE_URI + "CompositionExclusions.txt", - grapheme_break_property: BASE_URI + "auxiliary/GraphemeBreakProperty.txt", - cp1252: "http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT" - } - - def initialize - @ucd = Unicode::UnicodeDatabase.new - end - - def parse_codepoints(line) - codepoint = Codepoint.new - raise "Could not parse input." unless line =~ /^ - ([0-9A-F]+); # code - ([^;]+); # name - ([A-Z]+); # general category - ([0-9]+); # canonical combining class - ([A-Z]+); # bidi class - (<([A-Z]*)>)? # decomposition type - ((\ ?[0-9A-F]+)*); # decomposition mapping - ([0-9]*); # decimal digit - ([0-9]*); # digit - ([^;]*); # numeric - ([YN]*); # bidi mirrored - ([^;]*); # unicode 1.0 name - ([^;]*); # iso comment - ([0-9A-F]*); # simple uppercase mapping - ([0-9A-F]*); # simple lowercase mapping - ([0-9A-F]*)$/ix # simple titlecase mapping - codepoint.code = $1.hex - codepoint.combining_class = Integer($4) - codepoint.decomp_type = $7 - codepoint.decomp_mapping = ($8 == "") ? nil : $8.split.collect(&:hex) - codepoint.uppercase_mapping = ($16 == "") ? 0 : $16.hex - codepoint.lowercase_mapping = ($17 == "") ? 0 : $17.hex - @ucd.codepoints[codepoint.code] = codepoint - end - - def parse_grapheme_break_property(line) - if line =~ /^([0-9A-F.]+)\s*;\s*([\w]+)\s*#/ - type = $2.downcase.intern - @ucd.boundary[type] ||= [] - if $1.include? ".." - parts = $1.split ".." - @ucd.boundary[type] << (parts[0].hex..parts[1].hex) - else - @ucd.boundary[type] << $1.hex - end - end - end - - def parse_composition_exclusion(line) - if line =~ /^([0-9A-F]+)/i - @ucd.composition_exclusion << $1.hex - end - end - - def parse_cp1252(line) - if line =~ /^([0-9A-Fx]+)\s([0-9A-Fx]+)/i - @ucd.cp1252[$1.hex] = $2.hex - end - end - - def create_composition_map - @ucd.codepoints.each do |_, cp| - if !cp.nil? && cp.combining_class == 0 && cp.decomp_type.nil? && !cp.decomp_mapping.nil? && cp.decomp_mapping.length == 2 && @ucd.codepoints[cp.decomp_mapping[0]].combining_class == 0 && !@ucd.composition_exclusion.include?(cp.code) - @ucd.composition_map[cp.decomp_mapping[0]] ||= {} - @ucd.composition_map[cp.decomp_mapping[0]][cp.decomp_mapping[1]] = cp.code - end - end - end - - def normalize_boundary_map - @ucd.boundary.each do |k, v| - if [:lf, :cr].include? k - @ucd.boundary[k] = v[0] - end - end - end - - def parse - SOURCES.each do |type, url| - filename = File.join(Dir.tmpdir, UNICODE_VERSION, "#{url.split('/').last}") - unless File.exist?(filename) - $stderr.puts "Downloading #{url.split('/').last}" - FileUtils.mkdir_p(File.dirname(filename)) - File.open(filename, "wb") do |target| - open(url) do |source| - source.each_line { |line| target.write line } - end - end - end - File.open(filename) do |file| - file.each_line { |line| send "parse_#{type}".intern, line } - end - end - create_composition_map - normalize_boundary_map - end - - def dump_to(filename) - File.open(filename, "wb") do |f| - f.write Marshal.dump([@ucd.codepoints, @ucd.composition_exclusion, @ucd.composition_map, @ucd.boundary, @ucd.cp1252]) - end - end - end - end - end -end - -if __FILE__ == $0 - filename = ActiveSupport::Multibyte::Unicode::UnicodeDatabase.filename - generator = ActiveSupport::Multibyte::Unicode::DatabaseGenerator.new - generator.parse - print "Writing to: #{filename}" - generator.dump_to filename - puts " (#{File.size(filename)} bytes)" -end -- cgit v1.2.3