diff options
| -rw-r--r-- | railties/guides/rails_guides.rb | 1 | ||||
| -rw-r--r-- | railties/guides/rails_guides/generator.rb | 5 | ||||
| -rw-r--r-- | railties/guides/rails_guides/levenshtein.rb | 112 | 
3 files changed, 117 insertions, 1 deletions
| diff --git a/railties/guides/rails_guides.rb b/railties/guides/rails_guides.rb index 6da7de890e..725f4cd886 100644 --- a/railties/guides/rails_guides.rb +++ b/railties/guides/rails_guides.rb @@ -22,6 +22,7 @@ module RailsGuides    autoload :Indexer, "rails_guides/indexer"    autoload :Helpers, "rails_guides/helpers"    autoload :TextileExtensions, "rails_guides/textile_extensions" +  autoload :Levenshtein, "rails_guides/levenshtein"  end  RedCloth.send(:include, RailsGuides::TextileExtensions) diff --git a/railties/guides/rails_guides/generator.rb b/railties/guides/rails_guides/generator.rb index dd147c4d5f..3dffe372e3 100644 --- a/railties/guides/rails_guides/generator.rb +++ b/railties/guides/rails_guides/generator.rb @@ -146,7 +146,10 @@ module RailsGuides        html.scan(/<a\s+href="#([^"]+)/).flatten.each do |fragment_identifier|          next if fragment_identifier == 'mainCol' # in layout, jumps to some DIV          unless anchors.member?(fragment_identifier) -          puts "BROKEN LINK: ##{fragment_identifier}" +          guess = anchors.min { |a, b| +            Levenshtein.distance(fragment_identifier, a) <=> Levenshtein.distance(fragment_identifier, b) +          } +          puts "*** BROKEN LINK: ##{fragment_identifier}, perhaps you meant ##{guess}."          end        end      end diff --git a/railties/guides/rails_guides/levenshtein.rb b/railties/guides/rails_guides/levenshtein.rb new file mode 100644 index 0000000000..02e35f60d2 --- /dev/null +++ b/railties/guides/rails_guides/levenshtein.rb @@ -0,0 +1,112 @@ +# +# Levenshtein distance algorithm implementation for Ruby, with UTF-8 support +# +# Author::  Paul BATTLEY (pbattley @ gmail.com) +# Version:: 1.3 +# Date::    2005-04-19 +# +# == About +# +# The Levenshtein distance is a measure of how similar two strings s and t are, +# calculated as the number of deletions/insertions/substitutions needed to +# transform s into t.  The greater the distance, the more the strings differ. +# +# The Levenshtein distance is also sometimes referred to as the +# easier-to-pronounce-and-spell 'edit distance'. +# +# == Revision history +# +# * 2005-05-19 1.3 Repairing an oversight, distance can now be called via +#   Levenshtein.distance(s, t) +# * 2005-05-04 1.2 Now uses just one 1-dimensional array.  I think this is as +#   far as optimisation can go. +# * 2005-05-04 1.1 Now storing only the current and previous rows of the matrix +#   instead of the whole lot. +# +# == Licence +# +# Copyright (c) 2005 Paul Battley +# +# Usage of the works is permitted provided that this instrument is retained +# with the works, so that any entity that uses the works is notified of this +# instrument. +# +# DISCLAIMER: THE WORKS ARE WITHOUT WARRANTY.   +# + +module Levenshtein + +    # +    # Calculate the Levenshtein distance between two strings +str1+ and +str2+. +    # +str1+ and +str2+ should be ASCII or UTF-8. +    # +    def distance(str1, str2) +        s = str1.unpack('U*') +        t = str2.unpack('U*') +        n = s.length +        m = t.length +        return m if (0 == n) +        return n if (0 == m) +         +        d = (0..m).to_a +        x = nil + +        (0...n).each do |i| +            e = i+1 +            (0...m).each do |j| +                cost = (s[i] == t[j]) ? 0 : 1 +                x = [ +                    d[j+1] + 1, # insertion +                    e + 1,      # deletion +                    d[j] + cost # substitution +                ].min +                d[j] = e +                e = x +            end +            d[m] = x +        end + +        return x +    end + +    extend self +end + +if (__FILE__ == $0) +    require 'test/unit' + +    class LevenshteinTest < Test::Unit::TestCase +        include Levenshtein + +        EXPECTED = [ +            # Easy ones +            ['test', 'test', 0], +            ['test', 'tent', 1], +            ['gumbo', 'gambol', 2], +            ['kitten', 'sitting', 3], +            # Empty strings +            ['foo', '', 3], +            ['', '', 0], +            ['a', '', 1], +            # UTF-8 +            ["f\303\266o", 'foo', 1], +            ["fran\303\247ais", 'francais', 1], +            ["fran\303\247ais", "fran\303\246ais", 1], +            ["\347\247\201\343\201\256\345\220\215\345\211\215\343\201\257"<< +             "\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231", +             "\343\201\274\343\201\217\343\201\256\345\220\215\345\211\215\343\201"<< +             "\257\343\203\235\343\203\274\343\203\253\343\201\247\343\201\231",  +             2], # Japanese +            # Edge cases +            ['a', 'a', 0], +            ['0123456789', 'abcdefghijklmnopqrstuvwxyz', 26] +        ] + +        def test_known_distances +            EXPECTED.each do |a,b,x| +                assert_equal(x, distance(a, b)) +                assert_equal(x, distance(b, a)) +            end +        end +    end +end | 
