Browse | Submit A New Snippet | Create A Package

 

Mispel

Type:
Class
Category:
Other
License:
GNU General Public License
Language:
Ruby
 
Description:
Searches through all defined symbols for near misses in namespace. Hopefully some of those listed will be mispelling bugs.

Usage :-
ruby -w Mispel.rb

Will run its unit tests.

ruby -rmodule1 -rmodule2 -rmodule... Mispel.rb --cache

Will create ~/.mispel_ignore, a list of known near misses in the
system and in those modules.

If you add into your script, after all require statements...

require 'Mispel'
Mispel::problem_cases
exit(1)

It will (after a long time) print out all near misses in namespace
that are not in ~/.mispel_ignore

Versions Of This Snippet::

John Carter
Snippet ID Download Version Date Posted Author Delete
581.02004-11-02 10:56John Carter

Download a raw-text version of this code by clicking on "Download Version"

 


Latest Snippet Version: :1.0

# Run
#  ruby Mispel.rb --help
# for help.
#
class String
  @@regex_memo = {}
  def fuzzy_regex
    return @@regex_memo[self] if @@regex_memo.has_key? self
    result = ''
    (0...self.size).each do |i|
      result += self[i].chr + '+.?'
    end
    (0...self.size).each do |i|
      result += '|'
      (0...self.size).each do |j|
        next if i == j
        result += self[j].chr
      end
    end
    regex = Regexp.new( result)
    @@regex_memo[ self] = regex
    regex
  rescue Exception => details
    puts "Converting /#{result}/ to regexp failed."
    raise details
  end
  
  def fuzzy_canonicalize
    downcase.gsub(/[^a-z0-9]/,'')
  end

  def fuzzy_compare( other)
    self.fuzzy_canonicalize.fuzzy_compare_canonicalized( other.fuzzy_canonicalize)
  end

  def fuzzy_compare_canonicalized( other)
    return 0 if self == other
    left_r  = self .fuzzy_regex
    if left_r.match( other) 
      right_r = other.fuzzy_regex
      if right_r.match( self)
        return 0 
      end
    end

    self <=> other
  end
end

module Mispel
  def Mispel::full_list
    all = Hash.new do |hash,key|
      hash[key] = []
    end

    Symbol.all_symbols.each do |s|
      sym = s.to_s
      canon = sym.fuzzy_canonicalize
      next if canon.length < 3
      all[canon] << sym
    end
    
    all
  end

  def Mispel::almost_same(all)
    result = {}
    all.each_with_index do |sym1, i|
      puts sym1
      (0...i).each do |j|
        sym2 = all[j]
        result["#{sym1}\##{sym2}"] = 1 if sym1.fuzzy_compare_canonicalized(sym2) == 0
      end
    end
    result
  end

  def Mispel.problem_cases
    is_good_list = {}
    open( "#{ENV['HOME']}/.mispel_ignore") do |inf|
      inf.each do |wordpair|
        wordpair.chomp!
        is_good_list[wordpair] = 1
      end
    end
    almost_same(full_list.keys.sort).each_key do |key|
      next if is_good_list.has_key? key
      puts key
    end
  end
end

if $0 == __FILE__ then
  if ARGV.size > 0
    if ARGV[0] =~ %r{ ^--?(\?|h(e(lp?)?)?) $  }x
      puts "
Usage :-
  ruby -w Mispel.rb 

  Will run its unit tests.

  ruby -rmodule1 -rmodule2 -rmodule... Mispel.rb --cache 

  Will create ~/.mispel_ignore, a list of known near misses in the
  system and in those modules.

  If you add into your script, after all require statements...

    require 'Mispel'
    Mispel::problem_cases
    exit(1)

  It will (after a long time) print out all near misses in namespace
  that are not in ~/.mispel_ignore

"
      exit(1)
    end

    puts "Creating a fresh  ignore word pair list"
    open( "#{ENV['HOME']}/.mispel_ignore", 'w') do |outf|
      Mispel::almost_same(Mispel::full_list.keys.sort).keys.sort.each do |wordpair|
        outf.puts wordpair
      end
    end
  else
    require 'test/unit'

    class TC_Mispel < Test::Unit::TestCase
      def test_same
        #      puts "IS_GOODLIST={"
        #      Mispel::almost_same(Mispel::full_list).keys.sort.each do |k|
        #        puts "\t'#{k}'=>1,"
        #      end
        #      puts "}"
      end

      def fuzzy(a,b,eq)
        puts '-'*70
        p a
        p b
        p a.fuzzy_regex
        p b.fuzzy_regex
        p a.fuzzy_canonicalize
        p b.fuzzy_canonicalize
        if eq
          assert(0 == a.fuzzy_compare(b))
        else
          assert(0 != a.fuzzy_compare(b))
        end
      end

      def test_fuzzy_compare
        fuzzy( 'do_stuff', 'dostuff', true)
        fuzzy( 'do_stuff', 'doStuff', true)
        fuzzy( 'do_stuff', 'Doostuff', true)
        fuzzy( 'do_stufff', 'dostuff', true)
        fuzzy( 'stuffdo', 'dostuff', false)
      end
      
      def test_problem
        #      Mispel.problem_cases
        testproblem = 2
      end
      
    
      
      def test_full_list
        a = Mispel::full_list
        assert( a.has_key?( 'testfulllist'))
        #      puts a.join("\n")
      end

      def test_fuzzy_regex
        p "fuzzy_regex".fuzzy_regex
      end
    end
  end
end

		

Submit a new version

You can submit a new version of this snippet if you have modified it and you feel it is appropriate to share with others..