|
Versions Of This Snippet::
Download a raw-text version of this code by clicking on "Download Version"
Latest Snippet Version: :1.0
# Run
# ruby Mispel.rb --help
# for help.
#
class String
@@regex_memo = {}
def fuzzy_regex
return @@regex_memo[self] if @@regex_memo.has_key? self
result = ''
(0...self.size).each do |i|
result += self[i].chr + '+.?'
end
(0...self.size).each do |i|
result += '|'
(0...self.size).each do |j|
next if i == j
result += self[j].chr
end
end
regex = Regexp.new( result)
@@regex_memo[ self] = regex
regex
rescue Exception => details
puts "Converting /#{result}/ to regexp failed."
raise details
end
def fuzzy_canonicalize
downcase.gsub(/[^a-z0-9]/,'')
end
def fuzzy_compare( other)
self.fuzzy_canonicalize.fuzzy_compare_canonicalized( other.fuzzy_canonicalize)
end
def fuzzy_compare_canonicalized( other)
return 0 if self == other
left_r = self .fuzzy_regex
if left_r.match( other)
right_r = other.fuzzy_regex
if right_r.match( self)
return 0
end
end
self <=> other
end
end
module Mispel
def Mispel::full_list
all = Hash.new do |hash,key|
hash[key] = []
end
Symbol.all_symbols.each do |s|
sym = s.to_s
canon = sym.fuzzy_canonicalize
next if canon.length < 3
all[canon] << sym
end
all
end
def Mispel::almost_same(all)
result = {}
all.each_with_index do |sym1, i|
puts sym1
(0...i).each do |j|
sym2 = all[j]
result["#{sym1}\##{sym2}"] = 1 if sym1.fuzzy_compare_canonicalized(sym2) == 0
end
end
result
end
def Mispel.problem_cases
is_good_list = {}
open( "#{ENV['HOME']}/.mispel_ignore") do |inf|
inf.each do |wordpair|
wordpair.chomp!
is_good_list[wordpair] = 1
end
end
almost_same(full_list.keys.sort).each_key do |key|
next if is_good_list.has_key? key
puts key
end
end
end
if $0 == __FILE__ then
if ARGV.size > 0
if ARGV[0] =~ %r{ ^--?(\?|h(e(lp?)?)?) $ }x
puts "
Usage :-
ruby -w Mispel.rb
Will run its unit tests.
ruby -rmodule1 -rmodule2 -rmodule... Mispel.rb --cache
Will create ~/.mispel_ignore, a list of known near misses in the
system and in those modules.
If you add into your script, after all require statements...
require 'Mispel'
Mispel::problem_cases
exit(1)
It will (after a long time) print out all near misses in namespace
that are not in ~/.mispel_ignore
"
exit(1)
end
puts "Creating a fresh ignore word pair list"
open( "#{ENV['HOME']}/.mispel_ignore", 'w') do |outf|
Mispel::almost_same(Mispel::full_list.keys.sort).keys.sort.each do |wordpair|
outf.puts wordpair
end
end
else
require 'test/unit'
class TC_Mispel < Test::Unit::TestCase
def test_same
# puts "IS_GOODLIST={"
# Mispel::almost_same(Mispel::full_list).keys.sort.each do |k|
# puts "\t'#{k}'=>1,"
# end
# puts "}"
end
def fuzzy(a,b,eq)
puts '-'*70
p a
p b
p a.fuzzy_regex
p b.fuzzy_regex
p a.fuzzy_canonicalize
p b.fuzzy_canonicalize
if eq
assert(0 == a.fuzzy_compare(b))
else
assert(0 != a.fuzzy_compare(b))
end
end
def test_fuzzy_compare
fuzzy( 'do_stuff', 'dostuff', true)
fuzzy( 'do_stuff', 'doStuff', true)
fuzzy( 'do_stuff', 'Doostuff', true)
fuzzy( 'do_stufff', 'dostuff', true)
fuzzy( 'stuffdo', 'dostuff', false)
end
def test_problem
# Mispel.problem_cases
testproblem = 2
end
def test_full_list
a = Mispel::full_list
assert( a.has_key?( 'testfulllist'))
# puts a.join("\n")
end
def test_fuzzy_regex
p "fuzzy_regex".fuzzy_regex
end
end
end
end
Submit a new versionYou can submit a new version of this snippet if you have modified it and you feel it is appropriate to share with others..
|
||||||||||||||||||||||||||
