DICTIONARY_FILE = 'C:\dict.txt' # list of words, one per line def usage puts "passgen.rb LEN [NUM_TO_GENERATE]" exit end usage if ( ARGV.length != 1 && ARGV.length != 2 ) word_length, word_count = ARGV[0].to_i, (ARGV[1] || 1).to_i usage if ( word_length < 3 || word_count < 1 ) base = "a"[0] # If you need performance, hack this code to compute probabilities once and store for reading. probs = Array.new(27) { |i| Array.new(27) { |j| Array.new(27) { |k| 0 }}} pen_probs = Array.new(27) { |i| Array.new(27) { |j| Array.new(27) { |k| 0 }}} last_probs = Array.new(27) { |i| Array.new(27) { |j| Array.new(27) { |k| 0 }}} File.open( DICTIONARY_FILE ) { |file| file.each_line { |line| next if line.length < 5 next if line !~ /^[a-zA-Z]+$/ prev = [ 26, 26 ] line.chomp.downcase.each_byte { |c| c -= base probs[prev[-2]][prev[-1]][c] += 1 prev << c } pen_probs[prev[-4]][prev[-3]][prev[-2]] += 1 last_probs[prev[-3]][prev[-2]][prev[-1]] += 1 probs[prev[-4]][prev[-3]][prev[-2]] -= 1 probs[prev[-3]][prev[-2]][prev[-1]] -= 1 } } #normalize [ probs, pen_probs, last_probs ].each { |prob_arr| prob_arr.each { |second_letter| second_letter.each { |third_letter| sum = 0 third_letter.each { |count| sum += count } third_letter.map! { |count| count.to_f()/sum } } } } srand( Time.now.to_i ) word_count.times { chars = [] (Array.new(word_length - 2, probs) << pen_probs << last_probs).each { |prob_arr| total = 0 target = rand prob_arr[chars[-2] || 26][chars[-1] || 26].each_with_index { |prob,char| total += prob if ( total >= target ) chars << char break end } } (word_length - chars.length).times { chars << rand(26) } #probs occasionally lead to shorter words puts chars.map { |c| (c+base).chr }.join }