[groonga-commit:2862] ranguba/ranguba [scoring] use OptionParser
null+ranguba at clear-code.com
null+ranguba at clear-code.com
Tue Dec 21 18:06:51 EST 2010
Kenji Okimoto 2010-11-25 04:36:36 +0000 (Thu, 25 Nov 2010)
New Revision: 66914696f0f5764e182cc1436290f91bff2604a9
Log:
use OptionParser
Modified files:
bin/ranguba_indexer
lib/ranguba/customize.rb
lib/ranguba/indexer.rb
Modified: bin/ranguba_indexer (+1 -1)
===================================================================
--- bin/ranguba_indexer 2010-11-25 03:20:35 +0000 (0d0a467)
+++ bin/ranguba_indexer 2010-11-25 04:36:36 +0000 (63aee5f)
@@ -7,6 +7,6 @@ $LOAD_PATH.unshift(File.join(base_dir, "lib"))
require 'ranguba/indexer'
-indexer = Ranguba::Indexer.new
+indexer = Ranguba::Indexer.new(ARGV)
process = indexer.prepare(ARGV)
process.call unless process.nil?
Modified: lib/ranguba/customize.rb (+19 -19)
===================================================================
--- lib/ranguba/customize.rb 2010-11-25 03:20:35 +0000 (cddde86)
+++ lib/ranguba/customize.rb 2010-11-25 04:36:36 +0000 (66852f7)
@@ -55,11 +55,29 @@ module Ranguba
type.blank? ? "unknown" : type
end
- private
def base
Ranguba::Application.config.customize_base_path
end
+ def category_definitions
+ @@category_definitions ||= read_hash("#{base}/master/categories.txt")
+ end
+
+ def type_definitions
+ @@type_definitions ||= read_hash("#{base}/master/types.txt")
+ end
+
+ def type_for_mime(source)
+ source = source.sub(/\s*;\s*.*\z/, "").strip
+ mime, type = type_definitions.select do |mime, type|
+ source == mime
+ end.max_by do |mime, type|
+ mime.length
+ end
+ type
+ end
+
+ private
def read(path)
File.exists?(path) ? File.read(path) : ""
end
@@ -84,24 +102,6 @@ module Ranguba
end
contents
end
-
- def category_definitions
- @@category_definitions ||= read_hash("#{base}/master/categories.txt")
- end
-
- def type_definitions
- @@type_definitions ||= read_hash("#{base}/master/types.txt")
- end
-
- def type_for_mime(source)
- source = source.sub(/\s*;\s*.*\z/, "").strip
- mime, type = type_definitions.select do |mime, type|
- source == mime
- end.max_by do |mime, type|
- mime.length
- end
- type
- end
end
end
end
Modified: lib/ranguba/indexer.rb (+43 -44)
===================================================================
--- lib/ranguba/indexer.rb 2010-11-25 03:20:35 +0000 (8c1c99a)
+++ lib/ranguba/indexer.rb 2010-11-25 04:36:36 +0000 (1fc5b2f)
@@ -20,7 +20,7 @@ class Ranguba::Indexer
val
end
- def initialize(options={})
+ def initialize(argv)
@wget = %w[wget]
@log_file = nil
@url_prefix = nil
@@ -33,58 +33,56 @@ class Ranguba::Indexer
@debug = false
@oldest = nil
- options.each do |key, value|
- send("#{key}=", value)
- end
- end
-
- def set_options(opts)
- banner = opts.banner
- opts.banner = <<EOS
+ parser = OptionParser.new
+ banner = parser.banner
+ parser.banner = <<EOS
#{banner} [URL...]
#{banner} --from-log=LOG base-directory
#{banner} --url-prefix=PREFIX files...
EOS
- opts.define("-w", "--wget[=WGET-PATH]", Shellwords) do |v|
+ parser.on("-w", "--wget[=WGET-PATH]", Shellwords) do |v|
@wget = v
end
- opts.define("-f", "--from-log=FILE") do |v|
+ parser.on("-f", "--from-log=FILE") do |v|
@log_file = v
end
- opts.define("-p", "--url-prefix=URL_PREFIX") do |v|
+ parser.on("-p", "--url-prefix=URL_PREFIX") do |v|
@url_prefix = v
end
- opts.define("-l", "--level=NUMBER", Integer) do |v|
+ parser.on("-l", "--level=NUMBER", Integer) do |v|
@level = v
end
- opts.define("-A", "--accept=LIST", Array) do |v|
+ parser.on("-A", "--accept=LIST", Array) do |v|
@accept.concat(v)
end
- opts.define("-R", "--reject=LIST", Array) do |v|
+ parser.on("-R", "--reject=LIST", Array) do |v|
@reject.concat(v)
end
- opts.define("-d", "--tmpdir=TMPDIR") do |v|
+ parser.on("-d", "--tmpdir=TMPDIR") do |v|
@tmpdir = v
end
- opts.define("-d", "--[no-]auto-delete") do |v|
+ parser.on("-D", "--[no-]auto-delete") do |v|
@auto_delete = v
end
- opts.define("-i", "--[no-]ignore-errors") do |v|
+ parser.on("-i", "--[no-]ignore-errors") do |v|
@ignore_erros = v
end
- opts.define("--[no-]debug") do |v|
+ parser.on("--[no-]debug") do |v|
@debug = v
end
- opts
+ begin
+ parser.parse!(argv)
+ rescue OptionParser::ParseError => ex
+ $stderr.puts ex.message
+ exit 1
+ end
end
def prepare(args)
- if @log_file
- if @url_prefix
- raise OptionParser::InvalidOption, "--url-prefix and --from-log options are exclusive"
- end
+ if @log_file and @url_prefix
+ raise OptionParser::InvalidOption, "--url-prefix and --from-log options are exclusive"
end
case
when @log_file
@@ -110,31 +108,12 @@ EOS
}
else
# crawl
- end
-
- unless process
if args.empty? and (args = Ranguba::Customize.category_definitions.keys).empty?
raise OptionParser::MissingArgument, "no URL"
return
end
process = proc {
- @auto_delete = true
- base = Dir.mktmpdir("ranguba", @tmpdir)
- wget = [{"LC_ALL"=>"C"}, *@wget, "-r", "-l#{@level}", "-np", "-S"]
- wget << "--accept=#{@accept.join(',')}" unless @accept.empty?
- wget << "--reject=#{@reject.join(',')}" unless @reject.empty?
- wget.concat(args)
- wget << {chdir: base, err: [:child, :out]}
- begin
- IO.popen(wget, "r", encoding: "utf-8") {|input|
- process_from_log(base, input)
- }
- ensure
- FileUtils.rm_rf(base)
- end
- if @oldest
- purge_old_records(@oldest)
- end
+ process_crawl(args)
}
end
@@ -190,6 +169,26 @@ EOS
result
end
+ def process_crawl(args)
+ @auto_delete = true
+ base = Dir.mktmpdir("ranguba", @tmpdir)
+ wget = [{"LC_ALL"=>"C"}, *@wget, "-r", "-l#{@level}", "-np", "-S"]
+ wget << "--accept=#{@accept.join(',')}" unless @accept.empty?
+ wget << "--reject=#{@reject.join(',')}" unless @reject.empty?
+ wget.concat(args)
+ wget << {chdir: base, err: [:child, :out]}
+ begin
+ IO.popen(wget, "r", encoding: "utf-8") {|input|
+ process_from_log(base, input)
+ }
+ ensure
+ FileUtils.rm_rf(base)
+ end
+ if @oldest
+ purge_old_records(@oldest)
+ end
+ end
+
def add_entry(url, path, response = {})
begin
metadata, body = decompose_file(path, response)
More information about the groonga-commit
mailing list