[groonga-commit:1677] ranguba/ranguba [master] Add --accept, --reject and --level options.
null+ranguba at clear-code.com
null+ranguba at clear-code.com
Tue Oct 19 03:59:49 EDT 2010
Nobuyoshi Nakada 2010-10-19 07:59:49 +0000 (Tue, 19 Oct 2010)
New Revision: 41cd26969c5fd7ebe0aecc61dff22e9dc1ad8095
Log:
Add --accept, --reject and --level options.
Modified files:
indexer/ranguba-indexer
Modified: indexer/ranguba-indexer (+17 -1)
===================================================================
--- indexer/ranguba-indexer 2010-10-19 07:58:59 +0000 (96af8bb)
+++ indexer/ranguba-indexer 2010-10-19 07:59:49 +0000 (bd91c6e)
@@ -14,6 +14,9 @@ class ChupaDatabase
@database = nil
@log_file = nil
@url_prefix = nil
+ @level = 5
+ @accept = %w[html doc xls ppt pdf]
+ @reject = []
@category_file = nil
@category_table = {}
@tmpdir = nil
@@ -40,6 +43,15 @@ EOS
opts.define("-p", "--url-prefix=URL_PREFIX") do |v|
@url_prefix = v
end
+ opts.define("-l", "--level=NUMBER", Integer) do |v|
+ @level = v
+ end
+ opts.define("-A", "--accept=LIST", Array) do |v|
+ @accept.concat(v)
+ end
+ opts.define("-R", "--reject=LIST", Array) do |v|
+ @reject.concat(v)
+ end
opts.define("-c", "--category-file=CATEGORY_FILE") do |v|
@category_file = v
end
@@ -107,7 +119,11 @@ EOS
process = proc {
@auto_delete = true
base = Dir.mktmpdir("ranguba", @tmpdir)
- wget = [{"LC_ALL"=>"C"}, *@wget, "-S", *args, chdir: base, err: [:child, :out]]
+ wget = [{"LC_ALL"=>"C"}, *@wget, "-r", "-l#{@level}", "-np", "-S"]
+ wget << "--accept=#{@accept.join(',')}" unless @accept.empty?
+ wget << "--reject=#{@reject.join(',')}" unless @reject.empty?
+ wget.concat(args)
+ wget << {chdir: base, err: [:child, :out]}
begin
IO.popen(wget, "r", encoding: "us-ascii") {|input|
process_from_log(db, base, input)
More information about the groonga-commit
mailing list