[groonga-commit:1677] ranguba/ranguba [master] Add --accept, --reject and --level options.

null+ranguba at clear-code.com null+ranguba at clear-code.com
Tue Oct 19 03:59:49 EDT 2010


Nobuyoshi Nakada	2010-10-19 07:59:49 +0000 (Tue, 19 Oct 2010)

  New Revision: 41cd26969c5fd7ebe0aecc61dff22e9dc1ad8095

  Log:
    Add --accept, --reject and --level options.

  Modified files:
    indexer/ranguba-indexer

  Modified: indexer/ranguba-indexer (+17 -1)
===================================================================
--- indexer/ranguba-indexer    2010-10-19 07:58:59 +0000 (96af8bb)
+++ indexer/ranguba-indexer    2010-10-19 07:59:49 +0000 (bd91c6e)
@@ -14,6 +14,9 @@ class ChupaDatabase
     @database = nil
     @log_file = nil
     @url_prefix = nil
+    @level = 5
+    @accept = %w[html doc xls ppt pdf]
+    @reject = []
     @category_file = nil
     @category_table = {}
     @tmpdir = nil
@@ -40,6 +43,15 @@ EOS
     opts.define("-p", "--url-prefix=URL_PREFIX") do |v|
       @url_prefix = v
     end
+    opts.define("-l", "--level=NUMBER", Integer) do |v|
+      @level = v
+    end
+    opts.define("-A", "--accept=LIST", Array) do |v|
+      @accept.concat(v)
+    end
+    opts.define("-R", "--reject=LIST", Array) do |v|
+      @reject.concat(v)
+    end
     opts.define("-c", "--category-file=CATEGORY_FILE") do |v|
       @category_file = v
     end
@@ -107,7 +119,11 @@ EOS
       process = proc {
         @auto_delete = true
         base = Dir.mktmpdir("ranguba", @tmpdir)
-        wget = [{"LC_ALL"=>"C"}, *@wget, "-S", *args, chdir: base, err: [:child, :out]]
+        wget = [{"LC_ALL"=>"C"}, *@wget, "-r", "-l#{@level}", "-np", "-S"]
+        wget << "--accept=#{@accept.join(',')}" unless @accept.empty?
+        wget << "--reject=#{@reject.join(',')}" unless @reject.empty?
+        wget.concat(args)
+        wget << {chdir: base, err: [:child, :out]}
         begin
           IO.popen(wget, "r", encoding: "us-ascii") {|input|
             process_from_log(db, base, input)



More information about the groonga-commit mailing list