[groonga-commit:2573] ranguba/chupatext [master] detect encrypted data.

null+ranguba at clear-code.com null+ranguba at clear-code.com
Sat Dec 18 22:55:33 EST 2010


Kouhei Sutou	2010-12-19 03:55:33 +0000 (Sun, 19 Dec 2010)

  New Revision: 50648c2d7cd5cfaa69ac6aa3cd0d66c69675ccd6

  Log:
    detect encrypted data.

  Added files:
    modules/ruby/lib/chupatext/decomposers/encrypted.rb
  Modified files:
    chupatext/chupa_data.c
    modules/excel.c
    modules/ruby/lib/chupatext/decomposers/Makefile.am

  Modified: chupatext/chupa_data.c (+29 -1)
===================================================================
--- chupatext/chupa_data.c    2010-12-19 03:54:03 +0000 (f304095)
+++ chupatext/chupa_data.c    2010-12-19 03:55:33 +0000 (9225fb6)
@@ -20,6 +20,12 @@
  *  MA  02110-1301  USA
  */
 
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+
 #include "chupa_logger.h"
 #include "chupa_data.h"
 #include "chupa_gsf_input_stream.h"
@@ -103,6 +109,19 @@ read_head_data(GInputStream *stream, guchar *buffer, gsize buffer_size,
     return TRUE;
 }
 
+const gchar encrypted_magic[] = "SCDSA002";
+const gchar encrypted_mime_type[] = "application/x-chupatext-encrypted";
+
+static gboolean
+encrypted_data_p(const guchar *data, gsize data_length)
+{
+    gsize magic_length;
+
+    magic_length = sizeof(encrypted_magic);
+    return (data_length >= magic_length &&
+            memcmp(data, encrypted_magic, magic_length) == 0);
+}
+
 static gchar *
 guess_mime_type(const char *name, GInputStream *stream, gboolean *uncertain)
 {
@@ -110,6 +129,7 @@ guess_mime_type(const char *name, GInputStream *stream, gboolean *uncertain)
     gchar *mime_type = NULL;
     guchar data[1024];
     gsize data_length;
+    gboolean text_p;
 
     if (read_head_data(stream, data, sizeof(data), &data_length))
         content_type = g_content_type_guess(name, data, data_length, uncertain);
@@ -121,6 +141,14 @@ guess_mime_type(const char *name, GInputStream *stream, gboolean *uncertain)
 
     g_free(content_type);
 
+    text_p = (mime_type && g_str_has_prefix(mime_type, "text/"));
+    if (!text_p && data_length > 0 && encrypted_data_p(data, data_length)) {
+        g_free(mime_type);
+        if (uncertain)
+            *uncertain = FALSE;
+        return g_strdup(encrypted_mime_type);
+    }
+
     return mime_type;
 }
 
@@ -151,7 +179,7 @@ constructed(GObject *object)
     }
     filename = chupa_metadata_get_string(priv->metadata, meta_filename, NULL);
     mime_type = guess_mime_type(filename, priv->stream, NULL);
-    chupa_metadata_set_string(priv->metadata, "mime-type", mime_type);
+    chupa_metadata_set_mime_type(priv->metadata, mime_type);
     g_free(mime_type);
 }
 

  Modified: modules/excel.c (+0 -42)
===================================================================
--- modules/excel.c    2010-12-19 03:54:03 +0000 (a8f5c26)
+++ modules/excel.c    2010-12-19 03:55:33 +0000 (34eb1fb)
@@ -251,26 +251,6 @@ printerr_to_log_delegator (const gchar *string)
     }
 }
 
-static const gchar excel_magic[8] = "\320\317\021\340\241\261\032\341";
-
-static gboolean
-chupa_excel_plain_file_p(GsfInput *source)
-{
-    guint8 header[sizeof(excel_magic)];
-    if (!gsf_input_read(source, sizeof(header), header)) return FALSE;
-    return memcmp(header, excel_magic, sizeof(header)) == 0;
-}
-
-static gboolean
-chupa_excel_encrypted_file_p(ChupaDecomposer *decomposer, GsfInput *source)
-{
-    if (!chupa_utils_string_equal(chupa_decomposer_get_mime_type(decomposer),
-                                  EXCEL_MIME_TYPE))
-        return FALSE;
-
-    return !chupa_excel_plain_file_p(source);
-}
-
 static gboolean
 get_time_value(const gchar *name, const GValue *value, GTimeVal *time_value)
 {
@@ -395,28 +375,6 @@ feed(ChupaDecomposer *decomposer, ChupaFeeder *feeder,
     filename = chupa_data_get_filename(data);
 
     source = chupa_data_input_new(data);
-    if (chupa_excel_encrypted_file_p(decomposer, source)) {
-        g_set_error(error,
-                    CHUPA_DECOMPOSER_ERROR,
-                    CHUPA_DECOMPOSER_ERROR_FEED,
-                    "[decomposer][excel][feed][%s][unsupported]: "
-                    "encrypted Excel file isn't supported",
-                    filename);
-        g_object_unref(source);
-        return FALSE;
-    }
-
-    if (gsf_input_seek(source, 0, G_SEEK_SET)) {
-        g_set_error(error,
-                    CHUPA_DECOMPOSER_ERROR,
-                    CHUPA_DECOMPOSER_ERROR_FEED,
-                    "[decomposer][excel][feed][%s][error]"
-                    ": failed to seek input to head",
-                    filename);
-        g_object_unref(source);
-        return FALSE;
-    }
-
     io_context = go_io_context_new(command_context);
     old_print_error_func = g_set_printerr_handler(printerr_to_log_delegator);
     view = wb_view_new_from_input(source, filename, opener, io_context, NULL);

  Modified: modules/ruby/lib/chupatext/decomposers/Makefile.am (+2 -1)
===================================================================
--- modules/ruby/lib/chupatext/decomposers/Makefile.am    2010-12-19 03:54:03 +0000 (813dc41)
+++ modules/ruby/lib/chupatext/decomposers/Makefile.am    2010-12-19 03:55:33 +0000 (4217d8b)
@@ -1,4 +1,5 @@
 dist_ruby_decomposers_DATA =			\
 	html.rb					\
 	powerpoint.rb				\
-	mail.rb
+	mail.rb					\
+	encrypted.rb

  Added: modules/ruby/lib/chupatext/decomposers/encrypted.rb (+25 -0) 100644
===================================================================
--- /dev/null
+++ modules/ruby/lib/chupatext/decomposers/encrypted.rb    2010-12-19 03:55:33 +0000 (e804dd4)
@@ -0,0 +1,25 @@
+# Copyright (C) 2010  Kouhei Sutou <kou at clear-code.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301  USA
+
+class Chupa::Encrypted < Chupa::BaseDecomposer
+  mime_types "application/x-chupatext-encrypted"
+
+  def decompose
+    tag = "[unsupported]"
+    raise EncryptedError, "#{tag}: encrypted data isn't supported."
+  end
+end



More information about the groonga-commit mailing list