[Archipelago-submits] [104] trunk/archipelago: added a bitstring class to do fast xoring of strings.

nobody at rubyforge.org nobody at rubyforge.org
Sun Dec 10 09:22:29 EST 2006


Revision: 104
Author:   zond
Date:     2006-12-10 09:22:29 -0500 (Sun, 10 Dec 2006)

Log Message:
-----------
added a bitstring class to do fast xoring of strings. added a raider module that splits and joins stuff in a raid-like manner. added tests and some kind of benchmarks

Modified Paths:
--------------
    trunk/archipelago/README
    trunk/archipelago/lib/archipelago.rb

Added Paths:
-----------
    trunk/archipelago/lib/archipelago/bitstring.rb
    trunk/archipelago/lib/archipelago/raider.rb
    trunk/archipelago/tests/raider_benchmark.rb
    trunk/archipelago/tests/raider_test.rb
    trunk/archipelago/tests/string_benchmark.rb
    trunk/archipelago/tests/string_test.rb

Modified: trunk/archipelago/README
===================================================================
--- trunk/archipelago/README	2006-12-04 17:19:49 UTC (rev 103)
+++ trunk/archipelago/README	2006-12-10 14:22:29 UTC (rev 104)
@@ -4,6 +4,7 @@
 
 == Dependencies:
 Archipelago::Hashish::BerkeleyHashishProvider:: ruby bdb: http://moulon.inra.fr/ruby/bdb.html
+String:: ruby inline: http://www.zenspider.com/ZSS/Products/RubyInline/
 
 == Sub packages:
 Archipelago::Disco:: A UDP multicast discovery service useful to find services in your network with a minimum of configuration.

Added: trunk/archipelago/lib/archipelago/bitstring.rb
===================================================================
--- trunk/archipelago/lib/archipelago/bitstring.rb	                        (rev 0)
+++ trunk/archipelago/lib/archipelago/bitstring.rb	2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,88 @@
+
+require 'rubygems'
+require 'inline'
+
+#
+# Adds a few methods to String, some using the inline gem.
+#
+class String
+
+  inline do |builder|
+    builder.c_raw <<EOC
+static VALUE
+xor1(int argc, VALUE* argv, VALUE self) 
+{
+  VALUE return_value;
+  VALUE other;
+  int i;
+
+  if (argc != 1)
+    rb_raise(rb_eRuntimeError, "xor takes one argument");
+
+  other = argv[0];
+
+  if (TYPE(other) != T_STRING)
+    rb_raise(rb_eRuntimeError, "xor takes a String argument");
+
+  if (RSTRING(self)->len != RSTRING(other)->len)
+    rb_raise(rb_eRuntimeError, "xor argument must be of same size");
+
+  return_value = rb_str_new(0, RSTRING(self)->len);
+
+  for (i = 0; i < RSTRING(return_value)->len; i++)
+    RSTRING(return_value)->ptr[i] = RSTRING(self)->ptr[i] ^ RSTRING(other)->ptr[i];
+
+  return return_value;
+}
+EOC
+    
+    #
+    # Returns a String that is self ^ +s+.
+    #
+    def xor(s)
+      self.xor1(s)
+    end
+
+    alias :^ :xor
+
+    builder.c_raw <<EOC
+static VALUE
+xor2(int argc, VALUE* argv, VALUE self) 
+{
+  VALUE other;
+  int i;
+
+  if (argc != 1)
+    rb_raise(rb_eRuntimeError, "xor takes one argument");
+
+  other = argv[0];
+
+  if (TYPE(other) != T_STRING)
+    rb_raise(rb_eRuntimeError, "xor takes a String argument");
+
+  if (RSTRING(self)->len != RSTRING(other)->len)
+    rb_raise(rb_eRuntimeError, "xor argument must be of same size");
+
+  for (i = 0; i < RSTRING(self)->len; i++)
+    RSTRING(self)->ptr[i] = RSTRING(self)->ptr[i] ^ RSTRING(other)->ptr[i];
+
+  return self;
+}
+EOC
+  end
+
+  #
+  # Returns whether self begins with +s+.
+  #
+  def begins?(s)
+    self[0...(s.size)] == s
+  end
+
+  #
+  # Modifies self so that it becomes self ^ +s+.
+  #
+  def xor!(s)
+    self.xor2(s)
+  end
+
+end

Added: trunk/archipelago/lib/archipelago/raider.rb
===================================================================
--- trunk/archipelago/lib/archipelago/raider.rb	                        (rev 0)
+++ trunk/archipelago/lib/archipelago/raider.rb	2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,183 @@
+
+require 'pp'
+
+module Archipelago
+
+  #
+  # A module providing methods to split any String into parts that can be rejoined to provide
+  # the original String again.
+  #
+  module Raider
+
+    #
+    # Raised when the given parts are not enough to recreate the original data.
+    #
+    class NotEnoughBulkError < RuntimeError
+      def initialize(parts)
+        super("#{parts.inspect} is not enough to recreate the original data")
+      end
+    end
+
+    #
+    # A part of the original data.
+    #
+    class Bulk
+      attr_accessor :data, :size, :length
+      def initialize(size, length)
+        @size = size
+        @length = length
+        @data = nil
+      end
+      def <<(data)
+        if @data
+          @data.xor!(data)
+        else
+          @data = data.clone
+        end
+      end
+      def <=>(other)
+        @data <=> other.data
+      end
+    end
+
+    #
+    # Join +parts+ of Bulk together to form the String they were once created from.
+    #
+    def self.join(*parts)
+      #
+      # We know that this is not enough if the parts are empty or if they are fewer than the
+      # minimum size needed to rejoin this data.
+      #
+      raise NotEnoughBulkError.new(parts) if parts.empty? or parts.size < parts.first.size
+      
+      #
+      # For each bit of the data, create a String that is the beginning of
+      # all parts having this bit as the first set bit, and another String that is the beginning
+      # of all parts having ONLY this bit set.
+      #
+      # Then go through all parts and find one that has this bit as the first one
+      # and all the others having at least this bit set.
+      #
+      # Then xor the one having this bit as the first one into each other part having at least
+      # this bit set.
+      #
+      covered_bits = 0
+      has_only = []
+      has_only_markers = []
+      0.upto(parts.first.size - 1) do |n|
+        having_this_bit = []
+        beginning_with_this_bit = nil
+        beginning_marker = ""
+        0.upto(n - 1) do
+          beginning_marker << "\000"
+        end
+        beginning_marker << "\001"
+        has_only_marker = beginning_marker.clone
+        while has_only_marker.size < parts.first.size
+          has_only_marker << "\000"
+        end
+        has_only_markers[n] = has_only_marker
+
+        parts.each do |part|
+          if part.data.begins?(beginning_marker) && beginning_with_this_bit.nil?
+            beginning_with_this_bit = part
+          elsif part.data[n] == 1
+            having_this_bit << part
+          end
+        end
+        unless beginning_with_this_bit.nil?
+          covered_bits += 1
+          having_this_bit.each do |having|
+            having << beginning_with_this_bit.data
+          end
+        end
+      end
+
+      #
+      # Reject all parts having only 0 bits to get the ones
+      # having one 1 bit each.
+      #
+      parts.reject! do |part|
+        part.data.begins?("\000" * parts.first.size)
+      end
+      
+      #
+      # Raise an error if we didnt find all bits needed.
+      #
+      raise NotEnoughBulkError.new(parts) if parts.empty? || parts.size != parts.first.size
+
+      #
+      # Sort the bits having only one bit set in reverse order and collect their actual
+      # data (throw away the bit padding), join them and truncate them at the wanted size.
+      #
+      return parts.sort do |a,b|
+        b <=> a
+      end.collect do |having|
+        having.data[parts.first.size..-1]
+      end.join[0...parts.first.length]
+    end
+
+    #
+    # Split a String of +data+ into +parts+.
+    #
+    # You will get 2 ** +parts+ - 1 parts from the String,
+    # and ( 2 ** +parts+ ) / 2 + 1 will always be enough to 
+    # recreate it using <b>join</b>.
+    #
+    def self.split(original_data, parts = 3)
+      data = original_data.clone
+
+      #
+      # Remember the original size of data.
+      #
+      size = data.size
+
+      #
+      # Make sure that the data is divisible by +parts+.
+      #
+      if (rest = (data.size % parts)) != 0
+        data << " " * (parts - rest)
+      end
+
+      #
+      # Split the data into +parts+ bits and append a positional marker at the start of each.
+      #
+      data_parts = []
+      0.upto(parts - 1) do |n|
+        pos_marker = ""
+        0.upto(parts - 1) do |m|
+          if m == n
+            pos_marker << "\001"
+          else
+            pos_marker << "\000"
+          end
+        end
+        data_parts << (pos_marker + data[(n * (data.size / parts))...( (n + 1) * (data.size / parts) )])
+      end
+
+      return_parts = []
+
+      #
+      # Create (1 << +parts+) - 1 parts to return
+      #
+      1.upto((1 << parts) - 1) do |n|
+        this_part = Bulk.new(parts, size)
+        #
+        # For each of our data parts, if our number in the return part order
+        # has the bit ouf our order number in the data part array set,
+        # xor us into this return part.
+        #
+        0.upto(data_parts.size - 1) do |m|
+          if n & (1 << m) > 0
+            this_part << data_parts[m]
+          end
+        end
+        return_parts << this_part
+      end
+
+      return return_parts
+    end
+
+  end
+
+end

Modified: trunk/archipelago/lib/archipelago.rb
===================================================================
--- trunk/archipelago/lib/archipelago.rb	2006-12-04 17:19:49 UTC (rev 103)
+++ trunk/archipelago/lib/archipelago.rb	2006-12-10 14:22:29 UTC (rev 104)
@@ -22,3 +22,5 @@
 require 'archipelago/tranny'
 require 'archipelago/treasure'
 require 'archipelago/pirate'
+require 'archipelago/bitstring'
+require 'archipelago/raider'

Added: trunk/archipelago/tests/raider_benchmark.rb
===================================================================
--- trunk/archipelago/tests/raider_benchmark.rb	                        (rev 0)
+++ trunk/archipelago/tests/raider_benchmark.rb	2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,44 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class RaiderBenchmark < Test::Unit::TestCase
+
+  def test_split_join
+    split_and_join(random_string(1000), 1000, 3)
+    split_and_join(random_string(1000), 1000, 4)
+    split_and_join(random_string(1000), 1000, 5)
+    split_and_join(random_string(1000), 1000, 6)
+    split_and_join(random_string(1000), 1000, 7)
+    split_and_join(random_string(1000), 1000, 10)
+  end
+
+  private
+
+  def random_string(n)
+    s = ""
+    rand(n).times do
+      s << (rand(27) + 65).chr
+    end
+    return s
+  end
+
+  def split_and_join(s, n, size)
+    enough = []
+    n.times do
+      p = Archipelago::Raider.split(s, size)
+      assert_equal((1 << size) - 1,p.size)
+      parts_to_use = []
+      begin
+        parts_to_use << p.delete_at(rand(p.size))
+        assert_equal(s, Archipelago::Raider.join(*parts_to_use))
+        enough[parts_to_use.size] ||= 0
+        enough[parts_to_use.size] += 1
+      rescue Archipelago::Raider::NotEnoughBulkError => e
+        retry
+      end
+    end
+    puts "enough bits for #{size} parts"
+    puts enough.inspect
+  end
+
+end

Added: trunk/archipelago/tests/raider_test.rb
===================================================================
--- trunk/archipelago/tests/raider_test.rb	                        (rev 0)
+++ trunk/archipelago/tests/raider_test.rb	2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,47 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class RaiderTest < Test::Unit::TestCase
+
+  def test_split_join
+    3.times do
+      split_and_join(random_string(1000), 100, 3)
+    end
+    3.times do
+      split_and_join(random_string(1000), 100, 4)
+    end
+    3.times do
+      split_and_join(random_string(1000), 100, 5)
+    end
+    3.times do
+      split_and_join(random_string(1000), 100, 6)
+    end
+    3.times do
+      split_and_join(random_string(1000), 100, 7)
+    end
+  end
+
+  private
+
+  def random_string(n)
+    s = ""
+    rand(n).times do
+      s << (rand(27) + 65).chr
+    end
+    return s
+  end
+
+  def split_and_join(s, n, size)
+    n.times do
+      p = Archipelago::Raider.split(s, size)
+      assert_equal((1 << size) - 1,p.size)
+      parts_to_use = []
+      ((1 << (size - 1)) + 1).times do
+        parts_to_use << p.delete_at(rand(p.size))
+      end
+      
+      assert_equal(s, Archipelago::Raider.join(*parts_to_use))
+    end
+  end
+
+end

Added: trunk/archipelago/tests/string_benchmark.rb
===================================================================
--- trunk/archipelago/tests/string_benchmark.rb	                        (rev 0)
+++ trunk/archipelago/tests/string_benchmark.rb	2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,17 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class StringBenchmark < Test::Unit::TestCase
+
+  def test_xor
+    s = " " * (1 << 16)
+    s2 = " " * (1 << 16)
+    bm("String#xor!") do
+      s.xor!(s2)
+    end
+    bm("String#^") do
+      s ^ s2
+    end
+  end
+
+end

Added: trunk/archipelago/tests/string_test.rb
===================================================================
--- trunk/archipelago/tests/string_test.rb	                        (rev 0)
+++ trunk/archipelago/tests/string_test.rb	2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,23 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class StringTest < Test::Unit::TestCase
+
+  def test_begins
+    assert("hej".begins?("he"))
+    assert("hejsan".begins?("hejsan"))
+    assert(!"epa".begins?("pa"))
+    assert(!"epa".begins?("gnu"))
+  end
+
+  def test_xor
+    assert("hej" ^ "hej" == "\000\000\000")
+    assert("\003\003" ^ "\001\001" == "\002\002")
+    s = "\002"
+    assert(s ^ "\001" == "\003")
+    assert_equal("\002", s)
+    s.xor!("\001")
+    assert_equal("\003", s)
+  end
+
+end




More information about the Archipelago-submits mailing list