[Archipelago-submits] [104] trunk/archipelago: added a bitstring class to do fast xoring of strings.
nobody at rubyforge.org
nobody at rubyforge.org
Sun Dec 10 09:22:29 EST 2006
Revision: 104
Author: zond
Date: 2006-12-10 09:22:29 -0500 (Sun, 10 Dec 2006)
Log Message:
-----------
added a bitstring class to do fast xoring of strings. added a raider module that splits and joins stuff in a raid-like manner. added tests and some kind of benchmarks
Modified Paths:
--------------
trunk/archipelago/README
trunk/archipelago/lib/archipelago.rb
Added Paths:
-----------
trunk/archipelago/lib/archipelago/bitstring.rb
trunk/archipelago/lib/archipelago/raider.rb
trunk/archipelago/tests/raider_benchmark.rb
trunk/archipelago/tests/raider_test.rb
trunk/archipelago/tests/string_benchmark.rb
trunk/archipelago/tests/string_test.rb
Modified: trunk/archipelago/README
===================================================================
--- trunk/archipelago/README 2006-12-04 17:19:49 UTC (rev 103)
+++ trunk/archipelago/README 2006-12-10 14:22:29 UTC (rev 104)
@@ -4,6 +4,7 @@
== Dependencies:
Archipelago::Hashish::BerkeleyHashishProvider:: ruby bdb: http://moulon.inra.fr/ruby/bdb.html
+String:: ruby inline: http://www.zenspider.com/ZSS/Products/RubyInline/
== Sub packages:
Archipelago::Disco:: A UDP multicast discovery service useful to find services in your network with a minimum of configuration.
Added: trunk/archipelago/lib/archipelago/bitstring.rb
===================================================================
--- trunk/archipelago/lib/archipelago/bitstring.rb (rev 0)
+++ trunk/archipelago/lib/archipelago/bitstring.rb 2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,88 @@
+
+require 'rubygems'
+require 'inline'
+
+#
+# Adds a few methods to String, some using the inline gem.
+#
+class String
+
+ inline do |builder|
+ builder.c_raw <<EOC
+static VALUE
+xor1(int argc, VALUE* argv, VALUE self)
+{
+ VALUE return_value;
+ VALUE other;
+ int i;
+
+ if (argc != 1)
+ rb_raise(rb_eRuntimeError, "xor takes one argument");
+
+ other = argv[0];
+
+ if (TYPE(other) != T_STRING)
+ rb_raise(rb_eRuntimeError, "xor takes a String argument");
+
+ if (RSTRING(self)->len != RSTRING(other)->len)
+ rb_raise(rb_eRuntimeError, "xor argument must be of same size");
+
+ return_value = rb_str_new(0, RSTRING(self)->len);
+
+ for (i = 0; i < RSTRING(return_value)->len; i++)
+ RSTRING(return_value)->ptr[i] = RSTRING(self)->ptr[i] ^ RSTRING(other)->ptr[i];
+
+ return return_value;
+}
+EOC
+
+ #
+ # Returns a String that is self ^ +s+.
+ #
+ def xor(s)
+ self.xor1(s)
+ end
+
+ alias :^ :xor
+
+ builder.c_raw <<EOC
+static VALUE
+xor2(int argc, VALUE* argv, VALUE self)
+{
+ VALUE other;
+ int i;
+
+ if (argc != 1)
+ rb_raise(rb_eRuntimeError, "xor takes one argument");
+
+ other = argv[0];
+
+ if (TYPE(other) != T_STRING)
+ rb_raise(rb_eRuntimeError, "xor takes a String argument");
+
+ if (RSTRING(self)->len != RSTRING(other)->len)
+ rb_raise(rb_eRuntimeError, "xor argument must be of same size");
+
+ for (i = 0; i < RSTRING(self)->len; i++)
+ RSTRING(self)->ptr[i] = RSTRING(self)->ptr[i] ^ RSTRING(other)->ptr[i];
+
+ return self;
+}
+EOC
+ end
+
+ #
+ # Returns whether self begins with +s+.
+ #
+ def begins?(s)
+ self[0...(s.size)] == s
+ end
+
+ #
+ # Modifies self so that it becomes self ^ +s+.
+ #
+ def xor!(s)
+ self.xor2(s)
+ end
+
+end
Added: trunk/archipelago/lib/archipelago/raider.rb
===================================================================
--- trunk/archipelago/lib/archipelago/raider.rb (rev 0)
+++ trunk/archipelago/lib/archipelago/raider.rb 2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,183 @@
+
+require 'pp'
+
+module Archipelago
+
+ #
+ # A module providing methods to split any String into parts that can be rejoined to provide
+ # the original String again.
+ #
+ module Raider
+
+ #
+ # Raised when the given parts are not enough to recreate the original data.
+ #
+ class NotEnoughBulkError < RuntimeError
+ def initialize(parts)
+ super("#{parts.inspect} is not enough to recreate the original data")
+ end
+ end
+
+ #
+ # A part of the original data.
+ #
+ class Bulk
+ attr_accessor :data, :size, :length
+ def initialize(size, length)
+ @size = size
+ @length = length
+ @data = nil
+ end
+ def <<(data)
+ if @data
+ @data.xor!(data)
+ else
+ @data = data.clone
+ end
+ end
+ def <=>(other)
+ @data <=> other.data
+ end
+ end
+
+ #
+ # Join +parts+ of Bulk together to form the String they were once created from.
+ #
+ def self.join(*parts)
+ #
+ # We know that this is not enough if the parts are empty or if they are fewer than the
+ # minimum size needed to rejoin this data.
+ #
+ raise NotEnoughBulkError.new(parts) if parts.empty? or parts.size < parts.first.size
+
+ #
+ # For each bit of the data, create a String that is the beginning of
+ # all parts having this bit as the first set bit, and another String that is the beginning
+ # of all parts having ONLY this bit set.
+ #
+ # Then go through all parts and find one that has this bit as the first one
+ # and all the others having at least this bit set.
+ #
+ # Then xor the one having this bit as the first one into each other part having at least
+ # this bit set.
+ #
+ covered_bits = 0
+ has_only = []
+ has_only_markers = []
+ 0.upto(parts.first.size - 1) do |n|
+ having_this_bit = []
+ beginning_with_this_bit = nil
+ beginning_marker = ""
+ 0.upto(n - 1) do
+ beginning_marker << "\000"
+ end
+ beginning_marker << "\001"
+ has_only_marker = beginning_marker.clone
+ while has_only_marker.size < parts.first.size
+ has_only_marker << "\000"
+ end
+ has_only_markers[n] = has_only_marker
+
+ parts.each do |part|
+ if part.data.begins?(beginning_marker) && beginning_with_this_bit.nil?
+ beginning_with_this_bit = part
+ elsif part.data[n] == 1
+ having_this_bit << part
+ end
+ end
+ unless beginning_with_this_bit.nil?
+ covered_bits += 1
+ having_this_bit.each do |having|
+ having << beginning_with_this_bit.data
+ end
+ end
+ end
+
+ #
+ # Reject all parts having only 0 bits to get the ones
+ # having one 1 bit each.
+ #
+ parts.reject! do |part|
+ part.data.begins?("\000" * parts.first.size)
+ end
+
+ #
+ # Raise an error if we didnt find all bits needed.
+ #
+ raise NotEnoughBulkError.new(parts) if parts.empty? || parts.size != parts.first.size
+
+ #
+ # Sort the bits having only one bit set in reverse order and collect their actual
+ # data (throw away the bit padding), join them and truncate them at the wanted size.
+ #
+ return parts.sort do |a,b|
+ b <=> a
+ end.collect do |having|
+ having.data[parts.first.size..-1]
+ end.join[0...parts.first.length]
+ end
+
+ #
+ # Split a String of +data+ into +parts+.
+ #
+ # You will get 2 ** +parts+ - 1 parts from the String,
+ # and ( 2 ** +parts+ ) / 2 + 1 will always be enough to
+ # recreate it using <b>join</b>.
+ #
+ def self.split(original_data, parts = 3)
+ data = original_data.clone
+
+ #
+ # Remember the original size of data.
+ #
+ size = data.size
+
+ #
+ # Make sure that the data is divisible by +parts+.
+ #
+ if (rest = (data.size % parts)) != 0
+ data << " " * (parts - rest)
+ end
+
+ #
+ # Split the data into +parts+ bits and append a positional marker at the start of each.
+ #
+ data_parts = []
+ 0.upto(parts - 1) do |n|
+ pos_marker = ""
+ 0.upto(parts - 1) do |m|
+ if m == n
+ pos_marker << "\001"
+ else
+ pos_marker << "\000"
+ end
+ end
+ data_parts << (pos_marker + data[(n * (data.size / parts))...( (n + 1) * (data.size / parts) )])
+ end
+
+ return_parts = []
+
+ #
+ # Create (1 << +parts+) - 1 parts to return
+ #
+ 1.upto((1 << parts) - 1) do |n|
+ this_part = Bulk.new(parts, size)
+ #
+ # For each of our data parts, if our number in the return part order
+ # has the bit ouf our order number in the data part array set,
+ # xor us into this return part.
+ #
+ 0.upto(data_parts.size - 1) do |m|
+ if n & (1 << m) > 0
+ this_part << data_parts[m]
+ end
+ end
+ return_parts << this_part
+ end
+
+ return return_parts
+ end
+
+ end
+
+end
Modified: trunk/archipelago/lib/archipelago.rb
===================================================================
--- trunk/archipelago/lib/archipelago.rb 2006-12-04 17:19:49 UTC (rev 103)
+++ trunk/archipelago/lib/archipelago.rb 2006-12-10 14:22:29 UTC (rev 104)
@@ -22,3 +22,5 @@
require 'archipelago/tranny'
require 'archipelago/treasure'
require 'archipelago/pirate'
+require 'archipelago/bitstring'
+require 'archipelago/raider'
Added: trunk/archipelago/tests/raider_benchmark.rb
===================================================================
--- trunk/archipelago/tests/raider_benchmark.rb (rev 0)
+++ trunk/archipelago/tests/raider_benchmark.rb 2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,44 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class RaiderBenchmark < Test::Unit::TestCase
+
+ def test_split_join
+ split_and_join(random_string(1000), 1000, 3)
+ split_and_join(random_string(1000), 1000, 4)
+ split_and_join(random_string(1000), 1000, 5)
+ split_and_join(random_string(1000), 1000, 6)
+ split_and_join(random_string(1000), 1000, 7)
+ split_and_join(random_string(1000), 1000, 10)
+ end
+
+ private
+
+ def random_string(n)
+ s = ""
+ rand(n).times do
+ s << (rand(27) + 65).chr
+ end
+ return s
+ end
+
+ def split_and_join(s, n, size)
+ enough = []
+ n.times do
+ p = Archipelago::Raider.split(s, size)
+ assert_equal((1 << size) - 1,p.size)
+ parts_to_use = []
+ begin
+ parts_to_use << p.delete_at(rand(p.size))
+ assert_equal(s, Archipelago::Raider.join(*parts_to_use))
+ enough[parts_to_use.size] ||= 0
+ enough[parts_to_use.size] += 1
+ rescue Archipelago::Raider::NotEnoughBulkError => e
+ retry
+ end
+ end
+ puts "enough bits for #{size} parts"
+ puts enough.inspect
+ end
+
+end
Added: trunk/archipelago/tests/raider_test.rb
===================================================================
--- trunk/archipelago/tests/raider_test.rb (rev 0)
+++ trunk/archipelago/tests/raider_test.rb 2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,47 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class RaiderTest < Test::Unit::TestCase
+
+ def test_split_join
+ 3.times do
+ split_and_join(random_string(1000), 100, 3)
+ end
+ 3.times do
+ split_and_join(random_string(1000), 100, 4)
+ end
+ 3.times do
+ split_and_join(random_string(1000), 100, 5)
+ end
+ 3.times do
+ split_and_join(random_string(1000), 100, 6)
+ end
+ 3.times do
+ split_and_join(random_string(1000), 100, 7)
+ end
+ end
+
+ private
+
+ def random_string(n)
+ s = ""
+ rand(n).times do
+ s << (rand(27) + 65).chr
+ end
+ return s
+ end
+
+ def split_and_join(s, n, size)
+ n.times do
+ p = Archipelago::Raider.split(s, size)
+ assert_equal((1 << size) - 1,p.size)
+ parts_to_use = []
+ ((1 << (size - 1)) + 1).times do
+ parts_to_use << p.delete_at(rand(p.size))
+ end
+
+ assert_equal(s, Archipelago::Raider.join(*parts_to_use))
+ end
+ end
+
+end
Added: trunk/archipelago/tests/string_benchmark.rb
===================================================================
--- trunk/archipelago/tests/string_benchmark.rb (rev 0)
+++ trunk/archipelago/tests/string_benchmark.rb 2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,17 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class StringBenchmark < Test::Unit::TestCase
+
+ def test_xor
+ s = " " * (1 << 16)
+ s2 = " " * (1 << 16)
+ bm("String#xor!") do
+ s.xor!(s2)
+ end
+ bm("String#^") do
+ s ^ s2
+ end
+ end
+
+end
Added: trunk/archipelago/tests/string_test.rb
===================================================================
--- trunk/archipelago/tests/string_test.rb (rev 0)
+++ trunk/archipelago/tests/string_test.rb 2006-12-10 14:22:29 UTC (rev 104)
@@ -0,0 +1,23 @@
+
+require File.join(File.dirname(__FILE__), 'test_helper')
+
+class StringTest < Test::Unit::TestCase
+
+ def test_begins
+ assert("hej".begins?("he"))
+ assert("hejsan".begins?("hejsan"))
+ assert(!"epa".begins?("pa"))
+ assert(!"epa".begins?("gnu"))
+ end
+
+ def test_xor
+ assert("hej" ^ "hej" == "\000\000\000")
+ assert("\003\003" ^ "\001\001" == "\002\002")
+ s = "\002"
+ assert(s ^ "\001" == "\003")
+ assert_equal("\002", s)
+ s.xor!("\001")
+ assert_equal("\003", s)
+ end
+
+end
More information about the Archipelago-submits
mailing list