[Rubinius-devel] String and MatchData Patch

Alan Hurdle alan_hurdle at msn.com
Fri Jan 5 14:01:20 EST 2007


Index: kernel/regexp.rb
===================================================================
--- kernel/regexp.rb	(revision 309)
+++ kernel/regexp.rb	(working copy)
@@ -47,7 +47,28 @@
   def string
     @source
   end
+
+  def begin(idx)
+    return full.at(0) if idx == 0
+    return @region.at(idx - 1).at(0)
+  end
   
+  def end(idx)
+    return full.at(1) if idx == 0
+    @region.at(idx - 1).at(1)
+  end
+
+  def offset(idx)
+    out = []
+    out << self.begin(idx)
+    out << self.end(idx)
+    return out
+  end
+
+  def length
+    @region.length + 1
+  end
+
   def captures
     out = []
     @region.each do |tup|
Index: kernel/string.rb
===================================================================
--- kernel/string.rb	(revision 309)
+++ kernel/string.rb	(working copy)
@@ -162,15 +162,34 @@
       i = 0
       @data.each { |b| return i if b == arg; i += 1 }
     elsif arg.is_a? String
+      idx = 0
+      if offset
+        if offset >= 0
+          return nil if offset >= self.size
+          idx = offset
+        else
+          return nil if (1-offset) >= self.size
+          idx = self.size + offset
+        end
+      end
       argsize = arg.size
       max = self.size - argsize
       if max >= 0 and argsize > 0
-        0.upto(max) do |i|
+        idx.upto(max) do |i|
           if @data.get_byte(i) == arg.data.get_byte(0)
             return i if substring(i,argsize) == arg
           end
         end
       end
+    elsif arg.is_a? Regexp
+      idx = offset ? offset : 0
+      mstr = self[idx..-1]
+      offset = self.size - mstr.size
+      m = arg.match(mstr)
+      if m
+        return offset + m.begin(0)
+      end
+      return nil
     else
       raise ArgumentError.new("String#index cannot accept #{arg.class} objects")
     end
@@ -190,7 +209,9 @@
       return (self.include?(arg) ? arg.dup : nil)
     elsif arg.respond_to? :match
       m = arg.match(self)
-      m[len.to_i] if m
+      return m[len.to_i] if m && len
+      return m[0] if m
+      return nil
     elsif arg.respond_to?(:first) and arg.respond_to?(:last)
       from = arg.first
       to = arg.last
@@ -252,32 +273,77 @@
     
     return ret
   end
-  
+
+  # TODO: check that the string will never go over the maximum range
+  #       as the function is not supposed to raise an exception.
   def to_i(radix=10)
-    if self[0] == ?-
+    i = 0
+    # had to move the char definition out of the block to compile
+    char = 0
+
+    # leading whitespace removal
+    loop do
+      return 0 if i >= @bytes
+      char = @data.get_byte(i) 
+      if (char != 32 && char != ?\t && char != ?\n && char != ?\r && char != ?\f)
+        break
+      end
+      i += 1
+    end
+
+    # Sign determination
+    if self[i] == ?-
       neg = true
-      i = 1
+      i += 1
     else
       neg = false
-      i = 0
+      if self[i] == ?+
+        i += 1
+      end
     end
     
+    # Determine the radix from the string for radix = 0
+    # 0b = 2, 0o = 8, 0x = 16, defaults to radix = 10
+    if radix == 0
+      radix = 10
+      if self[i] == ?0
+        if self[i+1] == ?b
+          radix = 2
+          i += 2
+        elsif self[i+1] == ?o
+          radix = 8
+          i += 2
+        elsif self[i+1] == ?x
+          radix = 16
+          i += 2
+        end
+      end
+    end
+  
     ret = 0
     i.upto(@bytes - 1) do |idx|
       char = @data.get_byte(idx)
+      value = 0
       if char >= ?0 and char <= ?9
-        ret *= radix
-        ret += (char - ?0)
+        value = (char - ?0)
+      elsif char >= ?A and char <= ?Z
+        value = (char - ?A + 10)
+      elsif char >= ?a and char <= ?z
+        value = (char - ?a + 10)
       # An invalid character.
       elsif char != ?_
-        return ret
+        return neg ? -ret : ret
       end
-      
+
+      if value >= radix
+       return neg ? -ret : ret
+      end
+
+      ret *= radix
+      ret += value
     end
     
-    ret = -ret if neg
-    
-    return ret
+    return neg ? -ret : ret
   end
 end
 
Index: spec/core/string_spec.rb
===================================================================
--- spec/core/string_spec.rb	(revision 309)
+++ spec/core/string_spec.rb	(working copy)
@@ -32,15 +32,25 @@
   specify "to_i should convert the string to an integer base (2, 8, 10, or 16)" do
     example do
       p [ "12345".to_i,
+          " 12345".to_i,
+          "+12345".to_i,
+          "-12345".to_i,
+          " ".to_i,
+          "hello".to_i,
           "99 red balloons".to_i,
           "0a".to_i,
           "0a".to_i(16),
+          "0b1100101".to_i(0),
+          "0o1100101".to_i(0),
+          "0x1100101".to_i(0),
+          "01100101".to_i(0),
+          "1100101".to_i(0),
           "hello".to_i,
           "1100101".to_i(2),
           "1100101".to_i(8),
           "1100101".to_i(10),
           "1100101".to_i(16) ]
-    end.should == '[12345, 99, 0, 10, 0, 101, 294977, 1100101, 17826049]'
+    end.should == '[12345, 12345, 12345, -12345, 0, 0, 99, 0, 10, 101, 294977, 17826049, 1100101, 1100101, 0, 101, 294977, 1100101, 17826049]'
   end
   
   specify "[] with index should return the code of the character at index" do

_________________________________________________________________
Be one of the first to try Windows Live Mail.
http://ideas.live.com/programpage.aspx?versionId=5d21c51a-b161-4314-9b0e-4911fb2b2e6d
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://rubyforge.org/pipermail/rubinius-devel/attachments/20070106/9f9746c5/attachment-0001.html 


More information about the Rubinius-devel mailing list