[Rubinius-devel] String and MatchData Patch
Alan Hurdle
alan_hurdle at msn.com
Fri Jan 5 14:01:20 EST 2007
Index: kernel/regexp.rb
===================================================================
--- kernel/regexp.rb (revision 309)
+++ kernel/regexp.rb (working copy)
@@ -47,7 +47,28 @@
def string
@source
end
+
+ def begin(idx)
+ return full.at(0) if idx == 0
+ return @region.at(idx - 1).at(0)
+ end
+ def end(idx)
+ return full.at(1) if idx == 0
+ @region.at(idx - 1).at(1)
+ end
+
+ def offset(idx)
+ out = []
+ out << self.begin(idx)
+ out << self.end(idx)
+ return out
+ end
+
+ def length
+ @region.length + 1
+ end
+
def captures
out = []
@region.each do |tup|
Index: kernel/string.rb
===================================================================
--- kernel/string.rb (revision 309)
+++ kernel/string.rb (working copy)
@@ -162,15 +162,34 @@
i = 0
@data.each { |b| return i if b == arg; i += 1 }
elsif arg.is_a? String
+ idx = 0
+ if offset
+ if offset >= 0
+ return nil if offset >= self.size
+ idx = offset
+ else
+ return nil if (1-offset) >= self.size
+ idx = self.size + offset
+ end
+ end
argsize = arg.size
max = self.size - argsize
if max >= 0 and argsize > 0
- 0.upto(max) do |i|
+ idx.upto(max) do |i|
if @data.get_byte(i) == arg.data.get_byte(0)
return i if substring(i,argsize) == arg
end
end
end
+ elsif arg.is_a? Regexp
+ idx = offset ? offset : 0
+ mstr = self[idx..-1]
+ offset = self.size - mstr.size
+ m = arg.match(mstr)
+ if m
+ return offset + m.begin(0)
+ end
+ return nil
else
raise ArgumentError.new("String#index cannot accept #{arg.class} objects")
end
@@ -190,7 +209,9 @@
return (self.include?(arg) ? arg.dup : nil)
elsif arg.respond_to? :match
m = arg.match(self)
- m[len.to_i] if m
+ return m[len.to_i] if m && len
+ return m[0] if m
+ return nil
elsif arg.respond_to?(:first) and arg.respond_to?(:last)
from = arg.first
to = arg.last
@@ -252,32 +273,77 @@
return ret
end
-
+
+ # TODO: check that the string will never go over the maximum range
+ # as the function is not supposed to raise an exception.
def to_i(radix=10)
- if self[0] == ?-
+ i = 0
+ # had to move the char definition out of the block to compile
+ char = 0
+
+ # leading whitespace removal
+ loop do
+ return 0 if i >= @bytes
+ char = @data.get_byte(i)
+ if (char != 32 && char != ?\t && char != ?\n && char != ?\r && char != ?\f)
+ break
+ end
+ i += 1
+ end
+
+ # Sign determination
+ if self[i] == ?-
neg = true
- i = 1
+ i += 1
else
neg = false
- i = 0
+ if self[i] == ?+
+ i += 1
+ end
end
+ # Determine the radix from the string for radix = 0
+ # 0b = 2, 0o = 8, 0x = 16, defaults to radix = 10
+ if radix == 0
+ radix = 10
+ if self[i] == ?0
+ if self[i+1] == ?b
+ radix = 2
+ i += 2
+ elsif self[i+1] == ?o
+ radix = 8
+ i += 2
+ elsif self[i+1] == ?x
+ radix = 16
+ i += 2
+ end
+ end
+ end
+
ret = 0
i.upto(@bytes - 1) do |idx|
char = @data.get_byte(idx)
+ value = 0
if char >= ?0 and char <= ?9
- ret *= radix
- ret += (char - ?0)
+ value = (char - ?0)
+ elsif char >= ?A and char <= ?Z
+ value = (char - ?A + 10)
+ elsif char >= ?a and char <= ?z
+ value = (char - ?a + 10)
# An invalid character.
elsif char != ?_
- return ret
+ return neg ? -ret : ret
end
-
+
+ if value >= radix
+ return neg ? -ret : ret
+ end
+
+ ret *= radix
+ ret += value
end
- ret = -ret if neg
-
- return ret
+ return neg ? -ret : ret
end
end
Index: spec/core/string_spec.rb
===================================================================
--- spec/core/string_spec.rb (revision 309)
+++ spec/core/string_spec.rb (working copy)
@@ -32,15 +32,25 @@
specify "to_i should convert the string to an integer base (2, 8, 10, or 16)" do
example do
p [ "12345".to_i,
+ " 12345".to_i,
+ "+12345".to_i,
+ "-12345".to_i,
+ " ".to_i,
+ "hello".to_i,
"99 red balloons".to_i,
"0a".to_i,
"0a".to_i(16),
+ "0b1100101".to_i(0),
+ "0o1100101".to_i(0),
+ "0x1100101".to_i(0),
+ "01100101".to_i(0),
+ "1100101".to_i(0),
"hello".to_i,
"1100101".to_i(2),
"1100101".to_i(8),
"1100101".to_i(10),
"1100101".to_i(16) ]
- end.should == '[12345, 99, 0, 10, 0, 101, 294977, 1100101, 17826049]'
+ end.should == '[12345, 12345, 12345, -12345, 0, 0, 99, 0, 10, 101, 294977, 17826049, 1100101, 1100101, 0, 101, 294977, 1100101, 17826049]'
end
specify "[] with index should return the code of the character at index" do
_________________________________________________________________
Be one of the first to try Windows Live Mail.
http://ideas.live.com/programpage.aspx?versionId=5d21c51a-b161-4314-9b0e-4911fb2b2e6d
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://rubyforge.org/pipermail/rubinius-devel/attachments/20070106/9f9746c5/attachment-0001.html
More information about the Rubinius-devel
mailing list