We work by substituting non-printing characters in to the text. For now I'm assuming that I can substitute a character in the range 0..8 for a 7 bit character without damaging the encoded string, but this might be optimistic
This maps HTML tags to the corresponding attribute char
This maps delimiters that occur around words (such as bold
or tt) where the start and end delimiters and the same. This
lets us optimize the regexp
A \ in front of a character that would normally be processed turns off processing. We do this by turning < into <#{PROTECT}
And this maps special sequences to a name. A special sequence is something like a WikiWord
And this is used when the delimiters aren't the same. In this case the hash maps a pattern to the attribute character
# File rdoc/markup/simple_markup/inline.rb, line 208
def initialize
add_word_pair("*", "*", :BOLD)
add_word_pair("_", "_", :EM)
add_word_pair("+", "+", :TT)
add_html("em", :EM)
add_html("i", :EM)
add_html("b", :BOLD)
add_html("tt", :TT)
add_html("code", :TT)
add_special(/<!--(.*?)-->/, :COMMENT)
end
# File rdoc/markup/simple_markup/inline.rb, line 238
def add_html(tag, name)
HTML_TAGS[tag.downcase] = Attribute.bitmap_for(name)
end
# File rdoc/markup/simple_markup/inline.rb, line 242
def add_special(pattern, name)
SPECIAL[pattern] = Attribute.bitmap_for(name)
end
# File rdoc/markup/simple_markup/inline.rb, line 222
def add_word_pair(start, stop, name)
raise "Word flags may not start '<'" if start[0] == ?<
bitmap = Attribute.bitmap_for(name)
if start == stop
MATCHING_WORD_PAIRS[start] = bitmap
else
pattern = Regexp.new("(" + Regexp.escape(start) + ")" +
# "([A-Za-z]+)" +
"(\\S+)" +
"(" + Regexp.escape(stop) +")")
WORD_PAIR_MAP[pattern] = bitmap
end
PROTECTABLE << start[0,1]
PROTECTABLE.uniq!
end
Return an attribute object with the given turn_on and turn_off bits set
# File rdoc/markup/simple_markup/inline.rb, line 122
def attribute(turn_on, turn_off)
AttrChanger.new(turn_on, turn_off)
end
# File rdoc/markup/simple_markup/inline.rb, line 127
def change_attribute(current, new)
diff = current ^ new
attribute(new & diff, current & diff)
end
# File rdoc/markup/simple_markup/inline.rb, line 132
def changed_attribute_by_name(current_set, new_set)
current = new = 0
current_set.each {|name| current |= Attribute.bitmap_for(name) }
new_set.each {|name| new |= Attribute.bitmap_for(name) }
change_attribute(current, new)
end
Map attributes like textto the sequence 001002<char>001003<char>, where <char> is a per-attribute specific character
# File rdoc/markup/simple_markup/inline.rb, line 148
def convert_attrs(str, attrs)
# first do matching ones
tags = MATCHING_WORD_PAIRS.keys.join("")
re = "(^|\\W)([#{tags}])([A-Za-z_]+?)\\2(\\W|\$)"
# re = "(^|\\W)([#{tags}])(\\S+?)\\2(\\W|\$)"
1 while str.gsub!(Regexp.new(re)) {
attr = MATCHING_WORD_PAIRS[$2];
attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
$1 + NULL*$2.length + $3 + NULL*$2.length + $4
}
# then non-matching
unless WORD_PAIR_MAP.empty?
WORD_PAIR_MAP.each do |regexp, attr|
str.gsub!(regexp) {
attrs.set_attrs($`.length + $1.length, $2.length, attr)
NULL*$1.length + $2 + NULL*$3.length
}
end
end
end
# File rdoc/markup/simple_markup/inline.rb, line 170
def convert_html(str, attrs)
tags = HTML_TAGS.keys.join("|")
re = "<(#{tags})>(.*?)</\\1>"
1 while str.gsub!(Regexp.new(re, Regexp::IGNORECASE)) {
attr = HTML_TAGS[$1.downcase]
html_length = $1.length + 2
seq = NULL * html_length
attrs.set_attrs($`.length + html_length, $2.length, attr)
seq + $2 + seq + NULL
}
end
# File rdoc/markup/simple_markup/inline.rb, line 182
def convert_specials(str, attrs)
unless SPECIAL.empty?
SPECIAL.each do |regexp, attr|
str.scan(regexp) do
attrs.set_attrs($`.length, $&.length, attr | Attribute::SPECIAL)
end
end
end
end
# File rdoc/markup/simple_markup/inline.rb, line 139
def copy_string(start_pos, end_pos)
res = @str[start_pos...end_pos]
res.gsub!(/\000/, '')
res
end
# File rdoc/markup/simple_markup/inline.rb, line 263
def display_attributes
puts
puts @str.tr(NULL, "!")
bit = 1
16.times do |bno|
line = ""
@str.length.times do |i|
if (@attrs[i] & bit) == 0
line << " "
else
if bno.zero?
line << "S"
else
line << ("%d" % (bno+1))
end
end
end
puts(line) unless line =~ /^ *$/
bit <<= 1
end
end
# File rdoc/markup/simple_markup/inline.rb, line 246
def flow(str)
@str = str
puts("Before flow, str='#{@str.dump}'") if $DEBUG
mask_protected_sequences
@attrs = AttrSpan.new(@str.length)
puts("After protecting, str='#{@str.dump}'") if $DEBUG
convert_attrs(@str, @attrs)
convert_html(@str, @attrs)
convert_specials(str, @attrs)
unmask_protected_sequences
puts("After flow, str='#{@str.dump}'") if $DEBUG
return split_into_flow
end
# File rdoc/markup/simple_markup/inline.rb, line 199
def mask_protected_sequences
protect_pattern = Regexp.new("\\\\([#{Regexp.escape(PROTECTABLE.join(''))}])")
@str.gsub!(protect_pattern, "\\1#{PROTECT_ATTR}")
end
# File rdoc/markup/simple_markup/inline.rb, line 285
def split_into_flow
display_attributes if $DEBUG
res = []
current_attr = 0
str = ""
str_len = @str.length
# skip leading invisible text
i = 0
i += 1 while i < str_len and @str[i].zero?
start_pos = i
# then scan the string, chunking it on attribute changes
while i < str_len
new_attr = @attrs[i]
if new_attr != current_attr
if i > start_pos
res << copy_string(start_pos, i)
start_pos = i
end
res << change_attribute(current_attr, new_attr)
current_attr = new_attr
if (current_attr & Attribute::SPECIAL) != 0
i += 1 while i < str_len and (@attrs[i] & Attribute::SPECIAL) != 0
res << Special.new(current_attr, copy_string(start_pos, i))
start_pos = i
next
end
end
# move on, skipping any invisible characters
begin
i += 1
end while i < str_len and @str[i].zero?
end
# tidy up trailing text
if start_pos < str_len
res << copy_string(start_pos, str_len)
end
# and reset to all attributes off
res << change_attribute(current_attr, 0) if current_attr != 0
return res
end