summaryrefslogtreecommitdiffstats
path: root/vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby.rb
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby.rb')
-rw-r--r--vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby.rb444
1 files changed, 444 insertions, 0 deletions
diff --git a/vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby.rb b/vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby.rb
new file mode 100644
index 000000000..3cadc64d1
--- /dev/null
+++ b/vendor/gems/coderay-0.9.7/lib/coderay/scanners/ruby.rb
@@ -0,0 +1,444 @@
+# encoding: utf-8
+module CodeRay
+module Scanners
+
+ # This scanner is really complex, since Ruby _is_ a complex language!
+ #
+ # It tries to highlight 100% of all common code,
+ # and 90% of strange codes.
+ #
+ # It is optimized for HTML highlighting, and is not very useful for
+ # parsing or pretty printing.
+ #
+ # For now, I think it's better than the scanners in VIM or Syntax, or
+ # any highlighter I was able to find, except Caleb's RubyLexer.
+ #
+ # I hope it's also better than the rdoc/irb lexer.
+ class Ruby < Scanner
+
+ include Streamable
+
+ register_for :ruby
+ file_extension 'rb'
+
+ helper :patterns
+
+ if not defined? EncodingError
+ EncodingError = Class.new Exception
+ end
+
+ private
+ def scan_tokens tokens, options
+ if string.respond_to?(:encoding)
+ unless string.encoding == Encoding::UTF_8
+ self.string = string.encode Encoding::UTF_8,
+ :invalid => :replace, :undef => :replace, :replace => '?'
+ end
+ unicode = false
+ else
+ unicode = exist?(/[^\x00-\x7f]/)
+ end
+
+ last_token_dot = false
+ value_expected = true
+ heredocs = nil
+ last_state = nil
+ state = :initial
+ depth = nil
+ inline_block_stack = []
+
+
+ patterns = Patterns # avoid constant lookup
+
+ until eos?
+ match = nil
+ kind = nil
+
+ if state.instance_of? patterns::StringState
+# {{{
+ match = scan_until(state.pattern) || scan_until(/\z/)
+ tokens << [match, :content] unless match.empty?
+ break if eos?
+
+ if state.heredoc and self[1] # end of heredoc
+ match = getch.to_s
+ match << scan_until(/$/) unless eos?
+ tokens << [match, :delimiter]
+ tokens << [:close, state.type]
+ state = state.next_state
+ next
+ end
+
+ case match = getch
+
+ when state.delim
+ if state.paren
+ state.paren_depth -= 1
+ if state.paren_depth > 0
+ tokens << [match, :nesting_delimiter]
+ next
+ end
+ end
+ tokens << [match, :delimiter]
+ if state.type == :regexp and not eos?
+ modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
+ tokens << [modifiers, :modifier] unless modifiers.empty?
+ end
+ tokens << [:close, state.type]
+ value_expected = false
+ state = state.next_state
+
+ when '\\'
+ if state.interpreted
+ if esc = scan(/ #{patterns::ESCAPE} /ox)
+ tokens << [match + esc, :char]
+ else
+ tokens << [match, :error]
+ end
+ else
+ case m = getch
+ when state.delim, '\\'
+ tokens << [match + m, :char]
+ when nil
+ tokens << [match, :error]
+ else
+ tokens << [match + m, :content]
+ end
+ end
+
+ when '#'
+ case peek(1)
+ when '{'
+ inline_block_stack << [state, depth, heredocs]
+ value_expected = true
+ state = :initial
+ depth = 1
+ tokens << [:open, :inline]
+ tokens << [match + getch, :inline_delimiter]
+ when '$', '@'
+ tokens << [match, :escape]
+ last_state = state # scan one token as normal code, then return here
+ state = :initial
+ else
+ raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
+ end
+
+ when state.paren
+ state.paren_depth += 1
+ tokens << [match, :nesting_delimiter]
+
+ when /#{patterns::REGEXP_SYMBOLS}/ox
+ tokens << [match, :function]
+
+ else
+ raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
+
+ end
+ next
+# }}}
+ else
+# {{{
+ if match = scan(/[ \t\f]+/)
+ kind = :space
+ match << scan(/\s*/) unless eos? || heredocs
+ value_expected = true if match.index(?\n)
+ tokens << [match, kind]
+ next
+
+ elsif match = scan(/\\?\n/)
+ kind = :space
+ if match == "\n"
+ value_expected = true
+ state = :initial if state == :undef_comma_expected
+ end
+ if heredocs
+ unscan # heredoc scanning needs \n at start
+ state = heredocs.shift
+ tokens << [:open, state.type]
+ heredocs = nil if heredocs.empty?
+ next
+ else
+ match << scan(/\s*/) unless eos?
+ end
+ tokens << [match, kind]
+ next
+
+ elsif bol? && match = scan(/\#!.*/)
+ tokens << [match, :doctype]
+ next
+
+ elsif match = scan(/\#.*/) or
+ ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
+ kind = :comment
+ tokens << [match, kind]
+ next
+
+ elsif state == :initial
+
+ # IDENTS #
+ if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
+ /#{patterns::METHOD_NAME}/o)
+ if last_token_dot
+ kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
+ else
+ if value_expected != :expect_colon && scan(/:(?= )/)
+ tokens << [match, :key]
+ match = ':'
+ kind = :operator
+ else
+ kind = patterns::IDENT_KIND[match]
+ if kind == :ident
+ if match[/\A[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
+ kind = :constant
+ end
+ elsif kind == :reserved
+ state = patterns::DEF_NEW_STATE[match]
+ value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
+ end
+ end
+ end
+ value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
+
+ elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
+ kind = :ident
+ value_expected = :set if check(unicode ? /#{patterns::VALUE_FOLLOWS}/uo :
+ /#{patterns::VALUE_FOLLOWS}/o)
+
+ # OPERATORS #
+ elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
+ if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
+ value_expected = :set
+ end
+ last_token_dot = :set if self[1]
+ kind = :operator
+ unless inline_block_stack.empty?
+ case match
+ when '{'
+ depth += 1
+ when '}'
+ depth -= 1
+ if depth == 0 # closing brace of inline block reached
+ state, depth, heredocs = inline_block_stack.pop
+ heredocs = nil if heredocs && heredocs.empty?
+ tokens << [match, :inline_delimiter]
+ kind = :inline
+ match = :close
+ end
+ end
+ end
+
+ elsif match = scan(/ ['"] /mx)
+ tokens << [:open, :string]
+ kind = :delimiter
+ state = patterns::StringState.new :string, match == '"', match # important for streaming
+
+ elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
+ /#{patterns::INSTANCE_VARIABLE}/o)
+ kind = :instance_variable
+
+ elsif value_expected and match = scan(/\//)
+ tokens << [:open, :regexp]
+ kind = :delimiter
+ interpreted = true
+ state = patterns::StringState.new :regexp, interpreted, match
+
+ # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
+ elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
+ kind = self[1] ? :float : :integer
+
+ elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
+ /#{patterns::SYMBOL}/o)
+ case delim = match[1]
+ when ?', ?"
+ tokens << [:open, :symbol]
+ tokens << [':', :symbol]
+ match = delim.chr
+ kind = :delimiter
+ state = patterns::StringState.new :symbol, delim == ?", match
+ else
+ kind = :symbol
+ end
+
+ elsif match = scan(/ -[>=]? | [+!~^]=? | [*|&]{1,2}=? | >>? /x)
+ value_expected = :set
+ kind = :operator
+
+ elsif value_expected and match = scan(unicode ? /#{patterns::HEREDOC_OPEN}/uo :
+ /#{patterns::HEREDOC_OPEN}/o)
+ indented = self[1] == '-'
+ quote = self[3]
+ delim = self[quote ? 4 : 2]
+ kind = patterns::QUOTE_TO_TYPE[quote]
+ tokens << [:open, kind]
+ tokens << [match, :delimiter]
+ match = :close
+ heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
+ heredocs ||= [] # create heredocs if empty
+ heredocs << heredoc
+
+ elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
+ kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
+ raise_inspect 'Unknown fancy string: %%%p' % k, tokens
+ end
+ tokens << [:open, kind]
+ state = patterns::StringState.new kind, interpreted, self[2]
+ kind = :delimiter
+
+ elsif value_expected and match = scan(unicode ? /#{patterns::CHARACTER}/uo :
+ /#{patterns::CHARACTER}/o)
+ kind = :integer
+
+ elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
+ value_expected = :set
+ kind = :operator
+
+ elsif match = scan(/`/)
+ if last_token_dot
+ kind = :operator
+ else
+ tokens << [:open, :shell]
+ kind = :delimiter
+ state = patterns::StringState.new :shell, true, match
+ end
+
+ elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
+ /#{patterns::GLOBAL_VARIABLE}/o)
+ kind = :global_variable
+
+ elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
+ /#{patterns::CLASS_VARIABLE}/o)
+ kind = :class_variable
+
+ else
+ if !unicode && !string.respond_to?(:encoding)
+ # check for unicode
+ debug, $DEBUG = $DEBUG, false
+ begin
+ if check(/./mu).size > 1
+ # seems like we should try again with unicode
+ unicode = true
+ end
+ rescue
+ # bad unicode char; use getch
+ ensure
+ $DEBUG = debug
+ end
+ next if unicode
+ end
+ kind = :error
+ match = scan(unicode ? /./mu : /./m)
+
+ end
+
+ elsif state == :def_expected
+ state = :initial
+ if scan(/self\./)
+ tokens << ['self', :pre_constant]
+ tokens << ['.', :operator]
+ end
+ if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
+ /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
+ kind = :method
+ else
+ next
+ end
+
+ elsif state == :module_expected
+ if match = scan(/<</)
+ kind = :operator
+ else
+ state = :initial
+ if match = scan(unicode ? /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/uo :
+ /(?:#{patterns::IDENT}::)*#{patterns::IDENT}/o)
+ kind = :class
+ else
+ next
+ end
+ end
+
+ elsif state == :undef_expected
+ state = :undef_comma_expected
+ if match = scan(unicode ? /#{patterns::METHOD_NAME_EX}/uo :
+ /#{patterns::METHOD_NAME_EX}/o)
+ kind = :method
+ elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
+ /#{patterns::SYMBOL}/o)
+ case delim = match[1]
+ when ?', ?"
+ tokens << [:open, :symbol]
+ tokens << [':', :symbol]
+ match = delim.chr
+ kind = :delimiter
+ state = patterns::StringState.new :symbol, delim == ?", match
+ state.next_state = :undef_comma_expected
+ else
+ kind = :symbol
+ end
+ else
+ state = :initial
+ next
+ end
+
+ elsif state == :alias_expected
+ match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
+ /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
+
+ if match
+ tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
+ tokens << [self[2], :space]
+ tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
+ end
+ state = :initial
+ next
+
+ elsif state == :undef_comma_expected
+ if match = scan(/,/)
+ kind = :operator
+ state = :undef_expected
+ else
+ state = :initial
+ next
+ end
+
+ end
+# }}}
+
+ unless kind == :error
+ if value_expected = value_expected == :set
+ value_expected = :expect_colon if match == '?' || match == 'when'
+ end
+ last_token_dot = last_token_dot == :set
+ end
+
+ if $CODERAY_DEBUG and not kind
+ raise_inspect 'Error token %p in line %d' %
+ [[match, kind], line], tokens, state
+ end
+ raise_inspect 'Empty token', tokens unless match
+
+ tokens << [match, kind]
+
+ if last_state
+ state = last_state
+ last_state = nil
+ end
+ end
+ end
+
+ inline_block_stack << [state] if state.is_a? patterns::StringState
+ until inline_block_stack.empty?
+ this_block = inline_block_stack.pop
+ tokens << [:close, :inline] if this_block.size > 1
+ state = this_block.first
+ tokens << [:close, state.type]
+ end
+
+ tokens
+ end
+
+ end
+
+end
+end
+
+# vim:fdm=marker