diff options
Diffstat (limited to 'vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb')
-rw-r--r-- | vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb | 390 |
1 files changed, 390 insertions, 0 deletions
diff --git a/vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb b/vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb new file mode 100644 index 000000000..6ac5f4405 --- /dev/null +++ b/vendor/gems/coderay-0.9.7/lib/coderay/tokens.rb @@ -0,0 +1,390 @@ +module CodeRay + + # = Tokens + # + # The Tokens class represents a list of tokens returnd from + # a Scanner. + # + # A token is not a special object, just a two-element Array + # consisting of + # * the _token_ _text_ (the original source of the token in a String) or + # a _token_ _action_ (:open, :close, :begin_line, :end_line) + # * the _token_ _kind_ (a Symbol representing the type of the token) + # + # A token looks like this: + # + # ['# It looks like this', :comment] + # ['3.1415926', :float] + # ['$^', :error] + # + # Some scanners also yield sub-tokens, represented by special + # token actions, namely :open and :close. + # + # The Ruby scanner, for example, splits "a string" into: + # + # [ + # [:open, :string], + # ['"', :delimiter], + # ['a string', :content], + # ['"', :delimiter], + # [:close, :string] + # ] + # + # Tokens is the interface between Scanners and Encoders: + # The input is split and saved into a Tokens object. The Encoder + # then builds the output from this object. + # + # Thus, the syntax below becomes clear: + # + # CodeRay.scan('price = 2.59', :ruby).html + # # the Tokens object is here -------^ + # + # See how small it is? ;) + # + # Tokens gives you the power to handle pre-scanned code very easily: + # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string + # that you put in your DB. + # + # It also allows you to generate tokens directly (without using a scanner), + # to load them from a file, and still use any Encoder that CodeRay provides. + # + # Tokens' subclass TokenStream allows streaming to save memory. + class Tokens < Array + + # The Scanner instance that created the tokens. + attr_accessor :scanner + + # Whether the object is a TokenStream. + # + # Returns false. + def stream? + false + end + + # Iterates over all tokens. + # + # If a filter is given, only tokens of that kind are yielded. + def each kind_filter = nil, &block + unless kind_filter + super(&block) + else + super() do |text, kind| + next unless kind == kind_filter + yield text, kind + end + end + end + + # Iterates over all text tokens. + # Range tokens like [:open, :string] are left out. + # + # Example: + # tokens.each_text_token { |text, kind| text.replace html_escape(text) } + def each_text_token + each do |text, kind| + next unless text.is_a? ::String + yield text, kind + end + end + + # Encode the tokens using encoder. + # + # encoder can be + # * a symbol like :html oder :statistic + # * an Encoder class + # * an Encoder object + # + # options are passed to the encoder. + def encode encoder, options = {} + unless encoder.is_a? Encoders::Encoder + unless encoder.is_a? Class + encoder_class = Encoders[encoder] + end + encoder = encoder_class.new options + end + encoder.encode_tokens self, options + end + + + # Turn into a string using Encoders::Text. + # + # +options+ are passed to the encoder if given. + def to_s options = {} + encode :text, options + end + + # Redirects unknown methods to encoder calls. + # + # For example, if you call +tokens.html+, the HTML encoder + # is used to highlight the tokens. + def method_missing meth, options = {} + Encoders[meth].new(options).encode_tokens self + end + + # Returns the tokens compressed by joining consecutive + # tokens of the same kind. + # + # This can not be undone, but should yield the same output + # in most Encoders. It basically makes the output smaller. + # + # Combined with dump, it saves space for the cost of time. + # + # If the scanner is written carefully, this is not required - + # for example, consecutive //-comment lines could already be + # joined in one comment token by the Scanner. + def optimize + last_kind = last_text = nil + new = self.class.new + for text, kind in self + if text.is_a? String + if kind == last_kind + last_text << text + else + new << [last_text, last_kind] if last_kind + last_text = text + last_kind = kind + end + else + new << [last_text, last_kind] if last_kind + last_kind = last_text = nil + new << [text, kind] + end + end + new << [last_text, last_kind] if last_kind + new + end + + # Compact the object itself; see optimize. + def optimize! + replace optimize + end + + # Ensure that all :open tokens have a correspondent :close one. + # + # TODO: Test this! + def fix + tokens = self.class.new + # Check token nesting using a stack of kinds. + opened = [] + for type, kind in self + case type + when :open + opened.push [:close, kind] + when :begin_line + opened.push [:end_line, kind] + when :close, :end_line + expected = opened.pop + if [type, kind] != expected + # Unexpected :close; decide what to do based on the kind: + # - token was never opened: delete the :close (just skip it) + next unless opened.rindex expected + # - token was opened earlier: also close tokens in between + tokens << token until (token = opened.pop) == expected + end + end + tokens << [type, kind] + end + # Close remaining opened tokens + tokens << token while token = opened.pop + tokens + end + + def fix! + replace fix + end + + # TODO: Scanner#split_into_lines + # + # Makes sure that: + # - newlines are single tokens + # (which means all other token are single-line) + # - there are no open tokens at the end the line + # + # This makes it simple for encoders that work line-oriented, + # like HTML with list-style numeration. + def split_into_lines + raise NotImplementedError + end + + def split_into_lines! + replace split_into_lines + end + + # Dumps the object into a String that can be saved + # in files or databases. + # + # The dump is created with Marshal.dump; + # In addition, it is gzipped using GZip.gzip. + # + # The returned String object includes Undumping + # so it has an #undump method. See Tokens.load. + # + # You can configure the level of compression, + # but the default value 7 should be what you want + # in most cases as it is a good compromise between + # speed and compression rate. + # + # See GZip module. + def dump gzip_level = 7 + require 'coderay/helpers/gzip_simple' + dump = Marshal.dump self + dump = dump.gzip gzip_level + dump.extend Undumping + end + + # The total size of the tokens. + # Should be equal to the input size before + # scanning. + def text_size + size = 0 + each_text_token do |t, k| + size + t.size + end + size + end + + # Return all text tokens joined into a single string. + def text + map { |t, k| t if t.is_a? ::String }.join + end + + # Include this module to give an object an #undump + # method. + # + # The string returned by Tokens.dump includes Undumping. + module Undumping + # Calls Tokens.load with itself. + def undump + Tokens.load self + end + end + + # Undump the object using Marshal.load, then + # unzip it using GZip.gunzip. + # + # The result is commonly a Tokens object, but + # this is not guaranteed. + def Tokens.load dump + require 'coderay/helpers/gzip_simple' + dump = dump.gunzip + @dump = Marshal.load dump + end + + end + + + # = TokenStream + # + # The TokenStream class is a fake Array without elements. + # + # It redirects the method << to a block given at creation. + # + # This allows scanners and Encoders to use streaming (no + # tokens are saved, the input is highlighted the same time it + # is scanned) with the same code. + # + # See CodeRay.encode_stream and CodeRay.scan_stream + class TokenStream < Tokens + + # Whether the object is a TokenStream. + # + # Returns true. + def stream? + true + end + + # The Array is empty, but size counts the tokens given by <<. + attr_reader :size + + # Creates a new TokenStream that calls +block+ whenever + # its << method is called. + # + # Example: + # + # require 'coderay' + # + # token_stream = CodeRay::TokenStream.new do |text, kind| + # puts 'kind: %s, text size: %d.' % [kind, text.size] + # end + # + # token_stream << ['/\d+/', :regexp] + # #-> kind: rexpexp, text size: 5. + # + def initialize &block + raise ArgumentError, 'Block expected for streaming.' unless block + @callback = block + @size = 0 + end + + # Calls +block+ with +token+ and increments size. + # + # Returns self. + def << token + @callback.call(*token) + @size += 1 + self + end + + # This method is not implemented due to speed reasons. Use Tokens. + def text_size + raise NotImplementedError, + 'This method is not implemented due to speed reasons.' + end + + # A TokenStream cannot be dumped. Use Tokens. + def dump + raise NotImplementedError, 'A TokenStream cannot be dumped.' + end + + # A TokenStream cannot be optimized. Use Tokens. + def optimize + raise NotImplementedError, 'A TokenStream cannot be optimized.' + end + + end + +end + +if $0 == __FILE__ + $VERBOSE = true + $: << File.join(File.dirname(__FILE__), '..') + eval DATA.read, nil, $0, __LINE__ + 4 +end + +__END__ +require 'test/unit' + +class TokensTest < Test::Unit::TestCase + + def test_creation + assert CodeRay::Tokens < Array + tokens = nil + assert_nothing_raised do + tokens = CodeRay::Tokens.new + end + assert_kind_of Array, tokens + end + + def test_adding_tokens + tokens = CodeRay::Tokens.new + assert_nothing_raised do + tokens << ['string', :type] + tokens << ['()', :operator] + end + assert_equal tokens.size, 2 + end + + def test_dump_undump + tokens = CodeRay::Tokens.new + assert_nothing_raised do + tokens << ['string', :type] + tokens << ['()', :operator] + end + tokens2 = nil + assert_nothing_raised do + tokens2 = tokens.dump.undump + end + assert_equal tokens, tokens2 + end + +end
\ No newline at end of file |