1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
|
module CodeRay
# GZip library for writing and reading token dumps.
autoload :GZip, 'coderay/helpers/gzip'
# = Tokens TODO: Rewrite!
#
# The Tokens class represents a list of tokens returnd from
# a Scanner.
#
# A token is not a special object, just a two-element Array
# consisting of
# * the _token_ _text_ (the original source of the token in a String) or
# a _token_ _action_ (begin_group, end_group, begin_line, end_line)
# * the _token_ _kind_ (a Symbol representing the type of the token)
#
# A token looks like this:
#
# ['# It looks like this', :comment]
# ['3.1415926', :float]
# ['$^', :error]
#
# Some scanners also yield sub-tokens, represented by special
# token actions, namely begin_group and end_group.
#
# The Ruby scanner, for example, splits "a string" into:
#
# [
# [:begin_group, :string],
# ['"', :delimiter],
# ['a string', :content],
# ['"', :delimiter],
# [:end_group, :string]
# ]
#
# Tokens is the interface between Scanners and Encoders:
# The input is split and saved into a Tokens object. The Encoder
# then builds the output from this object.
#
# Thus, the syntax below becomes clear:
#
# CodeRay.scan('price = 2.59', :ruby).html
# # the Tokens object is here -------^
#
# See how small it is? ;)
#
# Tokens gives you the power to handle pre-scanned code very easily:
# You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
# that you put in your DB.
#
# It also allows you to generate tokens directly (without using a scanner),
# to load them from a file, and still use any Encoder that CodeRay provides.
class Tokens < Array
# The Scanner instance that created the tokens.
attr_accessor :scanner
# Encode the tokens using encoder.
#
# encoder can be
# * a symbol like :html oder :statistic
# * an Encoder class
# * an Encoder object
#
# options are passed to the encoder.
def encode encoder, options = {}
encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
encoder.encode_tokens self, options
end
# Turn tokens into a string by concatenating them.
def to_s
encode CodeRay::Encoders::Encoder.new
end
# Redirects unknown methods to encoder calls.
#
# For example, if you call +tokens.html+, the HTML encoder
# is used to highlight the tokens.
def method_missing meth, options = {}
encode meth, options
rescue PluginHost::PluginNotFound
super
end
# Split the tokens into parts of the given +sizes+.
#
# The result will be an Array of Tokens objects. The parts have
# the text size specified by the parameter. In addition, each
# part closes all opened tokens. This is useful to insert tokens
# betweem them.
#
# This method is used by @Scanner#tokenize@ when called with an Array
# of source strings. The Diff encoder uses it for inline highlighting.
def split_into_parts *sizes
parts = []
opened = []
content = nil
part = Tokens.new
part_size = 0
size = sizes.first
i = 0
for item in self
case content
when nil
content = item
when String
if size && part_size + content.size > size # token must be cut
if part_size < size # some part of the token goes into this part
content = content.dup # content may no be safe to change
part << content.slice!(0, size - part_size) << item
end
# close all open groups and lines...
closing = opened.reverse.flatten.map do |content_or_kind|
case content_or_kind
when :begin_group
:end_group
when :begin_line
:end_line
else
content_or_kind
end
end
part.concat closing
begin
parts << part
part = Tokens.new
size = sizes[i += 1]
end until size.nil? || size > 0
# ...and open them again.
part.concat opened.flatten
part_size = 0
redo unless content.empty?
else
part << content << item
part_size += content.size
end
content = nil
when Symbol
case content
when :begin_group, :begin_line
opened << [content, item]
when :end_group, :end_line
opened.pop
else
raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
end
part << content << item
content = nil
else
raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
end
end
parts << part
parts << Tokens.new while parts.size < sizes.size
parts
end
# Dumps the object into a String that can be saved
# in files or databases.
#
# The dump is created with Marshal.dump;
# In addition, it is gzipped using GZip.gzip.
#
# The returned String object includes Undumping
# so it has an #undump method. See Tokens.load.
#
# You can configure the level of compression,
# but the default value 7 should be what you want
# in most cases as it is a good compromise between
# speed and compression rate.
#
# See GZip module.
def dump gzip_level = 7
dump = Marshal.dump self
dump = GZip.gzip dump, gzip_level
dump.extend Undumping
end
# Return the actual number of tokens.
def count
size / 2
end
# Include this module to give an object an #undump
# method.
#
# The string returned by Tokens.dump includes Undumping.
module Undumping
# Calls Tokens.load with itself.
def undump
Tokens.load self
end
end
# Undump the object using Marshal.load, then
# unzip it using GZip.gunzip.
#
# The result is commonly a Tokens object, but
# this is not guaranteed.
def Tokens.load dump
dump = GZip.gunzip dump
@dump = Marshal.load dump
end
alias text_token push
def begin_group kind; push :begin_group, kind end
def end_group kind; push :end_group, kind end
def begin_line kind; push :begin_line, kind end
def end_line kind; push :end_line, kind end
alias tokens concat
end
end
|