summaryrefslogtreecommitdiffstats
path: root/vendor/plugins/coderay-0.9.0/lib/coderay/scanners/cpp.rb
blob: 0b92ef9e1401291a8021b72da5119fbb2b1c53df (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
module CodeRay
module Scanners

  class CPlusPlus < Scanner

    include Streamable
    
    register_for :cpp
    file_extension 'cpp'
    title 'C++'
    
    # http://www.cppreference.com/wiki/keywords/start
    RESERVED_WORDS = [
      'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
      'case', 'catch', 'class', 'compl', 'const_cast',
      'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
      'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
      'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
      'sizeof', 'static_cast', 'struct', 'switch', 'template',
      'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
      'while', 'xor', 'xor_eq'
    ]

    PREDEFINED_TYPES = [
      'bool', 'char', 'double', 'float', 'int', 'long',
      'short', 'signed', 'unsigned', 'wchar_t', 'string'
    ]
    PREDEFINED_CONSTANTS = [
      'false', 'true',
      'EOF', 'NULL',
    ]
    PREDEFINED_VARIABLES = [
      'this'
    ]
    DIRECTIVES = [
      'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
      'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
      'volatile'
    ]

    IDENT_KIND = WordList.new(:ident).
      add(RESERVED_WORDS, :reserved).
      add(PREDEFINED_TYPES, :pre_type).
      add(PREDEFINED_VARIABLES, :local_variable).
      add(DIRECTIVES, :directive).
      add(PREDEFINED_CONSTANTS, :pre_constant)

    ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x

    def scan_tokens tokens, options

      state = :initial

      until eos?

        kind = nil
        match = nil
        
        case state

        when :initial

          if scan(/ \s+ | \\\n /x)
            kind = :space

          elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
            kind = :comment

          elsif match = scan(/ \# \s* if \s* 0 /x)
            match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
            kind = :comment

          elsif scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
            kind = :operator

          elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
            kind = IDENT_KIND[match]
            if kind == :ident and check(/:(?!:)/)
              # FIXME: don't match a?b:c
              kind = :label
            elsif match == 'class'
              state = :class_name_expected
            end

          elsif scan(/\$/)
            kind = :ident
          
          elsif match = scan(/L?"/)
            tokens << [:open, :string]
            if match[0] == ?L
              tokens << ['L', :modifier]
              match = '"'
            end
            state = :string
            kind = :delimiter

          elsif scan(/#\s*(\w*)/)
            kind = :preprocessor
            state = :include_expected if self[1] == 'include'

          elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
            kind = :char

          elsif scan(/0[xX][0-9A-Fa-f]+/)
            kind = :hex

          elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
            kind = :oct

          elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
            kind = :integer

          elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
            kind = :float

          else
            getch
            kind = :error

          end

        when :string
          if scan(/[^\\"]+/)
            kind = :content
          elsif scan(/"/)
            tokens << ['"', :delimiter]
            tokens << [:close, :string]
            state = :initial
            next
          elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
            kind = :char
          elsif scan(/ \\ | $ /x)
            tokens << [:close, :string]
            kind = :error
            state = :initial
          else
            raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
          end

        when :include_expected
          if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
            kind = :include
            state = :initial

          elsif match = scan(/\s+/)
            kind = :space
            state = :initial if match.index ?\n

          else
            state = :initial
            next

          end
        
        when :class_name_expected
          if scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
            kind = :class
            state = :initial

          elsif match = scan(/\s+/)
            kind = :space

          else
            getch
            kind = :error
            state = :initial

          end
          
        else
          raise_inspect 'Unknown state', tokens

        end

        match ||= matched
        if $DEBUG and not kind
          raise_inspect 'Error token %p in line %d' %
            [[match, kind], line], tokens
        end
        raise_inspect 'Empty token', tokens unless match

        tokens << [match, kind]

      end

      if state == :string
        tokens << [:close, :string]
      end

      tokens
    end

  end

end
end