You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lpegre.lua 6.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. -- $Id: re.lua $
  2. -- imported functions and modules
  3. local tonumber, type, print, error = tonumber, type, print, error
  4. local setmetatable = setmetatable
  5. local m = require"lpeg"
  6. -- 'm' will be used to parse expressions, and 'mm' will be used to
  7. -- create expressions; that is, 're' runs on 'm', creating patterns
  8. -- on 'mm'
  9. local mm = m
  10. -- pattern's metatable
  11. local mt = getmetatable(mm.P(0))
  12. -- No more global accesses after this point
  13. local version = _VERSION
  14. if version == "Lua 5.2" then _ENV = nil end
  15. local any = m.P(1)
  16. -- Pre-defined names
  17. local Predef = { nl = m.P"\n" }
  18. local mem
  19. local fmem
  20. local gmem
  21. local function updatelocale ()
  22. mm.locale(Predef)
  23. Predef.a = Predef.alpha
  24. Predef.c = Predef.cntrl
  25. Predef.d = Predef.digit
  26. Predef.g = Predef.graph
  27. Predef.l = Predef.lower
  28. Predef.p = Predef.punct
  29. Predef.s = Predef.space
  30. Predef.u = Predef.upper
  31. Predef.w = Predef.alnum
  32. Predef.x = Predef.xdigit
  33. Predef.A = any - Predef.a
  34. Predef.C = any - Predef.c
  35. Predef.D = any - Predef.d
  36. Predef.G = any - Predef.g
  37. Predef.L = any - Predef.l
  38. Predef.P = any - Predef.p
  39. Predef.S = any - Predef.s
  40. Predef.U = any - Predef.u
  41. Predef.W = any - Predef.w
  42. Predef.X = any - Predef.x
  43. mem = {} -- restart memoization
  44. fmem = {}
  45. gmem = {}
  46. local mt = {__mode = "v"}
  47. setmetatable(mem, mt)
  48. setmetatable(fmem, mt)
  49. setmetatable(gmem, mt)
  50. end
  51. updatelocale()
  52. local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end)
  53. local function patt_error (s, i)
  54. local msg = (#s < i + 20) and s:sub(i)
  55. or s:sub(i,i+20) .. "..."
  56. msg = ("pattern error near '%s'"):format(msg)
  57. error(msg, 2)
  58. end
  59. local function mult (p, n)
  60. local np = mm.P(true)
  61. while n >= 1 do
  62. if n%2 >= 1 then np = np * p end
  63. p = p * p
  64. n = n/2
  65. end
  66. return np
  67. end
  68. local function equalcap (s, i, c)
  69. if type(c) ~= "string" then return nil end
  70. local e = #c + i
  71. if s:sub(i, e - 1) == c then return e else return nil end
  72. end
  73. local S = (Predef.space + "--" * (any - Predef.nl)^0)^0
  74. local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0
  75. local arrow = S * "<-"
  76. local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1
  77. name = m.C(name)
  78. -- a defined name only have meaning in a given environment
  79. local Def = name * m.Carg(1)
  80. local function getdef (id, defs)
  81. local c = defs and defs[id]
  82. if not c then error("undefined name: " .. id) end
  83. return c
  84. end
  85. -- match a name and return a group of its corresponding definition
  86. -- and 'f' (to be folded in 'Suffix')
  87. local function defwithfunc (f)
  88. return m.Cg(Def / getdef * m.Cc(f))
  89. end
  90. local num = m.C(m.R"09"^1) * S / tonumber
  91. local String = "'" * m.C((any - "'")^0) * "'" +
  92. '"' * m.C((any - '"')^0) * '"'
  93. local defined = "%" * Def / function (c,Defs)
  94. local cat = Defs and Defs[c] or Predef[c]
  95. if not cat then error ("name '" .. c .. "' undefined") end
  96. return cat
  97. end
  98. local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R
  99. local item = (defined + Range + m.C(any)) / m.P
  100. local Class =
  101. "["
  102. * (m.C(m.P"^"^-1)) -- optional complement symbol
  103. * m.Cf(item * (item - "]")^0, mt.__add) /
  104. function (c, p) return c == "^" and any - p or p end
  105. * "]"
  106. local function adddef (t, k, exp)
  107. if t[k] then
  108. error("'"..k.."' already defined as a rule")
  109. else
  110. t[k] = exp
  111. end
  112. return t
  113. end
  114. local function firstdef (n, r) return adddef({n}, n, r) end
  115. local function NT (n, b)
  116. if not b then
  117. error("rule '"..n.."' used outside a grammar")
  118. else return mm.V(n)
  119. end
  120. end
  121. local exp = m.P{ "Exp",
  122. Exp = S * ( m.V"Grammar"
  123. + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) );
  124. Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul)
  125. * (#seq_follow + patt_error);
  126. Prefix = "&" * S * m.V"Prefix" / mt.__len
  127. + "!" * S * m.V"Prefix" / mt.__unm
  128. + m.V"Suffix";
  129. Suffix = m.Cf(m.V"Primary" * S *
  130. ( ( m.P"+" * m.Cc(1, mt.__pow)
  131. + m.P"*" * m.Cc(0, mt.__pow)
  132. + m.P"?" * m.Cc(-1, mt.__pow)
  133. + "^" * ( m.Cg(num * m.Cc(mult))
  134. + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow))
  135. )
  136. + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div))
  137. + m.P"{}" * m.Cc(nil, m.Ct)
  138. + defwithfunc(mt.__div)
  139. )
  140. + "=>" * S * defwithfunc(m.Cmt)
  141. + "~>" * S * defwithfunc(m.Cf)
  142. ) * S
  143. )^0, function (a,b,f) return f(a,b) end );
  144. Primary = "(" * m.V"Exp" * ")"
  145. + String / mm.P
  146. + Class
  147. + defined
  148. + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" /
  149. function (n, p) return mm.Cg(p, n) end
  150. + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end
  151. + m.P"{}" / mm.Cp
  152. + "{~" * m.V"Exp" * "~}" / mm.Cs
  153. + "{|" * m.V"Exp" * "|}" / mm.Ct
  154. + "{" * m.V"Exp" * "}" / mm.C
  155. + m.P"." * m.Cc(any)
  156. + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT;
  157. Definition = name * arrow * m.V"Exp";
  158. Grammar = m.Cg(m.Cc(true), "G") *
  159. m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0,
  160. adddef) / mm.P
  161. }
  162. local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error)
  163. local function compile (p, defs)
  164. if mm.type(p) == "pattern" then return p end -- already compiled
  165. local cp = pattern:match(p, 1, defs)
  166. if not cp then error("incorrect pattern", 3) end
  167. return cp
  168. end
  169. local function match (s, p, i)
  170. local cp = mem[p]
  171. if not cp then
  172. cp = compile(p)
  173. mem[p] = cp
  174. end
  175. return cp:match(s, i or 1)
  176. end
  177. local function find (s, p, i)
  178. local cp = fmem[p]
  179. if not cp then
  180. cp = compile(p) / 0
  181. cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) }
  182. fmem[p] = cp
  183. end
  184. local i, e = cp:match(s, i or 1)
  185. if i then return i, e - 1
  186. else return i
  187. end
  188. end
  189. local function gsub (s, p, rep)
  190. local g = gmem[p] or {} -- ensure gmem[p] is not collected while here
  191. gmem[p] = g
  192. local cp = g[rep]
  193. if not cp then
  194. cp = compile(p)
  195. cp = mm.Cs((cp / rep + 1)^0)
  196. g[rep] = cp
  197. end
  198. return cp:match(s)
  199. end
  200. -- exported names
  201. local re = {
  202. compile = compile,
  203. match = match,
  204. find = find,
  205. gsub = gsub,
  206. updatelocale = updatelocale,
  207. }
  208. if version == "Lua 5.1" then _G.re = re end
  209. return re