You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parser.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. // TOML Parser.
  2. package toml
  3. import (
  4. "errors"
  5. "fmt"
  6. "math"
  7. "reflect"
  8. "strconv"
  9. "strings"
  10. "time"
  11. )
  12. type tomlParser struct {
  13. flowIdx int
  14. flow []token
  15. tree *Tree
  16. currentTable []string
  17. seenTableKeys []string
  18. }
  19. type tomlParserStateFn func() tomlParserStateFn
  20. // Formats and panics an error message based on a token
  21. func (p *tomlParser) raiseError(tok *token, msg string, args ...interface{}) {
  22. panic(tok.Position.String() + ": " + fmt.Sprintf(msg, args...))
  23. }
  24. func (p *tomlParser) run() {
  25. for state := p.parseStart; state != nil; {
  26. state = state()
  27. }
  28. }
  29. func (p *tomlParser) peek() *token {
  30. if p.flowIdx >= len(p.flow) {
  31. return nil
  32. }
  33. return &p.flow[p.flowIdx]
  34. }
  35. func (p *tomlParser) assume(typ tokenType) {
  36. tok := p.getToken()
  37. if tok == nil {
  38. p.raiseError(tok, "was expecting token %s, but token stream is empty", tok)
  39. }
  40. if tok.typ != typ {
  41. p.raiseError(tok, "was expecting token %s, but got %s instead", typ, tok)
  42. }
  43. }
  44. func (p *tomlParser) getToken() *token {
  45. tok := p.peek()
  46. if tok == nil {
  47. return nil
  48. }
  49. p.flowIdx++
  50. return tok
  51. }
  52. func (p *tomlParser) parseStart() tomlParserStateFn {
  53. tok := p.peek()
  54. // end of stream, parsing is finished
  55. if tok == nil {
  56. return nil
  57. }
  58. switch tok.typ {
  59. case tokenDoubleLeftBracket:
  60. return p.parseGroupArray
  61. case tokenLeftBracket:
  62. return p.parseGroup
  63. case tokenKey:
  64. return p.parseAssign
  65. case tokenEOF:
  66. return nil
  67. case tokenError:
  68. p.raiseError(tok, "parsing error: %s", tok.String())
  69. default:
  70. p.raiseError(tok, "unexpected token %s", tok.typ)
  71. }
  72. return nil
  73. }
  74. func (p *tomlParser) parseGroupArray() tomlParserStateFn {
  75. startToken := p.getToken() // discard the [[
  76. key := p.getToken()
  77. if key.typ != tokenKeyGroupArray {
  78. p.raiseError(key, "unexpected token %s, was expecting a table array key", key)
  79. }
  80. // get or create table array element at the indicated part in the path
  81. keys, err := parseKey(key.val)
  82. if err != nil {
  83. p.raiseError(key, "invalid table array key: %s", err)
  84. }
  85. p.tree.createSubTree(keys[:len(keys)-1], startToken.Position) // create parent entries
  86. destTree := p.tree.GetPath(keys)
  87. var array []*Tree
  88. if destTree == nil {
  89. array = make([]*Tree, 0)
  90. } else if target, ok := destTree.([]*Tree); ok && target != nil {
  91. array = destTree.([]*Tree)
  92. } else {
  93. p.raiseError(key, "key %s is already assigned and not of type table array", key)
  94. }
  95. p.currentTable = keys
  96. // add a new tree to the end of the table array
  97. newTree := newTree()
  98. newTree.position = startToken.Position
  99. array = append(array, newTree)
  100. p.tree.SetPath(p.currentTable, array)
  101. // remove all keys that were children of this table array
  102. prefix := key.val + "."
  103. found := false
  104. for ii := 0; ii < len(p.seenTableKeys); {
  105. tableKey := p.seenTableKeys[ii]
  106. if strings.HasPrefix(tableKey, prefix) {
  107. p.seenTableKeys = append(p.seenTableKeys[:ii], p.seenTableKeys[ii+1:]...)
  108. } else {
  109. found = (tableKey == key.val)
  110. ii++
  111. }
  112. }
  113. // keep this key name from use by other kinds of assignments
  114. if !found {
  115. p.seenTableKeys = append(p.seenTableKeys, key.val)
  116. }
  117. // move to next parser state
  118. p.assume(tokenDoubleRightBracket)
  119. return p.parseStart
  120. }
  121. func (p *tomlParser) parseGroup() tomlParserStateFn {
  122. startToken := p.getToken() // discard the [
  123. key := p.getToken()
  124. if key.typ != tokenKeyGroup {
  125. p.raiseError(key, "unexpected token %s, was expecting a table key", key)
  126. }
  127. for _, item := range p.seenTableKeys {
  128. if item == key.val {
  129. p.raiseError(key, "duplicated tables")
  130. }
  131. }
  132. p.seenTableKeys = append(p.seenTableKeys, key.val)
  133. keys, err := parseKey(key.val)
  134. if err != nil {
  135. p.raiseError(key, "invalid table array key: %s", err)
  136. }
  137. if err := p.tree.createSubTree(keys, startToken.Position); err != nil {
  138. p.raiseError(key, "%s", err)
  139. }
  140. destTree := p.tree.GetPath(keys)
  141. if target, ok := destTree.(*Tree); ok && target != nil && target.inline {
  142. p.raiseError(key, "could not re-define exist inline table or its sub-table : %s",
  143. strings.Join(keys, "."))
  144. }
  145. p.assume(tokenRightBracket)
  146. p.currentTable = keys
  147. return p.parseStart
  148. }
  149. func (p *tomlParser) parseAssign() tomlParserStateFn {
  150. key := p.getToken()
  151. p.assume(tokenEqual)
  152. parsedKey, err := parseKey(key.val)
  153. if err != nil {
  154. p.raiseError(key, "invalid key: %s", err.Error())
  155. }
  156. value := p.parseRvalue()
  157. var tableKey []string
  158. if len(p.currentTable) > 0 {
  159. tableKey = p.currentTable
  160. } else {
  161. tableKey = []string{}
  162. }
  163. prefixKey := parsedKey[0 : len(parsedKey)-1]
  164. tableKey = append(tableKey, prefixKey...)
  165. // find the table to assign, looking out for arrays of tables
  166. var targetNode *Tree
  167. switch node := p.tree.GetPath(tableKey).(type) {
  168. case []*Tree:
  169. targetNode = node[len(node)-1]
  170. case *Tree:
  171. targetNode = node
  172. case nil:
  173. // create intermediate
  174. if err := p.tree.createSubTree(tableKey, key.Position); err != nil {
  175. p.raiseError(key, "could not create intermediate group: %s", err)
  176. }
  177. targetNode = p.tree.GetPath(tableKey).(*Tree)
  178. default:
  179. p.raiseError(key, "Unknown table type for path: %s",
  180. strings.Join(tableKey, "."))
  181. }
  182. if targetNode.inline {
  183. p.raiseError(key, "could not add key or sub-table to exist inline table or its sub-table : %s",
  184. strings.Join(tableKey, "."))
  185. }
  186. // assign value to the found table
  187. keyVal := parsedKey[len(parsedKey)-1]
  188. localKey := []string{keyVal}
  189. finalKey := append(tableKey, keyVal)
  190. if targetNode.GetPath(localKey) != nil {
  191. p.raiseError(key, "The following key was defined twice: %s",
  192. strings.Join(finalKey, "."))
  193. }
  194. var toInsert interface{}
  195. switch value.(type) {
  196. case *Tree, []*Tree:
  197. toInsert = value
  198. default:
  199. toInsert = &tomlValue{value: value, position: key.Position}
  200. }
  201. targetNode.values[keyVal] = toInsert
  202. return p.parseStart
  203. }
  204. var errInvalidUnderscore = errors.New("invalid use of _ in number")
  205. func numberContainsInvalidUnderscore(value string) error {
  206. // For large numbers, you may use underscores between digits to enhance
  207. // readability. Each underscore must be surrounded by at least one digit on
  208. // each side.
  209. hasBefore := false
  210. for idx, r := range value {
  211. if r == '_' {
  212. if !hasBefore || idx+1 >= len(value) {
  213. // can't end with an underscore
  214. return errInvalidUnderscore
  215. }
  216. }
  217. hasBefore = isDigit(r)
  218. }
  219. return nil
  220. }
  221. var errInvalidUnderscoreHex = errors.New("invalid use of _ in hex number")
  222. func hexNumberContainsInvalidUnderscore(value string) error {
  223. hasBefore := false
  224. for idx, r := range value {
  225. if r == '_' {
  226. if !hasBefore || idx+1 >= len(value) {
  227. // can't end with an underscore
  228. return errInvalidUnderscoreHex
  229. }
  230. }
  231. hasBefore = isHexDigit(r)
  232. }
  233. return nil
  234. }
  235. func cleanupNumberToken(value string) string {
  236. cleanedVal := strings.Replace(value, "_", "", -1)
  237. return cleanedVal
  238. }
  239. func (p *tomlParser) parseRvalue() interface{} {
  240. tok := p.getToken()
  241. if tok == nil || tok.typ == tokenEOF {
  242. p.raiseError(tok, "expecting a value")
  243. }
  244. switch tok.typ {
  245. case tokenString:
  246. return tok.val
  247. case tokenTrue:
  248. return true
  249. case tokenFalse:
  250. return false
  251. case tokenInf:
  252. if tok.val[0] == '-' {
  253. return math.Inf(-1)
  254. }
  255. return math.Inf(1)
  256. case tokenNan:
  257. return math.NaN()
  258. case tokenInteger:
  259. cleanedVal := cleanupNumberToken(tok.val)
  260. var err error
  261. var val int64
  262. if len(cleanedVal) >= 3 && cleanedVal[0] == '0' {
  263. switch cleanedVal[1] {
  264. case 'x':
  265. err = hexNumberContainsInvalidUnderscore(tok.val)
  266. if err != nil {
  267. p.raiseError(tok, "%s", err)
  268. }
  269. val, err = strconv.ParseInt(cleanedVal[2:], 16, 64)
  270. case 'o':
  271. err = numberContainsInvalidUnderscore(tok.val)
  272. if err != nil {
  273. p.raiseError(tok, "%s", err)
  274. }
  275. val, err = strconv.ParseInt(cleanedVal[2:], 8, 64)
  276. case 'b':
  277. err = numberContainsInvalidUnderscore(tok.val)
  278. if err != nil {
  279. p.raiseError(tok, "%s", err)
  280. }
  281. val, err = strconv.ParseInt(cleanedVal[2:], 2, 64)
  282. default:
  283. panic("invalid base") // the lexer should catch this first
  284. }
  285. } else {
  286. err = numberContainsInvalidUnderscore(tok.val)
  287. if err != nil {
  288. p.raiseError(tok, "%s", err)
  289. }
  290. val, err = strconv.ParseInt(cleanedVal, 10, 64)
  291. }
  292. if err != nil {
  293. p.raiseError(tok, "%s", err)
  294. }
  295. return val
  296. case tokenFloat:
  297. err := numberContainsInvalidUnderscore(tok.val)
  298. if err != nil {
  299. p.raiseError(tok, "%s", err)
  300. }
  301. cleanedVal := cleanupNumberToken(tok.val)
  302. val, err := strconv.ParseFloat(cleanedVal, 64)
  303. if err != nil {
  304. p.raiseError(tok, "%s", err)
  305. }
  306. return val
  307. case tokenLocalTime:
  308. val, err := ParseLocalTime(tok.val)
  309. if err != nil {
  310. p.raiseError(tok, "%s", err)
  311. }
  312. return val
  313. case tokenLocalDate:
  314. // a local date may be followed by:
  315. // * nothing: this is a local date
  316. // * a local time: this is a local date-time
  317. next := p.peek()
  318. if next == nil || next.typ != tokenLocalTime {
  319. val, err := ParseLocalDate(tok.val)
  320. if err != nil {
  321. p.raiseError(tok, "%s", err)
  322. }
  323. return val
  324. }
  325. localDate := tok
  326. localTime := p.getToken()
  327. next = p.peek()
  328. if next == nil || next.typ != tokenTimeOffset {
  329. v := localDate.val + "T" + localTime.val
  330. val, err := ParseLocalDateTime(v)
  331. if err != nil {
  332. p.raiseError(tok, "%s", err)
  333. }
  334. return val
  335. }
  336. offset := p.getToken()
  337. layout := time.RFC3339Nano
  338. v := localDate.val + "T" + localTime.val + offset.val
  339. val, err := time.ParseInLocation(layout, v, time.UTC)
  340. if err != nil {
  341. p.raiseError(tok, "%s", err)
  342. }
  343. return val
  344. case tokenLeftBracket:
  345. return p.parseArray()
  346. case tokenLeftCurlyBrace:
  347. return p.parseInlineTable()
  348. case tokenEqual:
  349. p.raiseError(tok, "cannot have multiple equals for the same key")
  350. case tokenError:
  351. p.raiseError(tok, "%s", tok)
  352. default:
  353. panic(fmt.Errorf("unhandled token: %v", tok))
  354. }
  355. return nil
  356. }
  357. func tokenIsComma(t *token) bool {
  358. return t != nil && t.typ == tokenComma
  359. }
  360. func (p *tomlParser) parseInlineTable() *Tree {
  361. tree := newTree()
  362. var previous *token
  363. Loop:
  364. for {
  365. follow := p.peek()
  366. if follow == nil || follow.typ == tokenEOF {
  367. p.raiseError(follow, "unterminated inline table")
  368. }
  369. switch follow.typ {
  370. case tokenRightCurlyBrace:
  371. p.getToken()
  372. break Loop
  373. case tokenKey, tokenInteger, tokenString:
  374. if !tokenIsComma(previous) && previous != nil {
  375. p.raiseError(follow, "comma expected between fields in inline table")
  376. }
  377. key := p.getToken()
  378. p.assume(tokenEqual)
  379. parsedKey, err := parseKey(key.val)
  380. if err != nil {
  381. p.raiseError(key, "invalid key: %s", err)
  382. }
  383. value := p.parseRvalue()
  384. tree.SetPath(parsedKey, value)
  385. case tokenComma:
  386. if tokenIsComma(previous) {
  387. p.raiseError(follow, "need field between two commas in inline table")
  388. }
  389. p.getToken()
  390. default:
  391. p.raiseError(follow, "unexpected token type in inline table: %s", follow.String())
  392. }
  393. previous = follow
  394. }
  395. if tokenIsComma(previous) {
  396. p.raiseError(previous, "trailing comma at the end of inline table")
  397. }
  398. tree.inline = true
  399. return tree
  400. }
  401. func (p *tomlParser) parseArray() interface{} {
  402. var array []interface{}
  403. arrayType := reflect.TypeOf(newTree())
  404. for {
  405. follow := p.peek()
  406. if follow == nil || follow.typ == tokenEOF {
  407. p.raiseError(follow, "unterminated array")
  408. }
  409. if follow.typ == tokenRightBracket {
  410. p.getToken()
  411. break
  412. }
  413. val := p.parseRvalue()
  414. if reflect.TypeOf(val) != arrayType {
  415. arrayType = nil
  416. }
  417. array = append(array, val)
  418. follow = p.peek()
  419. if follow == nil || follow.typ == tokenEOF {
  420. p.raiseError(follow, "unterminated array")
  421. }
  422. if follow.typ != tokenRightBracket && follow.typ != tokenComma {
  423. p.raiseError(follow, "missing comma")
  424. }
  425. if follow.typ == tokenComma {
  426. p.getToken()
  427. }
  428. }
  429. // if the array is a mixed-type array or its length is 0,
  430. // don't convert it to a table array
  431. if len(array) <= 0 {
  432. arrayType = nil
  433. }
  434. // An array of Trees is actually an array of inline
  435. // tables, which is a shorthand for a table array. If the
  436. // array was not converted from []interface{} to []*Tree,
  437. // the two notations would not be equivalent.
  438. if arrayType == reflect.TypeOf(newTree()) {
  439. tomlArray := make([]*Tree, len(array))
  440. for i, v := range array {
  441. tomlArray[i] = v.(*Tree)
  442. }
  443. return tomlArray
  444. }
  445. return array
  446. }
  447. func parseToml(flow []token) *Tree {
  448. result := newTree()
  449. result.position = Position{1, 1}
  450. parser := &tomlParser{
  451. flowIdx: 0,
  452. flow: flow,
  453. tree: result,
  454. currentTable: make([]string, 0),
  455. seenTableKeys: make([]string, 0),
  456. }
  457. parser.run()
  458. return result
  459. }